forked from luck/tmp_suning_uos_patched
b7176c261c
Right now, drivers like ARM SMMU are using dma_alloc_coherent() to get coherent DMA buffers to save their command queues and page tables. As there is only one default CMA in the whole system, SMMUs on nodes other than node0 will get remote memory. This leads to significant latency. This patch provides per-numa CMA so that drivers like SMMU can get local memory. Tests show localizing CMA can decrease dma_unmap latency much. For instance, before this patch, SMMU on node2 has to wait for more than 560ns for the completion of CMD_SYNC in an empty command queue; with this patch, it needs 240ns only. A positive side effect of this patch would be improving performance even further for those users who are worried about performance more than DMA security and use iommu.passthrough=1 to skip IOMMU. With local CMA, all drivers can get local coherent DMA buffers. Also, this patch changes the default CONFIG_CMA_AREAS to 19 in NUMA. As 1+CONFIG_CMA_AREAS should be quite enough for most servers on the market even they enable both hugetlb_cma and pernuma_cma. 2 numa nodes: 2(hugetlb) + 2(pernuma) + 1(default global cma) = 5 4 numa nodes: 4(hugetlb) + 4(pernuma) + 1(default global cma) = 9 8 numa nodes: 8(hugetlb) + 8(pernuma) + 1(default global cma) = 17 Signed-off-by: Barry Song <song.bao.hua@hisilicon.com> Signed-off-by: Christoph Hellwig <hch@lst.de>
224 lines
5.5 KiB
Plaintext
224 lines
5.5 KiB
Plaintext
# SPDX-License-Identifier: GPL-2.0-only
|
|
|
|
config NO_DMA
|
|
bool
|
|
|
|
config HAS_DMA
|
|
bool
|
|
depends on !NO_DMA
|
|
default y
|
|
|
|
config DMA_OPS
|
|
bool
|
|
|
|
#
|
|
# IOMMU drivers that can bypass the IOMMU code and optionally use the direct
|
|
# mapping fast path should select this option and set the dma_ops_bypass
|
|
# flag in struct device where applicable
|
|
#
|
|
config DMA_OPS_BYPASS
|
|
bool
|
|
|
|
config NEED_SG_DMA_LENGTH
|
|
bool
|
|
|
|
config NEED_DMA_MAP_STATE
|
|
bool
|
|
|
|
config ARCH_DMA_ADDR_T_64BIT
|
|
def_bool 64BIT || PHYS_ADDR_T_64BIT
|
|
|
|
config ARCH_HAS_DMA_COHERENCE_H
|
|
bool
|
|
|
|
config ARCH_HAS_DMA_SET_MASK
|
|
bool
|
|
|
|
#
|
|
# Select this option if the architecture needs special handling for
|
|
# DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what
|
|
# people thing of when saying write combine, so very few platforms should
|
|
# need to enable this.
|
|
#
|
|
config ARCH_HAS_DMA_WRITE_COMBINE
|
|
bool
|
|
|
|
config DMA_DECLARE_COHERENT
|
|
bool
|
|
|
|
config ARCH_HAS_SETUP_DMA_OPS
|
|
bool
|
|
|
|
config ARCH_HAS_TEARDOWN_DMA_OPS
|
|
bool
|
|
|
|
config ARCH_HAS_SYNC_DMA_FOR_DEVICE
|
|
bool
|
|
|
|
config ARCH_HAS_SYNC_DMA_FOR_CPU
|
|
bool
|
|
select NEED_DMA_MAP_STATE
|
|
|
|
config ARCH_HAS_SYNC_DMA_FOR_CPU_ALL
|
|
bool
|
|
|
|
config ARCH_HAS_DMA_PREP_COHERENT
|
|
bool
|
|
|
|
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
|
|
bool
|
|
|
|
config DMA_NONCOHERENT_CACHE_SYNC
|
|
bool
|
|
|
|
config DMA_VIRT_OPS
|
|
bool
|
|
depends on HAS_DMA
|
|
select DMA_OPS
|
|
|
|
config SWIOTLB
|
|
bool
|
|
select NEED_DMA_MAP_STATE
|
|
|
|
#
|
|
# Should be selected if we can mmap non-coherent mappings to userspace.
|
|
# The only thing that is really required is a way to set an uncached bit
|
|
# in the pagetables
|
|
#
|
|
config DMA_NONCOHERENT_MMAP
|
|
default y if !MMU
|
|
bool
|
|
|
|
config DMA_COHERENT_POOL
|
|
select GENERIC_ALLOCATOR
|
|
bool
|
|
|
|
config DMA_REMAP
|
|
bool
|
|
depends on MMU
|
|
select DMA_NONCOHERENT_MMAP
|
|
|
|
config DMA_DIRECT_REMAP
|
|
bool
|
|
select DMA_REMAP
|
|
select DMA_COHERENT_POOL
|
|
|
|
config DMA_CMA
|
|
bool "DMA Contiguous Memory Allocator"
|
|
depends on HAVE_DMA_CONTIGUOUS && CMA
|
|
help
|
|
This enables the Contiguous Memory Allocator which allows drivers
|
|
to allocate big physically-contiguous blocks of memory for use with
|
|
hardware components that do not support I/O map nor scatter-gather.
|
|
|
|
You can disable CMA by specifying "cma=0" on the kernel's command
|
|
line.
|
|
|
|
For more information see <include/linux/dma-contiguous.h>.
|
|
If unsure, say "n".
|
|
|
|
if DMA_CMA
|
|
|
|
config DMA_PERNUMA_CMA
|
|
bool "Enable separate DMA Contiguous Memory Area for each NUMA Node"
|
|
default NUMA && ARM64
|
|
help
|
|
Enable this option to get pernuma CMA areas so that devices like
|
|
ARM64 SMMU can get local memory by DMA coherent APIs.
|
|
|
|
You can set the size of pernuma CMA by specifying "cma_pernuma=size"
|
|
on the kernel's command line.
|
|
|
|
comment "Default contiguous memory area size:"
|
|
|
|
config CMA_SIZE_MBYTES
|
|
int "Size in Mega Bytes"
|
|
depends on !CMA_SIZE_SEL_PERCENTAGE
|
|
default 0 if X86
|
|
default 16
|
|
help
|
|
Defines the size (in MiB) of the default memory area for Contiguous
|
|
Memory Allocator. If the size of 0 is selected, CMA is disabled by
|
|
default, but it can be enabled by passing cma=size[MG] to the kernel.
|
|
|
|
|
|
config CMA_SIZE_PERCENTAGE
|
|
int "Percentage of total memory"
|
|
depends on !CMA_SIZE_SEL_MBYTES
|
|
default 0 if X86
|
|
default 10
|
|
help
|
|
Defines the size of the default memory area for Contiguous Memory
|
|
Allocator as a percentage of the total memory in the system.
|
|
If 0 percent is selected, CMA is disabled by default, but it can be
|
|
enabled by passing cma=size[MG] to the kernel.
|
|
|
|
choice
|
|
prompt "Selected region size"
|
|
default CMA_SIZE_SEL_MBYTES
|
|
|
|
config CMA_SIZE_SEL_MBYTES
|
|
bool "Use mega bytes value only"
|
|
|
|
config CMA_SIZE_SEL_PERCENTAGE
|
|
bool "Use percentage value only"
|
|
|
|
config CMA_SIZE_SEL_MIN
|
|
bool "Use lower value (minimum)"
|
|
|
|
config CMA_SIZE_SEL_MAX
|
|
bool "Use higher value (maximum)"
|
|
|
|
endchoice
|
|
|
|
config CMA_ALIGNMENT
|
|
int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
|
|
range 4 12
|
|
default 8
|
|
help
|
|
DMA mapping framework by default aligns all buffers to the smallest
|
|
PAGE_SIZE order which is greater than or equal to the requested buffer
|
|
size. This works well for buffers up to a few hundreds kilobytes, but
|
|
for larger buffers it just a memory waste. With this parameter you can
|
|
specify the maximum PAGE_SIZE order for contiguous buffers. Larger
|
|
buffers will be aligned only to this specified order. The order is
|
|
expressed as a power of two multiplied by the PAGE_SIZE.
|
|
|
|
For example, if your system defaults to 4KiB pages, the order value
|
|
of 8 means that the buffers will be aligned up to 1MiB only.
|
|
|
|
If unsure, leave the default value "8".
|
|
|
|
endif
|
|
|
|
config DMA_API_DEBUG
|
|
bool "Enable debugging of DMA-API usage"
|
|
select NEED_DMA_MAP_STATE
|
|
help
|
|
Enable this option to debug the use of the DMA API by device drivers.
|
|
With this option you will be able to detect common bugs in device
|
|
drivers like double-freeing of DMA mappings or freeing mappings that
|
|
were never allocated.
|
|
|
|
This option causes a performance degradation. Use only if you want to
|
|
debug device drivers and dma interactions.
|
|
|
|
If unsure, say N.
|
|
|
|
config DMA_API_DEBUG_SG
|
|
bool "Debug DMA scatter-gather usage"
|
|
default y
|
|
depends on DMA_API_DEBUG
|
|
help
|
|
Perform extra checking that callers of dma_map_sg() have respected the
|
|
appropriate segment length/boundary limits for the given device when
|
|
preparing DMA scatterlists.
|
|
|
|
This is particularly likely to have been overlooked in cases where the
|
|
dma_map_sg() API is used for general bulk mapping of pages rather than
|
|
preparing literal scatter-gather descriptors, where there is a risk of
|
|
unexpected behaviour from DMA API implementations if the scatterlist
|
|
is technically out-of-spec.
|
|
|
|
If unsure, say N.
|