From b7176c261cdbced87bed9562577333150ed05b01 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 24 Aug 2020 11:03:07 +1200 Subject: [PATCH 01/63] dma-contiguous: provide the ability to reserve per-numa CMA Right now, drivers like ARM SMMU are using dma_alloc_coherent() to get coherent DMA buffers to save their command queues and page tables. As there is only one default CMA in the whole system, SMMUs on nodes other than node0 will get remote memory. This leads to significant latency. This patch provides per-numa CMA so that drivers like SMMU can get local memory. Tests show localizing CMA can decrease dma_unmap latency much. For instance, before this patch, SMMU on node2 has to wait for more than 560ns for the completion of CMD_SYNC in an empty command queue; with this patch, it needs 240ns only. A positive side effect of this patch would be improving performance even further for those users who are worried about performance more than DMA security and use iommu.passthrough=1 to skip IOMMU. With local CMA, all drivers can get local coherent DMA buffers. Also, this patch changes the default CONFIG_CMA_AREAS to 19 in NUMA. As 1+CONFIG_CMA_AREAS should be quite enough for most servers on the market even they enable both hugetlb_cma and pernuma_cma. 2 numa nodes: 2(hugetlb) + 2(pernuma) + 1(default global cma) = 5 4 numa nodes: 4(hugetlb) + 4(pernuma) + 1(default global cma) = 9 8 numa nodes: 8(hugetlb) + 8(pernuma) + 1(default global cma) = 17 Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- .../admin-guide/kernel-parameters.txt | 11 ++ include/linux/dma-contiguous.h | 6 ++ kernel/dma/Kconfig | 11 ++ kernel/dma/contiguous.c | 100 ++++++++++++++++-- mm/Kconfig | 3 +- 5 files changed, 120 insertions(+), 11 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1068742a6df..e464cf0b5025 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -599,6 +599,17 @@ altogether. For more information, see include/linux/dma-contiguous.h + cma_pernuma=nn[MG] + [ARM64,KNL] + Sets the size of kernel per-numa memory area for + contiguous memory allocations. A value of 0 disables + per-numa CMA altogether. And If this option is not + specificed, the default value is 0. + With per-numa CMA enabled, DMA users on node nid will + first try to allocate buffer from the pernuma area + which is located in node nid, if the allocation fails, + they will fallback to the global default memory area. + cmo_free_hint= [PPC] Format: { yes | no } Specify whether pages are marked as being inactive when they are freed. This is used in CMO environments diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 03f8e98e3bcc..fe55e004f1f4 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -171,6 +171,12 @@ static inline void dma_free_contiguous(struct device *dev, struct page *page, #endif +#ifdef CONFIG_DMA_PERNUMA_CMA +void dma_pernuma_cma_reserve(void); +#else +static inline void dma_pernuma_cma_reserve(void) { } +#endif + #endif #endif diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 847a9d1fa634..0ddfb5510fe4 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -118,6 +118,17 @@ config DMA_CMA If unsure, say "n". if DMA_CMA + +config DMA_PERNUMA_CMA + bool "Enable separate DMA Contiguous Memory Area for each NUMA Node" + default NUMA && ARM64 + help + Enable this option to get pernuma CMA areas so that devices like + ARM64 SMMU can get local memory by DMA coherent APIs. + + You can set the size of pernuma CMA by specifying "cma_pernuma=size" + on the kernel's command line. + comment "Default contiguous memory area size:" config CMA_SIZE_MBYTES diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index cff7e60968b9..aa53384fd7dc 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -69,6 +69,19 @@ static int __init early_cma(char *p) } early_param("cma", early_cma); +#ifdef CONFIG_DMA_PERNUMA_CMA + +static struct cma *dma_contiguous_pernuma_area[MAX_NUMNODES]; +static phys_addr_t pernuma_size_bytes __initdata; + +static int __init early_cma_pernuma(char *p) +{ + pernuma_size_bytes = memparse(p, &p); + return 0; +} +early_param("cma_pernuma", early_cma_pernuma); +#endif + #ifdef CONFIG_CMA_SIZE_PERCENTAGE static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) @@ -96,6 +109,34 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void) #endif +#ifdef CONFIG_DMA_PERNUMA_CMA +void __init dma_pernuma_cma_reserve(void) +{ + int nid; + + if (!pernuma_size_bytes) + return; + + for_each_online_node(nid) { + int ret; + char name[20]; + struct cma **cma = &dma_contiguous_pernuma_area[nid]; + + snprintf(name, sizeof(name), "pernuma%d", nid); + ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0, + 0, false, name, cma, nid); + if (ret) { + pr_warn("%s: reservation failed: err %d, node %d", __func__, + ret, nid); + continue; + } + + pr_debug("%s: reserved %llu MiB on node %d\n", __func__, + (unsigned long long)pernuma_size_bytes / SZ_1M, nid); + } +} +#endif + /** * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling * @limit: End address of the reserved memory (optional, 0 for any). @@ -228,23 +269,44 @@ static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp) * @size: Requested allocation size. * @gfp: Allocation flags. * - * This function allocates contiguous memory buffer for specified device. It - * tries to use device specific contiguous memory area if available, or the - * default global one. + * tries to use device specific contiguous memory area if available, or it + * tries to use per-numa cma, if the allocation fails, it will fallback to + * try default global one. * - * Note that it byapss one-page size of allocations from the global area as - * the addresses within one page are always contiguous, so there is no need - * to waste CMA pages for that kind; it also helps reduce fragmentations. + * Note that it bypass one-page size of allocations from the per-numa and + * global area as the addresses within one page are always contiguous, so + * there is no need to waste CMA pages for that kind; it also helps reduce + * fragmentations. */ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) { +#ifdef CONFIG_DMA_PERNUMA_CMA + int nid = dev_to_node(dev); +#endif + /* CMA can be used only in the context which permits sleeping */ if (!gfpflags_allow_blocking(gfp)) return NULL; if (dev->cma_area) return cma_alloc_aligned(dev->cma_area, size, gfp); - if (size <= PAGE_SIZE || !dma_contiguous_default_area) + if (size <= PAGE_SIZE) return NULL; + +#ifdef CONFIG_DMA_PERNUMA_CMA + if (nid != NUMA_NO_NODE && !(gfp & (GFP_DMA | GFP_DMA32))) { + struct cma *cma = dma_contiguous_pernuma_area[nid]; + struct page *page; + + if (cma) { + page = cma_alloc_aligned(cma, size, gfp); + if (page) + return page; + } + } +#endif + if (!dma_contiguous_default_area) + return NULL; + return cma_alloc_aligned(dma_contiguous_default_area, size, gfp); } @@ -261,9 +323,27 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp) */ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) { - if (!cma_release(dev_get_cma_area(dev), page, - PAGE_ALIGN(size) >> PAGE_SHIFT)) - __free_pages(page, get_order(size)); + unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; + + /* if dev has its own cma, free page from there */ + if (dev->cma_area) { + if (cma_release(dev->cma_area, page, count)) + return; + } else { + /* + * otherwise, page is from either per-numa cma or default cma + */ +#ifdef CONFIG_DMA_PERNUMA_CMA + if (cma_release(dma_contiguous_pernuma_area[page_to_nid(page)], + page, count)) + return; +#endif + if (cma_release(dma_contiguous_default_area, page, count)) + return; + } + + /* not in any cma, free from buddy */ + __free_pages(page, get_order(size)); } /* diff --git a/mm/Kconfig b/mm/Kconfig index 6c974888f86f..d75a0107f61f 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -516,13 +516,14 @@ config CMA_DEBUGFS config CMA_AREAS int "Maximum count of the CMA areas" depends on CMA + default 19 if NUMA default 7 help CMA allows to create CMA areas for particular purpose, mainly, used as device private area. This parameter sets the maximum number of CMA area in the system. - If unsure, leave the default value "7". + If unsure, leave the default value "7" in UMA and "19" in NUMA. config MEM_SOFT_DIRTY bool "Track memory changes" From c6303ab9b91e7ca20a49ff494338309259ed7c65 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 24 Aug 2020 11:03:08 +1200 Subject: [PATCH 02/63] arm64: mm: reserve per-numa CMA to localize coherent dma buffers Right now, smmu is using dma_alloc_coherent() to get memory to save queues and tables. Typically, on ARM64 server, there is a default CMA located at node0, which could be far away from node2, node3 etc. with this patch, smmu will get memory from local numa node to save command queues and page tables. that means dma_unmap latency will be shrunk much. Meanwhile, when iommu.passthrough is on, device drivers which call dma_ alloc_coherent() will also get local memory and avoid the travel between numa nodes. Acked-by: Will Deacon Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- arch/arm64/mm/init.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 481d22c32a2e..f1c75957ff3c 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -429,6 +429,8 @@ void __init bootmem_init(void) arm64_hugetlb_cma_reserve(); #endif + dma_pernuma_cma_reserve(); + /* * sparse_init() tries to allocate memory from memblock, so must be * done after the fixed reservations From 2281f797f5524abb8fff66bf8540b4f4687332a2 Mon Sep 17 00:00:00 2001 From: Barry Song Date: Mon, 24 Aug 2020 11:03:09 +1200 Subject: [PATCH 03/63] mm: cma: use CMA_MAX_NAME to define the length of cma name array CMA_MAX_NAME should be visible to CMA's users as they might need it to set the name of CMA areas and avoid hardcoding the size locally. So this patch moves CMA_MAX_NAME from local header file to include/linux header file and removes the hardcode in both hugetlb.c and contiguous.c. Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- include/linux/cma.h | 2 ++ kernel/dma/contiguous.c | 2 +- mm/cma.h | 2 -- mm/hugetlb.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index 6ff79fefd01f..217999c8a762 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -18,6 +18,8 @@ #endif +#define CMA_MAX_NAME 64 + struct cma; extern unsigned long totalcma_pages; diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index aa53384fd7dc..f4c150810fd2 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -119,7 +119,7 @@ void __init dma_pernuma_cma_reserve(void) for_each_online_node(nid) { int ret; - char name[20]; + char name[CMA_MAX_NAME]; struct cma **cma = &dma_contiguous_pernuma_area[nid]; snprintf(name, sizeof(name), "pernuma%d", nid); diff --git a/mm/cma.h b/mm/cma.h index 20f6e24bc477..42ae082cb067 100644 --- a/mm/cma.h +++ b/mm/cma.h @@ -4,8 +4,6 @@ #include -#define CMA_MAX_NAME 64 - struct cma { unsigned long base_pfn; unsigned long count; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a301c2d672bf..9eec0ea9ba68 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5683,12 +5683,12 @@ void __init hugetlb_cma_reserve(int order) reserved = 0; for_each_node_state(nid, N_ONLINE) { int res; - char name[20]; + char name[CMA_MAX_NAME]; size = min(per_node, hugetlb_cma_size - reserved); size = round_up(size, PAGE_SIZE << order); - snprintf(name, 20, "hugetlb%d", nid); + snprintf(name, sizeof(name), "hugetlb%d", nid); res = cma_declare_contiguous_nid(0, size, 0, PAGE_SIZE << order, 0, false, name, &hugetlb_cma[nid], nid); From 1e9d90dbed120ec98517428ffff4dacd9797e39d Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 1 Sep 2020 15:16:45 -0700 Subject: [PATCH 04/63] dma-mapping: introduce dma_get_seg_boundary_nr_pages() We found that callers of dma_get_seg_boundary mostly do an ALIGN with page mask and then do a page shift to get number of pages: ALIGN(boundary + 1, 1 << shift) >> shift However, the boundary might be as large as ULONG_MAX, which means that a device has no specific boundary limit. So either "+ 1" or passing it to ALIGN() would potentially overflow. According to kernel defines: #define ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) #define ALIGN(x, a) ALIGN_MASK(x, (typeof(x))(a) - 1) We can simplify the logic here into a helper function doing: ALIGN(boundary + 1, 1 << shift) >> shift = ALIGN_MASK(b + 1, (1 << s) - 1) >> s = {[b + 1 + (1 << s) - 1] & ~[(1 << s) - 1]} >> s = [b + 1 + (1 << s) - 1] >> s = [b + (1 << s)] >> s = (b >> s) + 1 This patch introduces and applies dma_get_seg_boundary_nr_pages() as an overflow-free helper for the dma_get_seg_boundary() callers to get numbers of pages. It also takes care of the NULL dev case for non-DMA API callers. Suggested-by: Christoph Hellwig Signed-off-by: Nicolin Chen Acked-by: Niklas Schnelle Acked-by: Michael Ellerman (powerpc) Signed-off-by: Christoph Hellwig --- arch/alpha/kernel/pci_iommu.c | 7 +------ arch/ia64/hp/common/sba_iommu.c | 3 +-- arch/powerpc/kernel/iommu.c | 11 ++--------- arch/s390/pci/pci_dma.c | 6 ++---- arch/sparc/kernel/iommu-common.c | 10 +++------- arch/sparc/kernel/iommu.c | 3 +-- arch/sparc/kernel/pci_sun4v.c | 3 +-- arch/x86/kernel/amd_gart_64.c | 3 +-- drivers/parisc/ccio-dma.c | 3 +-- drivers/parisc/sba_iommu.c | 3 +-- include/linux/dma-mapping.h | 19 +++++++++++++++++++ 11 files changed, 33 insertions(+), 38 deletions(-) diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 81037907268d..6f7de4f4e191 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -141,12 +141,7 @@ iommu_arena_find_pages(struct device *dev, struct pci_iommu_arena *arena, unsigned long boundary_size; base = arena->dma_base >> PAGE_SHIFT; - if (dev) { - boundary_size = dma_get_seg_boundary(dev) + 1; - boundary_size >>= PAGE_SHIFT; - } else { - boundary_size = 1UL << (32 - PAGE_SHIFT); - } + boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT); /* Search forward for the first mask-aligned sequence of N free ptes */ ptes = arena->ptes; diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 656a4888c300..b49b73a95067 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -485,8 +485,7 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev, ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); ASSERT(res_ptr < res_end); - boundary_size = (unsigned long long)dma_get_seg_boundary(dev) + 1; - boundary_size = ALIGN(boundary_size, 1ULL << iovp_shift) >> iovp_shift; + boundary_size = dma_get_seg_boundary_nr_pages(dev, iovp_shift); BUG_ON(ioc->ibase & ~iovp_mask); shift = ioc->ibase >> iovp_shift; diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 9704f3f76e63..5b69a6a72a0e 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -172,7 +172,6 @@ static unsigned long iommu_range_alloc(struct device *dev, int largealloc = npages > 15; int pass = 0; unsigned long align_mask; - unsigned long boundary_size; unsigned long flags; unsigned int pool_nr; struct iommu_pool *pool; @@ -236,15 +235,9 @@ static unsigned long iommu_range_alloc(struct device *dev, } } - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << tbl->it_page_shift); - else - boundary_size = ALIGN(1UL << 32, 1 << tbl->it_page_shift); - /* 4GB boundary for iseries_hv_alloc and iseries_hv_map */ - n = iommu_area_alloc(tbl->it_map, limit, start, npages, tbl->it_offset, - boundary_size >> tbl->it_page_shift, align_mask); + dma_get_seg_boundary_nr_pages(dev, tbl->it_page_shift), + align_mask); if (n == -1) { if (likely(pass == 0)) { /* First try the pool from the start */ diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 64b1399a73f0..4a37d8f4de9d 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -261,13 +261,11 @@ static unsigned long __dma_alloc_iommu(struct device *dev, unsigned long start, int size) { struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, zdev->start_dma >> PAGE_SHIFT, - boundary_size, 0); + dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), + 0); } static dma_addr_t dma_alloc_address(struct device *dev, int size) diff --git a/arch/sparc/kernel/iommu-common.c b/arch/sparc/kernel/iommu-common.c index 59cb16691322..23ca75f09277 100644 --- a/arch/sparc/kernel/iommu-common.c +++ b/arch/sparc/kernel/iommu-common.c @@ -166,13 +166,6 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, } } - if (dev) - boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - 1 << iommu->table_shift); - else - boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); - - boundary_size = boundary_size >> iommu->table_shift; /* * if the skip_span_boundary_check had been set during init, we set * things up so that iommu_is_span_boundary() merely checks if the @@ -181,6 +174,9 @@ unsigned long iommu_tbl_range_alloc(struct device *dev, if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { shift = 0; boundary_size = iommu->poolsize * iommu->nr_pools; + } else { + boundary_size = dma_get_seg_boundary_nr_pages(dev, + iommu->table_shift); } n = iommu_area_alloc(iommu->map, limit, start, npages, shift, boundary_size, align_mask); diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 4ae7388b1bff..c3e4e2df26a8 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -472,8 +472,7 @@ static int dma_4u_map_sg(struct device *dev, struct scatterlist *sglist, outs->dma_length = 0; max_seg_size = dma_get_max_seg_size(dev); - seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + seg_boundary_size = dma_get_seg_boundary_nr_pages(dev, IO_PAGE_SHIFT); base_shift = iommu->tbl.table_map_base >> IO_PAGE_SHIFT; for_each_sg(sglist, s, nelems, i) { unsigned long paddr, npages, entry, out_entry = 0, slen; diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 14b93c5564e3..6b92dd51c002 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -508,8 +508,7 @@ static int dma_4v_map_sg(struct device *dev, struct scatterlist *sglist, iommu_batch_start(dev, prot, ~0UL); max_seg_size = dma_get_max_seg_size(dev); - seg_boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, - IO_PAGE_SIZE) >> IO_PAGE_SHIFT; + seg_boundary_size = dma_get_seg_boundary_nr_pages(dev, IO_PAGE_SHIFT); mask = *dev->dma_mask; if (!iommu_use_atu(iommu, mask)) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index e89031e9c847..bccc5357bffd 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -96,8 +96,7 @@ static unsigned long alloc_iommu(struct device *dev, int size, base_index = ALIGN(iommu_bus_base & dma_get_seg_boundary(dev), PAGE_SIZE) >> PAGE_SHIFT; - boundary_size = ALIGN((u64)dma_get_seg_boundary(dev) + 1, - PAGE_SIZE) >> PAGE_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT); spin_lock_irqsave(&iommu_bitmap_lock, flags); offset = iommu_area_alloc(iommu_gart_bitmap, iommu_pages, next_bit, diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index a5507f75b524..ba16b7f8f806 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -356,8 +356,7 @@ ccio_alloc_range(struct ioc *ioc, struct device *dev, size_t size) ** ggg sacrifices another 710 to the computer gods. */ - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, - 1ULL << IOVP_SHIFT) >> IOVP_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, IOVP_SHIFT); if (pages_needed <= 8) { /* diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index d4314fba0269..959bda193b96 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -342,8 +342,7 @@ sba_search_bitmap(struct ioc *ioc, struct device *dev, unsigned long shift; int ret; - boundary_size = ALIGN((unsigned long long)dma_get_seg_boundary(dev) + 1, - 1ULL << IOVP_SHIFT) >> IOVP_SHIFT; + boundary_size = dma_get_seg_boundary_nr_pages(dev, IOVP_SHIFT); #if defined(ZX1_SUPPORT) BUG_ON(ioc->ibase & ~IOVP_MASK); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 52635e91143b..faab0a8210b9 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -632,6 +632,25 @@ static inline unsigned long dma_get_seg_boundary(struct device *dev) return DMA_BIT_MASK(32); } +/** + * dma_get_seg_boundary_nr_pages - return the segment boundary in "page" units + * @dev: device to guery the boundary for + * @page_shift: ilog() of the IOMMU page size + * + * Return the segment boundary in IOMMU page units (which may be different from + * the CPU page size) for the passed in device. + * + * If @dev is NULL a boundary of U32_MAX is assumed, this case is just for + * non-DMA API callers. + */ +static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, + unsigned int page_shift) +{ + if (!dev) + return (U32_MAX >> page_shift) + 1; + return (dma_get_seg_boundary(dev) >> page_shift) + 1; +} + static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) { if (dev->dma_parms) { From 135ba11a7a07b4ce9197d9fa4b196329a57f1e06 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 1 Sep 2020 15:16:46 -0700 Subject: [PATCH 05/63] dma-mapping: set default segment_boundary_mask to ULONG_MAX The default segment_boundary_mask was set to DMA_BIT_MAKS(32) a decade ago by referencing SCSI/block subsystem, as a 32-bit mask was good enough for most of the devices. Now more and more drivers set dma_masks above DMA_BIT_MAKS(32) while only a handful of them call dma_set_seg_boundary(). This means that most drivers have a 4GB segmention boundary because DMA API returns a 32-bit default value, though they might not really have such a limit. The default segment_boundary_mask should mean "no limit" since the device doesn't explicitly set the mask. But a 32-bit mask certainly limits those devices capable of 32+ bits addressing. So this patch sets default segment_boundary_mask to ULONG_MAX. Signed-off-by: Nicolin Chen Acked-by: Niklas Schnelle Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index faab0a8210b9..df0bff2ea750 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -629,7 +629,7 @@ static inline unsigned long dma_get_seg_boundary(struct device *dev) { if (dev->dma_parms && dev->dma_parms->segment_boundary_mask) return dev->dma_parms->segment_boundary_mask; - return DMA_BIT_MASK(32); + return ULONG_MAX; } /** From 3d842b51a04808fc1494006b97e0d33402d28507 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 18:34:27 +0200 Subject: [PATCH 06/63] dma-mapping: remove the dma_dummy_ops export dma_dummy_ops is only used by the ACPI code, which can't be modular. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/dummy.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index 05607642c888..6974b1bd7d0b 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -36,4 +36,3 @@ const struct dma_map_ops dma_dummy_ops = { .map_sg = dma_dummy_map_sg, .dma_supported = dma_dummy_supported, }; -EXPORT_SYMBOL(dma_dummy_ops); From ec91ccb27408abd842faf6137b3a0df8d6ebbdbb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 18:35:24 +0200 Subject: [PATCH 07/63] dma-debug: remove most exports Now that the main dma mapping entry points are out of line most of the symbols in dma-debug.c can only be called from built-in code. Remove the unused exports. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/debug.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 8e9f7b301c6d..6f53c2e03aa4 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1235,7 +1235,6 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, add_dma_entry(entry); } -EXPORT_SYMBOL(debug_dma_map_page); void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { @@ -1290,7 +1289,6 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, return; check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_page); void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int mapped_ents, int direction) @@ -1328,7 +1326,6 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, add_dma_entry(entry); } } -EXPORT_SYMBOL(debug_dma_map_sg); static int get_nr_mapped_entries(struct device *dev, struct dma_debug_entry *ref) @@ -1380,7 +1377,6 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, check_unmap(&ref); } } -EXPORT_SYMBOL(debug_dma_unmap_sg); void debug_dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t dma_addr, void *virt) @@ -1466,7 +1462,6 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, add_dma_entry(entry); } -EXPORT_SYMBOL(debug_dma_map_resource); void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, size_t size, int direction) @@ -1484,7 +1479,6 @@ void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, check_unmap(&ref); } -EXPORT_SYMBOL(debug_dma_unmap_resource); void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, int direction) @@ -1503,7 +1497,6 @@ void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, check_sync(dev, &ref, true); } -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); void debug_dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, @@ -1523,7 +1516,6 @@ void debug_dma_sync_single_for_device(struct device *dev, check_sync(dev, &ref, false); } -EXPORT_SYMBOL(debug_dma_sync_single_for_device); void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1556,7 +1548,6 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, true); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction) @@ -1588,7 +1579,6 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, check_sync(dev, &ref, false); } } -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); static int __init dma_debug_driver_setup(char *str) { From cbf1449ba5aec9cf4c68b69f899391a8d42e9b8f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 15:18:23 +0200 Subject: [PATCH 08/63] MIPS: make dma_sync_*_for_cpu a little less overzealous When transferring DMA ownership back to the CPU there should never be any writeback from the cache, as the buffer was owned by the device until now. Instead it should just be invalidated for the mapping directions where the device could have written data. Note that the changes rely on the fact that kmap_atomic is stubbed out for the !HIGHMEM case to simplify the code a bit. Signed-off-by: Christoph Hellwig Acked-by: Thomas Bogendoerfer --- arch/mips/mm/dma-noncoherent.c | 44 +++++++++++++++++++++------------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 563c2c0d0c81..97a14adbafc9 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -55,22 +55,34 @@ void *arch_dma_set_uncached(void *addr, size_t size) return (void *)(__pa(addr) + UNCAC_BASE); } -static inline void dma_sync_virt(void *addr, size_t size, +static inline void dma_sync_virt_for_device(void *addr, size_t size, enum dma_data_direction dir) { switch (dir) { case DMA_TO_DEVICE: dma_cache_wback((unsigned long)addr, size); break; - case DMA_FROM_DEVICE: dma_cache_inv((unsigned long)addr, size); break; - case DMA_BIDIRECTIONAL: dma_cache_wback_inv((unsigned long)addr, size); break; + default: + BUG(); + } +} +static inline void dma_sync_virt_for_cpu(void *addr, size_t size, + enum dma_data_direction dir) +{ + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + dma_cache_inv((unsigned long)addr, size); + break; default: BUG(); } @@ -82,7 +94,7 @@ static inline void dma_sync_virt(void *addr, size_t size, * configured then the bulk of this loop gets optimized out. */ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) + enum dma_data_direction dir, bool for_device) { struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); unsigned long offset = paddr & ~PAGE_MASK; @@ -90,18 +102,20 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, do { size_t len = left; + void *addr; if (PageHighMem(page)) { - void *addr; - if (offset + len > PAGE_SIZE) len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + if (for_device) + dma_sync_virt_for_device(addr + offset, len, dir); + else + dma_sync_virt_for_cpu(addr + offset, len, dir); + kunmap_atomic(addr); - addr = kmap_atomic(page); - dma_sync_virt(addr + offset, len, dir); - kunmap_atomic(addr); - } else - dma_sync_virt(page_address(page) + offset, size, dir); offset = 0; page++; left -= len; @@ -111,7 +125,7 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir, true); } #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU @@ -119,16 +133,14 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, enum dma_data_direction dir) { if (cpu_needs_post_dma_flush()) - dma_sync_phys(paddr, size, dir); + dma_sync_phys(paddr, size, dir, false); } #endif void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { - BUG_ON(direction == DMA_NONE); - - dma_sync_virt(vaddr, size, direction); + dma_sync_virt_for_device(vaddr, size, direction); } #ifdef CONFIG_DMA_PERDEV_COHERENT From a4877c44a48e25f98f767a0fd54acacac55d1aac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 19:31:25 +0200 Subject: [PATCH 09/63] MIPS/jazzdma: remove the unused vdma_remap function Signed-off-by: Christoph Hellwig Acked-by: Thomas Bogendoerfer --- arch/mips/include/asm/jazzdma.h | 2 - arch/mips/jazz/jazzdma.c | 70 --------------------------------- 2 files changed, 72 deletions(-) diff --git a/arch/mips/include/asm/jazzdma.h b/arch/mips/include/asm/jazzdma.h index d13f940022d5..c831da7fa898 100644 --- a/arch/mips/include/asm/jazzdma.h +++ b/arch/mips/include/asm/jazzdma.h @@ -10,8 +10,6 @@ */ extern unsigned long vdma_alloc(unsigned long paddr, unsigned long size); extern int vdma_free(unsigned long laddr); -extern int vdma_remap(unsigned long laddr, unsigned long paddr, - unsigned long size); extern unsigned long vdma_phys2log(unsigned long paddr); extern unsigned long vdma_log2phys(unsigned long laddr); extern void vdma_stats(void); /* for debugging only */ diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 014773f0bfcd..fe40dbed04c1 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -209,76 +209,6 @@ int vdma_free(unsigned long laddr) EXPORT_SYMBOL(vdma_free); -/* - * Map certain page(s) to another physical address. - * Caller must have allocated the page(s) before. - */ -int vdma_remap(unsigned long laddr, unsigned long paddr, unsigned long size) -{ - int first, pages; - - if (laddr > 0xffffff) { - if (vdma_debug) - printk - ("vdma_map: Invalid logical address: %08lx\n", - laddr); - return -EINVAL; /* invalid logical address */ - } - if (paddr > 0x1fffffff) { - if (vdma_debug) - printk - ("vdma_map: Invalid physical address: %08lx\n", - paddr); - return -EINVAL; /* invalid physical address */ - } - - pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; - first = laddr >> 12; - if (vdma_debug) - printk("vdma_remap: first=%x, pages=%x\n", first, pages); - if (first + pages > VDMA_PGTBL_ENTRIES) { - if (vdma_debug) - printk("vdma_alloc: Invalid size: %08lx\n", size); - return -EINVAL; - } - - paddr &= ~(VDMA_PAGESIZE - 1); - while (pages > 0 && first < VDMA_PGTBL_ENTRIES) { - if (pgtbl[first].owner != laddr) { - if (vdma_debug) - printk("Trying to remap other's pages.\n"); - return -EPERM; /* not owner */ - } - pgtbl[first].frame = paddr; - paddr += VDMA_PAGESIZE; - first++; - pages--; - } - - /* - * Update translation table - */ - r4030_write_reg32(JAZZ_R4030_TRSTBL_INV, 0); - - if (vdma_debug > 2) { - int i; - pages = (((paddr & (VDMA_PAGESIZE - 1)) + size) >> 12) + 1; - first = laddr >> 12; - printk("LADDR: "); - for (i = first; i < first + pages; i++) - printk("%08x ", i << 12); - printk("\nPADDR: "); - for (i = first; i < first + pages; i++) - printk("%08x ", pgtbl[i].frame); - printk("\nOWNER: "); - for (i = first; i < first + pages; i++) - printk("%08x ", pgtbl[i].owner); - printk("\n"); - } - - return 0; -} - /* * Translate a physical address to a logical address. * This will return the logical address of the first From 170780be324da2c32ec0f7e9de2ebd0c4370ade5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 16:51:47 +0200 Subject: [PATCH 10/63] MIPS/jazzdma: decouple from dma-direct The jazzdma ops implement support for a very basic IOMMU. Thus we really should not use the dma-direct code that takes physical address limits into account. This survived through the great MIPS DMA ops cleanup mostly because I was lazy, but now it is time to fully split the implementations. Signed-off-by: Christoph Hellwig Acked-by: Thomas Bogendoerfer --- arch/mips/jazz/jazzdma.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index fe40dbed04c1..dab4d058cea9 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -492,26 +491,39 @@ int vdma_get_enable(int channel) static void *jazz_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { + struct page *page; void *ret; - ret = dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); - if (!ret) - return NULL; + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; + size = PAGE_ALIGN(size); + page = alloc_pages(gfp, get_order(size)); + if (!page) + return NULL; + ret = page_address(page); + memset(ret, 0, size); *dma_handle = vdma_alloc(virt_to_phys(ret), size); - if (*dma_handle == DMA_MAPPING_ERROR) { - dma_direct_free_pages(dev, size, ret, *dma_handle, attrs); - return NULL; - } + if (*dma_handle == DMA_MAPPING_ERROR) + goto out_free_pages; - return ret; + if (attrs & DMA_ATTR_NON_CONSISTENT) + return ret; + arch_dma_prep_coherent(page, size); + return (void *)(UNCAC_BASE + __pa(ret)); + +out_free_pages: + __free_pages(page, get_order(size)); + return NULL; } static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { vdma_free(dma_handle); - dma_direct_free_pages(dev, size, vaddr, dma_handle, attrs); + if (!(attrs & DMA_ATTR_NON_CONSISTENT)) + vaddr = __va(vaddr - UNCAC_BASE); + __free_pages(virt_to_page(vaddr), get_order(size)); } static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page, @@ -608,7 +620,6 @@ const struct dma_map_ops jazz_dma_ops = { .sync_single_for_device = jazz_dma_sync_single_for_device, .sync_sg_for_cpu = jazz_dma_sync_sg_for_cpu, .sync_sg_for_device = jazz_dma_sync_sg_for_device, - .dma_supported = dma_direct_supported, .cache_sync = arch_dma_cache_sync, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, From ef1a85b6ca093abb00fbc38ea55fd9ae08ab05ef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 29 Aug 2020 10:40:28 +0200 Subject: [PATCH 11/63] dma-mapping: fix DMA_OPS dependencies Driver that select DMA_OPS need to depend on HAS_DMA support to work. The vop driver was missing that dependency, so add it, and also add a another depends in DMA_OPS itself. That won't fix the issue due to how the Kconfig dependencies work, but at least produce a warning about unmet dependencies. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- drivers/misc/mic/Kconfig | 1 + kernel/dma/Kconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/misc/mic/Kconfig b/drivers/misc/mic/Kconfig index b9bb086785db..8a7c2c5711d5 100644 --- a/drivers/misc/mic/Kconfig +++ b/drivers/misc/mic/Kconfig @@ -35,6 +35,7 @@ config SCIF_BUS config VOP_BUS tristate "VOP Bus Driver" + depends on HAS_DMA select DMA_OPS help This option is selected by any driver which registers a diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 0ddfb5510fe4..e7b801649f65 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -9,6 +9,7 @@ config HAS_DMA default y config DMA_OPS + depends on HAS_DMA bool # From abdaf11ac18925ce8cc229e62e35b342d548ece2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 16:41:50 +0200 Subject: [PATCH 12/63] dma-mapping: add (back) arch_dma_mark_clean for ia64 Add back a hook to optimize dcache flushing after reading executable code using DMA. This gets ia64 out of the business of pretending to be dma incoherent just for this optimization. Signed-off-by: Christoph Hellwig --- arch/ia64/Kconfig | 3 +-- arch/ia64/kernel/dma-mapping.c | 14 +------------- arch/ia64/mm/init.c | 3 +-- include/linux/dma-direct.h | 3 +++ include/linux/dma-noncoherent.h | 8 ++++++++ kernel/dma/Kconfig | 6 ++++++ kernel/dma/direct.c | 3 +++ 7 files changed, 23 insertions(+), 17 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 5b4ec80bf586..513ba0c5d336 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_HAS_DMA_MARK_CLEAN select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select ACPI @@ -32,8 +33,6 @@ config IA64 select TTY select HAVE_ARCH_TRACEHOOK select HAVE_VIRT_CPU_ACCOUNTING - select DMA_NONCOHERENT_MMAP - select ARCH_HAS_SYNC_DMA_FOR_CPU select VIRT_TO_BUS select GENERIC_IRQ_PROBE select GENERIC_PENDING_IRQ if SMP diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 09ef9ce9988d..f640ed6fe1d5 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include /* Set this to 1 if there is a HW IOMMU in the system */ @@ -7,15 +7,3 @@ int iommu_detected __read_mostly; const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); - -void *arch_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) -{ - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); -} - -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); -} diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 0b3fb4c7af29..02e5aa08294e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -73,8 +73,7 @@ __ia64_sync_icache_dcache (pte_t pte) * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to * flush them when they get mapped into an executable vm-area. */ -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void arch_dma_mark_clean(phys_addr_t paddr, size_t size) { unsigned long pfn = PHYS_PFN(paddr); diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 6e87225600ae..95e3e28bd93f 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -150,6 +150,9 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev, if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, size); } static inline dma_addr_t dma_direct_map_page(struct device *dev, diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index ca09a4e07d2d..b9bc6c557ea4 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -108,6 +108,14 @@ static inline void arch_dma_prep_coherent(struct page *page, size_t size) } #endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ +#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN +void arch_dma_mark_clean(phys_addr_t paddr, size_t size); +#else +static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) +{ +} +#endif /* ARCH_HAS_DMA_MARK_CLEAN */ + void *arch_dma_set_uncached(void *addr, size_t size); void arch_dma_clear_uncached(void *addr, size_t size); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index e7b801649f65..281785feb874 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -44,6 +44,12 @@ config ARCH_HAS_DMA_SET_MASK config ARCH_HAS_DMA_WRITE_COMBINE bool +# +# Select if the architectures provides the arch_dma_mark_clean hook +# +config ARCH_HAS_DMA_MARK_CLEAN + bool + config DMA_DECLARE_COHERENT bool diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index db6ef07aec3b..949c1cbf08b2 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -345,6 +345,9 @@ void dma_direct_sync_sg_for_cpu(struct device *dev, if (unlikely(is_swiotlb_buffer(paddr))) swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, sg->length); } if (!dev_is_dma_coherent(dev)) From 2f5388a29be82a62529d146499e70987e856f6f7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 17:06:40 +0200 Subject: [PATCH 13/63] dma-direct: remove dma_direct_{alloc,free}_pages Just merge these helpers into the main dma_direct_{alloc,free} routines, as the additional checks are always false for the two callers. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- arch/x86/kernel/amd_gart_64.c | 6 +++--- include/linux/dma-direct.h | 4 ---- kernel/dma/direct.c | 39 ++++++++++++++--------------------- kernel/dma/pool.c | 2 +- 4 files changed, 19 insertions(+), 32 deletions(-) diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index bccc5357bffd..153374b996a2 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -467,7 +467,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, { void *vaddr; - vaddr = dma_direct_alloc_pages(dev, size, dma_addr, flag, attrs); + vaddr = dma_direct_alloc(dev, size, dma_addr, flag, attrs); if (!vaddr || !force_iommu || dev->coherent_dma_mask <= DMA_BIT_MASK(24)) return vaddr; @@ -479,7 +479,7 @@ gart_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_addr, goto out_free; return vaddr; out_free: - dma_direct_free_pages(dev, size, vaddr, *dma_addr, attrs); + dma_direct_free(dev, size, vaddr, *dma_addr, attrs); return NULL; } @@ -489,7 +489,7 @@ gart_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_addr, unsigned long attrs) { gart_unmap_page(dev, dma_addr, size, DMA_BIDIRECTIONAL, 0); - dma_direct_free_pages(dev, size, vaddr, dma_addr, attrs); + dma_direct_free(dev, size, vaddr, dma_addr, attrs); } static int no_agp; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 95e3e28bd93f..20eceb2e4f91 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -77,10 +77,6 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); -void *dma_direct_alloc_pages(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 949c1cbf08b2..1d564bea5857 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -151,13 +151,18 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, return page; } -void *dma_direct_alloc_pages(struct device *dev, size_t size, +void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { struct page *page; void *ret; int err; + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + dma_alloc_need_uncached(dev, attrs)) + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); + size = PAGE_ALIGN(size); if (dma_should_alloc_from_pool(dev, gfp, attrs)) { @@ -256,11 +261,18 @@ void *dma_direct_alloc_pages(struct device *dev, size_t size, return NULL; } -void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) +void dma_direct_free(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { unsigned int page_order = get_order(size); + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + dma_alloc_need_uncached(dev, attrs)) { + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); + return; + } + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ if (dma_should_free_from_pool(dev, attrs) && dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) @@ -284,27 +296,6 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); } -void *dma_direct_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); - return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); -} - -void dma_direct_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) -{ - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); - else - dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); -} - #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_device(struct device *dev, diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index 1281c0f0442b..fe11643ff9cc 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -115,7 +115,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size, #endif /* * Memory in the atomic DMA pools must be unencrypted, the pools do not - * shrink so no re-encryption occurs in dma_direct_free_pages(). + * shrink so no re-encryption occurs in dma_direct_free(). */ ret = set_memory_decrypted((unsigned long)page_to_virt(page), 1 << order); From 3773dfe6ea4d220dcccb03827fca94ec3d39bc17 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 17:14:28 +0200 Subject: [PATCH 14/63] dma-direct: lift gfp_t manipulation out of__dma_direct_alloc_pages Move the detailed gfp_t setup from __dma_direct_alloc_pages into the caller to clean things up a little. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/direct.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 1d564bea5857..12e9f5f75dfe 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -108,7 +108,7 @@ static inline bool dma_should_free_from_pool(struct device *dev, } static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, - gfp_t gfp, unsigned long attrs) + gfp_t gfp) { int node = dev_to_node(dev); struct page *page = NULL; @@ -116,11 +116,6 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, WARN_ON_ONCE(!PAGE_ALIGNED(size)); - if (attrs & DMA_ATTR_NO_WARN) - gfp |= __GFP_NOWARN; - - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, &phys_limit); page = dma_alloc_contiguous(dev, size, gfp); @@ -164,6 +159,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); size = PAGE_ALIGN(size); + if (attrs & DMA_ATTR_NO_WARN) + gfp |= __GFP_NOWARN; if (dma_should_alloc_from_pool(dev, gfp, attrs)) { u64 phys_mask; @@ -177,7 +174,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, goto done; } - page = __dma_direct_alloc_pages(dev, size, gfp, attrs); + /* we always manually zero the memory once we are done */ + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); if (!page) return NULL; From 96eb89caf753fd0c26d239d8483d92632fb5be15 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 17:20:52 +0200 Subject: [PATCH 15/63] dma-direct: use phys_to_dma_direct in dma_direct_alloc Replace the currently open code copy. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/direct.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 12e9f5f75dfe..57a6e7d7cf8f 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -240,10 +240,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, goto out_encrypt_pages; } done: - if (force_dma_unencrypted(dev)) - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - else - *dma_handle = phys_to_dma(dev, page_to_phys(page)); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return ret; out_encrypt_pages: From 7bc5c428a660d4d1bc95ba54bf4cb6bccf8c3029 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 17:56:22 +0200 Subject: [PATCH 16/63] dma-direct: remove __dma_to_phys There is no harm in just always clearing the SME encryption bit, while significantly simplifying the interface. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- arch/arm/include/asm/dma-direct.h | 2 +- arch/mips/bmips/dma.c | 2 +- arch/mips/cavium-octeon/dma-octeon.c | 2 +- arch/mips/include/asm/dma-direct.h | 2 +- arch/mips/loongson2ef/fuloong-2e/dma.c | 2 +- arch/mips/loongson2ef/lemote-2f/dma.c | 2 +- arch/mips/loongson64/dma.c | 2 +- arch/mips/pci/pci-ar2315.c | 2 +- arch/mips/pci/pci-xtalk-bridge.c | 2 +- arch/mips/sgi-ip32/ip32-dma.c | 2 +- arch/powerpc/include/asm/dma-direct.h | 2 +- include/linux/dma-direct.h | 14 +++++--------- kernel/dma/direct.c | 6 +----- 13 files changed, 17 insertions(+), 25 deletions(-) diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index 7c3001a6a775..a8cee87a93e8 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -8,7 +8,7 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) { unsigned int offset = dev_addr & ~PAGE_MASK; return __pfn_to_phys(dma_to_pfn(dev, dev_addr)) + offset; diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c index df56bf4179e3..ba2a5d33dfd3 100644 --- a/arch/mips/bmips/dma.c +++ b/arch/mips/bmips/dma.c @@ -52,7 +52,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa) return pa; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { struct bmips_dma_range *r; diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index 14ea680d180e..388b13ba2558 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -177,7 +177,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { #ifdef CONFIG_PCI if (dev && dev_is_pci(dev)) diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h index 14e352651ce9..8e178651c638 100644 --- a/arch/mips/include/asm/dma-direct.h +++ b/arch/mips/include/asm/dma-direct.h @@ -3,6 +3,6 @@ #define _MIPS_DMA_DIRECT_H 1 dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); #endif /* _MIPS_DMA_DIRECT_H */ diff --git a/arch/mips/loongson2ef/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c index e122292bf666..83fadeb3fd7d 100644 --- a/arch/mips/loongson2ef/fuloong-2e/dma.c +++ b/arch/mips/loongson2ef/fuloong-2e/dma.c @@ -6,7 +6,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr | 0x80000000; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr & 0x7fffffff; } diff --git a/arch/mips/loongson2ef/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c index abf0e39d7e46..302b43a14eee 100644 --- a/arch/mips/loongson2ef/lemote-2f/dma.c +++ b/arch/mips/loongson2ef/lemote-2f/dma.c @@ -6,7 +6,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr | 0x80000000; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { if (dma_addr > 0x8fffffff) return dma_addr; diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index dbfe6e82fddd..b3dc5d0bd2b1 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -13,7 +13,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return ((nid << 44) ^ paddr) | (nid << node_id_offset); } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index 490953f51528..d88395684f48 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -175,7 +175,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr + ar2315_dev_offset(dev); } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr - ar2315_dev_offset(dev); } diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index 9b3cc775c55e..f1b37f32b553 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -33,7 +33,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return bc->baddr + paddr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { return dma_addr & ~(0xffUL << 56); } diff --git a/arch/mips/sgi-ip32/ip32-dma.c b/arch/mips/sgi-ip32/ip32-dma.c index fa7b17cb5385..160317294d97 100644 --- a/arch/mips/sgi-ip32/ip32-dma.c +++ b/arch/mips/sgi-ip32/ip32-dma.c @@ -27,7 +27,7 @@ dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return dma_addr; } -phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) +phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { phys_addr_t paddr = dma_addr & RAM_OFFSET_MASK; diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index abc154d784b0..95b09313d2a4 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -7,7 +7,7 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return paddr + dev->archdata.dma_offset; } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { return daddr - dev->archdata.dma_offset; } diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 20eceb2e4f91..f00e262ab6b1 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -24,11 +24,12 @@ static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } -static inline phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) { - phys_addr_t paddr = (phys_addr_t)dev_addr; + phys_addr_t paddr = (phys_addr_t)dev_addr + + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); - return paddr + ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + return __sme_clr(paddr); } #endif /* !CONFIG_ARCH_HAS_PHYS_TO_DMA */ @@ -44,7 +45,7 @@ static inline bool force_dma_unencrypted(struct device *dev) /* * If memory encryption is supported, phys_to_dma will set the memory encryption * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma - * and __dma_to_phys versions should only be used on non-encrypted memory for + * version should only be used on non-encrypted memory for * special occasions like DMA coherent buffers. */ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) @@ -52,11 +53,6 @@ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return __sme_set(__phys_to_dma(dev, paddr)); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) -{ - return __sme_clr(__dma_to_phys(dev, daddr)); -} - static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, bool is_ram) { diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 57a6e7d7cf8f..bfb479c8a370 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -48,11 +48,6 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, { u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit); - if (force_dma_unencrypted(dev)) - *phys_limit = __dma_to_phys(dev, dma_limit); - else - *phys_limit = dma_to_phys(dev, dma_limit); - /* * Optimistically try the zone that the physical address mask falls * into first. If that returns memory that isn't actually addressable @@ -61,6 +56,7 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding * zones. */ + *phys_limit = dma_to_phys(dev, dma_limit); if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits)) return GFP_DMA; if (*phys_limit <= DMA_BIT_MASK(32)) From 5ceda74093a5c1c3f42a02b894df031f3bbc9af1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 17:34:03 +0200 Subject: [PATCH 17/63] dma-direct: rename and cleanup __phys_to_dma The __phys_to_dma vs phys_to_dma distinction isn't exactly obvious. Try to improve the situation by renaming __phys_to_dma to phys_to_dma_unencryped, and not forcing architectures that want to override phys_to_dma to actually provide __phys_to_dma. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- arch/arm/include/asm/dma-direct.h | 2 +- arch/mips/bmips/dma.c | 2 +- arch/mips/cavium-octeon/dma-octeon.c | 2 +- arch/mips/include/asm/dma-direct.h | 2 +- arch/mips/loongson2ef/fuloong-2e/dma.c | 2 +- arch/mips/loongson2ef/lemote-2f/dma.c | 2 +- arch/mips/loongson64/dma.c | 2 +- arch/mips/pci/pci-ar2315.c | 2 +- arch/mips/pci/pci-xtalk-bridge.c | 2 +- arch/mips/sgi-ip32/ip32-dma.c | 2 +- arch/powerpc/include/asm/dma-direct.h | 2 +- drivers/iommu/intel/iommu.c | 2 +- include/linux/dma-direct.h | 28 +++++++++++++++----------- kernel/dma/direct.c | 8 ++++---- kernel/dma/swiotlb.c | 4 ++-- 15 files changed, 34 insertions(+), 30 deletions(-) diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index a8cee87a93e8..bca0de567534 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -2,7 +2,7 @@ #ifndef ASM_ARM_DMA_DIRECT_H #define ASM_ARM_DMA_DIRECT_H 1 -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { unsigned int offset = paddr & ~PAGE_MASK; return pfn_to_dma(dev, __phys_to_pfn(paddr)) + offset; diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c index ba2a5d33dfd3..49061b870680 100644 --- a/arch/mips/bmips/dma.c +++ b/arch/mips/bmips/dma.c @@ -40,7 +40,7 @@ static struct bmips_dma_range *bmips_dma_ranges; #define FLUSH_RAC 0x100 -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t pa) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t pa) { struct bmips_dma_range *r; diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index 388b13ba2558..232fa1017b1e 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -168,7 +168,7 @@ void __init octeon_pci_dma_init(void) } #endif /* CONFIG_PCI */ -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { #ifdef CONFIG_PCI if (dev && dev_is_pci(dev)) diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h index 8e178651c638..9a640118316c 100644 --- a/arch/mips/include/asm/dma-direct.h +++ b/arch/mips/include/asm/dma-direct.h @@ -2,7 +2,7 @@ #ifndef _MIPS_DMA_DIRECT_H #define _MIPS_DMA_DIRECT_H 1 -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); #endif /* _MIPS_DMA_DIRECT_H */ diff --git a/arch/mips/loongson2ef/fuloong-2e/dma.c b/arch/mips/loongson2ef/fuloong-2e/dma.c index 83fadeb3fd7d..cea167d8aba8 100644 --- a/arch/mips/loongson2ef/fuloong-2e/dma.c +++ b/arch/mips/loongson2ef/fuloong-2e/dma.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr | 0x80000000; } diff --git a/arch/mips/loongson2ef/lemote-2f/dma.c b/arch/mips/loongson2ef/lemote-2f/dma.c index 302b43a14eee..3c9e99456357 100644 --- a/arch/mips/loongson2ef/lemote-2f/dma.c +++ b/arch/mips/loongson2ef/lemote-2f/dma.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr | 0x80000000; } diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index b3dc5d0bd2b1..364f2f27c872 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -4,7 +4,7 @@ #include #include -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ diff --git a/arch/mips/pci/pci-ar2315.c b/arch/mips/pci/pci-ar2315.c index d88395684f48..cef4a47ab063 100644 --- a/arch/mips/pci/pci-ar2315.c +++ b/arch/mips/pci/pci-ar2315.c @@ -170,7 +170,7 @@ static inline dma_addr_t ar2315_dev_offset(struct device *dev) return 0; } -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + ar2315_dev_offset(dev); } diff --git a/arch/mips/pci/pci-xtalk-bridge.c b/arch/mips/pci/pci-xtalk-bridge.c index f1b37f32b553..50f7d42cca5a 100644 --- a/arch/mips/pci/pci-xtalk-bridge.c +++ b/arch/mips/pci/pci-xtalk-bridge.c @@ -25,7 +25,7 @@ /* * Common phys<->dma mapping for platforms using pci xtalk bridge */ -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { struct pci_dev *pdev = to_pci_dev(dev); struct bridge_controller *bc = BRIDGE_CONTROLLER(pdev->bus); diff --git a/arch/mips/sgi-ip32/ip32-dma.c b/arch/mips/sgi-ip32/ip32-dma.c index 160317294d97..20c6da9d76bc 100644 --- a/arch/mips/sgi-ip32/ip32-dma.c +++ b/arch/mips/sgi-ip32/ip32-dma.c @@ -18,7 +18,7 @@ #define RAM_OFFSET_MASK 0x3fffffffUL -dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { dma_addr_t dma_addr = paddr & RAM_OFFSET_MASK; diff --git a/arch/powerpc/include/asm/dma-direct.h b/arch/powerpc/include/asm/dma-direct.h index 95b09313d2a4..128304cbee1d 100644 --- a/arch/powerpc/include/asm/dma-direct.h +++ b/arch/powerpc/include/asm/dma-direct.h @@ -2,7 +2,7 @@ #ifndef ASM_POWERPC_DMA_DIRECT_H #define ASM_POWERPC_DMA_DIRECT_H 1 -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { return paddr + dev->archdata.dma_offset; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index f8177c59d229..7983c13b9eef 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3736,7 +3736,7 @@ bounce_map_single(struct device *dev, phys_addr_t paddr, size_t size, */ if (!IS_ALIGNED(paddr | size, VTD_PAGE_SIZE)) { tlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), + phys_to_dma_unencrypted(dev, io_tlb_start), paddr, size, aligned_size, dir, attrs); if (tlb_addr == DMA_MAPPING_ERROR) { goto swiotlb_error; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index f00e262ab6b1..805010ea5346 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -16,14 +16,29 @@ extern unsigned int zone_dma_bits; #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include +#ifndef phys_to_dma_unencrypted +#define phys_to_dma_unencrypted phys_to_dma +#endif #else -static inline dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) +static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, + phys_addr_t paddr) { dma_addr_t dev_addr = (dma_addr_t)paddr; return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); } +/* + * If memory encryption is supported, phys_to_dma will set the memory encryption + * bit in the DMA address, and dma_to_phys will clear it. + * phys_to_dma_unencrypted is for use on special unencrypted memory like swiotlb + * buffers. + */ +static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) +{ + return __sme_set(phys_to_dma_unencrypted(dev, paddr)); +} + static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) { phys_addr_t paddr = (phys_addr_t)dev_addr + @@ -42,17 +57,6 @@ static inline bool force_dma_unencrypted(struct device *dev) } #endif /* CONFIG_ARCH_HAS_FORCE_DMA_UNENCRYPTED */ -/* - * If memory encryption is supported, phys_to_dma will set the memory encryption - * bit in the DMA address, and dma_to_phys will clear it. The raw __phys_to_dma - * version should only be used on non-encrypted memory for - * special occasions like DMA coherent buffers. - */ -static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) -{ - return __sme_set(__phys_to_dma(dev, paddr)); -} - static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, bool is_ram) { diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index bfb479c8a370..54db9cfdaecc 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -25,7 +25,7 @@ static inline dma_addr_t phys_to_dma_direct(struct device *dev, phys_addr_t phys) { if (force_dma_unencrypted(dev)) - return __phys_to_dma(dev, phys); + return phys_to_dma_unencrypted(dev, phys); return phys_to_dma(dev, phys); } @@ -438,13 +438,13 @@ int dma_direct_supported(struct device *dev, u64 mask) return 1; /* - * This check needs to be against the actual bit mask value, so - * use __phys_to_dma() here so that the SME encryption mask isn't + * This check needs to be against the actual bit mask value, so use + * phys_to_dma_unencrypted() here so that the SME encryption mask isn't * part of the check. */ if (IS_ENABLED(CONFIG_ZONE_DMA)) min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits)); - return mask >= __phys_to_dma(dev, min_mask); + return mask >= phys_to_dma_unencrypted(dev, min_mask); } size_t dma_direct_max_mapping_size(struct device *dev) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index c19379fabd20..4ea72d145cd2 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -670,13 +670,13 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size, swiotlb_force); swiotlb_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), + phys_to_dma_unencrypted(dev, io_tlb_start), paddr, size, size, dir, attrs); if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR) return DMA_MAPPING_ERROR; /* Ensure that the address returned is DMA'ble */ - dma_addr = __phys_to_dma(dev, swiotlb_addr); + dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr); if (unlikely(!dma_capable(dev, dma_addr, size, true))) { swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); From 545d29272f38ba4791cca2a5a86fe6766f462f18 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 09:30:44 +0200 Subject: [PATCH 18/63] dma-mapping: move dma_common_{mmap,get_sgtable} out of mapping.c Add a new file that contains helpers for misc DMA ops, which is only built when CONFIG_DMA_OPS is set. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/Makefile | 1 + kernel/dma/mapping.c | 47 +----------------------------------- kernel/dma/ops_helpers.c | 51 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 46 deletions(-) create mode 100644 kernel/dma/ops_helpers.c diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile index 32c7c1942bbd..dc755ab68aab 100644 --- a/kernel/dma/Makefile +++ b/kernel/dma/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_HAS_DMA) += mapping.o direct.o +obj-$(CONFIG_DMA_OPS) += ops_helpers.o obj-$(CONFIG_DMA_OPS) += dummy.o obj-$(CONFIG_DMA_CMA) += contiguous.o obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 0d129421e75f..848c95c27d79 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -8,7 +8,7 @@ #include /* for max_pfn */ #include #include -#include +#include #include #include #include @@ -295,22 +295,6 @@ void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, } EXPORT_SYMBOL(dma_sync_sg_for_device); -/* - * Create scatter-list for the already allocated DMA buffer. - */ -int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - struct page *page = virt_to_page(cpu_addr); - int ret; - - ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - return ret; -} - /* * The whole dma_get_sgtable() idea is fundamentally unsafe - it seems * that the intention is to allow exporting memory allocated via the @@ -358,35 +342,6 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) } #endif /* CONFIG_MMU */ -/* - * Create userspace mapping for the DMA-coherent memory. - */ -int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ -#ifdef CONFIG_MMU - unsigned long user_count = vma_pages(vma); - unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned long off = vma->vm_pgoff; - int ret = -ENXIO; - - vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - if (off >= count || user_count > count - off) - return -ENXIO; - - return remap_pfn_range(vma, vma->vm_start, - page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, - user_count << PAGE_SHIFT, vma->vm_page_prot); -#else - return -ENXIO; -#endif /* CONFIG_MMU */ -} - /** * dma_can_mmap - check if a given device supports dma_mmap_* * @dev: device to check diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c new file mode 100644 index 000000000000..e443c69be429 --- /dev/null +++ b/kernel/dma/ops_helpers.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for DMA ops implementations. These generally rely on the fact that + * the allocated memory contains normal pages in the direct kernel mapping. + */ +#include + +/* + * Create scatter-list for the already allocated DMA buffer. + */ +int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ + struct page *page = virt_to_page(cpu_addr); + int ret; + + ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); + return ret; +} + +/* + * Create userspace mapping for the DMA-coherent memory. + */ +int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) +{ +#ifdef CONFIG_MMU + unsigned long user_count = vma_pages(vma); + unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; + unsigned long off = vma->vm_pgoff; + int ret = -ENXIO; + + vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs); + + if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) + return ret; + + if (off >= count || user_count > count - off) + return -ENXIO; + + return remap_pfn_range(vma, vma->vm_start, + page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff, + user_count << PAGE_SHIFT, vma->vm_page_prot); +#else + return -ENXIO; +#endif /* CONFIG_MMU */ +} From a92df4f62fda02e6b141e2b0bb52ccc486264b1c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 19:32:42 +0200 Subject: [PATCH 19/63] dma-mapping: move the dma_declare_coherent_memory documentation dma_declare_coherent_memory should not be in a DMA API guide aimed at driver writers (that is consumers of the API). Move it to a comment near the function instead. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- Documentation/core-api/dma-api.rst | 24 ------------------------ kernel/dma/coherent.c | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 3b3abbbb4b9a..90239348b30f 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -586,30 +586,6 @@ the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and continuing on for size. Again, you *must* observe the cache line boundaries when doing this. -:: - - int - dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size); - -Declare region of memory to be handed out by dma_alloc_coherent() when -it's asked for coherent memory for this device. - -phys_addr is the CPU physical address to which the memory is currently -assigned (this will be ioremapped so the CPU can access the region). - -device_addr is the DMA address the device needs to be programmed -with to actually address this memory (this will be handed out as the -dma_addr_t in dma_alloc_coherent()). - -size is the size of the area (must be multiples of PAGE_SIZE). - -As a simplification for the platforms, only *one* such region of -memory may be declared per device. - -For reasons of efficiency, most platforms choose to track the declared -region only at the granularity of a page. For smaller allocations, -you should use the dma_pool() API. Part III - Debug drivers use of the DMA-API ------------------------------------------- diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index 2a0c4985f38e..f85d14bbfcbe 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -107,6 +107,23 @@ static int dma_assign_coherent_memory(struct device *dev, return 0; } +/* + * Declare a region of memory to be handed out by dma_alloc_coherent() when it + * is asked for coherent memory for this device. This shall only be used + * from platform code, usually based on the device tree description. + * + * phys_addr is the CPU physical address to which the memory is currently + * assigned (this will be ioremapped so the CPU can access the region). + * + * device_addr is the DMA address the device needs to be programmed with to + * actually address this memory (this will be handed out as the dma_addr_t in + * dma_alloc_coherent()). + * + * size is the size of the area (must be a multiple of PAGE_SIZE). + * + * As a simplification for the platforms, only *one* such region of memory may + * be declared per device. + */ int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size) { From f959dcd6ddfd29235030e8026471ac1b022ad2b0 Mon Sep 17 00:00:00 2001 From: Thomas Tai Date: Thu, 17 Sep 2020 18:43:03 +0200 Subject: [PATCH 20/63] dma-direct: Fix potential NULL pointer dereference When booting the kernel v5.9-rc4 on a VM, the kernel would panic when printing a warning message in swiotlb_map(). The dev->dma_mask must not be a NULL pointer when calling the dma mapping layer. A NULL pointer check can potentially avoid the panic. Signed-off-by: Thomas Tai Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 3 --- kernel/dma/mapping.c | 11 +++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 805010ea5346..2929685e88aa 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -62,9 +62,6 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, { dma_addr_t end = addr + size - 1; - if (!dev->dma_mask) - return false; - if (is_ram && !IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 848c95c27d79..9a045e51df17 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -144,6 +144,10 @@ dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + if (dma_map_direct(dev, ops)) addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); else @@ -179,6 +183,10 @@ int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, int ents; BUG_ON(!valid_dma_direction(dir)); + + if (WARN_ON_ONCE(!dev->dma_mask)) + return 0; + if (dma_map_direct(dev, ops)) ents = dma_direct_map_sg(dev, sg, nents, dir, attrs); else @@ -213,6 +221,9 @@ dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, BUG_ON(!valid_dma_direction(dir)); + if (WARN_ON_ONCE(!dev->dma_mask)) + return DMA_MAPPING_ERROR; + /* Don't allow RAM to be mapped */ if (WARN_ON_ONCE(pfn_valid(PHYS_PFN(phys_addr)))) return DMA_MAPPING_ERROR; From f982438e82bbecb32200d5f22f405ec64f1fdd1b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 08:26:37 +0200 Subject: [PATCH 21/63] ARM/dma-mapping: remove a __arch_page_to_dma #error The __arch_page_to_dma hook is long gone. Signed-off-by: Christoph Hellwig --- arch/arm/include/asm/dma-mapping.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index bdd80ddbca34..70d956776560 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -23,10 +23,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -#ifdef __arch_page_to_dma -#error Please update to __arch_pfn_to_dma -#endif - /* * dma_to_pfn/pfn_to_dma/dma_to_virt/virt_to_dma are architecture private * functions used internally by the DMA-mapping API to provide DMA From 002a26fb55282f7832e132d046d22e2c55f47103 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 09:02:56 +0200 Subject: [PATCH 22/63] ARM/dma-mapping: remove dma_to_virt dma_to_virt is entirely unused, remove it. Signed-off-by: Christoph Hellwig --- arch/arm/include/asm/dma-mapping.h | 18 +----------------- arch/arm/mach-omap1/include/mach/memory.h | 4 ---- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 70d956776560..cf2535fb8891 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -24,7 +24,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) } /* - * dma_to_pfn/pfn_to_dma/dma_to_virt/virt_to_dma are architecture private + * dma_to_pfn/pfn_to_dma/virt_to_dma are architecture private * functions used internally by the DMA-mapping API to provide DMA * addresses. They must not be used by drivers. */ @@ -46,17 +46,6 @@ static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) return pfn; } -static inline void *dma_to_virt(struct device *dev, dma_addr_t addr) -{ - if (dev) { - unsigned long pfn = dma_to_pfn(dev, addr); - - return phys_to_virt(__pfn_to_phys(pfn)); - } - - return (void *)__bus_to_virt((unsigned long)addr); -} - static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) { if (dev) @@ -76,11 +65,6 @@ static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) return __arch_dma_to_pfn(dev, addr); } -static inline void *dma_to_virt(struct device *dev, dma_addr_t addr) -{ - return __arch_dma_to_virt(dev, addr); -} - static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) { return __arch_virt_to_dma(dev, addr); diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index 1142560e0078..e43697c3297b 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -41,10 +41,6 @@ __phys_to_pfn(__dma); \ }) -#define __arch_dma_to_virt(dev, addr) ({ (void *) (is_lbus_device(dev) ? \ - lbus_to_virt(addr) : \ - __phys_to_virt(addr)); }) - #define __arch_virt_to_dma(dev, addr) ({ unsigned long __addr = (unsigned long)(addr); \ (dma_addr_t) (is_lbus_device(dev) ? \ virt_to_lbus(__addr) : \ From 3799e402a4f0d69690e0838c4a68cab26d06283a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 14 Sep 2020 08:37:11 +0200 Subject: [PATCH 23/63] ARM/dma-mapping: move various helpers from dma-mapping.h to dma-direct.h Move the helpers to translate to and from direct mapping DMA addresses to dma-direct.h. This not only is the most logical place, but the new placement also avoids dependency loops with pending commits. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- arch/arm/common/dmabounce.c | 2 +- arch/arm/include/asm/dma-direct.h | 50 ++++++++++++++++++++++++++++++ arch/arm/include/asm/dma-mapping.h | 50 ------------------------------ 3 files changed, 51 insertions(+), 51 deletions(-) diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index f4b719bde763..d3e00ea92088 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index bca0de567534..fbcf4367b5cb 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -2,6 +2,56 @@ #ifndef ASM_ARM_DMA_DIRECT_H #define ASM_ARM_DMA_DIRECT_H 1 +#include + +/* + * dma_to_pfn/pfn_to_dma/virt_to_dma are architecture private + * functions used internally by the DMA-mapping API to provide DMA + * addresses. They must not be used by drivers. + */ +#ifndef __arch_pfn_to_dma +static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) +{ + if (dev) + pfn -= dev->dma_pfn_offset; + return (dma_addr_t)__pfn_to_bus(pfn); +} + +static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) +{ + unsigned long pfn = __bus_to_pfn(addr); + + if (dev) + pfn += dev->dma_pfn_offset; + + return pfn; +} + +static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) +{ + if (dev) + return pfn_to_dma(dev, virt_to_pfn(addr)); + + return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); +} + +#else +static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) +{ + return __arch_pfn_to_dma(dev, pfn); +} + +static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) +{ + return __arch_dma_to_pfn(dev, addr); +} + +static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) +{ + return __arch_virt_to_dma(dev, addr); +} +#endif + static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { unsigned int offset = paddr & ~PAGE_MASK; diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index cf2535fb8891..0a1a536368c3 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -8,8 +8,6 @@ #include #include -#include - #include #include @@ -23,54 +21,6 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return NULL; } -/* - * dma_to_pfn/pfn_to_dma/virt_to_dma are architecture private - * functions used internally by the DMA-mapping API to provide DMA - * addresses. They must not be used by drivers. - */ -#ifndef __arch_pfn_to_dma -static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) -{ - if (dev) - pfn -= dev->dma_pfn_offset; - return (dma_addr_t)__pfn_to_bus(pfn); -} - -static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) -{ - unsigned long pfn = __bus_to_pfn(addr); - - if (dev) - pfn += dev->dma_pfn_offset; - - return pfn; -} - -static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) -{ - if (dev) - return pfn_to_dma(dev, virt_to_pfn(addr)); - - return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); -} - -#else -static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) -{ - return __arch_pfn_to_dma(dev, pfn); -} - -static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) -{ - return __arch_dma_to_pfn(dev, addr); -} - -static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) -{ - return __arch_virt_to_dma(dev, addr); -} -#endif - /** * arm_dma_alloc - allocate consistent memory for DMA * @dev: valid struct device pointer, or NULL for ISA and EISA-like devices From cf141ae989e2ff119cd320326da5923b480d1641 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 8 Sep 2020 19:24:00 +0200 Subject: [PATCH 24/63] ARM/keystone: move the DMA offset handling under ifdef CONFIG_ARM_LPAE The DMA offset notifier can only be used if PHYS_OFFSET is at least KEYSTONE_HIGH_PHYS_START, which can't be represented by a 32-bit phys_addr_t. Currently the code compiles fine despite that, a pending change to the DMA offset handling would create a compiler warning for this case. Add an ifdef to not compile the code except for LPAE configs. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- arch/arm/mach-keystone/keystone.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index 638808c4e122..dcd031ba84c2 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -24,6 +24,7 @@ #include "keystone.h" +#ifdef CONFIG_ARM_LPAE static unsigned long keystone_dma_pfn_offset __read_mostly; static int keystone_platform_notifier(struct notifier_block *nb, @@ -48,14 +49,17 @@ static int keystone_platform_notifier(struct notifier_block *nb, static struct notifier_block platform_nb = { .notifier_call = keystone_platform_notifier, }; +#endif /* CONFIG_ARM_LPAE */ static void __init keystone_init(void) { +#ifdef CONFIG_ARM_LPAE if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) { keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START); bus_register_notifier(&platform_bus_type, &platform_nb); } +#endif keystone_pm_runtime_init(); } From 6eb0233ec2d0df288fe8515d5b0b2b15562e05bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 10 Sep 2020 18:03:49 +0200 Subject: [PATCH 25/63] usb: don't inherity DMA properties for USB devices As the comment in usb_alloc_dev correctly states, drivers can't use the DMA API on usb device, and at least calling dma_set_mask on them is highly dangerous. Unlike what the comment states upper level drivers also can't really use the presence of a dma mask to check for DMA support, as the dma_mask is set by default for most busses. Setting the dma_mask comes from "[PATCH] usbcore dma updates (and doc)" in BitKeeper times, as it seems like it was primarily for setting the NETIF_F_HIGHDMA flag in USB drivers, something that has long been fixed up since. Setting the dma_pfn_offset comes from commit b44bbc46a8bb ("usb: core: setup dma_pfn_offset for USB devices and, interfaces"), which worked around the fact that the scsi_calculate_bounce_limits functions wasn't going through the proper driver interface to query DMA information, but that function was removed in commit 21e07dba9fb1 ("scsi: reduce use of block bounce buffers") years ago. Signed-off-by: Christoph Hellwig Reviewed-by: Greg Kroah-Hartman --- drivers/usb/core/message.c | 6 ------ drivers/usb/core/usb.c | 12 ------------ 2 files changed, 18 deletions(-) diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c index 6197938dcc2d..9e45732dc1d1 100644 --- a/drivers/usb/core/message.c +++ b/drivers/usb/core/message.c @@ -1954,12 +1954,6 @@ int usb_set_configuration(struct usb_device *dev, int configuration) intf->dev.bus = &usb_bus_type; intf->dev.type = &usb_if_device_type; intf->dev.groups = usb_interface_groups; - /* - * Please refer to usb_alloc_dev() to see why we set - * dma_mask and dma_pfn_offset. - */ - intf->dev.dma_mask = dev->dev.dma_mask; - intf->dev.dma_pfn_offset = dev->dev.dma_pfn_offset; INIT_WORK(&intf->reset_ws, __usb_queue_reset_device); intf->minor = -1; device_initialize(&intf->dev); diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c index bafc113f2b3e..9b4ac4415f1a 100644 --- a/drivers/usb/core/usb.c +++ b/drivers/usb/core/usb.c @@ -599,18 +599,6 @@ struct usb_device *usb_alloc_dev(struct usb_device *parent, dev->dev.bus = &usb_bus_type; dev->dev.type = &usb_device_type; dev->dev.groups = usb_device_groups; - /* - * Fake a dma_mask/offset for the USB device: - * We cannot really use the dma-mapping API (dma_alloc_* and - * dma_map_*) for USB devices but instead need to use - * usb_alloc_coherent and pass data in 'urb's, but some subsystems - * manually look into the mask/offset pair to determine whether - * they need bounce buffers. - * Note: calling dma_set_mask() on a USB device would set the - * mask for the entire HCD, so don't do that. - */ - dev->dev.dma_mask = bus->sysdev->dma_mask; - dev->dev.dma_pfn_offset = bus->sysdev->dma_pfn_offset; set_dev_node(&dev->dev, dev_to_node(bus->sysdev)); dev->state = USB_STATE_ATTACHED; dev->lpm_disable_count = 1; From e0d072782c734d27f5af062c62266f2598f68542 Mon Sep 17 00:00:00 2001 From: Jim Quinlan Date: Thu, 17 Sep 2020 18:43:40 +0200 Subject: [PATCH 26/63] dma-mapping: introduce DMA range map, supplanting dma_pfn_offset The new field 'dma_range_map' in struct device is used to facilitate the use of single or multiple offsets between mapping regions of cpu addrs and dma addrs. It subsumes the role of "dev->dma_pfn_offset" which was only capable of holding a single uniform offset and had no region bounds checking. The function of_dma_get_range() has been modified so that it takes a single argument -- the device node -- and returns a map, NULL, or an error code. The map is an array that holds the information regarding the DMA regions. Each range entry contains the address offset, the cpu_start address, the dma_start address, and the size of the region. of_dma_configure() is the typical manner to set range offsets but there are a number of ad hoc assignments to "dev->dma_pfn_offset" in the kernel driver code. These cases now invoke the function dma_direct_set_offset(dev, cpu_addr, dma_addr, size). Signed-off-by: Jim Quinlan [hch: various interface cleanups] Signed-off-by: Christoph Hellwig Reviewed-by: Mathieu Poirier Tested-by: Mathieu Poirier Tested-by: Nathan Chancellor --- arch/arm/include/asm/dma-direct.h | 9 +-- arch/arm/mach-keystone/keystone.c | 17 ++--- arch/arm/mach-omap1/include/mach/memory.h | 4 + arch/sh/drivers/pci/pcie-sh7786.c | 9 ++- arch/x86/pci/sta2x11-fixup.c | 6 +- drivers/acpi/arm64/iort.c | 6 +- drivers/base/core.c | 2 + drivers/gpu/drm/sun4i/sun4i_backend.c | 8 +- drivers/iommu/io-pgtable-arm.c | 2 +- .../platform/sunxi/sun4i-csi/sun4i_csi.c | 9 ++- .../platform/sunxi/sun6i-csi/sun6i_csi.c | 11 ++- drivers/of/address.c | 73 ++++++++----------- drivers/of/device.c | 44 ++++++----- drivers/of/of_private.h | 11 +-- drivers/of/unittest.c | 34 ++++++--- drivers/remoteproc/remoteproc_core.c | 24 +++++- .../staging/media/sunxi/cedrus/cedrus_hw.c | 10 ++- include/linux/device.h | 4 +- include/linux/dma-direct.h | 53 ++++++++++++-- include/linux/dma-mapping.h | 9 ++- kernel/dma/coherent.c | 7 +- kernel/dma/direct.c | 51 ++++++++++++- 22 files changed, 285 insertions(+), 118 deletions(-) diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index fbcf4367b5cb..436544aeb834 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -12,8 +12,8 @@ #ifndef __arch_pfn_to_dma static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) { - if (dev) - pfn -= dev->dma_pfn_offset; + if (dev && dev->dma_range_map) + pfn = PFN_DOWN(translate_phys_to_dma(dev, PFN_PHYS(pfn))); return (dma_addr_t)__pfn_to_bus(pfn); } @@ -21,9 +21,8 @@ static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) { unsigned long pfn = __bus_to_pfn(addr); - if (dev) - pfn += dev->dma_pfn_offset; - + if (dev && dev->dma_range_map) + pfn = PFN_DOWN(translate_dma_to_phys(dev, PFN_PHYS(pfn))); return pfn; } diff --git a/arch/arm/mach-keystone/keystone.c b/arch/arm/mach-keystone/keystone.c index dcd031ba84c2..09a65c2dfd73 100644 --- a/arch/arm/mach-keystone/keystone.c +++ b/arch/arm/mach-keystone/keystone.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include @@ -25,8 +26,6 @@ #include "keystone.h" #ifdef CONFIG_ARM_LPAE -static unsigned long keystone_dma_pfn_offset __read_mostly; - static int keystone_platform_notifier(struct notifier_block *nb, unsigned long event, void *data) { @@ -39,9 +38,12 @@ static int keystone_platform_notifier(struct notifier_block *nb, return NOTIFY_BAD; if (!dev->of_node) { - dev->dma_pfn_offset = keystone_dma_pfn_offset; - dev_err(dev, "set dma_pfn_offset%08lx\n", - dev->dma_pfn_offset); + int ret = dma_direct_set_offset(dev, KEYSTONE_HIGH_PHYS_START, + KEYSTONE_LOW_PHYS_START, + KEYSTONE_HIGH_PHYS_SIZE); + dev_err(dev, "set dma_offset%08llx%s\n", + KEYSTONE_HIGH_PHYS_START - KEYSTONE_LOW_PHYS_START, + ret ? " failed" : ""); } return NOTIFY_OK; } @@ -54,11 +56,8 @@ static struct notifier_block platform_nb = { static void __init keystone_init(void) { #ifdef CONFIG_ARM_LPAE - if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) { - keystone_dma_pfn_offset = PFN_DOWN(KEYSTONE_HIGH_PHYS_START - - KEYSTONE_LOW_PHYS_START); + if (PHYS_OFFSET >= KEYSTONE_HIGH_PHYS_START) bus_register_notifier(&platform_bus_type, &platform_nb); - } #endif keystone_pm_runtime_init(); } diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index e43697c3297b..1142560e0078 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -41,6 +41,10 @@ __phys_to_pfn(__dma); \ }) +#define __arch_dma_to_virt(dev, addr) ({ (void *) (is_lbus_device(dev) ? \ + lbus_to_virt(addr) : \ + __phys_to_virt(addr)); }) + #define __arch_virt_to_dma(dev, addr) ({ unsigned long __addr = (unsigned long)(addr); \ (dma_addr_t) (is_lbus_device(dev) ? \ virt_to_lbus(__addr) : \ diff --git a/arch/sh/drivers/pci/pcie-sh7786.c b/arch/sh/drivers/pci/pcie-sh7786.c index e0b568aaa701..4468289ab2ca 100644 --- a/arch/sh/drivers/pci/pcie-sh7786.c +++ b/arch/sh/drivers/pci/pcie-sh7786.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -31,6 +32,8 @@ struct sh7786_pcie_port { static struct sh7786_pcie_port *sh7786_pcie_ports; static unsigned int nr_ports; static unsigned long dma_pfn_offset; +size_t memsize; +u64 memstart; static struct sh7786_pcie_hwops { int (*core_init)(void); @@ -301,7 +304,6 @@ static int __init pcie_init(struct sh7786_pcie_port *port) struct pci_channel *chan = port->hose; unsigned int data; phys_addr_t memstart, memend; - size_t memsize; int ret, i, win; /* Begin initialization */ @@ -368,8 +370,6 @@ static int __init pcie_init(struct sh7786_pcie_port *port) memstart = ALIGN_DOWN(memstart, memsize); memsize = roundup_pow_of_two(memend - memstart); - dma_pfn_offset = memstart >> PAGE_SHIFT; - /* * If there's more than 512MB of memory, we need to roll over to * LAR1/LAMR1. @@ -487,7 +487,8 @@ int pcibios_map_platform_irq(const struct pci_dev *pdev, u8 slot, u8 pin) void pcibios_bus_add_device(struct pci_dev *pdev) { - pdev->dev.dma_pfn_offset = dma_pfn_offset; + dma_direct_set_offset(&pdev->dev, __pa(memory_start), + __pa(memory_start) - memstart, memsize); } static int __init sh7786_pcie_core_init(void) diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index c313d784efab..324a207f9995 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -133,7 +133,7 @@ static void sta2x11_map_ep(struct pci_dev *pdev) struct sta2x11_instance *instance = sta2x11_pdev_to_instance(pdev); struct device *dev = &pdev->dev; u32 amba_base, max_amba_addr; - int i; + int i, ret; if (!instance) return; @@ -141,7 +141,9 @@ static void sta2x11_map_ep(struct pci_dev *pdev) pci_read_config_dword(pdev, AHB_BASE(0), &amba_base); max_amba_addr = amba_base + STA2X11_AMBA_SIZE - 1; - dev->dma_pfn_offset = PFN_DOWN(-amba_base); + ret = dma_direct_set_offset(dev, 0, amba_base, STA2X11_AMBA_SIZE); + if (ret) + dev_err(dev, "sta2x11: could not set DMA offset\n"); dev->bus_dma_limit = max_amba_addr; pci_set_consistent_dma_mask(pdev, max_amba_addr); diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index ec782e4a0fe4..de18c07ca02c 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -18,6 +18,7 @@ #include #include #include +#include #define IORT_TYPE_MASK(type) (1 << (type)) #define IORT_MSI_TYPE (1 << ACPI_IORT_NODE_ITS_GROUP) @@ -1184,8 +1185,9 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size) *dma_addr = dmaaddr; *dma_size = size; - dev->dma_pfn_offset = PFN_DOWN(offset); - dev_dbg(dev, "dma_pfn_offset(%#08llx)\n", offset); + ret = dma_direct_set_offset(dev, dmaaddr + offset, dmaaddr, size); + + dev_dbg(dev, "dma_offset(%#08llx)%s\n", offset, ret ? " failed!" : ""); } static void __init acpi_iort_register_irq(int hwirq, const char *name, diff --git a/drivers/base/core.c b/drivers/base/core.c index f6f620aa9408..b893056e3945 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1792,6 +1792,8 @@ static void device_release(struct kobject *kobj) */ devres_release_all(dev); + kfree(dev->dma_range_map); + if (dev->release) dev->release(dev); else if (dev->type && dev->type->release) diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c index 072ea113e6be..05e9f0d28196 100644 --- a/drivers/gpu/drm/sun4i/sun4i_backend.c +++ b/drivers/gpu/drm/sun4i/sun4i_backend.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -811,8 +812,13 @@ static int sun4i_backend_bind(struct device *dev, struct device *master, * because of an old DT, we need to set the DMA offset by hand * on our device since the RAM mapping is at 0 for the DMA bus, * unlike the CPU. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. */ - drm->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + ret = dma_direct_set_offset(drm->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; } backend->engine.node = dev->of_node; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index dc7bcf858b6d..d77e881516a4 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -751,7 +751,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) return NULL; - if (!selftest_running && cfg->iommu_dev->dma_pfn_offset) { + if (!selftest_running && cfg->iommu_dev->dma_range_map) { dev_err(cfg->iommu_dev, "Cannot accommodate DMA offset for IOMMU page tables\n"); return NULL; } diff --git a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c index 5319eb1ab309..307997ee7f96 100644 --- a/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c +++ b/drivers/media/platform/sunxi/sun4i-csi/sun4i_csi.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include @@ -182,8 +183,14 @@ static int sun4i_csi_probe(struct platform_device *pdev) if (ret) return ret; } else { + /* + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. + */ #ifdef PHYS_PFN_OFFSET - csi->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + ret = dma_direct_set_offset(csi->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; #endif } diff --git a/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c b/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c index 28e89340fed9..e69e14379fc6 100644 --- a/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c +++ b/drivers/media/platform/sunxi/sun6i-csi/sun6i_csi.c @@ -899,8 +899,15 @@ static int sun6i_csi_probe(struct platform_device *pdev) return -ENOMEM; sdev->dev = &pdev->dev; - /* The DMA bus has the memory mapped at 0 */ - sdev->dev->dma_pfn_offset = PHYS_OFFSET >> PAGE_SHIFT; + /* + * The DMA bus has the memory mapped at 0. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. + */ + ret = dma_direct_set_offset(sdev->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; ret = sun6i_csi_resource_request(sdev, pdev); if (ret) diff --git a/drivers/of/address.c b/drivers/of/address.c index da4f7341323f..eb9ab4f1e80b 100644 --- a/drivers/of/address.c +++ b/drivers/of/address.c @@ -13,6 +13,7 @@ #include #include #include +#include /* for bus_dma_region */ #include "of_private.h" @@ -937,33 +938,33 @@ void __iomem *of_io_request_and_map(struct device_node *np, int index, } EXPORT_SYMBOL(of_io_request_and_map); +#ifdef CONFIG_HAS_DMA /** - * of_dma_get_range - Get DMA range info + * of_dma_get_range - Get DMA range info and put it into a map array * @np: device node to get DMA range info - * @dma_addr: pointer to store initial DMA address of DMA range - * @paddr: pointer to store initial CPU address of DMA range - * @size: pointer to store size of DMA range + * @map: dma range structure to return * * Look in bottom up direction for the first "dma-ranges" property - * and parse it. - * dma-ranges format: + * and parse it. Put the information into a DMA offset map array. + * + * dma-ranges format: * DMA addr (dma_addr) : naddr cells * CPU addr (phys_addr_t) : pna cells * size : nsize cells * - * It returns -ENODEV if "dma-ranges" property was not found - * for this device in DT. + * It returns -ENODEV if "dma-ranges" property was not found for this + * device in the DT. */ -int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size) +int of_dma_get_range(struct device_node *np, const struct bus_dma_region **map) { struct device_node *node = of_node_get(np); const __be32 *ranges = NULL; - int len; - int ret = 0; bool found_dma_ranges = false; struct of_range_parser parser; struct of_range range; - u64 dma_start = U64_MAX, dma_end = 0, dma_offset = 0; + struct bus_dma_region *r; + int len, num_ranges = 0; + int ret = 0; while (node) { ranges = of_get_property(node, "dma-ranges", &len); @@ -989,49 +990,39 @@ int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *siz } of_dma_range_parser_init(&parser, node); + for_each_of_range(&parser, &range) + num_ranges++; + r = kcalloc(num_ranges + 1, sizeof(*r), GFP_KERNEL); + if (!r) { + ret = -ENOMEM; + goto out; + } + + /* + * Record all info in the generic DMA ranges array for struct device. + */ + *map = r; + of_dma_range_parser_init(&parser, node); for_each_of_range(&parser, &range) { pr_debug("dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", range.bus_addr, range.cpu_addr, range.size); - - if (dma_offset && range.cpu_addr - range.bus_addr != dma_offset) { - pr_warn("Can't handle multiple dma-ranges with different offsets on node(%pOF)\n", node); - /* Don't error out as we'd break some existing DTs */ - continue; - } if (range.cpu_addr == OF_BAD_ADDR) { pr_err("translation of DMA address(%llx) to CPU address failed node(%pOF)\n", range.bus_addr, node); continue; } - dma_offset = range.cpu_addr - range.bus_addr; - - /* Take lower and upper limits */ - if (range.bus_addr < dma_start) - dma_start = range.bus_addr; - if (range.bus_addr + range.size > dma_end) - dma_end = range.bus_addr + range.size; + r->cpu_start = range.cpu_addr; + r->dma_start = range.bus_addr; + r->size = range.size; + r->offset = range.cpu_addr - range.bus_addr; + r++; } - - if (dma_start >= dma_end) { - ret = -EINVAL; - pr_debug("Invalid DMA ranges configuration on node(%pOF)\n", - node); - goto out; - } - - *dma_addr = dma_start; - *size = dma_end - dma_start; - *paddr = dma_start + dma_offset; - - pr_debug("final: dma_addr(%llx) cpu_addr(%llx) size(%llx)\n", - *dma_addr, *paddr, *size); - out: of_node_put(node); - return ret; } +#endif /* CONFIG_HAS_DMA */ /** * of_dma_is_coherent - Check if device is coherent diff --git a/drivers/of/device.c b/drivers/of/device.c index b439c1e05434..6e3ae7ebc33e 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include /* for bus_dma_region */ #include #include #include @@ -90,14 +90,14 @@ int of_device_add(struct platform_device *ofdev) int of_dma_configure_id(struct device *dev, struct device_node *np, bool force_dma, const u32 *id) { - u64 dma_addr, paddr, size = 0; - int ret; - bool coherent; - unsigned long offset; const struct iommu_ops *iommu; - u64 mask, end; + const struct bus_dma_region *map = NULL; + dma_addr_t dma_start = 0; + u64 mask, end, size = 0; + bool coherent; + int ret; - ret = of_dma_get_range(np, &dma_addr, &paddr, &size); + ret = of_dma_get_range(np, &map); if (ret < 0) { /* * For legacy reasons, we have to assume some devices need @@ -106,26 +106,35 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, */ if (!force_dma) return ret == -ENODEV ? 0 : ret; - - dma_addr = offset = 0; } else { - offset = PFN_DOWN(paddr - dma_addr); + const struct bus_dma_region *r = map; + dma_addr_t dma_end = 0; + + /* Determine the overall bounds of all DMA regions */ + for (dma_start = ~(dma_addr_t)0; r->size; r++) { + /* Take lower and upper limits */ + if (r->dma_start < dma_start) + dma_start = r->dma_start; + if (r->dma_start + r->size > dma_end) + dma_end = r->dma_start + r->size; + } + size = dma_end - dma_start; /* * Add a work around to treat the size as mask + 1 in case * it is defined in DT as a mask. */ if (size & 1) { - dev_warn(dev, "Invalid size 0x%llx for dma-range\n", + dev_warn(dev, "Invalid size 0x%llx for dma-range(s)\n", size); size = size + 1; } if (!size) { dev_err(dev, "Adjusted size 0x%llx invalid\n", size); + kfree(map); return -EINVAL; } - dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset); } /* @@ -144,13 +153,11 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, else if (!size) size = 1ULL << 32; - dev->dma_pfn_offset = offset; - /* * Limit coherent and dma mask based on size and default mask * set by the driver. */ - end = dma_addr + size - 1; + end = dma_start + size - 1; mask = DMA_BIT_MASK(ilog2(end) + 1); dev->coherent_dma_mask &= mask; *dev->dma_mask &= mask; @@ -163,14 +170,17 @@ int of_dma_configure_id(struct device *dev, struct device_node *np, coherent ? " " : " not "); iommu = of_iommu_configure(dev, np, id); - if (PTR_ERR(iommu) == -EPROBE_DEFER) + if (PTR_ERR(iommu) == -EPROBE_DEFER) { + kfree(map); return -EPROBE_DEFER; + } dev_dbg(dev, "device is%sbehind an iommu\n", iommu ? " " : " not "); - arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent); + arch_setup_dma_ops(dev, dma_start, size, iommu, coherent); + dev->dma_range_map = map; return 0; } EXPORT_SYMBOL_GPL(of_dma_configure_id); diff --git a/drivers/of/of_private.h b/drivers/of/of_private.h index edc682249c00..d9e6a324de0a 100644 --- a/drivers/of/of_private.h +++ b/drivers/of/of_private.h @@ -157,12 +157,13 @@ extern void __of_sysfs_remove_bin_file(struct device_node *np, extern int of_bus_n_addr_cells(struct device_node *np); extern int of_bus_n_size_cells(struct device_node *np); -#ifdef CONFIG_OF_ADDRESS -extern int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size); +struct bus_dma_region; +#if defined(CONFIG_OF_ADDRESS) && defined(CONFIG_HAS_DMA) +int of_dma_get_range(struct device_node *np, + const struct bus_dma_region **map); #else -static inline int of_dma_get_range(struct device_node *np, u64 *dma_addr, - u64 *paddr, u64 *size) +static inline int of_dma_get_range(struct device_node *np, + const struct bus_dma_region **map) { return -ENODEV; } diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c index 9b7e84bdc7d4..06cc988faf78 100644 --- a/drivers/of/unittest.c +++ b/drivers/of/unittest.c @@ -7,6 +7,7 @@ #include #include +#include /* to test phys_to_dma/dma_to_phys */ #include #include #include @@ -869,10 +870,11 @@ static void __init of_unittest_changeset(void) } static void __init of_unittest_dma_ranges_one(const char *path, - u64 expect_dma_addr, u64 expect_paddr, u64 expect_size) + u64 expect_dma_addr, u64 expect_paddr) { +#ifdef CONFIG_HAS_DMA struct device_node *np; - u64 dma_addr, paddr, size; + const struct bus_dma_region *map = NULL; int rc; np = of_find_node_by_path(path); @@ -881,28 +883,40 @@ static void __init of_unittest_dma_ranges_one(const char *path, return; } - rc = of_dma_get_range(np, &dma_addr, &paddr, &size); + rc = of_dma_get_range(np, &map); unittest(!rc, "of_dma_get_range failed on node %pOF rc=%i\n", np, rc); + if (!rc) { - unittest(size == expect_size, - "of_dma_get_range wrong size on node %pOF size=%llx\n", np, size); + phys_addr_t paddr; + dma_addr_t dma_addr; + struct device dev_bogus; + + dev_bogus.dma_range_map = map; + paddr = dma_to_phys(&dev_bogus, expect_dma_addr); + dma_addr = phys_to_dma(&dev_bogus, expect_paddr); + unittest(paddr == expect_paddr, - "of_dma_get_range wrong phys addr (%llx) on node %pOF", paddr, np); + "of_dma_get_range: wrong phys addr %pap (expecting %llx) on node %pOF\n", + &paddr, expect_paddr, np); unittest(dma_addr == expect_dma_addr, - "of_dma_get_range wrong DMA addr (%llx) on node %pOF", dma_addr, np); + "of_dma_get_range: wrong DMA addr %pad (expecting %llx) on node %pOF\n", + &dma_addr, expect_dma_addr, np); + + kfree(map); } of_node_put(np); +#endif } static void __init of_unittest_parse_dma_ranges(void) { of_unittest_dma_ranges_one("/testcase-data/address-tests/device@70000000", - 0x0, 0x20000000, 0x40000000); + 0x0, 0x20000000); of_unittest_dma_ranges_one("/testcase-data/address-tests/bus@80000000/device@1000", - 0x100000000, 0x20000000, 0x2000000000); + 0x100000000, 0x20000000); of_unittest_dma_ranges_one("/testcase-data/address-tests/pci@90000000", - 0x80000000, 0x20000000, 0x10000000); + 0x80000000, 0x20000000); } static void __init of_unittest_pci_dma_ranges(void) diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 7f90eeea67e2..8157dd491d28 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -23,6 +23,7 @@ #include #include #include +#include /* XXX: pokes into bus_dma_range */ #include #include #include @@ -458,6 +459,25 @@ static void rproc_rvdev_release(struct device *dev) kfree(rvdev); } +static int copy_dma_range_map(struct device *to, struct device *from) +{ + const struct bus_dma_region *map = from->dma_range_map, *new_map, *r; + int num_ranges = 0; + + if (!map) + return 0; + + for (r = map; r->size; r++) + num_ranges++; + + new_map = kmemdup(map, array_size(num_ranges + 1, sizeof(*map)), + GFP_KERNEL); + if (!new_map) + return -ENOMEM; + to->dma_range_map = new_map; + return 0; +} + /** * rproc_handle_vdev() - handle a vdev fw resource * @rproc: the remote processor @@ -529,7 +549,9 @@ static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc, /* Initialise vdev subdevice */ snprintf(name, sizeof(name), "vdev%dbuffer", rvdev->index); rvdev->dev.parent = &rproc->dev; - rvdev->dev.dma_pfn_offset = rproc->dev.parent->dma_pfn_offset; + ret = copy_dma_range_map(&rvdev->dev, rproc->dev.parent); + if (ret) + return ret; rvdev->dev.release = rproc_rvdev_release; dev_set_name(&rvdev->dev, "%s#%s", dev_name(rvdev->dev.parent), name); dev_set_drvdata(&rvdev->dev, rvdev); diff --git a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c index 1744e6fcc999..bcf050a04ffc 100644 --- a/drivers/staging/media/sunxi/cedrus/cedrus_hw.c +++ b/drivers/staging/media/sunxi/cedrus/cedrus_hw.c @@ -227,11 +227,17 @@ int cedrus_hw_probe(struct cedrus_dev *dev) * the RAM offset to the physcal addresses. * * This information will eventually be obtained from device-tree. + * + * XXX(hch): this has no business in a driver and needs to move + * to the device tree. */ #ifdef PHYS_PFN_OFFSET - if (!(variant->quirks & CEDRUS_QUIRK_NO_DMA_OFFSET)) - dev->dev->dma_pfn_offset = PHYS_PFN_OFFSET; + if (!(variant->quirks & CEDRUS_QUIRK_NO_DMA_OFFSET)) { + ret = dma_direct_set_offset(dev->dev, PHYS_OFFSET, 0, SZ_4G); + if (ret) + return ret; + } #endif ret = of_reserved_mem_device_init(dev->dev); diff --git a/include/linux/device.h b/include/linux/device.h index ca18da4768e3..1c78621fc3c0 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -466,7 +466,7 @@ struct dev_links_info { * such descriptors. * @bus_dma_limit: Limit of an upstream bridge or bus which imposes a smaller * DMA limit than the device itself supports. - * @dma_pfn_offset: offset of DMA memory range relatively of RAM + * @dma_range_map: map for DMA memory ranges relative to that of RAM * @dma_parms: A low level driver may set these to teach IOMMU code about * segment limitations. * @dma_pools: Dma pools (if dma'ble device). @@ -561,7 +561,7 @@ struct device { 64 bit addresses for consistent allocations such descriptors. */ u64 bus_dma_limit; /* upstream dma constraint */ - unsigned long dma_pfn_offset; + const struct bus_dma_region *dma_range_map; struct device_dma_parameters *dma_parms; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 2929685e88aa..83f797e0cb78 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -14,6 +14,41 @@ extern unsigned int zone_dma_bits; +/* + * Record the mapping of CPU physical to DMA addresses for a given region. + */ +struct bus_dma_region { + phys_addr_t cpu_start; + dma_addr_t dma_start; + u64 size; + u64 offset; +}; + +static inline dma_addr_t translate_phys_to_dma(struct device *dev, + phys_addr_t paddr) +{ + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; m->size; m++) + if (paddr >= m->cpu_start && paddr - m->cpu_start < m->size) + return (dma_addr_t)paddr - m->offset; + + /* make sure dma_capable fails when no translation is available */ + return DMA_MAPPING_ERROR; +} + +static inline phys_addr_t translate_dma_to_phys(struct device *dev, + dma_addr_t dma_addr) +{ + const struct bus_dma_region *m; + + for (m = dev->dma_range_map; m->size; m++) + if (dma_addr >= m->dma_start && dma_addr - m->dma_start < m->size) + return (phys_addr_t)dma_addr + m->offset; + + return (phys_addr_t)-1; +} + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #ifndef phys_to_dma_unencrypted @@ -23,9 +58,9 @@ extern unsigned int zone_dma_bits; static inline dma_addr_t phys_to_dma_unencrypted(struct device *dev, phys_addr_t paddr) { - dma_addr_t dev_addr = (dma_addr_t)paddr; - - return dev_addr - ((dma_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + if (dev->dma_range_map) + return translate_phys_to_dma(dev, paddr); + return paddr; } /* @@ -39,10 +74,14 @@ static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) return __sme_set(phys_to_dma_unencrypted(dev, paddr)); } -static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dev_addr) +static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t dma_addr) { - phys_addr_t paddr = (phys_addr_t)dev_addr + - ((phys_addr_t)dev->dma_pfn_offset << PAGE_SHIFT); + phys_addr_t paddr; + + if (dev->dma_range_map) + paddr = translate_dma_to_phys(dev, dma_addr); + else + paddr = dma_addr; return __sme_clr(paddr); } @@ -62,6 +101,8 @@ static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size, { dma_addr_t end = addr + size - 1; + if (addr == DMA_MAPPING_ERROR) + return false; if (is_ram && !IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) && min(addr, end) < phys_to_dma(dev, PFN_PHYS(min_low_pfn))) return false; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index df0bff2ea750..bb138ac6f5e6 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -730,4 +730,11 @@ static inline int dma_mmap_wc(struct device *dev, #define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif -#endif +/* + * Legacy interface to set up the dma offset map. Drivers really should not + * actually use it, but we have a few legacy cases left. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size); + +#endif /* _LINUX_DMA_MAPPING_H */ diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index f85d14bbfcbe..c0685196fb6d 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include struct dma_coherent_mem { void *virt_base; @@ -32,9 +32,8 @@ static inline dma_addr_t dma_get_device_base(struct device *dev, struct dma_coherent_mem * mem) { if (mem->use_dev_dma_pfn_offset) - return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT; - else - return mem->device_base; + return phys_to_dma(dev, PFN_PHYS(mem->pfn_base)); + return mem->device_base; } static int dma_init_coherent_memory(phys_addr_t phys_addr, diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 54db9cfdaecc..750659f7447c 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -13,6 +13,7 @@ #include #include #include +#include /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it @@ -66,8 +67,12 @@ static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) { - return phys_to_dma_direct(dev, phys) + size - 1 <= - min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); + dma_addr_t dma_addr = phys_to_dma_direct(dev, phys); + + if (dma_addr == DMA_MAPPING_ERROR) + return false; + return dma_addr + size - 1 <= + min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } /* @@ -461,3 +466,45 @@ bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr) return !dev_is_dma_coherent(dev) || is_swiotlb_buffer(dma_to_phys(dev, dma_addr)); } + +/** + * dma_direct_set_offset - Assign scalar offset for a single DMA range. + * @dev: device pointer; needed to "own" the alloced memory. + * @cpu_start: beginning of memory region covered by this offset. + * @dma_start: beginning of DMA/PCI region covered by this offset. + * @size: size of the region. + * + * This is for the simple case of a uniform offset which cannot + * be discovered by "dma-ranges". + * + * It returns -ENOMEM if out of memory, -EINVAL if a map + * already exists, 0 otherwise. + * + * Note: any call to this from a driver is a bug. The mapping needs + * to be described by the device tree or other firmware interfaces. + */ +int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, + dma_addr_t dma_start, u64 size) +{ + struct bus_dma_region *map; + u64 offset = (u64)cpu_start - (u64)dma_start; + + if (dev->dma_range_map) { + dev_err(dev, "attempt to add DMA range to existing map\n"); + return -EINVAL; + } + + if (!offset) + return 0; + + map = kcalloc(2, sizeof(*map), GFP_KERNEL); + if (!map) + return -ENOMEM; + map[0].cpu_start = cpu_start; + map[0].dma_start = dma_start; + map[0].offset = offset; + map[0].size = size; + dev->dma_range_map = map; + return 0; +} +EXPORT_SYMBOL_GPL(dma_direct_set_offset); From a97740f81874c8063c12c24f34d25f10c4f5e9aa Mon Sep 17 00:00:00 2001 From: Xu Wang Date: Mon, 21 Sep 2020 07:09:26 +0000 Subject: [PATCH 27/63] dma-debug: convert comma to semicolon Replace a comma between expression statements by a semicolon. Signed-off-by: Xu Wang Signed-off-by: Christoph Hellwig --- kernel/dma/debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 6f53c2e03aa4..4211800d9f3e 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -1219,7 +1219,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_single; entry->pfn = page_to_pfn(page); - entry->offset = offset, + entry->offset = offset; entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1308,7 +1308,7 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, + entry->offset = s->offset; entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; From b9bb694b9f62f4b31652223ed3ca38cf98bbb370 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 21 Sep 2020 16:08:01 +0100 Subject: [PATCH 28/63] iommu/io-pgtable-arm: Clean up faulty sanity check Checking for a nonzero dma_pfn_offset was a quick shortcut to validate whether the DMA == phys assumption could hold at all. Checking for a non-NULL dma_range_map is not quite equivalent, since a map may be present to describe a limited DMA window even without an offset, and thus this check can now yield false positives. However, it only ever served to short-circuit going all the way through to __arm_lpae_alloc_pages(), failing the canonical test there, and having a bit more to clean up. As such, we can simply remove it without loss of correctness. Reported-by: Naresh Kamboju Signed-off-by: Robin Murphy Signed-off-by: Christoph Hellwig --- drivers/iommu/io-pgtable-arm.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index d77e881516a4..f87cbb822a3d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -751,11 +751,6 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) if (cfg->oas > ARM_LPAE_MAX_ADDR_BITS) return NULL; - if (!selftest_running && cfg->iommu_dev->dma_range_map) { - dev_err(cfg->iommu_dev, "Cannot accommodate DMA offset for IOMMU page tables\n"); - return NULL; - } - data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return NULL; From 7ae10eb903d6ed9b3069fe1dc175b36d86667d09 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 09:29:50 +0200 Subject: [PATCH 29/63] dma-mapping: remove DMA_MASK_NONE This value is only used by a PCMCIA driver and not very useful. Signed-off-by: Christoph Hellwig Acked-by: Dominik Brodowski --- drivers/pcmcia/ds.c | 2 +- include/linux/dma-mapping.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c index 09d06b082f8b..72114907c0e4 100644 --- a/drivers/pcmcia/ds.c +++ b/drivers/pcmcia/ds.c @@ -516,7 +516,7 @@ static struct pcmcia_device *pcmcia_device_add(struct pcmcia_socket *s, p_dev->dev.parent = s->dev.parent; p_dev->dev.release = pcmcia_release_dev; /* by default don't allow DMA */ - p_dev->dma_mask = DMA_MASK_NONE; + p_dev->dma_mask = 0; p_dev->dev.dma_mask = &p_dev->dma_mask; dev_set_name(&p_dev->dev, "%d.%d", p_dev->socket->sock, p_dev->device_no); if (!dev_name(&p_dev->dev)) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index bb138ac6f5e6..e074588d753f 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -138,8 +138,6 @@ extern const struct dma_map_ops dma_dummy_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -#define DMA_MASK_NONE 0x0ULL - static inline int valid_dma_direction(int dma_direction) { return ((dma_direction == DMA_BIDIRECTIONAL) || From db4268f8c575617bacdeff862c7de674dbf65075 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 09:31:32 +0200 Subject: [PATCH 30/63] dma-mapping: move valid_dma_direction to dma-direction.h Move the valid_dma_direction helper to a more suitable header, and clean it up to use the proper enum as well as removing pointless braces. Signed-off-by: Christoph Hellwig --- include/linux/dma-direction.h | 8 +++++++- include/linux/dma-mapping.h | 7 ------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/include/linux/dma-direction.h b/include/linux/dma-direction.h index 9c96e30e6a0b..a2fe4571bc92 100644 --- a/include/linux/dma-direction.h +++ b/include/linux/dma-direction.h @@ -9,4 +9,10 @@ enum dma_data_direction { DMA_NONE = 3, }; -#endif +static inline int valid_dma_direction(enum dma_data_direction dir) +{ + return dir == DMA_BIDIRECTIONAL || dir == DMA_TO_DEVICE || + dir == DMA_FROM_DEVICE; +} + +#endif /* _LINUX_DMA_DIRECTION_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e074588d753f..51e93d44b826 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -138,13 +138,6 @@ extern const struct dma_map_ops dma_dummy_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -static inline int valid_dma_direction(int dma_direction) -{ - return ((dma_direction == DMA_BIDIRECTIONAL) || - (dma_direction == DMA_TO_DEVICE) || - (dma_direction == DMA_FROM_DEVICE)); -} - #ifdef CONFIG_DMA_DECLARE_COHERENT /* * These three functions are only for dma allocator. From eba304c6861613a649ba46cfab835b1eddeacd8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 09:33:42 +0200 Subject: [PATCH 31/63] dma-mapping: better document dma_addr_t and DMA_MAPPING_ERROR Move the comment documenting dma_addr_t away from the dma_map_ops definition which isn't very related to it, and toward DMA_MAPPING_ERROR, which is somewhat related. Add a little blurb about DMA_MAPPING_ERROR as well. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 51e93d44b826..943479fb77f6 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -67,12 +67,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -/* - * A dma_addr_t can hold any valid DMA or bus address for the platform. - * It can be given to a device to use as a DMA source or target. A CPU cannot - * reference a dma_addr_t directly because there may be translation between - * its physical address space and the bus address space. - */ struct dma_map_ops { void* (*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, @@ -131,6 +125,16 @@ struct dma_map_ops { unsigned long (*get_merge_boundary)(struct device *dev); }; +/* + * A dma_addr_t can hold any valid DMA or bus address for the platform. It can + * be given to a device to use as a DMA source or target. It is specific to a + * given device and there may be a translation between the CPU physical address + * space and the bus address space. + * + * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not + * be used directly in drivers, but checked for using dma_mapping_error() + * instead. + */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) extern const struct dma_map_ops dma_virt_ops; From 38225f2ef2f4263fc65acae63fdd4e8489c9ffcf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Sep 2020 09:33:59 +0200 Subject: [PATCH 32/63] ARM/omap1: switch to use dma_direct_set_offset for lbus DMA offsets Switch the omap1510 platform ohci device to use dma_direct_set_offset to set the DMA offset instead of using direct hooks into the DMA mapping code and remove the now unused hooks. Signed-off-by: Christoph Hellwig Tested-by: Janusz Krzysztofik Acked-by: Tony Lindgren --- arch/arm/include/asm/dma-direct.h | 18 ------------- arch/arm/mach-omap1/include/mach/memory.h | 31 ----------------------- arch/arm/mach-omap1/usb.c | 22 ++++++++++++++++ 3 files changed, 22 insertions(+), 49 deletions(-) diff --git a/arch/arm/include/asm/dma-direct.h b/arch/arm/include/asm/dma-direct.h index 436544aeb834..77fcb7ee5ec9 100644 --- a/arch/arm/include/asm/dma-direct.h +++ b/arch/arm/include/asm/dma-direct.h @@ -9,7 +9,6 @@ * functions used internally by the DMA-mapping API to provide DMA * addresses. They must not be used by drivers. */ -#ifndef __arch_pfn_to_dma static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) { if (dev && dev->dma_range_map) @@ -34,23 +33,6 @@ static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) return (dma_addr_t)__virt_to_bus((unsigned long)(addr)); } -#else -static inline dma_addr_t pfn_to_dma(struct device *dev, unsigned long pfn) -{ - return __arch_pfn_to_dma(dev, pfn); -} - -static inline unsigned long dma_to_pfn(struct device *dev, dma_addr_t addr) -{ - return __arch_dma_to_pfn(dev, addr); -} - -static inline dma_addr_t virt_to_dma(struct device *dev, void *addr) -{ - return __arch_virt_to_dma(dev, addr); -} -#endif - static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { unsigned int offset = paddr & ~PAGE_MASK; diff --git a/arch/arm/mach-omap1/include/mach/memory.h b/arch/arm/mach-omap1/include/mach/memory.h index 1142560e0078..36bc0000cb6a 100644 --- a/arch/arm/mach-omap1/include/mach/memory.h +++ b/arch/arm/mach-omap1/include/mach/memory.h @@ -14,42 +14,11 @@ * OMAP-1510 bus address is translated into a Local Bus address if the * OMAP bus type is lbus. We do the address translation based on the * device overriding the defaults used in the dma-mapping API. - * Note that the is_lbus_device() test is not very efficient on 1510 - * because of the strncmp(). */ -#if defined(CONFIG_ARCH_OMAP15XX) && !defined(__ASSEMBLER__) /* * OMAP-1510 Local Bus address offset */ #define OMAP1510_LB_OFFSET UL(0x30000000) -#define virt_to_lbus(x) ((x) - PAGE_OFFSET + OMAP1510_LB_OFFSET) -#define lbus_to_virt(x) ((x) - OMAP1510_LB_OFFSET + PAGE_OFFSET) -#define is_lbus_device(dev) (cpu_is_omap15xx() && dev && (strncmp(dev_name(dev), "ohci", 4) == 0)) - -#define __arch_pfn_to_dma(dev, pfn) \ - ({ dma_addr_t __dma = __pfn_to_phys(pfn); \ - if (is_lbus_device(dev)) \ - __dma = __dma - PHYS_OFFSET + OMAP1510_LB_OFFSET; \ - __dma; }) - -#define __arch_dma_to_pfn(dev, addr) \ - ({ dma_addr_t __dma = addr; \ - if (is_lbus_device(dev)) \ - __dma += PHYS_OFFSET - OMAP1510_LB_OFFSET; \ - __phys_to_pfn(__dma); \ - }) - -#define __arch_dma_to_virt(dev, addr) ({ (void *) (is_lbus_device(dev) ? \ - lbus_to_virt(addr) : \ - __phys_to_virt(addr)); }) - -#define __arch_virt_to_dma(dev, addr) ({ unsigned long __addr = (unsigned long)(addr); \ - (dma_addr_t) (is_lbus_device(dev) ? \ - virt_to_lbus(__addr) : \ - __virt_to_phys(__addr)); }) - -#endif /* CONFIG_ARCH_OMAP15XX */ - #endif diff --git a/arch/arm/mach-omap1/usb.c b/arch/arm/mach-omap1/usb.c index d8e9bbda8f7b..ba8566204ea9 100644 --- a/arch/arm/mach-omap1/usb.c +++ b/arch/arm/mach-omap1/usb.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -542,6 +543,25 @@ static u32 __init omap1_usb2_init(unsigned nwires, unsigned alt_pingroup) /* ULPD_APLL_CTRL */ #define APLL_NDPLL_SWITCH (1 << 0) +static int omap_1510_usb_ohci_notifier(struct notifier_block *nb, + unsigned long event, void *data) +{ + struct device *dev = data; + + if (event != BUS_NOTIFY_ADD_DEVICE) + return NOTIFY_DONE; + + if (strncmp(dev_name(dev), "ohci", 4) == 0 && + dma_direct_set_offset(dev, PHYS_OFFSET, OMAP1510_LB_OFFSET, + (u64)-1)) + WARN_ONCE(1, "failed to set DMA offset\n"); + return NOTIFY_OK; +} + +static struct notifier_block omap_1510_usb_ohci_nb = { + .notifier_call = omap_1510_usb_ohci_notifier, +}; + static void __init omap_1510_usb_init(struct omap_usb_config *config) { unsigned int val; @@ -600,6 +620,8 @@ static void __init omap_1510_usb_init(struct omap_usb_config *config) if (config->register_host) { int status; + bus_register_notifier(&platform_bus_type, + &omap_1510_usb_ohci_nb); ohci_device.dev.platform_data = config; status = platform_device_register(&ohci_device); if (status) From 43ee5b6daa6c45246098493dab2c229d196c9cf6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 19:17:20 +0200 Subject: [PATCH 33/63] mm: turn alloc_pages into an inline function To prevent a compiler error when a method call alloc_pages is added (which I plan to for the dma_map_ops). Signed-off-by: Christoph Hellwig --- include/linux/gfp.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 67a0774e080b..dd2577c54071 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -550,8 +550,10 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ alloc_pages_vma(gfp_mask, order, vma, addr, numa_node_id(), true) #else -#define alloc_pages(gfp_mask, order) \ - alloc_pages_node(numa_node_id(), gfp_mask, order) +static inline struct page *alloc_pages(gfp_t gfp_mask, unsigned int order) +{ + return alloc_pages_node(numa_node_id(), gfp_mask, order); +} #define alloc_pages_vma(gfp_mask, order, vma, addr, node, false)\ alloc_pages(gfp_mask, order) #define alloc_hugepage_vma(gfp_mask, vma, addr, order) \ From 08281bd3e9b970faf6297a23cd65ff7dd7cbb8ca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 15:29:48 +0200 Subject: [PATCH 34/63] drm/exynos: stop setting DMA_ATTR_NON_CONSISTENT DMA_ATTR_NON_CONSISTENT is a no-op except on PA-RISC and a few MIPS configs, so don't set it in this ARM specific driver. Signed-off-by: Christoph Hellwig --- drivers/gpu/drm/exynos/exynos_drm_gem.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c index efa476858db5..07073222b8f6 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_gem.c +++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c @@ -42,8 +42,6 @@ static int exynos_drm_alloc_buf(struct exynos_drm_gem *exynos_gem, bool kvmap) if (exynos_gem->flags & EXYNOS_BO_WC || !(exynos_gem->flags & EXYNOS_BO_CACHABLE)) attr |= DMA_ATTR_WRITE_COMBINE; - else - attr |= DMA_ATTR_NON_CONSISTENT; /* FBDev emulation requires kernel mapping */ if (!kvmap) From e0ec8a4d6432104bf0e985ccfa475f8742c78cc6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 15:30:40 +0200 Subject: [PATCH 35/63] drm/nouveau/gk20a: stop setting DMA_ATTR_NON_CONSISTENT DMA_ATTR_NON_CONSISTENT is a no-op except on PA-RISC and a few MIPS configs, so don't set it in this ARM specific driver part. Signed-off-by: Christoph Hellwig --- drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c index 985f2990ab0d..13d4d7ac0697 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c @@ -594,8 +594,7 @@ gk20a_instmem_new(struct nvkm_device *device, int index, nvkm_info(&imem->base.subdev, "using IOMMU\n"); } else { - imem->attrs = DMA_ATTR_NON_CONSISTENT | - DMA_ATTR_WEAK_ORDERING | + imem->attrs = DMA_ATTR_WEAK_ORDERING | DMA_ATTR_WRITE_COMBINE; nvkm_info(&imem->base.subdev, "using DMA API\n"); From 33b16dc8b805ad5c5e04fed55c1669d115102dc1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 15:32:34 +0200 Subject: [PATCH 36/63] net/au1000-eth: stop using DMA_ATTR_NON_CONSISTENT The au1000-eth driver contains none of the manual cache synchronization required for using DMA_ATTR_NON_CONSISTENT. From what I can tell it can be used on both dma coherent and non-coherent DMA platforms, but I suspect it has been buggy on the non-coherent platforms all along. Signed-off-by: Christoph Hellwig --- drivers/net/ethernet/amd/au1000_eth.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index 75dbd221dc59..19e195420e24 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -1131,10 +1131,9 @@ static int au1000_probe(struct platform_device *pdev) /* Allocate the data buffers * Snooping works fine with eth on all au1xxx */ - aup->vaddr = (u32)dma_alloc_attrs(&pdev->dev, MAX_BUF_SIZE * + aup->vaddr = (u32)dma_alloc_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), - &aup->dma_addr, 0, - DMA_ATTR_NON_CONSISTENT); + &aup->dma_addr, 0); if (!aup->vaddr) { dev_err(&pdev->dev, "failed to allocate data buffers\n"); err = -ENOMEM; @@ -1310,9 +1309,8 @@ static int au1000_probe(struct platform_device *pdev) err_remap2: iounmap(aup->mac); err_remap1: - dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), - (void *)aup->vaddr, aup->dma_addr, - DMA_ATTR_NON_CONSISTENT); + dma_free_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + (void *)aup->vaddr, aup->dma_addr); err_vaddr: free_netdev(dev); err_alloc: @@ -1344,9 +1342,8 @@ static int au1000_remove(struct platform_device *pdev) if (aup->tx_db_inuse[i]) au1000_ReleaseDB(aup, aup->tx_db_inuse[i]); - dma_free_attrs(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), - (void *)aup->vaddr, aup->dma_addr, - DMA_ATTR_NON_CONSISTENT); + dma_free_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS), + (void *)aup->vaddr, aup->dma_addr); iounmap(aup->macdma); iounmap(aup->mac); From 48d15814dd0fc429e3205b87f1af6cc472018478 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 17 Aug 2020 15:58:21 +0200 Subject: [PATCH 37/63] lib82596: move DMA allocation into the callers of i82596_probe This allows us to get rid of the LIB82596_DMA_ATTR defined and prepare for untangling the coherent vs non-coherent DMA allocation API. Signed-off-by: Christoph Hellwig Tested-by: Thomas Bogendoerfer (SNI part) --- drivers/net/ethernet/i825xx/lasi_82596.c | 24 ++++++++++------ drivers/net/ethernet/i825xx/lib82596.c | 36 ++++++++---------------- drivers/net/ethernet/i825xx/sni_82596.c | 21 +++++++++----- 3 files changed, 41 insertions(+), 40 deletions(-) diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index aec7e98bcc85..a12218e940a2 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -96,8 +96,6 @@ #define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */ -#define LIB82596_DMA_ATTR DMA_ATTR_NON_CONSISTENT - #define DMA_WBACK(ndev, addr, len) \ do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0) @@ -155,7 +153,7 @@ lan_init_chip(struct parisc_device *dev) { struct net_device *netdevice; struct i596_private *lp; - int retval; + int retval = -ENOMEM; int i; if (!dev->irq) { @@ -186,12 +184,22 @@ lan_init_chip(struct parisc_device *dev) lp = netdev_priv(netdevice); lp->options = dev->id.sversion == 0x72 ? OPT_SWAP_PORT : 0; + lp->dma = dma_alloc_attrs(&dev->dev, sizeof(struct i596_dma), + &lp->dma_addr, GFP_KERNEL, + DMA_ATTR_NON_CONSISTENT); + if (!lp->dma) + goto out_free_netdev; retval = i82596_probe(netdevice); - if (retval) { - free_netdev(netdevice); - return -ENODEV; - } + if (retval) + goto out_free_dma; + return 0; + +out_free_dma: + dma_free_attrs(&dev->dev, sizeof(struct i596_dma), lp->dma, + lp->dma_addr, DMA_ATTR_NON_CONSISTENT); +out_free_netdev: + free_netdev(netdevice); return retval; } @@ -202,7 +210,7 @@ static int __exit lan_remove_chip(struct parisc_device *pdev) unregister_netdev (dev); dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma, - lp->dma_addr, LIB82596_DMA_ATTR); + lp->dma_addr, DMA_ATTR_NON_CONSISTENT); free_netdev (dev); return 0; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index b03757e169e4..b4e4b3eb5758 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -1047,9 +1047,8 @@ static const struct net_device_ops i596_netdev_ops = { static int i82596_probe(struct net_device *dev) { - int i; struct i596_private *lp = netdev_priv(dev); - struct i596_dma *dma; + int ret; /* This lot is ensure things have been cache line aligned. */ BUILD_BUG_ON(sizeof(struct i596_rfd) != 32); @@ -1063,41 +1062,28 @@ static int i82596_probe(struct net_device *dev) if (!dev->base_addr || !dev->irq) return -ENODEV; - dma = dma_alloc_attrs(dev->dev.parent, sizeof(struct i596_dma), - &lp->dma_addr, GFP_KERNEL, - LIB82596_DMA_ATTR); - if (!dma) { - printk(KERN_ERR "%s: Couldn't get shared memory\n", __FILE__); - return -ENOMEM; - } - dev->netdev_ops = &i596_netdev_ops; dev->watchdog_timeo = TX_TIMEOUT; - memset(dma, 0, sizeof(struct i596_dma)); - lp->dma = dma; - - dma->scb.command = 0; - dma->scb.cmd = I596_NULL; - dma->scb.rfd = I596_NULL; + memset(lp->dma, 0, sizeof(struct i596_dma)); + lp->dma->scb.command = 0; + lp->dma->scb.cmd = I596_NULL; + lp->dma->scb.rfd = I596_NULL; spin_lock_init(&lp->lock); - DMA_WBACK_INV(dev, dma, sizeof(struct i596_dma)); + DMA_WBACK_INV(dev, lp->dma, sizeof(struct i596_dma)); - i = register_netdev(dev); - if (i) { - dma_free_attrs(dev->dev.parent, sizeof(struct i596_dma), - dma, lp->dma_addr, LIB82596_DMA_ATTR); - return i; - } + ret = register_netdev(dev); + if (ret) + return ret; DEB(DEB_PROBE, printk(KERN_INFO "%s: 82596 at %#3lx, %pM IRQ %d.\n", dev->name, dev->base_addr, dev->dev_addr, dev->irq)); DEB(DEB_INIT, printk(KERN_INFO "%s: dma at 0x%p (%d bytes), lp->scb at 0x%p\n", - dev->name, dma, (int)sizeof(struct i596_dma), - &dma->scb)); + dev->name, lp->dma, (int)sizeof(struct i596_dma), + &lp->dma->scb)); return 0; } diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 22f5887578b2..4b9ac0c65577 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -24,8 +24,6 @@ static const char sni_82596_string[] = "snirm_82596"; -#define LIB82596_DMA_ATTR 0 - #define DMA_WBACK(priv, addr, len) do { } while (0) #define DMA_INV(priv, addr, len) do { } while (0) #define DMA_WBACK_INV(priv, addr, len) do { } while (0) @@ -134,10 +132,19 @@ static int sni_82596_probe(struct platform_device *dev) lp->ca = ca_addr; lp->mpu_port = mpu_addr; - retval = i82596_probe(netdevice); - if (retval == 0) - return 0; + lp->dma = dma_alloc_coherent(&dev->dev, sizeof(struct i596_dma), + &lp->dma_addr, GFP_KERNEL); + if (!lp->dma) + goto probe_failed; + retval = i82596_probe(netdevice); + if (retval) + goto probe_failed_free_dma; + return 0; + +probe_failed_free_dma: + dma_free_coherent(&dev->dev, sizeof(struct i596_dma), lp->dma, + lp->dma_addr); probe_failed: free_netdev(netdevice); probe_failed_free_ca: @@ -153,8 +160,8 @@ static int sni_82596_driver_remove(struct platform_device *pdev) struct i596_private *lp = netdev_priv(dev); unregister_netdev(dev); - dma_free_attrs(dev->dev.parent, sizeof(struct i596_private), lp->dma, - lp->dma_addr, LIB82596_DMA_ATTR); + dma_free_coherent(&pdev->dev, sizeof(struct i596_private), lp->dma, + lp->dma_addr); iounmap(lp->ca); iounmap(lp->mpu_port); free_netdev (dev); From 91af2dd255ac9284ee2942a359fa8d2b49849e0b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 16:07:21 +0200 Subject: [PATCH 38/63] 53c700: improve non-coherent DMA handling Switch the 53c700 driver to only use non-coherent descriptor memory if it really has to because dma_alloc_coherent fails. This doesn't matter for any of the platforms it runs on currently, but that will change soon. To help with this two new helpers to transfer ownership to and from the device are added that abstract the syncing of the non-coherent memory. The two current bidirectional cases are mapped to transfers to the device, as that appears to what they are used for. Note that for parisc, which is the only architecture this driver needs to use non-coherent memory on, the direction argument of dma_cache_sync is ignored, so this will not change behavior in any way. Signed-off-by: Christoph Hellwig Tested-by: Thomas Bogendoerfer --- drivers/scsi/53c700.c | 113 +++++++++++++++++++++++------------------- drivers/scsi/53c700.h | 17 ++++--- 2 files changed, 72 insertions(+), 58 deletions(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index 84b57a8f86bf..c59226d7e2f6 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -269,6 +269,20 @@ NCR_700_get_SXFER(struct scsi_device *SDp) spi_period(SDp->sdev_target)); } +static inline void dma_sync_to_dev(struct NCR_700_Host_Parameters *h, + void *addr, size_t size) +{ + if (h->noncoherent) + dma_cache_sync(h->dev, addr, size, DMA_TO_DEVICE); +} + +static inline void dma_sync_from_dev(struct NCR_700_Host_Parameters *h, + void *addr, size_t size) +{ + if (h->noncoherent) + dma_cache_sync(h->dev, addr, size, DMA_FROM_DEVICE); +} + struct Scsi_Host * NCR_700_detect(struct scsi_host_template *tpnt, struct NCR_700_Host_Parameters *hostdata, struct device *dev) @@ -283,9 +297,13 @@ NCR_700_detect(struct scsi_host_template *tpnt, if(tpnt->sdev_attrs == NULL) tpnt->sdev_attrs = NCR_700_dev_attrs; - memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); - if(memory == NULL) { + memory = dma_alloc_coherent(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL); + if (!memory) { + hostdata->noncoherent = 1; + memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, + GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + } + if (!memory) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n"); return NULL; } @@ -339,11 +357,11 @@ NCR_700_detect(struct scsi_host_template *tpnt, for (j = 0; j < PATCHES; j++) script[LABELPATCHES[j]] = bS_to_host(pScript + SCRIPT[LABELPATCHES[j]]); /* now patch up fixed addresses. */ - script_patch_32(hostdata->dev, script, MessageLocation, + script_patch_32(hostdata, script, MessageLocation, pScript + MSGOUT_OFFSET); - script_patch_32(hostdata->dev, script, StatusAddress, + script_patch_32(hostdata, script, StatusAddress, pScript + STATUS_OFFSET); - script_patch_32(hostdata->dev, script, ReceiveMsgAddress, + script_patch_32(hostdata, script, ReceiveMsgAddress, pScript + MSGIN_OFFSET); hostdata->script = script; @@ -395,8 +413,12 @@ NCR_700_release(struct Scsi_Host *host) struct NCR_700_Host_Parameters *hostdata = (struct NCR_700_Host_Parameters *)host->hostdata[0]; - dma_free_attrs(hostdata->dev, TOTAL_MEM_SIZE, hostdata->script, - hostdata->pScript, DMA_ATTR_NON_CONSISTENT); + if (hostdata->noncoherent) + dma_free_attrs(hostdata->dev, TOTAL_MEM_SIZE, hostdata->script, + hostdata->pScript, DMA_ATTR_NON_CONSISTENT); + else + dma_free_coherent(hostdata->dev, TOTAL_MEM_SIZE, + hostdata->script, hostdata->pScript); return 1; } @@ -804,8 +826,8 @@ process_extended_message(struct Scsi_Host *host, shost_printk(KERN_WARNING, host, "Unexpected SDTR msg\n"); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ @@ -817,9 +839,8 @@ process_extended_message(struct Scsi_Host *host, printk(KERN_INFO "scsi%d: (%d:%d), Unsolicited WDTR after CMD, Rejecting\n", host->host_no, pun, lun); hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, MessageCount, - 1); + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); resume_offset = hostdata->pScript + Ent_SendMessageWithATN; break; @@ -832,9 +853,8 @@ process_extended_message(struct Scsi_Host *host, printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, MessageCount, - 1); + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -917,9 +937,8 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata printk("\n"); /* just reject it */ hostdata->msgout[0] = A_REJECT_MSG; - dma_cache_sync(hostdata->dev, hostdata->msgout, 1, DMA_TO_DEVICE); - script_patch_16(hostdata->dev, hostdata->script, MessageCount, - 1); + dma_sync_to_dev(hostdata, hostdata->msgout, 1); + script_patch_16(hostdata, hostdata->script, MessageCount, 1); /* SendMsgOut returns, so set up the return * address */ resume_offset = hostdata->pScript + Ent_SendMessageWithATN; @@ -928,7 +947,7 @@ process_message(struct Scsi_Host *host, struct NCR_700_Host_Parameters *hostdata } NCR_700_writel(temp, host, TEMP_REG); /* set us up to receive another message */ - dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, DMA_FROM_DEVICE); + dma_sync_from_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); return resume_offset; } @@ -1008,8 +1027,8 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, slot->SG[1].ins = bS_to_host(SCRIPT_RETURN); slot->SG[1].pAddr = 0; slot->resume_offset = hostdata->pScript; - dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG[0])*2, DMA_TO_DEVICE); - dma_cache_sync(hostdata->dev, SCp->sense_buffer, SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE); + dma_sync_to_dev(hostdata, slot->SG, sizeof(slot->SG[0])*2); + dma_sync_from_dev(hostdata, SCp->sense_buffer, SCSI_SENSE_BUFFERSIZE); /* queue the command for reissue */ slot->state = NCR_700_SLOT_QUEUED; @@ -1129,11 +1148,11 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->cmd = slot->cmnd; /* re-patch for this command */ - script_patch_32_abs(hostdata->dev, hostdata->script, + script_patch_32_abs(hostdata, hostdata->script, CommandAddress, slot->pCmd); - script_patch_16(hostdata->dev, hostdata->script, + script_patch_16(hostdata, hostdata->script, CommandCount, slot->cmnd->cmd_len); - script_patch_32_abs(hostdata->dev, hostdata->script, + script_patch_32_abs(hostdata, hostdata->script, SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); @@ -1144,14 +1163,14 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, * should therefore always clear ACK */ NCR_700_writeb(NCR_700_get_SXFER(hostdata->cmd->device), host, SXFER_REG); - dma_cache_sync(hostdata->dev, hostdata->msgin, - MSG_ARRAY_SIZE, DMA_FROM_DEVICE); - dma_cache_sync(hostdata->dev, hostdata->msgout, - MSG_ARRAY_SIZE, DMA_TO_DEVICE); + dma_sync_from_dev(hostdata, hostdata->msgin, + MSG_ARRAY_SIZE); + dma_sync_to_dev(hostdata, hostdata->msgout, + MSG_ARRAY_SIZE); /* I'm just being paranoid here, the command should * already have been flushed from the cache */ - dma_cache_sync(hostdata->dev, slot->cmnd->cmnd, - slot->cmnd->cmd_len, DMA_TO_DEVICE); + dma_sync_to_dev(hostdata, slot->cmnd->cmnd, + slot->cmnd->cmd_len); @@ -1214,8 +1233,7 @@ process_script_interrupt(__u32 dsps, __u32 dsp, struct scsi_cmnd *SCp, hostdata->reselection_id = reselection_id; /* just in case we have a stale simple tag message, clear it */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->dev, hostdata->msgin, - MSG_ARRAY_SIZE, DMA_BIDIRECTIONAL); + dma_sync_to_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); if(hostdata->tag_negotiated & (1<pScript + Ent_GetReselectionWithTag; } else { @@ -1329,8 +1347,7 @@ process_selection(struct Scsi_Host *host, __u32 dsp) hostdata->cmd = NULL; /* clear any stale simple tag message */ hostdata->msgin[1] = 0; - dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, - DMA_BIDIRECTIONAL); + dma_sync_to_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); if(id == 0xff) { /* Selected as target, Ignore */ @@ -1427,30 +1444,26 @@ NCR_700_start_command(struct scsi_cmnd *SCp) NCR_700_set_flag(SCp->device, NCR_700_DEV_BEGIN_SYNC_NEGOTIATION); } - script_patch_16(hostdata->dev, hostdata->script, MessageCount, count); + script_patch_16(hostdata, hostdata->script, MessageCount, count); + script_patch_ID(hostdata, hostdata->script, Device_ID, 1<dev, hostdata->script, - Device_ID, 1<dev, hostdata->script, CommandAddress, + script_patch_32_abs(hostdata, hostdata->script, CommandAddress, slot->pCmd); - script_patch_16(hostdata->dev, hostdata->script, CommandCount, - SCp->cmd_len); + script_patch_16(hostdata, hostdata->script, CommandCount, SCp->cmd_len); /* finally plumb the beginning of the SG list into the script * */ - script_patch_32_abs(hostdata->dev, hostdata->script, + script_patch_32_abs(hostdata, hostdata->script, SGScriptStartAddress, to32bit(&slot->pSG[0].ins)); NCR_700_clear_fifo(SCp->device->host); if(slot->resume_offset == 0) slot->resume_offset = hostdata->pScript; /* now perform all the writebacks and invalidates */ - dma_cache_sync(hostdata->dev, hostdata->msgout, count, DMA_TO_DEVICE); - dma_cache_sync(hostdata->dev, hostdata->msgin, MSG_ARRAY_SIZE, - DMA_FROM_DEVICE); - dma_cache_sync(hostdata->dev, SCp->cmnd, SCp->cmd_len, DMA_TO_DEVICE); - dma_cache_sync(hostdata->dev, hostdata->status, 1, DMA_FROM_DEVICE); + dma_sync_to_dev(hostdata, hostdata->msgout, count); + dma_sync_from_dev(hostdata, hostdata->msgin, MSG_ARRAY_SIZE); + dma_sync_to_dev(hostdata, SCp->cmnd, SCp->cmd_len); + dma_sync_from_dev(hostdata, hostdata->status, 1); /* set the synchronous period/offset */ NCR_700_writeb(NCR_700_get_SXFER(SCp->device), @@ -1626,7 +1639,7 @@ NCR_700_intr(int irq, void *dev_id) slot->SG[i].ins = bS_to_host(SCRIPT_NOP); slot->SG[i].pAddr = 0; } - dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_sync_to_dev(hostdata, slot->SG, sizeof(slot->SG)); /* and pretend we disconnected after * the command phase */ resume_offset = hostdata->pScript + Ent_MsgInDuringData; @@ -1878,7 +1891,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *) } slot->SG[i].ins = bS_to_host(SCRIPT_RETURN); slot->SG[i].pAddr = 0; - dma_cache_sync(hostdata->dev, slot->SG, sizeof(slot->SG), DMA_TO_DEVICE); + dma_sync_to_dev(hostdata, slot->SG, sizeof(slot->SG)); DEBUG((" SETTING %p to %x\n", (&slot->pSG[i].ins), slot->SG[i].ins)); diff --git a/drivers/scsi/53c700.h b/drivers/scsi/53c700.h index 05fe439b66af..c9f8c497babb 100644 --- a/drivers/scsi/53c700.h +++ b/drivers/scsi/53c700.h @@ -209,6 +209,7 @@ struct NCR_700_Host_Parameters { #endif __u32 chip710:1; /* set if really a 710 not 700 */ __u32 burst_length:4; /* set to 0 to disable 710 bursting */ + __u32 noncoherent:1; /* needs to use non-coherent DMA */ /* NOTHING BELOW HERE NEEDS ALTERING */ __u32 fast:1; /* if we can alter the SCSI bus clock @@ -422,33 +423,33 @@ struct NCR_700_Host_Parameters { #define NCR_710_MIN_XFERP 0 #define NCR_700_MIN_PERIOD 25 /* for SDTR message, 100ns */ -#define script_patch_32(dev, script, symbol, value) \ +#define script_patch_32(h, script, symbol, value) \ { \ int i; \ dma_addr_t da = value; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ __u32 val = bS_to_cpu((script)[A_##symbol##_used[i]]) + da; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching %s at %d to %pad\n", \ #symbol, A_##symbol##_used[i], &da)); \ } \ } -#define script_patch_32_abs(dev, script, symbol, value) \ +#define script_patch_32_abs(h, script, symbol, value) \ { \ int i; \ dma_addr_t da = value; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ (script)[A_##symbol##_used[i]] = bS_to_host(da); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching %s at %d to %pad\n", \ #symbol, A_##symbol##_used[i], &da)); \ } \ } /* Used for patching the SCSI ID in the SELECT instruction */ -#define script_patch_ID(dev, script, symbol, value) \ +#define script_patch_ID(h, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -456,13 +457,13 @@ struct NCR_700_Host_Parameters { val &= 0xff00ffff; \ val |= ((value) & 0xff) << 16; \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching ID field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ } -#define script_patch_16(dev, script, symbol, value) \ +#define script_patch_16(h, script, symbol, value) \ { \ int i; \ for(i=0; i< (sizeof(A_##symbol##_used) / sizeof(__u32)); i++) { \ @@ -470,7 +471,7 @@ struct NCR_700_Host_Parameters { val &= 0xffff0000; \ val |= ((value) & 0xffff); \ (script)[A_##symbol##_used[i]] = bS_to_host(val); \ - dma_cache_sync((dev), &(script)[A_##symbol##_used[i]], 4, DMA_TO_DEVICE); \ + dma_sync_to_dev((h), &(script)[A_##symbol##_used[i]], 4); \ DEBUG((" script, patching short field %s at %d to 0x%x\n", \ #symbol, A_##symbol##_used[i], val)); \ } \ From 0d71675f87dc406d4c284729b8d36be050ad0d15 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:31:30 +0200 Subject: [PATCH 39/63] dma-mapping: add a new dma_alloc_noncoherent API Add a new API to allocate and free memory that is guaranteed to be addressable by a device, but which potentially is not cache coherent for DMA. To transfer ownership to and from the device, the existing streaming DMA API calls dma_sync_single_for_device and dma_sync_single_for_cpu must be used. For now the new calls are implemented on top of dma_alloc_attrs just like the old-noncoherent API, but once all drivers are switched to the new API it will be replaced with a better working implementation that is available on all architectures. Signed-off-by: Christoph Hellwig --- Documentation/core-api/dma-api.rst | 75 ++++++++++++++---------------- include/linux/dma-mapping.h | 12 +++++ 2 files changed, 48 insertions(+), 39 deletions(-) diff --git a/Documentation/core-api/dma-api.rst b/Documentation/core-api/dma-api.rst index 90239348b30f..ea0413276ddb 100644 --- a/Documentation/core-api/dma-api.rst +++ b/Documentation/core-api/dma-api.rst @@ -516,48 +516,56 @@ routines, e.g.::: } -Part II - Advanced dma usage ----------------------------- +Part II - Non-coherent DMA allocations +-------------------------------------- -Warning: These pieces of the DMA API should not be used in the -majority of cases, since they cater for unlikely corner cases that -don't belong in usual drivers. +These APIs allow to allocate pages in the kernel direct mapping that are +guaranteed to be DMA addressable. This means that unlike dma_alloc_coherent, +virt_to_page can be called on the resulting address, and the resulting +struct page can be used for everything a struct page is suitable for. -If you don't understand how cache line coherency works between a -processor and an I/O device, you should not be using this part of the -API at all. +If you don't understand how cache line coherency works between a processor and +an I/O device, you should not be using this part of the API. :: void * - dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t flag, unsigned long attrs) + dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp) -Identical to dma_alloc_coherent() except that when the -DMA_ATTR_NON_CONSISTENT flags is passed in the attrs argument, the -platform will choose to return either consistent or non-consistent memory -as it sees fit. By using this API, you are guaranteeing to the platform -that you have all the correct and necessary sync points for this memory -in the driver should it choose to return non-consistent memory. +This routine allocates a region of bytes of consistent memory. It +returns a pointer to the allocated region (in the processor's virtual address +space) or NULL if the allocation failed. The returned memory may or may not +be in the kernels direct mapping. Drivers must not call virt_to_page on +the returned memory region. -Note: where the platform can return consistent memory, it will -guarantee that the sync points become nops. +It also returns a which may be cast to an unsigned integer the +same width as the bus and given to the device as the DMA address base of +the region. -Warning: Handling non-consistent memory is a real pain. You should -only use this API if you positively know your driver will be -required to work on one of the rare (usually non-PCI) architectures -that simply cannot make consistent memory. +The dir parameter specified if data is read and/or written by the device, +see dma_map_single() for details. + +The gfp parameter allows the caller to specify the ``GFP_`` flags (see +kmalloc()) for the allocation, but rejects flags used to specify a memory +zone such as GFP_DMA or GFP_HIGHMEM. + +Before giving the memory to the device, dma_sync_single_for_device() needs +to be called, and before reading memory written by the device, +dma_sync_single_for_cpu(), just like for streaming DMA mappings that are +reused. :: void - dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_handle, unsigned long attrs) + dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_handle, enum dma_data_direction dir) -Free memory allocated by the dma_alloc_attrs(). All common -parameters must be identical to those otherwise passed to dma_free_coherent, -and the attrs argument must be identical to the attrs passed to -dma_alloc_attrs(). +Free a region of memory previously allocated using dma_alloc_noncoherent(). +dev, size and dma_handle and dir must all be the same as those passed into +dma_alloc_noncoherent(). cpu_addr must be the virtual address returned by +the dma_alloc_noncoherent(). :: @@ -575,17 +583,6 @@ memory or doing partial flushes. into the width returned by this call. It will also always be a power of two for easy alignment. -:: - - void - dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) - -Do a partial sync of memory that was allocated by dma_alloc_attrs() with -the DMA_ATTR_NON_CONSISTENT flag starting at virtual address vaddr and -continuing on for size. Again, you *must* observe the cache line -boundaries when doing this. - Part III - Debug drivers use of the DMA-API ------------------------------------------- diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 943479fb77f6..cb607b381adf 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -384,6 +384,18 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) } #endif /* CONFIG_HAS_DMA */ +static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + return dma_alloc_attrs(dev, size, dma_handle, gfp, + DMA_ATTR_NON_CONSISTENT); +} +static inline void dma_free_noncoherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir) +{ + dma_free_attrs(dev, size, vaddr, dma_handle, DMA_ATTR_NON_CONSISTENT); +} + static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) { From bd9b848557b0e6bfefa64e661196aaea82c983aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:24:21 +0200 Subject: [PATCH 40/63] sgiwd93: convert to dma_alloc_noncoherent Use the new non-coherent DMA API including proper ownership transfers. This also means we can allocate the memory as DMA_TO_DEVICE instead of bidirectional. Signed-off-by: Christoph Hellwig Tested-by: Thomas Bogendoerfer --- drivers/scsi/sgiwd93.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/scsi/sgiwd93.c b/drivers/scsi/sgiwd93.c index 3bdf0deb8f15..cf1030c9dda1 100644 --- a/drivers/scsi/sgiwd93.c +++ b/drivers/scsi/sgiwd93.c @@ -95,7 +95,7 @@ void fill_hpc_entries(struct ip22_hostdata *hd, struct scsi_cmnd *cmd, int din) */ hcp->desc.pbuf = 0; hcp->desc.cntinfo = HPCDMA_EOX; - dma_cache_sync(hd->dev, hd->cpu, + dma_sync_single_for_device(hd->dev, hd->dma, (unsigned long)(hcp + 1) - (unsigned long)hd->cpu, DMA_TO_DEVICE); } @@ -234,8 +234,8 @@ static int sgiwd93_probe(struct platform_device *pdev) hdata = host_to_hostdata(host); hdata->dev = &pdev->dev; - hdata->cpu = dma_alloc_attrs(&pdev->dev, HPC_DMA_SIZE, &hdata->dma, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + hdata->cpu = dma_alloc_noncoherent(&pdev->dev, HPC_DMA_SIZE, + &hdata->dma, DMA_TO_DEVICE, GFP_KERNEL); if (!hdata->cpu) { printk(KERN_WARNING "sgiwd93: Could not allocate memory for " "host %d buffer.\n", unit); @@ -274,8 +274,8 @@ static int sgiwd93_probe(struct platform_device *pdev) out_irq: free_irq(irq, host); out_free: - dma_free_attrs(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, + DMA_TO_DEVICE); out_put: scsi_host_put(host); out: @@ -291,8 +291,8 @@ static int sgiwd93_remove(struct platform_device *pdev) scsi_remove_host(host); free_irq(pd->irq, host); - dma_free_attrs(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, HPC_DMA_SIZE, hdata->cpu, hdata->dma, + DMA_TO_DEVICE); scsi_host_put(host); return 0; } From ed4bc1890b4984d0af447ad3cc1f93541623f8f3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:26:46 +0200 Subject: [PATCH 41/63] hal2: convert to dma_alloc_noncoherent Use the new non-coherent DMA API including proper ownership transfers. This also means we can allocate the buffer memory with the proper direction instead of bidirectional. Signed-off-by: Christoph Hellwig Tested-by: Thomas Bogendoerfer --- sound/mips/hal2.c | 58 ++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/sound/mips/hal2.c b/sound/mips/hal2.c index ec84bc4c3a6e..9ac9b58d7c8c 100644 --- a/sound/mips/hal2.c +++ b/sound/mips/hal2.c @@ -441,7 +441,8 @@ static inline void hal2_stop_adc(struct snd_hal2 *hal2) hal2->adc.pbus.pbus->pbdma_ctrl = HPC3_PDMACTRL_LD; } -static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) +static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec, + enum dma_data_direction buffer_dir) { struct device *dev = hal2->card->dev; struct hal2_desc *desc; @@ -449,15 +450,15 @@ static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) int count = H2_BUF_SIZE / H2_BLOCK_SIZE; int i; - codec->buffer = dma_alloc_attrs(dev, H2_BUF_SIZE, &buffer_dma, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + codec->buffer = dma_alloc_noncoherent(dev, H2_BUF_SIZE, &buffer_dma, + buffer_dir, GFP_KERNEL); if (!codec->buffer) return -ENOMEM; - desc = dma_alloc_attrs(dev, count * sizeof(struct hal2_desc), - &desc_dma, GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + desc = dma_alloc_noncoherent(dev, count * sizeof(struct hal2_desc), + &desc_dma, DMA_BIDIRECTIONAL, GFP_KERNEL); if (!desc) { - dma_free_attrs(dev, H2_BUF_SIZE, codec->buffer, buffer_dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(dev, H2_BUF_SIZE, codec->buffer, buffer_dma, + buffer_dir); return -ENOMEM; } codec->buffer_dma = buffer_dma; @@ -470,20 +471,22 @@ static int hal2_alloc_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) desc_dma : desc_dma + (i + 1) * sizeof(struct hal2_desc); desc++; } - dma_cache_sync(dev, codec->desc, count * sizeof(struct hal2_desc), - DMA_TO_DEVICE); + dma_sync_single_for_device(dev, codec->desc_dma, + count * sizeof(struct hal2_desc), + DMA_BIDIRECTIONAL); codec->desc_count = count; return 0; } -static void hal2_free_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec) +static void hal2_free_dmabuf(struct snd_hal2 *hal2, struct hal2_codec *codec, + enum dma_data_direction buffer_dir) { struct device *dev = hal2->card->dev; - dma_free_attrs(dev, codec->desc_count * sizeof(struct hal2_desc), - codec->desc, codec->desc_dma, DMA_ATTR_NON_CONSISTENT); - dma_free_attrs(dev, H2_BUF_SIZE, codec->buffer, codec->buffer_dma, - DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(dev, codec->desc_count * sizeof(struct hal2_desc), + codec->desc, codec->desc_dma, DMA_BIDIRECTIONAL); + dma_free_noncoherent(dev, H2_BUF_SIZE, codec->buffer, codec->buffer_dma, + buffer_dir); } static const struct snd_pcm_hardware hal2_pcm_hw = { @@ -509,21 +512,16 @@ static int hal2_playback_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - int err; runtime->hw = hal2_pcm_hw; - - err = hal2_alloc_dmabuf(hal2, &hal2->dac); - if (err) - return err; - return 0; + return hal2_alloc_dmabuf(hal2, &hal2->dac, DMA_TO_DEVICE); } static int hal2_playback_close(struct snd_pcm_substream *substream) { struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - hal2_free_dmabuf(hal2, &hal2->dac); + hal2_free_dmabuf(hal2, &hal2->dac, DMA_TO_DEVICE); return 0; } @@ -579,7 +577,9 @@ static void hal2_playback_transfer(struct snd_pcm_substream *substream, unsigned char *buf = hal2->dac.buffer + rec->hw_data; memcpy(buf, substream->runtime->dma_area + rec->sw_data, bytes); - dma_cache_sync(hal2->card->dev, buf, bytes, DMA_TO_DEVICE); + dma_sync_single_for_device(hal2->card->dev, + hal2->dac.buffer_dma + rec->hw_data, bytes, + DMA_TO_DEVICE); } @@ -597,22 +597,16 @@ static int hal2_capture_open(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - struct hal2_codec *adc = &hal2->adc; - int err; runtime->hw = hal2_pcm_hw; - - err = hal2_alloc_dmabuf(hal2, adc); - if (err) - return err; - return 0; + return hal2_alloc_dmabuf(hal2, &hal2->adc, DMA_FROM_DEVICE); } static int hal2_capture_close(struct snd_pcm_substream *substream) { struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); - hal2_free_dmabuf(hal2, &hal2->adc); + hal2_free_dmabuf(hal2, &hal2->adc, DMA_FROM_DEVICE); return 0; } @@ -667,7 +661,9 @@ static void hal2_capture_transfer(struct snd_pcm_substream *substream, struct snd_hal2 *hal2 = snd_pcm_substream_chip(substream); unsigned char *buf = hal2->adc.buffer + rec->hw_data; - dma_cache_sync(hal2->card->dev, buf, bytes, DMA_FROM_DEVICE); + dma_sync_single_for_cpu(hal2->card->dev, + hal2->adc.buffer_dma + rec->hw_data, bytes, + DMA_FROM_DEVICE); memcpy(substream->runtime->dma_area + rec->sw_data, buf, bytes); } From 00718b23a4730c5b155fa6f1deba532c921470c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:24:11 +0200 Subject: [PATCH 42/63] lib82596: convert to dma_alloc_noncoherent Use the new non-coherent DMA API including proper ownership transfers. This includes moving the DMA helpers to lib82596 based of an ifdef to avoid include order problems. Signed-off-by: Christoph Hellwig Tested-by: Thomas Bogendoerfer (SNI part) --- drivers/net/ethernet/i825xx/lasi_82596.c | 25 ++--- drivers/net/ethernet/i825xx/lib82596.c | 114 ++++++++++++++--------- drivers/net/ethernet/i825xx/sni_82596.c | 4 - 3 files changed, 80 insertions(+), 63 deletions(-) diff --git a/drivers/net/ethernet/i825xx/lasi_82596.c b/drivers/net/ethernet/i825xx/lasi_82596.c index a12218e940a2..96c6f4f36904 100644 --- a/drivers/net/ethernet/i825xx/lasi_82596.c +++ b/drivers/net/ethernet/i825xx/lasi_82596.c @@ -96,21 +96,14 @@ #define OPT_SWAP_PORT 0x0001 /* Need to wordswp on the MPU port */ -#define DMA_WBACK(ndev, addr, len) \ - do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_TO_DEVICE); } while (0) - -#define DMA_INV(ndev, addr, len) \ - do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_FROM_DEVICE); } while (0) - -#define DMA_WBACK_INV(ndev, addr, len) \ - do { dma_cache_sync((ndev)->dev.parent, (void *)addr, len, DMA_BIDIRECTIONAL); } while (0) - #define SYSBUS 0x0000006c /* big endian CPU, 82596 "big" endian mode */ #define SWAP32(x) (((u32)(x)<<16) | ((((u32)(x)))>>16)) #define SWAP16(x) (x) +#define NONCOHERENT_DMA 1 + #include "lib82596.c" MODULE_AUTHOR("Richard Hirst"); @@ -184,9 +177,9 @@ lan_init_chip(struct parisc_device *dev) lp = netdev_priv(netdevice); lp->options = dev->id.sversion == 0x72 ? OPT_SWAP_PORT : 0; - lp->dma = dma_alloc_attrs(&dev->dev, sizeof(struct i596_dma), - &lp->dma_addr, GFP_KERNEL, - DMA_ATTR_NON_CONSISTENT); + lp->dma = dma_alloc_noncoherent(&dev->dev, + sizeof(struct i596_dma), &lp->dma_addr, + DMA_BIDIRECTIONAL, GFP_KERNEL); if (!lp->dma) goto out_free_netdev; @@ -196,8 +189,8 @@ lan_init_chip(struct parisc_device *dev) return 0; out_free_dma: - dma_free_attrs(&dev->dev, sizeof(struct i596_dma), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&dev->dev, sizeof(struct i596_dma), + lp->dma, lp->dma_addr, DMA_BIDIRECTIONAL); out_free_netdev: free_netdev(netdevice); return retval; @@ -209,8 +202,8 @@ static int __exit lan_remove_chip(struct parisc_device *pdev) struct i596_private *lp = netdev_priv(dev); unregister_netdev (dev); - dma_free_attrs(&pdev->dev, sizeof(struct i596_private), lp->dma, - lp->dma_addr, DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, sizeof(struct i596_private), lp->dma, + lp->dma_addr, DMA_BIDIRECTIONAL); free_netdev (dev); return 0; } diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c index b4e4b3eb5758..ca2fb303fcc6 100644 --- a/drivers/net/ethernet/i825xx/lib82596.c +++ b/drivers/net/ethernet/i825xx/lib82596.c @@ -365,13 +365,44 @@ static int max_cmd_backlog = TX_RING_SIZE-1; static void i596_poll_controller(struct net_device *dev); #endif +static inline dma_addr_t virt_to_dma(struct i596_private *lp, volatile void *v) +{ + return lp->dma_addr + ((unsigned long)v - (unsigned long)lp->dma); +} + +#ifdef NONCOHERENT_DMA +static inline void dma_sync_dev(struct net_device *ndev, volatile void *addr, + size_t len) +{ + dma_sync_single_for_device(ndev->dev.parent, + virt_to_dma(netdev_priv(ndev), addr), len, + DMA_BIDIRECTIONAL); +} + +static inline void dma_sync_cpu(struct net_device *ndev, volatile void *addr, + size_t len) +{ + dma_sync_single_for_cpu(ndev->dev.parent, + virt_to_dma(netdev_priv(ndev), addr), len, + DMA_BIDIRECTIONAL); +} +#else +static inline void dma_sync_dev(struct net_device *ndev, volatile void *addr, + size_t len) +{ +} +static inline void dma_sync_cpu(struct net_device *ndev, volatile void *addr, + size_t len) +{ +} +#endif /* NONCOHERENT_DMA */ static inline int wait_istat(struct net_device *dev, struct i596_dma *dma, int delcnt, char *str) { - DMA_INV(dev, &(dma->iscp), sizeof(struct i596_iscp)); + dma_sync_cpu(dev, &(dma->iscp), sizeof(struct i596_iscp)); while (--delcnt && dma->iscp.stat) { udelay(10); - DMA_INV(dev, &(dma->iscp), sizeof(struct i596_iscp)); + dma_sync_cpu(dev, &(dma->iscp), sizeof(struct i596_iscp)); } if (!delcnt) { printk(KERN_ERR "%s: %s, iscp.stat %04x, didn't clear\n", @@ -384,10 +415,10 @@ static inline int wait_istat(struct net_device *dev, struct i596_dma *dma, int d static inline int wait_cmd(struct net_device *dev, struct i596_dma *dma, int delcnt, char *str) { - DMA_INV(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_cpu(dev, &(dma->scb), sizeof(struct i596_scb)); while (--delcnt && dma->scb.command) { udelay(10); - DMA_INV(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_cpu(dev, &(dma->scb), sizeof(struct i596_scb)); } if (!delcnt) { printk(KERN_ERR "%s: %s, status %4.4x, cmd %4.4x.\n", @@ -451,12 +482,9 @@ static void i596_display_data(struct net_device *dev) SWAP32(rbd->b_data), SWAP16(rbd->size)); rbd = rbd->v_next; } while (rbd != lp->rbd_head); - DMA_INV(dev, dma, sizeof(struct i596_dma)); + dma_sync_cpu(dev, dma, sizeof(struct i596_dma)); } - -#define virt_to_dma(lp, v) ((lp)->dma_addr + (dma_addr_t)((unsigned long)(v)-(unsigned long)((lp)->dma))) - static inline int init_rx_bufs(struct net_device *dev) { struct i596_private *lp = netdev_priv(dev); @@ -508,7 +536,7 @@ static inline int init_rx_bufs(struct net_device *dev) rfd->b_next = SWAP32(virt_to_dma(lp, dma->rfds)); rfd->cmd = SWAP16(CMD_EOL|CMD_FLEX); - DMA_WBACK_INV(dev, dma, sizeof(struct i596_dma)); + dma_sync_dev(dev, dma, sizeof(struct i596_dma)); return 0; } @@ -547,7 +575,7 @@ static void rebuild_rx_bufs(struct net_device *dev) lp->rbd_head = dma->rbds; dma->rfds[0].rbd = SWAP32(virt_to_dma(lp, dma->rbds)); - DMA_WBACK_INV(dev, dma, sizeof(struct i596_dma)); + dma_sync_dev(dev, dma, sizeof(struct i596_dma)); } @@ -575,9 +603,9 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk(KERN_DEBUG "%s: starting i82596.\n", dev->name)); - DMA_WBACK(dev, &(dma->scp), sizeof(struct i596_scp)); - DMA_WBACK(dev, &(dma->iscp), sizeof(struct i596_iscp)); - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scp), sizeof(struct i596_scp)); + dma_sync_dev(dev, &(dma->iscp), sizeof(struct i596_iscp)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); mpu_port(dev, PORT_ALTSCP, virt_to_dma(lp, &dma->scp)); ca(dev); @@ -596,24 +624,24 @@ static int init_i596_mem(struct net_device *dev) rebuild_rx_bufs(dev); dma->scb.command = 0; - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); DEB(DEB_INIT, printk(KERN_DEBUG "%s: queuing CmdConfigure\n", dev->name)); memcpy(dma->cf_cmd.i596_config, init_setup, 14); dma->cf_cmd.cmd.command = SWAP16(CmdConfigure); - DMA_WBACK(dev, &(dma->cf_cmd), sizeof(struct cf_cmd)); + dma_sync_dev(dev, &(dma->cf_cmd), sizeof(struct cf_cmd)); i596_add_cmd(dev, &dma->cf_cmd.cmd); DEB(DEB_INIT, printk(KERN_DEBUG "%s: queuing CmdSASetup\n", dev->name)); memcpy(dma->sa_cmd.eth_addr, dev->dev_addr, ETH_ALEN); dma->sa_cmd.cmd.command = SWAP16(CmdSASetup); - DMA_WBACK(dev, &(dma->sa_cmd), sizeof(struct sa_cmd)); + dma_sync_dev(dev, &(dma->sa_cmd), sizeof(struct sa_cmd)); i596_add_cmd(dev, &dma->sa_cmd.cmd); DEB(DEB_INIT, printk(KERN_DEBUG "%s: queuing CmdTDR\n", dev->name)); dma->tdr_cmd.cmd.command = SWAP16(CmdTDR); - DMA_WBACK(dev, &(dma->tdr_cmd), sizeof(struct tdr_cmd)); + dma_sync_dev(dev, &(dma->tdr_cmd), sizeof(struct tdr_cmd)); i596_add_cmd(dev, &dma->tdr_cmd.cmd); spin_lock_irqsave (&lp->lock, flags); @@ -625,7 +653,7 @@ static int init_i596_mem(struct net_device *dev) DEB(DEB_INIT, printk(KERN_DEBUG "%s: Issuing RX_START\n", dev->name)); dma->scb.command = SWAP16(RX_START); dma->scb.rfd = SWAP32(virt_to_dma(lp, dma->rfds)); - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); ca(dev); @@ -659,13 +687,13 @@ static inline int i596_rx(struct net_device *dev) rfd = lp->rfd_head; /* Ref next frame to check */ - DMA_INV(dev, rfd, sizeof(struct i596_rfd)); + dma_sync_cpu(dev, rfd, sizeof(struct i596_rfd)); while (rfd->stat & SWAP16(STAT_C)) { /* Loop while complete frames */ if (rfd->rbd == I596_NULL) rbd = NULL; else if (rfd->rbd == lp->rbd_head->b_addr) { rbd = lp->rbd_head; - DMA_INV(dev, rbd, sizeof(struct i596_rbd)); + dma_sync_cpu(dev, rbd, sizeof(struct i596_rbd)); } else { printk(KERN_ERR "%s: rbd chain broken!\n", dev->name); /* XXX Now what? */ @@ -713,7 +741,7 @@ static inline int i596_rx(struct net_device *dev) DMA_FROM_DEVICE); rbd->v_data = newskb->data; rbd->b_data = SWAP32(dma_addr); - DMA_WBACK_INV(dev, rbd, sizeof(struct i596_rbd)); + dma_sync_dev(dev, rbd, sizeof(struct i596_rbd)); } else { skb = netdev_alloc_skb_ip_align(dev, pkt_len); } @@ -765,7 +793,7 @@ static inline int i596_rx(struct net_device *dev) if (rbd != NULL && (rbd->count & SWAP16(0x4000))) { rbd->count = 0; lp->rbd_head = rbd->v_next; - DMA_WBACK_INV(dev, rbd, sizeof(struct i596_rbd)); + dma_sync_dev(dev, rbd, sizeof(struct i596_rbd)); } /* Tidy the frame descriptor, marking it as end of list */ @@ -779,14 +807,14 @@ static inline int i596_rx(struct net_device *dev) lp->dma->scb.rfd = rfd->b_next; lp->rfd_head = rfd->v_next; - DMA_WBACK_INV(dev, rfd, sizeof(struct i596_rfd)); + dma_sync_dev(dev, rfd, sizeof(struct i596_rfd)); /* Remove end-of-list from old end descriptor */ rfd->v_prev->cmd = SWAP16(CMD_FLEX); - DMA_WBACK_INV(dev, rfd->v_prev, sizeof(struct i596_rfd)); + dma_sync_dev(dev, rfd->v_prev, sizeof(struct i596_rfd)); rfd = lp->rfd_head; - DMA_INV(dev, rfd, sizeof(struct i596_rfd)); + dma_sync_cpu(dev, rfd, sizeof(struct i596_rfd)); } DEB(DEB_RXFRAME, printk(KERN_DEBUG "frames %d\n", frames)); @@ -827,12 +855,12 @@ static inline void i596_cleanup_cmd(struct net_device *dev, struct i596_private ptr->v_next = NULL; ptr->b_next = I596_NULL; } - DMA_WBACK_INV(dev, ptr, sizeof(struct i596_cmd)); + dma_sync_dev(dev, ptr, sizeof(struct i596_cmd)); } wait_cmd(dev, lp->dma, 100, "i596_cleanup_cmd timed out"); lp->dma->scb.cmd = I596_NULL; - DMA_WBACK(dev, &(lp->dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(lp->dma->scb), sizeof(struct i596_scb)); } @@ -850,7 +878,7 @@ static inline void i596_reset(struct net_device *dev, struct i596_private *lp) /* FIXME: this command might cause an lpmc */ lp->dma->scb.command = SWAP16(CUC_ABORT | RX_ABORT); - DMA_WBACK(dev, &(lp->dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(lp->dma->scb), sizeof(struct i596_scb)); ca(dev); /* wait for shutdown */ @@ -878,20 +906,20 @@ static void i596_add_cmd(struct net_device *dev, struct i596_cmd *cmd) cmd->command |= SWAP16(CMD_EOL | CMD_INTR); cmd->v_next = NULL; cmd->b_next = I596_NULL; - DMA_WBACK(dev, cmd, sizeof(struct i596_cmd)); + dma_sync_dev(dev, cmd, sizeof(struct i596_cmd)); spin_lock_irqsave (&lp->lock, flags); if (lp->cmd_head != NULL) { lp->cmd_tail->v_next = cmd; lp->cmd_tail->b_next = SWAP32(virt_to_dma(lp, &cmd->status)); - DMA_WBACK(dev, lp->cmd_tail, sizeof(struct i596_cmd)); + dma_sync_dev(dev, lp->cmd_tail, sizeof(struct i596_cmd)); } else { lp->cmd_head = cmd; wait_cmd(dev, dma, 100, "i596_add_cmd timed out"); dma->scb.cmd = SWAP32(virt_to_dma(lp, &cmd->status)); dma->scb.command = SWAP16(CUC_START); - DMA_WBACK(dev, &(dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(dma->scb), sizeof(struct i596_scb)); ca(dev); } lp->cmd_tail = cmd; @@ -956,7 +984,7 @@ static void i596_tx_timeout (struct net_device *dev, unsigned int txqueue) /* Issue a channel attention signal */ DEB(DEB_ERRORS, printk(KERN_DEBUG "Kicking board.\n")); lp->dma->scb.command = SWAP16(CUC_START | RX_START); - DMA_WBACK_INV(dev, &(lp->dma->scb), sizeof(struct i596_scb)); + dma_sync_dev(dev, &(lp->dma->scb), sizeof(struct i596_scb)); ca (dev); lp->last_restart = dev->stats.tx_packets; } @@ -1014,8 +1042,8 @@ static netdev_tx_t i596_start_xmit(struct sk_buff *skb, struct net_device *dev) tbd->data = SWAP32(tx_cmd->dma_addr); DEB(DEB_TXADDR, print_eth(skb->data, "tx-queued")); - DMA_WBACK_INV(dev, tx_cmd, sizeof(struct tx_cmd)); - DMA_WBACK_INV(dev, tbd, sizeof(struct i596_tbd)); + dma_sync_dev(dev, tx_cmd, sizeof(struct tx_cmd)); + dma_sync_dev(dev, tbd, sizeof(struct i596_tbd)); i596_add_cmd(dev, &tx_cmd->cmd); dev->stats.tx_packets++; @@ -1071,7 +1099,7 @@ static int i82596_probe(struct net_device *dev) lp->dma->scb.rfd = I596_NULL; spin_lock_init(&lp->lock); - DMA_WBACK_INV(dev, lp->dma, sizeof(struct i596_dma)); + dma_sync_dev(dev, lp->dma, sizeof(struct i596_dma)); ret = register_netdev(dev); if (ret) @@ -1141,7 +1169,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) dev->name, status & 0x0700)); while (lp->cmd_head != NULL) { - DMA_INV(dev, lp->cmd_head, sizeof(struct i596_cmd)); + dma_sync_cpu(dev, lp->cmd_head, sizeof(struct i596_cmd)); if (!(lp->cmd_head->status & SWAP16(STAT_C))) break; @@ -1223,7 +1251,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } ptr->v_next = NULL; ptr->b_next = I596_NULL; - DMA_WBACK(dev, ptr, sizeof(struct i596_cmd)); + dma_sync_dev(dev, ptr, sizeof(struct i596_cmd)); lp->last_cmd = jiffies; } @@ -1237,13 +1265,13 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) ptr->command &= SWAP16(0x1fff); ptr = ptr->v_next; - DMA_WBACK_INV(dev, prev, sizeof(struct i596_cmd)); + dma_sync_dev(dev, prev, sizeof(struct i596_cmd)); } if (lp->cmd_head != NULL) ack_cmd |= CUC_START; dma->scb.cmd = SWAP32(virt_to_dma(lp, &lp->cmd_head->status)); - DMA_WBACK_INV(dev, &dma->scb, sizeof(struct i596_scb)); + dma_sync_dev(dev, &dma->scb, sizeof(struct i596_scb)); } if ((status & 0x1000) || (status & 0x4000)) { if ((status & 0x4000)) @@ -1268,7 +1296,7 @@ static irqreturn_t i596_interrupt(int irq, void *dev_id) } wait_cmd(dev, dma, 100, "i596 interrupt, timeout"); dma->scb.command = SWAP16(ack_cmd); - DMA_WBACK(dev, &dma->scb, sizeof(struct i596_scb)); + dma_sync_dev(dev, &dma->scb, sizeof(struct i596_scb)); /* DANGER: I suspect that some kind of interrupt acknowledgement aside from acking the 82596 might be needed @@ -1299,7 +1327,7 @@ static int i596_close(struct net_device *dev) wait_cmd(dev, lp->dma, 100, "close1 timed out"); lp->dma->scb.command = SWAP16(CUC_ABORT | RX_ABORT); - DMA_WBACK(dev, &lp->dma->scb, sizeof(struct i596_scb)); + dma_sync_dev(dev, &lp->dma->scb, sizeof(struct i596_scb)); ca(dev); @@ -1358,7 +1386,7 @@ static void set_multicast_list(struct net_device *dev) dev->name); else { dma->cf_cmd.cmd.command = SWAP16(CmdConfigure); - DMA_WBACK_INV(dev, &dma->cf_cmd, sizeof(struct cf_cmd)); + dma_sync_dev(dev, &dma->cf_cmd, sizeof(struct cf_cmd)); i596_add_cmd(dev, &dma->cf_cmd.cmd); } } @@ -1390,7 +1418,7 @@ static void set_multicast_list(struct net_device *dev) dev->name, cp)); cp += ETH_ALEN; } - DMA_WBACK_INV(dev, &dma->mc_cmd, sizeof(struct mc_cmd)); + dma_sync_dev(dev, &dma->mc_cmd, sizeof(struct mc_cmd)); i596_add_cmd(dev, &cmd->cmd); } } diff --git a/drivers/net/ethernet/i825xx/sni_82596.c b/drivers/net/ethernet/i825xx/sni_82596.c index 4b9ac0c65577..27937c5d7956 100644 --- a/drivers/net/ethernet/i825xx/sni_82596.c +++ b/drivers/net/ethernet/i825xx/sni_82596.c @@ -24,10 +24,6 @@ static const char sni_82596_string[] = "snirm_82596"; -#define DMA_WBACK(priv, addr, len) do { } while (0) -#define DMA_INV(priv, addr, len) do { } while (0) -#define DMA_WBACK_INV(priv, addr, len) do { } while (0) - #define SYSBUS 0x00004400 /* big endian CPU, 82596 little endian */ From 7f3bb7f53a404c883bc289eedaa5e3c6ff9e2b47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:24:11 +0200 Subject: [PATCH 43/63] sgiseeq: convert to dma_alloc_noncoherent Use the new non-coherent DMA API including proper ownership transfers. This includes adding additional calls to dma_sync_desc_dev as the old syncing was rather ad-hoc. Thanks to Thomas Bogendoerfer for debugging the ownership transfer issues. Signed-off-by: Christoph Hellwig Tested-by: Thomas Bogendoerfer --- drivers/net/ethernet/seeq/sgiseeq.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c index 8507ff242014..37ff25a84030 100644 --- a/drivers/net/ethernet/seeq/sgiseeq.c +++ b/drivers/net/ethernet/seeq/sgiseeq.c @@ -112,14 +112,18 @@ struct sgiseeq_private { static inline void dma_sync_desc_cpu(struct net_device *dev, void *addr) { - dma_cache_sync(dev->dev.parent, addr, sizeof(struct sgiseeq_rx_desc), - DMA_FROM_DEVICE); + struct sgiseeq_private *sp = netdev_priv(dev); + + dma_sync_single_for_cpu(dev->dev.parent, VIRT_TO_DMA(sp, addr), + sizeof(struct sgiseeq_rx_desc), DMA_BIDIRECTIONAL); } static inline void dma_sync_desc_dev(struct net_device *dev, void *addr) { - dma_cache_sync(dev->dev.parent, addr, sizeof(struct sgiseeq_rx_desc), - DMA_TO_DEVICE); + struct sgiseeq_private *sp = netdev_priv(dev); + + dma_sync_single_for_device(dev->dev.parent, VIRT_TO_DMA(sp, addr), + sizeof(struct sgiseeq_rx_desc), DMA_BIDIRECTIONAL); } static inline void hpc3_eth_reset(struct hpc3_ethregs *hregs) @@ -403,6 +407,8 @@ static inline void sgiseeq_rx(struct net_device *dev, struct sgiseeq_private *sp rd = &sp->rx_desc[sp->rx_new]; dma_sync_desc_cpu(dev, rd); } + dma_sync_desc_dev(dev, rd); + dma_sync_desc_cpu(dev, &sp->rx_desc[orig_end]); sp->rx_desc[orig_end].rdma.cntinfo &= ~(HPCDMA_EOR); dma_sync_desc_dev(dev, &sp->rx_desc[orig_end]); @@ -443,6 +449,7 @@ static inline void kick_tx(struct net_device *dev, dma_sync_desc_cpu(dev, td); } if (td->tdma.cntinfo & HPCDMA_XIU) { + dma_sync_desc_dev(dev, td); hregs->tx_ndptr = VIRT_TO_DMA(sp, td); hregs->tx_ctrl = HPC3_ETXCTRL_ACTIVE; } @@ -476,6 +483,7 @@ static inline void sgiseeq_tx(struct net_device *dev, struct sgiseeq_private *sp if (!(td->tdma.cntinfo & (HPCDMA_XIU))) break; if (!(td->tdma.cntinfo & (HPCDMA_ETXD))) { + dma_sync_desc_dev(dev, td); if (!(status & HPC3_ETXCTRL_ACTIVE)) { hregs->tx_ndptr = VIRT_TO_DMA(sp, td); hregs->tx_ctrl = HPC3_ETXCTRL_ACTIVE; @@ -740,8 +748,8 @@ static int sgiseeq_probe(struct platform_device *pdev) sp = netdev_priv(dev); /* Make private data page aligned */ - sr = dma_alloc_attrs(&pdev->dev, sizeof(*sp->srings), &sp->srings_dma, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + sr = dma_alloc_noncoherent(&pdev->dev, sizeof(*sp->srings), + &sp->srings_dma, DMA_BIDIRECTIONAL, GFP_KERNEL); if (!sr) { printk(KERN_ERR "Sgiseeq: Page alloc failed, aborting.\n"); err = -ENOMEM; @@ -802,8 +810,8 @@ static int sgiseeq_probe(struct platform_device *pdev) return 0; err_out_free_attrs: - dma_free_attrs(&pdev->dev, sizeof(*sp->srings), sp->srings, - sp->srings_dma, DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, sizeof(*sp->srings), sp->srings, + sp->srings_dma, DMA_BIDIRECTIONAL); err_out_free_dev: free_netdev(dev); @@ -817,8 +825,8 @@ static int sgiseeq_remove(struct platform_device *pdev) struct sgiseeq_private *sp = netdev_priv(dev); unregister_netdev(dev); - dma_free_attrs(&pdev->dev, sizeof(*sp->srings), sp->srings, - sp->srings_dma, DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(&pdev->dev, sizeof(*sp->srings), sp->srings, + sp->srings_dma, DMA_BIDIRECTIONAL); free_netdev(dev); return 0; From d69d8adc5bf045e686338918292aa417c5282852 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Aug 2020 16:38:25 +0200 Subject: [PATCH 44/63] 53c700: convert to dma_alloc_noncoherent Use the new non-coherent DMA API including proper ownership transfers. Signed-off-by: Christoph Hellwig Tested-by: Thomas Bogendoerfer --- drivers/scsi/53c700.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c index c59226d7e2f6..5117d90ccd9e 100644 --- a/drivers/scsi/53c700.c +++ b/drivers/scsi/53c700.c @@ -269,18 +269,25 @@ NCR_700_get_SXFER(struct scsi_device *SDp) spi_period(SDp->sdev_target)); } +static inline dma_addr_t virt_to_dma(struct NCR_700_Host_Parameters *h, void *p) +{ + return h->pScript + ((uintptr_t)p - (uintptr_t)h->script); +} + static inline void dma_sync_to_dev(struct NCR_700_Host_Parameters *h, void *addr, size_t size) { if (h->noncoherent) - dma_cache_sync(h->dev, addr, size, DMA_TO_DEVICE); + dma_sync_single_for_device(h->dev, virt_to_dma(h, addr), + size, DMA_BIDIRECTIONAL); } static inline void dma_sync_from_dev(struct NCR_700_Host_Parameters *h, void *addr, size_t size) { if (h->noncoherent) - dma_cache_sync(h->dev, addr, size, DMA_FROM_DEVICE); + dma_sync_single_for_device(h->dev, virt_to_dma(h, addr), size, + DMA_BIDIRECTIONAL); } struct Scsi_Host * @@ -300,8 +307,8 @@ NCR_700_detect(struct scsi_host_template *tpnt, memory = dma_alloc_coherent(dev, TOTAL_MEM_SIZE, &pScript, GFP_KERNEL); if (!memory) { hostdata->noncoherent = 1; - memory = dma_alloc_attrs(dev, TOTAL_MEM_SIZE, &pScript, - GFP_KERNEL, DMA_ATTR_NON_CONSISTENT); + memory = dma_alloc_noncoherent(dev, TOTAL_MEM_SIZE, &pScript, + DMA_BIDIRECTIONAL, GFP_KERNEL); } if (!memory) { printk(KERN_ERR "53c700: Failed to allocate memory for driver, detaching\n"); @@ -414,8 +421,9 @@ NCR_700_release(struct Scsi_Host *host) (struct NCR_700_Host_Parameters *)host->hostdata[0]; if (hostdata->noncoherent) - dma_free_attrs(hostdata->dev, TOTAL_MEM_SIZE, hostdata->script, - hostdata->pScript, DMA_ATTR_NON_CONSISTENT); + dma_free_noncoherent(hostdata->dev, TOTAL_MEM_SIZE, + hostdata->script, hostdata->pScript, + DMA_BIDIRECTIONAL); else dma_free_coherent(hostdata->dev, TOTAL_MEM_SIZE, hostdata->script, hostdata->pScript); From 5a84292271402cffe0717bc58e2ad9a0c7977272 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:28:13 +0200 Subject: [PATCH 45/63] dma-mapping: remove dma_cache_sync All users are gone now, remove the API. Signed-off-by: Christoph Hellwig Acked-by: Thomas Bogendoerfer (MIPS part) --- arch/mips/Kconfig | 1 - arch/mips/jazz/jazzdma.c | 1 - arch/mips/mm/dma-noncoherent.c | 6 ------ arch/parisc/Kconfig | 1 - arch/parisc/kernel/pci-dma.c | 6 ------ include/linux/dma-mapping.h | 8 -------- include/linux/dma-noncoherent.h | 10 ---------- kernel/dma/Kconfig | 3 --- kernel/dma/mapping.c | 14 -------------- 9 files changed, 50 deletions(-) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 8f328298f8cc..92b0fde3d882 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1135,7 +1135,6 @@ config DMA_NONCOHERENT select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_DMA_SET_UNCACHED select DMA_NONCOHERENT_MMAP - select DMA_NONCOHERENT_CACHE_SYNC select NEED_DMA_MAP_STATE config SYS_HAS_EARLY_PRINTK diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index dab4d058cea9..2bf849caf507 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -620,7 +620,6 @@ const struct dma_map_ops jazz_dma_ops = { .sync_single_for_device = jazz_dma_sync_single_for_device, .sync_sg_for_cpu = jazz_dma_sync_sg_for_cpu, .sync_sg_for_device = jazz_dma_sync_sg_for_device, - .cache_sync = arch_dma_cache_sync, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, }; diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 97a14adbafc9..f34ad1f09799 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -137,12 +137,6 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, } #endif -void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) -{ - dma_sync_virt_for_device(vaddr, size, direction); -} - #ifdef CONFIG_DMA_PERDEV_COHERENT void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3b0f53dd70bc..ed15da1da174 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -195,7 +195,6 @@ config PA11 depends on PA7000 || PA7100LC || PA7200 || PA7300LC select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYNC_DMA_FOR_DEVICE - select DMA_NONCOHERENT_CACHE_SYNC config PREFETCH def_bool y diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 38c68e131bbe..ce38c0b91581 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -454,9 +454,3 @@ void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, { flush_kernel_dcache_range((unsigned long)phys_to_virt(paddr), size); } - -void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) -{ - flush_kernel_dcache_range((unsigned long)vaddr, size); -} diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index cb607b381adf..233bb8dcbe02 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -117,8 +117,6 @@ struct dma_map_ops { void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir); - void (*cache_sync)(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction); int (*dma_supported)(struct device *dev, u64 mask); u64 (*get_required_mask)(struct device *dev); size_t (*max_mapping_size)(struct device *dev); @@ -249,8 +247,6 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); -void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir); int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); @@ -334,10 +330,6 @@ static inline void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { } -static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ -} static inline int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index b9bc6c557ea4..0888656369a4 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -62,16 +62,6 @@ static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, } #endif /* CONFIG_MMU */ -#ifdef CONFIG_DMA_NONCOHERENT_CACHE_SYNC -void arch_dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction); -#else -static inline void arch_dma_cache_sync(struct device *dev, void *vaddr, - size_t size, enum dma_data_direction direction) -{ -} -#endif /* CONFIG_DMA_NONCOHERENT_CACHE_SYNC */ - #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, enum dma_data_direction dir); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 281785feb874..c5f717021f56 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -75,9 +75,6 @@ config ARCH_HAS_DMA_PREP_COHERENT config ARCH_HAS_FORCE_DMA_UNENCRYPTED bool -config DMA_NONCOHERENT_CACHE_SYNC - bool - config DMA_VIRT_OPS bool depends on HAS_DMA diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 9a045e51df17..673ea6759c15 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -529,20 +529,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) EXPORT_SYMBOL(dma_set_coherent_mask); #endif -void dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ - const struct dma_map_ops *ops = get_dma_ops(dev); - - BUG_ON(!valid_dma_direction(dir)); - - if (dma_alloc_direct(dev, ops)) - arch_dma_cache_sync(dev, vaddr, size, dir); - else if (ops->cache_sync) - ops->cache_sync(dev, vaddr, size, dir); -} -EXPORT_SYMBOL(dma_cache_sync); - size_t dma_max_mapping_size(struct device *dev) { const struct dma_map_ops *ops = get_dma_ops(dev); From efa70f2fdc842e63a0a13223e0e83cedcc2117f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:34:33 +0200 Subject: [PATCH 46/63] dma-mapping: add a new dma_alloc_pages API This API is the equivalent of alloc_pages, except that the returned memory is guaranteed to be DMA addressable by the passed in device. The implementation will also be used to provide a more sensible replacement for DMA_ATTR_NON_CONSISTENT flag. Additionally dma_alloc_noncoherent is switched over to use dma_alloc_pages as its backend. Signed-off-by: Christoph Hellwig Acked-by: Thomas Bogendoerfer (MIPS part) --- Documentation/core-api/dma-attributes.rst | 8 --- arch/alpha/kernel/pci_iommu.c | 2 + arch/arm/mm/dma-mapping-nommu.c | 2 + arch/arm/mm/dma-mapping.c | 4 ++ arch/ia64/hp/common/sba_iommu.c | 2 + arch/mips/jazz/jazzdma.c | 7 +-- arch/powerpc/kernel/dma-iommu.c | 2 + arch/powerpc/platforms/ps3/system-bus.c | 4 ++ arch/powerpc/platforms/pseries/vio.c | 2 + arch/s390/pci/pci_dma.c | 2 + arch/x86/kernel/amd_gart_64.c | 2 + drivers/iommu/dma-iommu.c | 2 + drivers/iommu/intel/iommu.c | 4 ++ drivers/parisc/ccio-dma.c | 2 + drivers/parisc/sba_iommu.c | 2 + drivers/xen/swiotlb-xen.c | 2 + include/linux/dma-direct.h | 5 ++ include/linux/dma-mapping.h | 34 ++++++------ include/linux/dma-noncoherent.h | 3 -- kernel/dma/direct.c | 52 ++++++++++++++++++- kernel/dma/mapping.c | 63 +++++++++++++++++++++-- kernel/dma/ops_helpers.c | 35 +++++++++++++ kernel/dma/virt.c | 2 + 23 files changed, 206 insertions(+), 37 deletions(-) diff --git a/Documentation/core-api/dma-attributes.rst b/Documentation/core-api/dma-attributes.rst index 29dcbe8826e8..1887d92e8e92 100644 --- a/Documentation/core-api/dma-attributes.rst +++ b/Documentation/core-api/dma-attributes.rst @@ -25,14 +25,6 @@ Since it is optional for platforms to implement DMA_ATTR_WRITE_COMBINE, those that do not will simply ignore the attribute and exhibit default behavior. -DMA_ATTR_NON_CONSISTENT ------------------------ - -DMA_ATTR_NON_CONSISTENT lets the platform to choose to return either -consistent or non-consistent memory as it sees fit. By using this API, -you are guaranteeing to the platform that you have all the correct and -necessary sync points for this memory in the driver. - DMA_ATTR_NO_KERNEL_MAPPING -------------------------- diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 6f7de4f4e191..447e0fd0ed38 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -952,5 +952,7 @@ const struct dma_map_ops alpha_pci_ops = { .dma_supported = alpha_pci_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 287ef898a55e..43c6d66b6e73 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -176,6 +176,8 @@ static void arm_nommu_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist const struct dma_map_ops arm_nommu_dma_ops = { .alloc = arm_nommu_dma_alloc, .free = arm_nommu_dma_free, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, .mmap = arm_nommu_dma_mmap, .map_page = arm_nommu_dma_map_page, .unmap_page = arm_nommu_dma_unmap_page, diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8a8949174b1c..7738b4d23f69 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -199,6 +199,8 @@ static int arm_dma_supported(struct device *dev, u64 mask) const struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, .mmap = arm_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_dma_map_page, @@ -226,6 +228,8 @@ static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, const struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, .free = arm_coherent_dma_free, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, .mmap = arm_coherent_dma_mmap, .get_sgtable = arm_dma_get_sgtable, .map_page = arm_coherent_dma_map_page, diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index b49b73a95067..cafbb848a34e 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2070,6 +2070,8 @@ static const struct dma_map_ops sba_dma_ops = { .dma_supported = sba_dma_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; static int __init diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index 2bf849caf507..f53bc043334c 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -506,9 +506,6 @@ static void *jazz_dma_alloc(struct device *dev, size_t size, *dma_handle = vdma_alloc(virt_to_phys(ret), size); if (*dma_handle == DMA_MAPPING_ERROR) goto out_free_pages; - - if (attrs & DMA_ATTR_NON_CONSISTENT) - return ret; arch_dma_prep_coherent(page, size); return (void *)(UNCAC_BASE + __pa(ret)); @@ -521,8 +518,6 @@ static void jazz_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { vdma_free(dma_handle); - if (!(attrs & DMA_ATTR_NON_CONSISTENT)) - vaddr = __va(vaddr - UNCAC_BASE); __free_pages(virt_to_page(vaddr), get_order(size)); } @@ -622,5 +617,7 @@ const struct dma_map_ops jazz_dma_ops = { .sync_sg_for_device = jazz_dma_sync_sg_for_device, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(jazz_dma_ops); diff --git a/arch/powerpc/kernel/dma-iommu.c b/arch/powerpc/kernel/dma-iommu.c index 9053fc9d20c7..a1c744194018 100644 --- a/arch/powerpc/kernel/dma-iommu.c +++ b/arch/powerpc/kernel/dma-iommu.c @@ -138,4 +138,6 @@ const struct dma_map_ops dma_iommu_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 3542b7bd6a46..7bc5f9be3e12 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -696,6 +696,8 @@ static const struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; static const struct dma_map_ops ps3_ioc0_dma_ops = { @@ -708,6 +710,8 @@ static const struct dma_map_ops ps3_ioc0_dma_ops = { .unmap_page = ps3_unmap_page, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; /** diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 0487b26f6f1a..98ed7b09b3fe 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -608,6 +608,8 @@ static const struct dma_map_ops vio_dma_mapping_ops = { .get_required_mask = dma_iommu_get_required_mask, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; /** diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 4a37d8f4de9d..9291023e9469 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -668,6 +668,8 @@ const struct dma_map_ops s390_pci_dma_ops = { .unmap_page = s390_dma_unmap_pages, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_pci_dma_ops); diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 153374b996a2..c96dcaa572eb 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -677,6 +677,8 @@ static const struct dma_map_ops gart_dma_ops = { .get_sgtable = dma_common_get_sgtable, .dma_supported = dma_direct_supported, .get_required_mask = dma_direct_get_required_mask, + .alloc_pages = dma_direct_alloc_pages, + .free_pages = dma_direct_free_pages, }; static void gart_iommu_shutdown(void) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5141d49a046b..00a5b49248e3 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1120,6 +1120,8 @@ static unsigned long iommu_dma_get_merge_boundary(struct device *dev) static const struct dma_map_ops iommu_dma_ops = { .alloc = iommu_dma_alloc, .free = iommu_dma_free, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, .mmap = iommu_dma_mmap, .get_sgtable = iommu_dma_get_sgtable, .map_page = iommu_dma_map_page, diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index ed5c57e96e8b..2c426dbdf17f 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3712,6 +3712,8 @@ static const struct dma_map_ops intel_dma_ops = { .dma_supported = dma_direct_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, .get_required_mask = intel_get_required_mask, }; @@ -3965,6 +3967,8 @@ static const struct dma_map_ops bounce_dma_ops = { .sync_sg_for_device = bounce_sync_sg_for_device, .map_resource = bounce_map_resource, .unmap_resource = bounce_unmap_resource, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, .dma_supported = dma_direct_supported, }; diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index ba16b7f8f806..8cf0b9c8bdf7 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1024,6 +1024,8 @@ static const struct dma_map_ops ccio_ops = { .map_sg = ccio_map_sg, .unmap_sg = ccio_unmap_sg, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; #ifdef CONFIG_PROC_FS diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 959bda193b96..6fcde7980358 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1076,6 +1076,8 @@ static const struct dma_map_ops sba_ops = { .map_sg = sba_map_sg, .unmap_sg = sba_unmap_sg, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 39a0f2e0847c..030a225624b0 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -578,4 +578,6 @@ const struct dma_map_ops xen_swiotlb_dma_ops = { .dma_supported = xen_swiotlb_dma_supported, .mmap = dma_common_mmap, .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 83f797e0cb78..38ed3b55034d 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -115,6 +115,11 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs); +struct page *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_direct_free_pages(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_addr, + enum dma_data_direction dir); int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 233bb8dcbe02..4b9b1d64f5ec 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -27,11 +27,6 @@ * buffered to improve performance. */ #define DMA_ATTR_WRITE_COMBINE (1UL << 2) -/* - * DMA_ATTR_NON_CONSISTENT: Lets the platform to choose to return either - * consistent or non-consistent memory as it sees fit. - */ -#define DMA_ATTR_NON_CONSISTENT (1UL << 3) /* * DMA_ATTR_NO_KERNEL_MAPPING: Lets the platform to avoid creating a kernel * virtual mapping for the allocated buffer. @@ -74,6 +69,11 @@ struct dma_map_ops { void (*free)(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs); + struct page *(*alloc_pages)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); @@ -376,17 +376,14 @@ static inline unsigned long dma_get_merge_boundary(struct device *dev) } #endif /* CONFIG_HAS_DMA */ -static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) -{ - return dma_alloc_attrs(dev, size, dma_handle, gfp, - DMA_ATTR_NON_CONSISTENT); -} -static inline void dma_free_noncoherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir) -{ - dma_free_attrs(dev, size, vaddr, dma_handle, DMA_ATTR_NON_CONSISTENT); -} +struct page *dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir); +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) @@ -512,7 +509,10 @@ static inline void dma_sync_sgtable_for_device(struct device *dev, extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); - +struct page *dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); +void dma_common_free_pages(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); struct page **dma_common_find_pages(void *cpu_addr); void *dma_common_contiguous_remap(struct page *page, size_t size, pgprot_t prot, const void *caller); diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h index 0888656369a4..e61283e06576 100644 --- a/include/linux/dma-noncoherent.h +++ b/include/linux/dma-noncoherent.h @@ -31,9 +31,6 @@ static __always_inline bool dma_alloc_need_uncached(struct device *dev, return false; if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) return false; - if (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && - (attrs & DMA_ATTR_NON_CONSISTENT)) - return false; return true; } diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 750659f7447c..121a9c1969dd 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Copyright (C) 2018 Christoph Hellwig. + * Copyright (C) 2018-2020 Christoph Hellwig. * * DMA operations that map physical memory directly without using an IOMMU. */ @@ -292,6 +292,56 @@ void dma_direct_free(struct device *dev, size_t size, dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size); } +struct page *dma_direct_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + struct page *page; + void *ret; + + if (dma_should_alloc_from_pool(dev, gfp, 0)) { + page = dma_alloc_from_pool(dev, size, &ret, gfp, + dma_coherent_ok); + if (!page) + return NULL; + goto done; + } + + page = __dma_direct_alloc_pages(dev, size, gfp); + if (!page) + return NULL; + ret = page_address(page); + if (force_dma_unencrypted(dev)) { + if (set_memory_decrypted((unsigned long)ret, + 1 << get_order(size))) + goto out_free_pages; + } + memset(ret, 0, size); +done: + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return page; +out_free_pages: + dma_free_contiguous(dev, page, size); + return NULL; +} + +void dma_direct_free_pages(struct device *dev, size_t size, + struct page *page, dma_addr_t dma_addr, + enum dma_data_direction dir) +{ + unsigned int page_order = get_order(size); + void *vaddr = page_address(page); + + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ + if (dma_should_free_from_pool(dev, 0) && + dma_free_from_pool(dev, vaddr, size)) + return; + + if (force_dma_unencrypted(dev)) + set_memory_encrypted((unsigned long)vaddr, 1 << page_order); + + dma_free_contiguous(dev, page, size); +} + #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_SWIOTLB) void dma_direct_sync_sg_for_device(struct device *dev, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 673ea6759c15..06115f59f4ff 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -341,9 +341,7 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs) { if (force_dma_unencrypted(dev)) prot = pgprot_decrypted(prot); - if (dev_is_dma_coherent(dev) || - (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) && - (attrs & DMA_ATTR_NON_CONSISTENT))) + if (dev_is_dma_coherent(dev)) return prot; #ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE if (attrs & DMA_ATTR_WRITE_COMBINE) @@ -472,6 +470,65 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, } EXPORT_SYMBOL(dma_free_attrs); +struct page *dma_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + struct page *page; + + if (WARN_ON_ONCE(!dev->coherent_dma_mask)) + return NULL; + if (WARN_ON_ONCE(gfp & (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM))) + return NULL; + + size = PAGE_ALIGN(size); + if (dma_alloc_direct(dev, ops)) + page = dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp); + else if (ops->alloc_pages) + page = ops->alloc_pages(dev, size, dma_handle, dir, gfp); + else + return NULL; + + debug_dma_map_page(dev, page, 0, size, dir, *dma_handle); + + return page; +} +EXPORT_SYMBOL_GPL(dma_alloc_pages); + +void dma_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + size = PAGE_ALIGN(size); + debug_dma_unmap_page(dev, dma_handle, size, dir); + + if (dma_alloc_direct(dev, ops)) + dma_direct_free_pages(dev, size, page, dma_handle, dir); + else if (ops->free_pages) + ops->free_pages(dev, size, page, dma_handle, dir); +} +EXPORT_SYMBOL_GPL(dma_free_pages); + +void *dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + struct page *page; + + page = dma_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!page) + return NULL; + return page_address(page); +} +EXPORT_SYMBOL_GPL(dma_alloc_noncoherent); + +void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir); +} +EXPORT_SYMBOL_GPL(dma_free_noncoherent); + int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index e443c69be429..5828e5e01b79 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -3,6 +3,7 @@ * Helpers for DMA ops implementations. These generally rely on the fact that * the allocated memory contains normal pages in the direct kernel mapping. */ +#include #include /* @@ -49,3 +50,37 @@ int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, return -ENXIO; #endif /* CONFIG_MMU */ } + +struct page *dma_common_alloc_pages(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + struct page *page; + + page = dma_alloc_contiguous(dev, size, gfp); + if (!page) + page = alloc_pages_node(dev_to_node(dev), gfp, get_order(size)); + if (!page) + return NULL; + + *dma_handle = ops->map_page(dev, page, 0, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + if (*dma_handle == DMA_MAPPING_ERROR) { + dma_free_contiguous(dev, page, size); + return NULL; + } + + memset(page_address(page), 0, size); + return page; +} + +void dma_common_free_pages(struct device *dev, size_t size, struct page *page, + dma_addr_t dma_handle, enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (ops->unmap_page) + ops->unmap_page(dev, dma_handle, size, dir, + DMA_ATTR_SKIP_CPU_SYNC); + dma_free_contiguous(dev, page, size); +} diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c index ebe128833af7..6986bf1fd668 100644 --- a/kernel/dma/virt.c +++ b/kernel/dma/virt.c @@ -55,5 +55,7 @@ const struct dma_map_ops dma_virt_ops = { .free = dma_virt_free, .map_page = dma_virt_map_page, .map_sg = dma_virt_map_sg, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, }; EXPORT_SYMBOL(dma_virt_ops); From de7cf917768f438aae6d2f4e9bced3739f15f5b6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:59:45 +0200 Subject: [PATCH 47/63] dma-mapping: add new {alloc,free}_noncoherent dma_map_ops methods This will allow IOMMU drivers to allocate non-contigous memory and return a vmapped virtual address. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 5 +++++ kernel/dma/mapping.c | 33 +++++++++++++++++++++++++++------ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 4b9b1d64f5ec..7c77cd6f3604 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -74,6 +74,11 @@ struct dma_map_ops { gfp_t gfp); void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir); + void* (*alloc_noncoherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); int (*mmap)(struct device *, struct vm_area_struct *, void *, dma_addr_t, size_t, unsigned long attrs); diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 06115f59f4ff..9669550656a0 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -513,19 +513,40 @@ EXPORT_SYMBOL_GPL(dma_free_pages); void *dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) { - struct page *page; + const struct dma_map_ops *ops = get_dma_ops(dev); + void *vaddr; - page = dma_alloc_pages(dev, size, dma_handle, dir, gfp); - if (!page) - return NULL; - return page_address(page); + if (!ops || !ops->alloc_noncoherent) { + struct page *page; + + page = dma_alloc_pages(dev, size, dma_handle, dir, gfp); + if (!page) + return NULL; + return page_address(page); + } + + size = PAGE_ALIGN(size); + vaddr = ops->alloc_noncoherent(dev, size, dma_handle, dir, gfp); + if (vaddr) + debug_dma_map_page(dev, virt_to_page(vaddr), 0, size, dir, + *dma_handle); + return vaddr; } EXPORT_SYMBOL_GPL(dma_alloc_noncoherent); void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir) { - dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir); + const struct dma_map_ops *ops = get_dma_ops(dev); + + if (!ops || !ops->free_noncoherent) { + dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir); + return; + } + + size = PAGE_ALIGN(size); + debug_dma_unmap_page(dev, dma_handle, size, dir); + ops->free_noncoherent(dev, size, vaddr, dma_handle, dir); } EXPORT_SYMBOL_GPL(dma_free_noncoherent); From e8d39a903cc680c8c1ab5e528428f182f42b8a35 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 14:00:56 +0200 Subject: [PATCH 48/63] dma-iommu: implement ->alloc_noncoherent Implement the alloc_noncoherent method to provide memory that is neither coherent not contiguous. Signed-off-by: Christoph Hellwig --- drivers/iommu/dma-iommu.c | 41 +++++++++++++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 00a5b49248e3..c12c1dc43d31 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -572,6 +572,7 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * @size: Size of buffer in bytes * @dma_handle: Out argument for allocated DMA handle * @gfp: Allocation flags + * @prot: pgprot_t to use for the remapped mapping * @attrs: DMA attributes for this allocation * * If @size is less than PAGE_SIZE, then a full CPU page will be allocated, @@ -580,14 +581,14 @@ static struct page **__iommu_dma_alloc_pages(struct device *dev, * Return: Mapped virtual address, or NULL on failure. */ static void *iommu_dma_alloc_remap(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) + dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot, + unsigned long attrs) { struct iommu_domain *domain = iommu_get_dma_domain(dev); struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); - pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs); unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; struct page **pages; struct sg_table sgt; @@ -1030,8 +1031,10 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, gfp |= __GFP_ZERO; if (IS_ENABLED(CONFIG_DMA_REMAP) && gfpflags_allow_blocking(gfp) && - !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) - return iommu_dma_alloc_remap(dev, size, handle, gfp, attrs); + !(attrs & DMA_ATTR_FORCE_CONTIGUOUS)) { + return iommu_dma_alloc_remap(dev, size, handle, gfp, + dma_pgprot(dev, PAGE_KERNEL, attrs), attrs); + } if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !gfpflags_allow_blocking(gfp) && !coherent) @@ -1052,6 +1055,34 @@ static void *iommu_dma_alloc(struct device *dev, size_t size, return cpu_addr; } +#ifdef CONFIG_DMA_REMAP +static void *iommu_dma_alloc_noncoherent(struct device *dev, size_t size, + dma_addr_t *handle, enum dma_data_direction dir, gfp_t gfp) +{ + if (!gfpflags_allow_blocking(gfp)) { + struct page *page; + + page = dma_common_alloc_pages(dev, size, handle, dir, gfp); + if (!page) + return NULL; + return page_address(page); + } + + return iommu_dma_alloc_remap(dev, size, handle, gfp | __GFP_ZERO, + PAGE_KERNEL, 0); +} + +static void iommu_dma_free_noncoherent(struct device *dev, size_t size, + void *cpu_addr, dma_addr_t handle, enum dma_data_direction dir) +{ + __iommu_dma_unmap(dev, handle, size); + __iommu_dma_free(dev, size, cpu_addr); +} +#else +#define iommu_dma_alloc_noncoherent NULL +#define iommu_dma_free_noncoherent NULL +#endif /* CONFIG_DMA_REMAP */ + static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) @@ -1122,6 +1153,8 @@ static const struct dma_map_ops iommu_dma_ops = { .free = iommu_dma_free, .alloc_pages = dma_common_alloc_pages, .free_pages = dma_common_free_pages, + .alloc_noncoherent = iommu_dma_alloc_noncoherent, + .free_noncoherent = iommu_dma_free_noncoherent, .mmap = iommu_dma_mmap, .get_sgtable = iommu_dma_get_sgtable, .map_page = iommu_dma_map_page, From c51a9868d361a508c5340788950ece7959009c51 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 1 Sep 2020 13:48:33 +0200 Subject: [PATCH 49/63] firewire-ohci: use dma_alloc_pages Use dma_alloc_pages to allocate DMAable pages instead of hoping that the architecture either has GFP_DMA32 or not more than 4G of memory. Signed-off-by: Christoph Hellwig --- drivers/firewire/ohci.c | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c index 020cb15a4d8f..9811c40956e5 100644 --- a/drivers/firewire/ohci.c +++ b/drivers/firewire/ohci.c @@ -674,17 +674,16 @@ static void ar_context_link_page(struct ar_context *ctx, unsigned int index) static void ar_context_release(struct ar_context *ctx) { + struct device *dev = ctx->ohci->card.device; unsigned int i; vunmap(ctx->buffer); - for (i = 0; i < AR_BUFFERS; i++) - if (ctx->pages[i]) { - dma_unmap_page(ctx->ohci->card.device, - ar_buffer_bus(ctx, i), - PAGE_SIZE, DMA_FROM_DEVICE); - __free_page(ctx->pages[i]); - } + for (i = 0; i < AR_BUFFERS; i++) { + if (ctx->pages[i]) + dma_free_pages(dev, PAGE_SIZE, ctx->pages[i], + ar_buffer_bus(ctx, i), DMA_FROM_DEVICE); + } } static void ar_context_abort(struct ar_context *ctx, const char *error_msg) @@ -970,6 +969,7 @@ static void ar_context_tasklet(unsigned long data) static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, unsigned int descriptors_offset, u32 regs) { + struct device *dev = ohci->card.device; unsigned int i; dma_addr_t dma_addr; struct page *pages[AR_BUFFERS + AR_WRAPAROUND_PAGES]; @@ -980,17 +980,13 @@ static int ar_context_init(struct ar_context *ctx, struct fw_ohci *ohci, tasklet_init(&ctx->tasklet, ar_context_tasklet, (unsigned long)ctx); for (i = 0; i < AR_BUFFERS; i++) { - ctx->pages[i] = alloc_page(GFP_KERNEL | GFP_DMA32); + ctx->pages[i] = dma_alloc_pages(dev, PAGE_SIZE, &dma_addr, + DMA_FROM_DEVICE, GFP_KERNEL); if (!ctx->pages[i]) goto out_of_memory; - dma_addr = dma_map_page(ohci->card.device, ctx->pages[i], - 0, PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(ohci->card.device, dma_addr)) { - __free_page(ctx->pages[i]); - ctx->pages[i] = NULL; - goto out_of_memory; - } set_page_private(ctx->pages[i], dma_addr); + dma_sync_single_for_device(dev, dma_addr, PAGE_SIZE, + DMA_FROM_DEVICE); } for (i = 0; i < AR_BUFFERS; i++) From 0de327969b61a245e3a47b60009eae73fe513cef Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Wed, 30 Sep 2020 12:28:21 +0200 Subject: [PATCH 50/63] cma: decrease CMA_ALIGNMENT lower limit to 2 On an embedded system with a tiny (1 MiB) CMA area for video memory, and a simple enough video pipeline, we can decrease the CMA_ALIGNMENT by a factor of 2 to avoid wasting memory, as all the allocations for video buffers will be of the exact same size (dictated by the size of the screen). Signed-off-by: Paul Cercueil Signed-off-by: Christoph Hellwig --- kernel/dma/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index c5f717021f56..647996702bc9 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -177,7 +177,7 @@ endchoice config CMA_ALIGNMENT int "Maximum PAGE_SIZE order of alignment for contiguous buffers" - range 4 12 + range 2 12 default 8 help DMA mapping framework by default aligns all buffers to the smallest From 0a0f0d8be76dcd4390ff538e7060fda34db79717 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Sep 2020 15:31:03 +0200 Subject: [PATCH 51/63] dma-mapping: split Split out all the bits that are purely for dma_map_ops implementations and related code into a new header so that they don't get pulled into all the drivers. That also means the architecture specific is not pulled in by any more, which leads to a missing includes that were pulled in by the x86 or arm versions in a few not overly portable drivers. Signed-off-by: Christoph Hellwig --- MAINTAINERS | 1 + arch/alpha/kernel/pci_iommu.c | 2 +- arch/arc/mm/dma.c | 1 + arch/arm/common/dmabounce.c | 1 + arch/arm/mach-highbank/highbank.c | 2 +- arch/arm/mach-imx/mach-imx27_visstrim_m10.c | 2 +- arch/arm/mach-imx/mach-mx31moboard.c | 2 +- arch/arm/mach-mvebu/coherency.c | 2 +- arch/arm/mm/dma-mapping-nommu.c | 1 + arch/arm/mm/dma-mapping.c | 2 +- arch/arm64/mm/dma-mapping.c | 1 + arch/ia64/hp/common/sba_iommu.c | 2 +- arch/ia64/kernel/dma-mapping.c | 2 +- arch/mips/jazz/jazzdma.c | 1 + arch/mips/mm/dma-noncoherent.c | 1 + arch/parisc/kernel/drivers.c | 1 + arch/powerpc/include/asm/iommu.h | 2 +- arch/powerpc/include/asm/pci.h | 2 +- arch/powerpc/platforms/ps3/system-bus.c | 2 +- arch/powerpc/platforms/pseries/ibmebus.c | 2 +- arch/powerpc/platforms/pseries/vio.c | 2 +- arch/s390/pci/pci_dma.c | 2 +- arch/sh/boards/mach-ap325rxa/setup.c | 1 + arch/sh/boards/mach-ecovec24/setup.c | 1 + arch/sh/boards/mach-kfr2r09/setup.c | 2 +- arch/sh/boards/mach-migor/setup.c | 2 +- arch/sh/boards/mach-se/7724/setup.c | 1 + arch/sh/drivers/pci/fixups-dreamcast.c | 2 +- arch/sparc/kernel/iommu.c | 2 +- arch/sparc/kernel/pci_sun4v.c | 1 + arch/sparc/mm/io-unit.c | 2 +- arch/sparc/mm/iommu.c | 2 +- arch/x86/kernel/amd_gart_64.c | 1 + arch/x86/kernel/pci-dma.c | 1 + arch/x86/kernel/setup.c | 2 + arch/x86/xen/pci-swiotlb-xen.c | 2 +- drivers/acpi/arm64/iort.c | 2 +- drivers/acpi/scan.c | 2 +- drivers/base/dd.c | 2 +- drivers/gpu/drm/exynos/exynos_drm_dma.c | 2 +- drivers/gpu/drm/msm/msm_gem.c | 1 + drivers/iommu/dma-iommu.c | 1 + drivers/iommu/intel/iommu.c | 2 +- drivers/misc/mic/bus/mic_bus.c | 1 + drivers/misc/mic/bus/scif_bus.c | 2 +- drivers/misc/mic/bus/scif_bus.h | 2 +- drivers/misc/mic/bus/vop_bus.c | 2 +- drivers/misc/mic/host/mic_boot.c | 1 + drivers/of/device.c | 1 + drivers/parisc/ccio-dma.c | 1 + drivers/parisc/sba_iommu.c | 1 + drivers/pci/xen-pcifront.c | 1 + drivers/remoteproc/remoteproc_core.c | 1 + drivers/remoteproc/remoteproc_virtio.c | 2 +- drivers/vdpa/vdpa_sim/vdpa_sim.c | 2 +- drivers/xen/swiotlb-xen.c | 1 + include/linux/dma-map-ops.h | 158 ++++++++++++++++++ include/linux/dma-mapping.h | 168 +------------------- kernel/dma/coherent.c | 1 + kernel/dma/direct.c | 1 + kernel/dma/dummy.c | 2 +- kernel/dma/mapping.c | 2 +- kernel/dma/ops_helpers.c | 1 + kernel/dma/virt.c | 2 +- 64 files changed, 223 insertions(+), 200 deletions(-) create mode 100644 include/linux/dma-map-ops.h diff --git a/MAINTAINERS b/MAINTAINERS index 190c7fa2ea01..b13fc1794307 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5202,6 +5202,7 @@ T: git git://git.infradead.org/users/hch/dma-mapping.git F: include/asm-generic/dma-mapping.h F: include/linux/dma-direct.h F: include/linux/dma-mapping.h +F: include/linux/dma-map-ops.h F: include/linux/dma-noncoherent.h F: kernel/dma/ diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 447e0fd0ed38..d84b19aa8e9d 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index e947572a521e..a8c453e98d75 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -3,6 +3,7 @@ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) */ +#include #include #include #include diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index d3e00ea92088..7996c04393d5 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c index 56bf29523c65..db607955a7e4 100644 --- a/arch/arm/mach-highbank/highbank.c +++ b/arch/arm/mach-highbank/highbank.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c index 3da4c0920198..a329e50928b6 100644 --- a/arch/arm/mach-imx/mach-imx27_visstrim_m10.c +++ b/arch/arm/mach-imx/mach-imx27_visstrim_m10.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-imx/mach-mx31moboard.c b/arch/arm/mach-imx/mach-mx31moboard.c index 96845a4eaf57..7f780ad2d459 100644 --- a/arch/arm/mach-imx/mach-mx31moboard.c +++ b/arch/arm/mach-imx/mach-mx31moboard.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c index 8f8748a0c84f..49e3c8d20c2f 100644 --- a/arch/arm/mach-mvebu/coherency.c +++ b/arch/arm/mach-mvebu/coherency.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 43c6d66b6e73..6bfd2b884505 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 7738b4d23f69..8bf0bc6bc311 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 6c45350e33aa..3afd3bd659d8 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index cafbb848a34e..9148ddbf02e5 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -33,7 +33,7 @@ #include /* hweight64() */ #include #include -#include +#include #include #include diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index f640ed6fe1d5..cd0c166bfbc2 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include +#include #include /* Set this to 1 if there is a HW IOMMU in the system */ diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index f53bc043334c..b8fb42e56da0 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index f34ad1f09799..f4e8404ee049 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -5,6 +5,7 @@ * swiped from i386, and cloned for MIPS by Geert, polished by Ralf. */ #include +#include #include #include #include diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index a5f3e50fe976..80fa0650736b 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 5032f1593299..deef7c94d7b6 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 63ed7e3b0ba3..6436f0b41539 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 7bc5f9be3e12..c62aaa29a9d5 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index a6f101c958e8..8c6e509f6967 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 98ed7b09b3fe..b2797cfe4e2b 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 9291023e9469..ebc9a49523aa 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sh/boards/mach-ap325rxa/setup.c b/arch/sh/boards/mach-ap325rxa/setup.c index 665cad452798..bac8a058ebd7 100644 --- a/arch/sh/boards/mach-ap325rxa/setup.c +++ b/arch/sh/boards/mach-ap325rxa/setup.c @@ -13,6 +13,7 @@ #include +#include #include #include #include diff --git a/arch/sh/boards/mach-ecovec24/setup.c b/arch/sh/boards/mach-ecovec24/setup.c index dd427bac5cde..bab91a99124e 100644 --- a/arch/sh/boards/mach-ecovec24/setup.c +++ b/arch/sh/boards/mach-ecovec24/setup.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh/boards/mach-kfr2r09/setup.c b/arch/sh/boards/mach-kfr2r09/setup.c index 96538ba3aa32..eeb5ce341efd 100644 --- a/arch/sh/boards/mach-kfr2r09/setup.c +++ b/arch/sh/boards/mach-kfr2r09/setup.c @@ -14,7 +14,6 @@ #include #include -#include #include #include #include @@ -33,6 +32,7 @@ #include #include #include +#include #include diff --git a/arch/sh/boards/mach-migor/setup.c b/arch/sh/boards/mach-migor/setup.c index 9ed369dad62d..6703a2122c0d 100644 --- a/arch/sh/boards/mach-migor/setup.c +++ b/arch/sh/boards/mach-migor/setup.c @@ -5,7 +5,7 @@ * Copyright (C) 2008 Magnus Damm */ #include -#include +#include #include #include #include diff --git a/arch/sh/boards/mach-se/7724/setup.c b/arch/sh/boards/mach-se/7724/setup.c index 32f5dd944889..8d6541ba0186 100644 --- a/arch/sh/boards/mach-se/7724/setup.c +++ b/arch/sh/boards/mach-se/7724/setup.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh/drivers/pci/fixups-dreamcast.c b/arch/sh/drivers/pci/fixups-dreamcast.c index 7be8694c0d13..41e4daee8f04 100644 --- a/arch/sh/drivers/pci/fixups-dreamcast.c +++ b/arch/sh/drivers/pci/fixups-dreamcast.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index c3e4e2df26a8..a034f571d869 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 6b92dd51c002..9de57e88f7a1 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc/mm/io-unit.c b/arch/sparc/mm/io-unit.c index 430a47a1b6ae..bf3e6d2fe5d9 100644 --- a/arch/sparc/mm/io-unit.c +++ b/arch/sparc/mm/io-unit.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/mm/iommu.c b/arch/sparc/mm/iommu.c index 3a388b1c5d4b..0c0342e5b10d 100644 --- a/arch/sparc/mm/iommu.c +++ b/arch/sparc/mm/iommu.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index c96dcaa572eb..9ac696487b13 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 5dcedad21dff..4892dd043d41 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 3511736fbc74..9286fa9d575e 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include @@ -19,6 +20,7 @@ #include #include #include +#include #include diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index 33293ce01d8d..19ae3e4fe4e9 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -2,7 +2,7 @@ /* Glue code to lib/swiotlb-xen.c */ -#include +#include #include #include diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index de18c07ca02c..6446b2572f07 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #define IORT_TYPE_MASK(type) (1 << (type)) #define IORT_MSI_TYPE (1 << ACPI_IORT_NODE_ITS_GROUP) diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 2142f1554761..e0b7d7a605b5 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/base/dd.c b/drivers/base/dd.c index 857b0a928e8d..b3d43ace5c2b 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index 58b89ec11b0e..78b8f3403c30 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -5,7 +5,7 @@ // Author: Andrzej Hajda #include -#include +#include #include #include diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index b2f49152b4d4..6787ab0d0e43 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -4,6 +4,7 @@ * Author: Rob Clark */ +#include #include #include #include diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index c12c1dc43d31..d2e3f2622815 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 2c426dbdf17f..bd3470142b06 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c index ed9a8351c3bf..a08cb29692a8 100644 --- a/drivers/misc/mic/bus/mic_bus.c +++ b/drivers/misc/mic/bus/mic_bus.c @@ -9,6 +9,7 @@ * This implementation is very similar to the the virtio bus driver * implementation @ drivers/virtio/virtio.c */ +#include #include #include #include diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c index ae84109649d0..ad7c3604f151 100644 --- a/drivers/misc/mic/bus/scif_bus.c +++ b/drivers/misc/mic/bus/scif_bus.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "scif_bus.h" diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h index 642cd43bcabc..4981eb56f879 100644 --- a/drivers/misc/mic/bus/scif_bus.h +++ b/drivers/misc/mic/bus/scif_bus.h @@ -12,7 +12,7 @@ * Everything a scif driver needs to work with any particular scif * hardware abstraction layer. */ -#include +#include #include #include "../common/mic_dev.h" diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c index 3c865534868a..6935ddca1bd5 100644 --- a/drivers/misc/mic/bus/vop_bus.c +++ b/drivers/misc/mic/bus/vop_bus.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include "vop_bus.h" diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index fb5b3989753d..8cb85b8b3e19 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include "../bus/scif_bus.h" diff --git a/drivers/of/device.c b/drivers/of/device.c index 6e3ae7ebc33e..655dee422563 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -6,6 +6,7 @@ #include #include #include /* for bus_dma_region */ +#include #include #include #include diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 8cf0b9c8bdf7..b5f9ee81a46c 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 6fcde7980358..dce4cdf786cd 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c index c0e85be598c1..c6fe0cfec0f6 100644 --- a/drivers/pci/xen-pcifront.c +++ b/drivers/pci/xen-pcifront.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/remoteproc/remoteproc_core.c b/drivers/remoteproc/remoteproc_core.c index 8157dd491d28..dab2c0f5caf0 100644 --- a/drivers/remoteproc/remoteproc_core.c +++ b/drivers/remoteproc/remoteproc_core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include /* XXX: pokes into bus_dma_range */ #include diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c index dfd3808c34fd..0cc617f76068 100644 --- a/drivers/remoteproc/remoteproc_virtio.c +++ b/drivers/remoteproc/remoteproc_virtio.c @@ -9,7 +9,7 @@ * Brian Swetland */ -#include +#include #include #include #include diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 62d640327145..2629911c29bb 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 030a225624b0..71ff4bf38073 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -27,6 +27,7 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h new file mode 100644 index 000000000000..4b4ba5bdcf6a --- /dev/null +++ b/include/linux/dma-map-ops.h @@ -0,0 +1,158 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This header is for implementations of dma_map_ops and related code. + * It should not be included in drivers just using the DMA API. + */ +#ifndef _LINUX_DMA_MAP_OPS_H +#define _LINUX_DMA_MAP_OPS_H + +#include + +struct dma_map_ops { + void *(*alloc)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp, + unsigned long attrs); + void (*free)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs); + struct page *(*alloc_pages)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + void *(*alloc_noncoherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, enum dma_data_direction dir, + gfp_t gfp); + void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, enum dma_data_direction dir); + int (*mmap)(struct device *, struct vm_area_struct *, + void *, dma_addr_t, size_t, unsigned long attrs); + + int (*get_sgtable)(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); + + dma_addr_t (*map_page)(struct device *dev, struct page *page, + unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + /* + * map_sg returns 0 on error and a value > 0 on success. + * It should never return a value < 0. + */ + int (*map_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + void (*unmap_sg)(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, unsigned long attrs); + dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir, + unsigned long attrs); + void (*sync_single_for_cpu)(struct device *dev, dma_addr_t dma_handle, + size_t size, enum dma_data_direction dir); + void (*sync_single_for_device)(struct device *dev, + dma_addr_t dma_handle, size_t size, + enum dma_data_direction dir); + void (*sync_sg_for_cpu)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, + int nents, enum dma_data_direction dir); + void (*cache_sync)(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction); + int (*dma_supported)(struct device *dev, u64 mask); + u64 (*get_required_mask)(struct device *dev); + size_t (*max_mapping_size)(struct device *dev); + unsigned long (*get_merge_boundary)(struct device *dev); +}; + +#ifdef CONFIG_DMA_OPS +#include + +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev->dma_ops) + return dev->dma_ops; + return get_arch_dma_ops(dev->bus); +} + +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) +{ + dev->dma_ops = dma_ops; +} +#else /* CONFIG_DMA_OPS */ +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + return NULL; +} +static inline void set_dma_ops(struct device *dev, + const struct dma_map_ops *dma_ops) +{ +} +#endif /* CONFIG_DMA_OPS */ + +#ifdef CONFIG_DMA_DECLARE_COHERENT +int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, + dma_addr_t device_addr, size_t size); +int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret); +int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); +int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret); + +void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, + dma_addr_t *dma_handle); +int dma_release_from_global_coherent(int order, void *vaddr); +int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, + size_t size, int *ret); + +#else +static inline int dma_declare_coherent_memory(struct device *dev, + phys_addr_t phys_addr, dma_addr_t device_addr, size_t size) +{ + return -ENOSYS; +} +#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) +#define dma_release_from_dev_coherent(dev, order, vaddr) (0) +#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) + +static inline void *dma_alloc_from_global_coherent(struct device *dev, + ssize_t size, dma_addr_t *dma_handle) +{ + return NULL; +} +static inline int dma_release_from_global_coherent(int order, void *vaddr) +{ + return 0; +} +static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, + void *cpu_addr, size_t size, int *ret) +{ + return 0; +} +#endif /* CONFIG_DMA_DECLARE_COHERENT */ + +#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS +void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *iommu, bool coherent); +#else +static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, + u64 size, const struct iommu_ops *iommu, bool coherent) +{ +} +#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ + +#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS +void arch_teardown_dma_ops(struct device *dev); +#else +static inline void arch_teardown_dma_ops(struct device *dev) +{ +} +#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ + +extern const struct dma_map_ops dma_dummy_ops; + +#endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 7c77cd6f3604..9591cd482d7c 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -62,72 +62,6 @@ */ #define DMA_ATTR_PRIVILEGED (1UL << 9) -struct dma_map_ops { - void* (*alloc)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs); - void (*free)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs); - struct page *(*alloc_pages)(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, - gfp_t gfp); - void (*free_pages)(struct device *dev, size_t size, struct page *vaddr, - dma_addr_t dma_handle, enum dma_data_direction dir); - void* (*alloc_noncoherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, enum dma_data_direction dir, - gfp_t gfp); - void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, enum dma_data_direction dir); - int (*mmap)(struct device *, struct vm_area_struct *, - void *, dma_addr_t, size_t, - unsigned long attrs); - - int (*get_sgtable)(struct device *dev, struct sg_table *sgt, void *, - dma_addr_t, size_t, unsigned long attrs); - - dma_addr_t (*map_page)(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_page)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - /* - * map_sg returns 0 on error and a value > 0 on success. - * It should never return a value < 0. - */ - int (*map_sg)(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_sg)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir, - unsigned long attrs); - dma_addr_t (*map_resource)(struct device *dev, phys_addr_t phys_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - void (*unmap_resource)(struct device *dev, dma_addr_t dma_handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs); - void (*sync_single_for_cpu)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir); - void (*sync_single_for_device)(struct device *dev, - dma_addr_t dma_handle, size_t size, - enum dma_data_direction dir); - void (*sync_sg_for_cpu)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir); - void (*sync_sg_for_device)(struct device *dev, - struct scatterlist *sg, int nents, - enum dma_data_direction dir); - int (*dma_supported)(struct device *dev, u64 mask); - u64 (*get_required_mask)(struct device *dev); - size_t (*max_mapping_size)(struct device *dev); - unsigned long (*get_merge_boundary)(struct device *dev); -}; - /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a @@ -140,79 +74,9 @@ struct dma_map_ops { */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) -extern const struct dma_map_ops dma_virt_ops; -extern const struct dma_map_ops dma_dummy_ops; - #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) -#ifdef CONFIG_DMA_DECLARE_COHERENT -/* - * These three functions are only for dma allocator. - * Don't use them in device drivers. - */ -int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle, void **ret); -int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr); - -int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, size_t size, int *ret); - -void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, dma_addr_t *dma_handle); -int dma_release_from_global_coherent(int order, void *vaddr); -int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *cpu_addr, - size_t size, int *ret); - -#else -#define dma_alloc_from_dev_coherent(dev, size, handle, ret) (0) -#define dma_release_from_dev_coherent(dev, order, vaddr) (0) -#define dma_mmap_from_dev_coherent(dev, vma, vaddr, order, ret) (0) - -static inline void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size, - dma_addr_t *dma_handle) -{ - return NULL; -} - -static inline int dma_release_from_global_coherent(int order, void *vaddr) -{ - return 0; -} - -static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, - void *cpu_addr, size_t size, - int *ret) -{ - return 0; -} -#endif /* CONFIG_DMA_DECLARE_COHERENT */ - #ifdef CONFIG_HAS_DMA -#include - -#ifdef CONFIG_DMA_OPS -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) -{ - if (dev->dma_ops) - return dev->dma_ops; - return get_arch_dma_ops(dev->bus); -} - -static inline void set_dma_ops(struct device *dev, - const struct dma_map_ops *dma_ops) -{ - dev->dma_ops = dma_ops; -} -#else /* CONFIG_DMA_OPS */ -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) -{ - return NULL; -} -static inline void set_dma_ops(struct device *dev, - const struct dma_map_ops *dma_ops) -{ -} -#endif /* CONFIG_DMA_OPS */ - static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { debug_dma_mapping_error(dev, dma_addr); @@ -595,24 +459,6 @@ static inline bool dma_addressing_limited(struct device *dev) dma_get_required_mask(dev); } -#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *iommu, bool coherent); -#else -static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, - u64 size, const struct iommu_ops *iommu, bool coherent) -{ -} -#endif /* CONFIG_ARCH_HAS_SETUP_DMA_OPS */ - -#ifdef CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS -void arch_teardown_dma_ops(struct device *dev); -#else -static inline void arch_teardown_dma_ops(struct device *dev) -{ -} -#endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ - static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) @@ -672,18 +518,6 @@ static inline int dma_get_cache_alignment(void) return 1; } -#ifdef CONFIG_DMA_DECLARE_COHERENT -int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size); -#else -static inline int -dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, - dma_addr_t device_addr, size_t size) -{ - return -ENOSYS; -} -#endif /* CONFIG_DMA_DECLARE_COHERENT */ - static inline void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { @@ -741,4 +575,6 @@ static inline int dma_mmap_wc(struct device *dev, int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start, dma_addr_t dma_start, u64 size); +extern const struct dma_map_ops dma_virt_ops; + #endif /* _LINUX_DMA_MAPPING_H */ diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c index c0685196fb6d..5b5b6c7ec7f2 100644 --- a/kernel/dma/coherent.c +++ b/kernel/dma/coherent.c @@ -8,6 +8,7 @@ #include #include #include +#include struct dma_coherent_mem { void *virt_base; diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 121a9c1969dd..8cf5689a8c40 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c index 6974b1bd7d0b..eacd4c5b10bf 100644 --- a/kernel/dma/dummy.c +++ b/kernel/dma/dummy.c @@ -2,7 +2,7 @@ /* * Dummy DMA ops that always fail. */ -#include +#include static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 9669550656a0..2e13e6d3903f 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -8,7 +8,7 @@ #include /* for max_pfn */ #include #include -#include +#include #include #include #include diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index 5828e5e01b79..60d7b6cdfd8d 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -4,6 +4,7 @@ * the allocated memory contains normal pages in the direct kernel mapping. */ #include +#include #include /* diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c index 6986bf1fd668..59d32317dd57 100644 --- a/kernel/dma/virt.c +++ b/kernel/dma/virt.c @@ -4,7 +4,7 @@ */ #include #include -#include +#include #include static void *dma_virt_alloc(struct device *dev, size_t size, From 8df4051232152a0520ab3035c2d96f33083c2d6a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:29:47 +0200 Subject: [PATCH 52/63] dma-contiguous: remove dma_declare_contiguous dma_declare_contiguous is a trivial wrapper around dma_contiguous_reserve_area and just has a single caller. Signed-off-by: Christoph Hellwig --- arch/arm/mach-davinci/devices-da8xx.c | 16 +++++++++----- include/linux/dma-contiguous.h | 32 --------------------------- 2 files changed, 10 insertions(+), 38 deletions(-) diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index feb206bdf6e1..2e2853582b45 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -884,6 +884,7 @@ early_param("rproc_mem", early_rproc_mem); void __init da8xx_rproc_reserve_cma(void) { + struct cma *cma; int ret; if (!rproc_base || !rproc_size) { @@ -897,13 +898,16 @@ void __init da8xx_rproc_reserve_cma(void) pr_info("%s: reserving 0x%lx @ 0x%lx...\n", __func__, rproc_size, (unsigned long)rproc_base); - ret = dma_declare_contiguous(&da8xx_dsp.dev, rproc_size, rproc_base, 0); - if (ret) - pr_err("%s: dma_declare_contiguous failed %d\n", __func__, ret); - else - rproc_mem_inited = true; + ret = dma_contiguous_reserve_area(rproc_size, rproc_base, 0, &cma, + true); + if (ret) { + pr_err("%s: dma_contiguous_reserve_area failed %d\n", + __func__, ret); + return; + } + dev_set_cma_area(&da8xx_dsp.dev, cma); + rproc_mem_inited = true; } - #else void __init da8xx_rproc_reserve_cma(void) diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index fe55e004f1f4..62fd55d07235 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -83,31 +83,6 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, phys_addr_t limit, struct cma **res_cma, bool fixed); -/** - * dma_declare_contiguous() - reserve area for contiguous memory handling - * for particular device - * @dev: Pointer to device structure. - * @size: Size of the reserved memory. - * @base: Start address of the reserved memory (optional, 0 for any). - * @limit: End address of the reserved memory (optional, 0 for any). - * - * This function reserves memory for specified device. It should be - * called by board specific code when early allocator (memblock or bootmem) - * is still activate. - */ - -static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) -{ - struct cma *cma; - int ret; - ret = dma_contiguous_reserve_area(size, base, limit, &cma, true); - if (ret == 0) - dev_set_cma_area(dev, cma); - - return ret; -} - struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order, bool no_warn); bool dma_release_from_contiguous(struct device *dev, struct page *pages, @@ -135,13 +110,6 @@ static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base return -ENOSYS; } -static inline -int dma_declare_contiguous(struct device *dev, phys_addr_t size, - phys_addr_t base, phys_addr_t limit) -{ - return -ENOSYS; -} - static inline struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order, bool no_warn) From 5af638931eb374aa0894d8343cee72f50307ef20 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:56:03 +0200 Subject: [PATCH 53/63] dma-contiguous: remove dev_set_cma_area dev_set_cma_area contains a trivial assignment. It has just three callers that all have a non-NULL device and depend on CONFIG_DMA_CMA, so remove the wrapper. Signed-off-by: Christoph Hellwig --- arch/arm/mach-davinci/devices-da8xx.c | 2 +- include/linux/dma-contiguous.h | 8 -------- kernel/dma/contiguous.c | 4 ++-- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 2e2853582b45..1207eabe8d1c 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -905,7 +905,7 @@ void __init da8xx_rproc_reserve_cma(void) __func__, ret); return; } - dev_set_cma_area(&da8xx_dsp.dev, cma); + da8xx_dsp.dev.cma_area = cma; rproc_mem_inited = true; } #else diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 62fd55d07235..41ec08d81bc3 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -66,12 +66,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return dma_contiguous_default_area; } -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) -{ - if (dev) - dev->cma_area = cma; -} - static inline void dma_contiguous_set_default(struct cma *cma) { dma_contiguous_default_area = cma; @@ -97,8 +91,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return NULL; } -static inline void dev_set_cma_area(struct device *dev, struct cma *cma) { } - static inline void dma_contiguous_set_default(struct cma *cma) { } static inline void dma_contiguous_reserve(phys_addr_t limit) { } diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index f4c150810fd2..95adcee972e8 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -359,14 +359,14 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size) static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev) { - dev_set_cma_area(dev, rmem->priv); + dev->cma_area = rmem->priv; return 0; } static void rmem_cma_device_release(struct reserved_mem *rmem, struct device *dev) { - dev_set_cma_area(dev, NULL); + dev->cma_area = NULL; } static const struct reserved_mem_ops rmem_cma_ops = { From 580a0cc9c3f662e0b10136bc8af1e672e472806f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:56:40 +0200 Subject: [PATCH 54/63] dma-contiguous: remove dma_contiguous_set_default dma_contiguous_set_default contains a trivial assignment, and has a single caller that is compiled if CONFIG_CMA_DMA is enabled. Signed-off-by: Christoph Hellwig --- include/linux/dma-contiguous.h | 7 ------- kernel/dma/contiguous.c | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 41ec08d81bc3..f9ce1ee58d41 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -66,11 +66,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return dma_contiguous_default_area; } -static inline void dma_contiguous_set_default(struct cma *cma) -{ - dma_contiguous_default_area = cma; -} - void dma_contiguous_reserve(phys_addr_t addr_limit); int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, @@ -91,8 +86,6 @@ static inline struct cma *dev_get_cma_area(struct device *dev) return NULL; } -static inline void dma_contiguous_set_default(struct cma *cma) { } - static inline void dma_contiguous_reserve(phys_addr_t limit) { } static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 95adcee972e8..bf05ec2256e1 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -407,7 +407,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem) dma_contiguous_early_fixup(rmem->base, rmem->size); if (default_cma) - dma_contiguous_set_default(cma); + dma_contiguous_default_area = cma; rmem->ops = &rmem_cma_ops; rmem->priv = cma; From 0b1abd1fb7efafc25231c54a67c6fbb3d3127efd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:56:52 +0200 Subject: [PATCH 55/63] dma-mapping: merge into Merge dma-contiguous.h into dma-map-ops.h, after removing the comment describing the contiguous allocator into kernel/dma/contigous.c. Signed-off-by: Christoph Hellwig --- .../admin-guide/kernel-parameters.txt | 2 +- arch/arm/mach-davinci/devices-da8xx.c | 2 +- arch/arm/mach-shmobile/setup-rcar-gen2.c | 2 +- arch/arm/mm/dma-mapping.c | 1 - arch/arm/mm/init.c | 2 +- arch/arm64/mm/init.c | 3 +- arch/csky/kernel/setup.c | 2 +- arch/csky/mm/dma-mapping.c | 3 +- arch/microblaze/mm/init.c | 2 +- arch/mips/kernel/setup.c | 2 +- arch/mips/mm/dma-noncoherent.c | 1 - arch/s390/kernel/setup.c | 2 +- arch/x86/include/asm/dma-mapping.h | 1 - arch/x86/kernel/setup.c | 2 +- arch/xtensa/kernel/pci-dma.c | 2 +- arch/xtensa/mm/init.c | 2 +- drivers/dma-buf/heaps/cma_heap.c | 2 +- drivers/iommu/amd/iommu.c | 3 +- drivers/iommu/dma-iommu.c | 1 - drivers/iommu/intel/iommu.c | 2 +- drivers/media/platform/exynos4-is/fimc-is.c | 1 - include/linux/dma-contiguous.h | 135 ------------------ include/linux/dma-map-ops.h | 65 +++++++++ kernel/dma/Kconfig | 2 +- kernel/dma/contiguous.c | 30 +++- kernel/dma/direct.c | 1 - kernel/dma/ops_helpers.c | 1 - kernel/dma/pool.c | 2 +- 28 files changed, 112 insertions(+), 164 deletions(-) delete mode 100644 include/linux/dma-contiguous.h diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index e464cf0b5025..7657e00e83ca 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -597,7 +597,7 @@ placement constraint by the physical address range of memory allocations. A value of 0 disables CMA altogether. For more information, see - include/linux/dma-contiguous.h + kernel/dma/contiguous.c cma_pernuma=nn[MG] [ARM64,KNL] diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 1207eabe8d1c..bb368938fc49 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/mach-shmobile/setup-rcar-gen2.c b/arch/arm/mach-shmobile/setup-rcar-gen2.c index c42ff8c314c8..e00f5b3b9293 100644 --- a/arch/arm/mach-shmobile/setup-rcar-gen2.c +++ b/arch/arm/mach-shmobile/setup-rcar-gen2.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8bf0bc6bc311..154c24cec94c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 000c1b48e973..ab1d1931a352 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f1c75957ff3c..1b591ddb28b0 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -21,8 +21,7 @@ #include #include #include -#include -#include +#include #include #include #include diff --git a/arch/csky/kernel/setup.c b/arch/csky/kernel/setup.c index 0481f4e34538..e4cab16056d6 100644 --- a/arch/csky/kernel/setup.c +++ b/arch/csky/kernel/setup.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 8f6571ae27c8..3d9ff26c4bb4 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -2,8 +2,7 @@ // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. #include -#include -#include +#include #include #include #include diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 3344d4a1fe89..7659a8b86580 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -7,7 +7,7 @@ * for more details. */ -#include +#include #include #include #include diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index bf5f5acab0a8..464bfd3957ae 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index f4e8404ee049..22d99ccc70c8 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index c2c1b4e723ea..151092565a27 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index fed67eafcacc..e0c380b3ec14 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -11,7 +11,6 @@ #include #include #include -#include extern int iommu_merge; extern int panic_on_overflow; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 9286fa9d575e..e8155e85bd8f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -7,7 +7,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 17c4384f8495..790dbe022a07 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -11,7 +11,7 @@ * Joe Taylor */ -#include +#include #include #include #include diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index a05b306cf371..8079007842ac 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/dma-buf/heaps/cma_heap.c b/drivers/dma-buf/heaps/cma_heap.c index 626cf7fd033a..e55384dc115b 100644 --- a/drivers/dma-buf/heaps/cma_heap.c +++ b/drivers/dma-buf/heaps/cma_heap.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 10e4200d3552..5396eb8d730b 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d2e3f2622815..22c221bba13e 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index bd3470142b06..0c5b4500ae83 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/platform/exynos4-is/fimc-is.c b/drivers/media/platform/exynos4-is/fimc-is.c index a474014f0a0f..32f5982afa1a 100644 --- a/drivers/media/platform/exynos4-is/fimc-is.c +++ b/drivers/media/platform/exynos4-is/fimc-is.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h deleted file mode 100644 index f9ce1ee58d41..000000000000 --- a/include/linux/dma-contiguous.h +++ /dev/null @@ -1,135 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef __LINUX_CMA_H -#define __LINUX_CMA_H - -/* - * Contiguous Memory Allocator for DMA mapping framework - * Copyright (c) 2010-2011 by Samsung Electronics. - * Written by: - * Marek Szyprowski - * Michal Nazarewicz - */ - -/* - * Contiguous Memory Allocator - * - * The Contiguous Memory Allocator (CMA) makes it possible to - * allocate big contiguous chunks of memory after the system has - * booted. - * - * Why is it needed? - * - * Various devices on embedded systems have no scatter-getter and/or - * IO map support and require contiguous blocks of memory to - * operate. They include devices such as cameras, hardware video - * coders, etc. - * - * Such devices often require big memory buffers (a full HD frame - * is, for instance, more then 2 mega pixels large, i.e. more than 6 - * MB of memory), which makes mechanisms such as kmalloc() or - * alloc_page() ineffective. - * - * At the same time, a solution where a big memory region is - * reserved for a device is suboptimal since often more memory is - * reserved then strictly required and, moreover, the memory is - * inaccessible to page system even if device drivers don't use it. - * - * CMA tries to solve this issue by operating on memory regions - * where only movable pages can be allocated from. This way, kernel - * can use the memory for pagecache and when device driver requests - * it, allocated pages can be migrated. - * - * Driver usage - * - * CMA should not be used by the device drivers directly. It is - * only a helper framework for dma-mapping subsystem. - * - * For more information, see kernel-docs in kernel/dma/contiguous.c - */ - -#ifdef __KERNEL__ - -#include -#include - -struct cma; -struct page; - -#ifdef CONFIG_DMA_CMA - -extern struct cma *dma_contiguous_default_area; - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - if (dev && dev->cma_area) - return dev->cma_area; - return dma_contiguous_default_area; -} - -void dma_contiguous_reserve(phys_addr_t addr_limit); - -int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed); - -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn); -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count); -struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); -void dma_free_contiguous(struct device *dev, struct page *page, size_t size); - -#else - -static inline struct cma *dev_get_cma_area(struct device *dev) -{ - return NULL; -} - -static inline void dma_contiguous_reserve(phys_addr_t limit) { } - -static inline int dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, - phys_addr_t limit, struct cma **res_cma, - bool fixed) -{ - return -ENOSYS; -} - -static inline -struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order, bool no_warn) -{ - return NULL; -} - -static inline -bool dma_release_from_contiguous(struct device *dev, struct page *pages, - int count) -{ - return false; -} - -/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ -static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, - gfp_t gfp) -{ - return NULL; -} - -static inline void dma_free_contiguous(struct device *dev, struct page *page, - size_t size) -{ - __free_pages(page, get_order(size)); -} - -#endif - -#ifdef CONFIG_DMA_PERNUMA_CMA -void dma_pernuma_cma_reserve(void); -#else -static inline void dma_pernuma_cma_reserve(void) { } -#endif - -#endif - -#endif diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 4b4ba5bdcf6a..474fc81bd492 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -8,6 +8,8 @@ #include +struct cma; + struct dma_map_ops { void *(*alloc)(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, @@ -94,6 +96,69 @@ static inline void set_dma_ops(struct device *dev, } #endif /* CONFIG_DMA_OPS */ +#ifdef CONFIG_DMA_CMA +extern struct cma *dma_contiguous_default_area; + +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + if (dev && dev->cma_area) + return dev->cma_area; + return dma_contiguous_default_area; +} + +void dma_contiguous_reserve(phys_addr_t addr_limit); +int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, + phys_addr_t limit, struct cma **res_cma, bool fixed); + +struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, + unsigned int order, bool no_warn); +bool dma_release_from_contiguous(struct device *dev, struct page *pages, + int count); +struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); +void dma_free_contiguous(struct device *dev, struct page *page, size_t size); +#else /* CONFIG_DMA_CMA */ +static inline struct cma *dev_get_cma_area(struct device *dev) +{ + return NULL; +} +static inline void dma_contiguous_reserve(phys_addr_t limit) +{ +} +static inline int dma_contiguous_reserve_area(phys_addr_t size, + phys_addr_t base, phys_addr_t limit, struct cma **res_cma, + bool fixed) +{ + return -ENOSYS; +} +static inline struct page *dma_alloc_from_contiguous(struct device *dev, + size_t count, unsigned int order, bool no_warn) +{ + return NULL; +} +static inline bool dma_release_from_contiguous(struct device *dev, + struct page *pages, int count) +{ + return false; +} +/* Use fallback alloc() and free() when CONFIG_DMA_CMA=n */ +static inline struct page *dma_alloc_contiguous(struct device *dev, size_t size, + gfp_t gfp) +{ + return NULL; +} +static inline void dma_free_contiguous(struct device *dev, struct page *page, + size_t size) +{ + __free_pages(page, get_order(size)); +} +#endif /* CONFIG_DMA_CMA*/ + +#ifdef CONFIG_DMA_PERNUMA_CMA +void dma_pernuma_cma_reserve(void); +#else +static inline void dma_pernuma_cma_reserve(void) { } +#endif /* CONFIG_DMA_PERNUMA_CMA */ + #ifdef CONFIG_DMA_DECLARE_COHERENT int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr, dma_addr_t device_addr, size_t size); diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig index 647996702bc9..c99de4a21458 100644 --- a/kernel/dma/Kconfig +++ b/kernel/dma/Kconfig @@ -118,7 +118,7 @@ config DMA_CMA You can disable CMA by specifying "cma=0" on the kernel's command line. - For more information see . + For more information see . If unsure, say "n". if DMA_CMA diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index bf05ec2256e1..6bfb763fff6f 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -5,6 +5,34 @@ * Written by: * Marek Szyprowski * Michal Nazarewicz + * + * Contiguous Memory Allocator + * + * The Contiguous Memory Allocator (CMA) makes it possible to + * allocate big contiguous chunks of memory after the system has + * booted. + * + * Why is it needed? + * + * Various devices on embedded systems have no scatter-getter and/or + * IO map support and require contiguous blocks of memory to + * operate. They include devices such as cameras, hardware video + * coders, etc. + * + * Such devices often require big memory buffers (a full HD frame + * is, for instance, more then 2 mega pixels large, i.e. more than 6 + * MB of memory), which makes mechanisms such as kmalloc() or + * alloc_page() ineffective. + * + * At the same time, a solution where a big memory region is + * reserved for a device is suboptimal since often more memory is + * reserved then strictly required and, moreover, the memory is + * inaccessible to page system even if device drivers don't use it. + * + * CMA tries to solve this issue by operating on memory regions + * where only movable pages can be allocated from. This way, kernel + * can use the memory for pagecache and when device driver requests + * it, allocated pages can be migrated. */ #define pr_fmt(fmt) "cma: " fmt @@ -21,7 +49,7 @@ #include #include #include -#include +#include #include #ifdef CONFIG_CMA_SIZE_MBYTES diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 8cf5689a8c40..87697c86f0b8 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index 60d7b6cdfd8d..bc80fd2648bc 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -3,7 +3,6 @@ * Helpers for DMA ops implementations. These generally rely on the fact that * the allocated memory contains normal pages in the direct kernel mapping. */ -#include #include #include diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index fe11643ff9cc..c9fb5c3d8bd0 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -5,7 +5,7 @@ */ #include #include -#include +#include #include #include #include From 5db5d93089880c3cc9e83ca8bba68a5502e92dfe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 11:04:43 +0200 Subject: [PATCH 56/63] dma-mapping: remove Just provide a weak default definition of dma_contiguous_early_fixup and let arm override it. Signed-off-by: Christoph Hellwig --- arch/arm/include/asm/dma-contiguous.h | 15 --------------- arch/arm/mm/dma-mapping.c | 1 - include/asm-generic/Kbuild | 1 - include/asm-generic/dma-contiguous.h | 10 ---------- include/linux/dma-map-ops.h | 2 ++ kernel/dma/contiguous.c | 6 +++++- 6 files changed, 7 insertions(+), 28 deletions(-) delete mode 100644 arch/arm/include/asm/dma-contiguous.h delete mode 100644 include/asm-generic/dma-contiguous.h diff --git a/arch/arm/include/asm/dma-contiguous.h b/arch/arm/include/asm/dma-contiguous.h deleted file mode 100644 index d785187a6f8a..000000000000 --- a/arch/arm/include/asm/dma-contiguous.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef ASMARM_DMA_CONTIGUOUS_H -#define ASMARM_DMA_CONTIGUOUS_H - -#ifdef __KERNEL__ -#ifdef CONFIG_DMA_CMA - -#include - -void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); - -#endif -#endif - -#endif diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 154c24cec94c..911fc6ea2607 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include "dma.h" diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild index 74b0612601dd..62ebdc731ee2 100644 --- a/include/asm-generic/Kbuild +++ b/include/asm-generic/Kbuild @@ -16,7 +16,6 @@ mandatory-y += current.h mandatory-y += delay.h mandatory-y += device.h mandatory-y += div64.h -mandatory-y += dma-contiguous.h mandatory-y += dma-mapping.h mandatory-y += dma.h mandatory-y += emergency-restart.h diff --git a/include/asm-generic/dma-contiguous.h b/include/asm-generic/dma-contiguous.h deleted file mode 100644 index f24b0f9a4f05..000000000000 --- a/include/asm-generic/dma-contiguous.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_GENERIC_DMA_CONTIGUOUS_H -#define _ASM_GENERIC_DMA_CONTIGUOUS_H - -#include - -static inline void -dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) { } - -#endif diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 474fc81bd492..7912f5d00ed9 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -116,6 +116,8 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count); struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp); void dma_free_contiguous(struct device *dev, struct page *page, size_t size); + +void dma_contiguous_early_fixup(phys_addr_t base, unsigned long size); #else /* CONFIG_DMA_CMA */ static inline struct cma *dev_get_cma_area(struct device *dev) { diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 6bfb763fff6f..a2ee330a3749 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -44,7 +44,6 @@ #endif #include -#include #include #include @@ -212,6 +211,11 @@ void __init dma_contiguous_reserve(phys_addr_t limit) } } +void __weak +dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) +{ +} + /** * dma_contiguous_reserve_area() - reserve custom contiguous area * @size: Size of the reserved area (in bytes), From a1fd09e8e6ae35228ecc7c1e4bfff1fd725f78a0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 11 Sep 2020 10:12:44 +0200 Subject: [PATCH 57/63] dma-mapping: move dma-debug.h to kernel/dma/ Most of dma-debug.h is not required by anything outside of kernel/dma. Move the four declarations needed by dma-mappin.h or dma-ops providers into dma-mapping.h and dma-map-ops.h, and move the remainder of the file to kernel/dma/debug.h. Signed-off-by: Christoph Hellwig --- arch/arm/include/asm/dma-iommu.h | 1 - arch/arm/include/asm/dma-mapping.h | 1 - arch/microblaze/kernel/dma.c | 1 - arch/sh/drivers/pci/pci.c | 1 - arch/x86/include/asm/dma-mapping.h | 1 - arch/x86/kernel/pci-dma.c | 1 - drivers/pci/pci-driver.c | 1 + include/linux/dma-map-ops.h | 12 +++++ include/linux/dma-mapping.h | 16 ++++++- kernel/dma/debug.c | 5 +-- .../linux/dma-debug.h => kernel/dma/debug.h | 44 ++----------------- kernel/dma/mapping.c | 1 + mm/memory.c | 1 - 13 files changed, 34 insertions(+), 52 deletions(-) rename include/linux/dma-debug.h => kernel/dma/debug.h (81%) diff --git a/arch/arm/include/asm/dma-iommu.h b/arch/arm/include/asm/dma-iommu.h index 86405cc81385..fe9ef6f79e9c 100644 --- a/arch/arm/include/asm/dma-iommu.h +++ b/arch/arm/include/asm/dma-iommu.h @@ -6,7 +6,6 @@ #include #include -#include #include struct dma_iommu_mapping { diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 0a1a536368c3..77082246a5e1 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -6,7 +6,6 @@ #include #include -#include #include #include diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index d7bebd04247b..a564863db06e 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c index 6ab0b7377f66..a3903304f33f 100644 --- a/arch/sh/drivers/pci/pci.c +++ b/arch/sh/drivers/pci/pci.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index e0c380b3ec14..bb1654fe0ce7 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -8,7 +8,6 @@ */ #include -#include #include #include diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 4892dd043d41..de234e7a8962 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include #include diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 449466f71040..d1b7169c0684 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "pci.h" #include "pcie/portdrv.h" diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 7912f5d00ed9..9891def42da7 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -220,6 +220,18 @@ static inline void arch_teardown_dma_ops(struct device *dev) } #endif /* CONFIG_ARCH_HAS_TEARDOWN_DMA_OPS */ +#ifdef CONFIG_DMA_API_DEBUG +void dma_debug_add_bus(struct bus_type *bus); +void debug_dma_dump_mappings(struct device *dev); +#else +static inline void dma_debug_add_bus(struct bus_type *bus) +{ +} +static inline void debug_dma_dump_mappings(struct device *dev) +{ +} +#endif /* CONFIG_DMA_API_DEBUG */ + extern const struct dma_map_ops dma_dummy_ops; #endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 9591cd482d7c..3f029afdc9dc 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -76,6 +75,21 @@ #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +#ifdef CONFIG_DMA_API_DEBUG +void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); +void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len); +#else +static inline void debug_dma_mapping_error(struct device *dev, + dma_addr_t dma_addr) +{ +} +static inline void debug_dma_map_single(struct device *dev, const void *addr, + unsigned long len) +{ +} +#endif /* CONFIG_DMA_API_DEBUG */ + #ifdef CONFIG_HAS_DMA static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index 4211800d9f3e..14de1271463f 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -9,10 +9,9 @@ #include #include -#include +#include #include #include -#include #include #include #include @@ -24,8 +23,8 @@ #include #include #include - #include +#include "debug.h" #define HASH_SIZE 16384ULL #define HASH_FN_SHIFT 13 diff --git a/include/linux/dma-debug.h b/kernel/dma/debug.h similarity index 81% rename from include/linux/dma-debug.h rename to kernel/dma/debug.h index 7b3b04ba60f3..83643b3010b2 100644 --- a/include/linux/dma-debug.h +++ b/kernel/dma/debug.h @@ -5,28 +5,14 @@ * Author: Joerg Roedel */ -#ifndef __DMA_DEBUG_H -#define __DMA_DEBUG_H - -#include - -struct device; -struct scatterlist; -struct bus_type; +#ifndef _KERNEL_DMA_DEBUG_H +#define _KERNEL_DMA_DEBUG_H #ifdef CONFIG_DMA_API_DEBUG - -extern void dma_debug_add_bus(struct bus_type *bus); - -extern void debug_dma_map_single(struct device *dev, const void *addr, - unsigned long len); - extern void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr); -extern void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); - extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction); @@ -64,31 +50,13 @@ extern void debug_dma_sync_sg_for_cpu(struct device *dev, extern void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, int direction); - -extern void debug_dma_dump_mappings(struct device *dev); - #else /* CONFIG_DMA_API_DEBUG */ - -static inline void dma_debug_add_bus(struct bus_type *bus) -{ -} - -static inline void debug_dma_map_single(struct device *dev, const void *addr, - unsigned long len) -{ -} - static inline void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, size_t size, int direction, dma_addr_t dma_addr) { } -static inline void debug_dma_mapping_error(struct device *dev, - dma_addr_t dma_addr) -{ -} - static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, size_t size, int direction) { @@ -150,11 +118,5 @@ static inline void debug_dma_sync_sg_for_device(struct device *dev, int nelems, int direction) { } - -static inline void debug_dma_dump_mappings(struct device *dev) -{ -} - #endif /* CONFIG_DMA_API_DEBUG */ - -#endif /* __DMA_DEBUG_H */ +#endif /* _KERNEL_DMA_DEBUG_H */ diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 2e13e6d3903f..335ba183e095 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -14,6 +14,7 @@ #include #include #include +#include "debug.h" /* * Managed DMA API diff --git a/mm/memory.c b/mm/memory.c index f3eb55975902..7e4e8b81a738 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,7 +65,6 @@ #include #include #include -#include #include #include #include From 19c65c3d30bb5a97170e425979d2e44ab2096c7d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Sep 2020 15:34:22 +0200 Subject: [PATCH 58/63] dma-mapping: move large parts of to kernel/dma Most of the dma_direct symbols should only be used by direct.c and mapping.c, so move them to kernel/dma. In fact more of dma-direct.h should eventually move, but that will require more coordination with other subsystems. Signed-off-by: Christoph Hellwig --- include/linux/dma-direct.h | 106 --------------------------------- kernel/dma/direct.c | 2 +- kernel/dma/direct.h | 119 +++++++++++++++++++++++++++++++++++++ kernel/dma/mapping.c | 2 +- 4 files changed, 121 insertions(+), 108 deletions(-) create mode 100644 kernel/dma/direct.h diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index 38ed3b55034d..a2d6640c42c0 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -120,114 +120,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, void dma_direct_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_addr, enum dma_data_direction dir); -int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs); -bool dma_direct_can_mmap(struct device *dev); -int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs); int dma_direct_supported(struct device *dev, u64 mask); -bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs); dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, size_t size, enum dma_data_direction dir, unsigned long attrs); -size_t dma_direct_max_mapping_size(struct device *dev); -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ - defined(CONFIG_SWIOTLB) -void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs); -void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir); -#else -static inline void dma_direct_unmap_sg(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir, - unsigned long attrs) -{ -} -static inline void dma_direct_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ -} -#endif - -static inline void dma_direct_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); - - if (!dev_is_dma_coherent(dev)) - arch_sync_dma_for_device(paddr, size, dir); -} - -static inline void dma_direct_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - phys_addr_t paddr = dma_to_phys(dev, addr); - - if (!dev_is_dma_coherent(dev)) { - arch_sync_dma_for_cpu(paddr, size, dir); - arch_sync_dma_for_cpu_all(); - } - - if (unlikely(is_swiotlb_buffer(paddr))) - swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); - - if (dir == DMA_FROM_DEVICE) - arch_dma_mark_clean(paddr, size); -} - -static inline dma_addr_t dma_direct_map_page(struct device *dev, - struct page *page, unsigned long offset, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = page_to_phys(page) + offset; - dma_addr_t dma_addr = phys_to_dma(dev, phys); - - if (unlikely(swiotlb_force == SWIOTLB_FORCE)) - return swiotlb_map(dev, phys, size, dir, attrs); - - if (unlikely(!dma_capable(dev, dma_addr, size, true))) { - if (swiotlb_force != SWIOTLB_NO_FORCE) - return swiotlb_map(dev, phys, size, dir, attrs); - - dev_WARN_ONCE(dev, 1, - "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", - &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); - return DMA_MAPPING_ERROR; - } - - if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(phys, size, dir); - return dma_addr; -} - -static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - phys_addr_t phys = dma_to_phys(dev, addr); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_direct_sync_single_for_cpu(dev, addr, size, dir); - - if (unlikely(is_swiotlb_buffer(phys))) - swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); -} #endif /* _LINUX_DMA_DIRECT_H */ diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 87697c86f0b8..bf9f77623022 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -7,13 +7,13 @@ #include /* for max_pfn */ #include #include -#include #include #include #include #include #include #include +#include "direct.h" /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h new file mode 100644 index 000000000000..b98615578737 --- /dev/null +++ b/kernel/dma/direct.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2018 Christoph Hellwig. + * + * DMA operations that map physical memory directly without using an IOMMU. + */ +#ifndef _KERNEL_DMA_DIRECT_H +#define _KERNEL_DMA_DIRECT_H + +#include + +int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +bool dma_direct_can_mmap(struct device *dev); +int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs); +bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr); +int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, + enum dma_data_direction dir, unsigned long attrs); +size_t dma_direct_max_mapping_size(struct device *dev); + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_sync_sg_for_device(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ + defined(CONFIG_SWIOTLB) +void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, + int nents, enum dma_data_direction dir, unsigned long attrs); +void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir); +#else +static inline void dma_direct_unmap_sg(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +} +static inline void dma_direct_sync_sg_for_cpu(struct device *dev, + struct scatterlist *sgl, int nents, enum dma_data_direction dir) +{ +} +#endif + +static inline void dma_direct_sync_single_for_device(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); + + if (!dev_is_dma_coherent(dev)) + arch_sync_dma_for_device(paddr, size, dir); +} + +static inline void dma_direct_sync_single_for_cpu(struct device *dev, + dma_addr_t addr, size_t size, enum dma_data_direction dir) +{ + phys_addr_t paddr = dma_to_phys(dev, addr); + + if (!dev_is_dma_coherent(dev)) { + arch_sync_dma_for_cpu(paddr, size, dir); + arch_sync_dma_for_cpu_all(); + } + + if (unlikely(is_swiotlb_buffer(paddr))) + swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); + + if (dir == DMA_FROM_DEVICE) + arch_dma_mark_clean(paddr, size); +} + +static inline dma_addr_t dma_direct_map_page(struct device *dev, + struct page *page, unsigned long offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = page_to_phys(page) + offset; + dma_addr_t dma_addr = phys_to_dma(dev, phys); + + if (unlikely(swiotlb_force == SWIOTLB_FORCE)) + return swiotlb_map(dev, phys, size, dir, attrs); + + if (unlikely(!dma_capable(dev, dma_addr, size, true))) { + if (swiotlb_force != SWIOTLB_NO_FORCE) + return swiotlb_map(dev, phys, size, dir, attrs); + + dev_WARN_ONCE(dev, 1, + "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n", + &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit); + return DMA_MAPPING_ERROR; + } + + if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + arch_sync_dma_for_device(phys, size, dir); + return dma_addr; +} + +static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + phys_addr_t phys = dma_to_phys(dev, addr); + + if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) + dma_direct_sync_single_for_cpu(dev, addr, size, dir); + + if (unlikely(is_swiotlb_buffer(phys))) + swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs); +} +#endif /* _KERNEL_DMA_DIRECT_H */ diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 335ba183e095..51bb8fa8eb89 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -7,7 +7,6 @@ */ #include /* for max_pfn */ #include -#include #include #include #include @@ -15,6 +14,7 @@ #include #include #include "debug.h" +#include "direct.h" /* * Managed DMA API From 9f4df96b8781e40d0cb0e32eb3d1f6d87375adf9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Sep 2020 15:36:11 +0200 Subject: [PATCH 59/63] dma-mapping: merge into Move more nitty gritty DMA implementation details into the common internal header. Signed-off-by: Christoph Hellwig --- MAINTAINERS | 1 - arch/arc/mm/dma.c | 1 - arch/arm/mm/dma-mapping.c | 1 - arch/arm/xen/mm.c | 2 +- arch/arm64/mm/dma-mapping.c | 1 - arch/c6x/mm/dma-coherent.c | 2 +- arch/csky/mm/dma-mapping.c | 1 - arch/hexagon/kernel/dma.c | 2 +- arch/ia64/mm/init.c | 2 +- arch/m68k/kernel/dma.c | 2 +- arch/microblaze/kernel/dma.c | 2 +- arch/microblaze/mm/consistent.c | 2 +- arch/mips/jazz/jazzdma.c | 1 - arch/mips/mm/dma-noncoherent.c | 1 - arch/nds32/kernel/dma.c | 2 +- arch/openrisc/kernel/dma.c | 2 +- arch/parisc/kernel/pci-dma.c | 2 +- arch/powerpc/mm/dma-noncoherent.c | 2 +- arch/sh/kernel/dma-coherent.c | 2 +- arch/sparc/kernel/ioport.c | 2 +- arch/xtensa/kernel/pci-dma.c | 1 - drivers/iommu/dma-iommu.c | 1 - drivers/xen/swiotlb-xen.c | 3 +- include/linux/dma-direct.h | 2 +- include/linux/dma-map-ops.h | 102 ++++++++++++++++++++++++++++ include/linux/dma-noncoherent.h | 109 ------------------------------ kernel/dma/ops_helpers.c | 1 - kernel/dma/pool.c | 1 - kernel/dma/swiotlb.c | 2 +- 29 files changed, 118 insertions(+), 137 deletions(-) delete mode 100644 include/linux/dma-noncoherent.h diff --git a/MAINTAINERS b/MAINTAINERS index b13fc1794307..c0dbe6e9de65 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5203,7 +5203,6 @@ F: include/asm-generic/dma-mapping.h F: include/linux/dma-direct.h F: include/linux/dma-mapping.h F: include/linux/dma-map-ops.h -F: include/linux/dma-noncoherent.h F: kernel/dma/ DMA-BUF HEAPS FRAMEWORK diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index a8c453e98d75..517988e60cfc 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -4,7 +4,6 @@ */ #include -#include #include #include diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 911fc6ea2607..c4b8df2ad328 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index 396797ffe2b1..5c80088db13b 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 3afd3bd659d8..93e87b287556 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c index a5909091cb14..03df07a831fc 100644 --- a/arch/c6x/mm/dma-coherent.c +++ b/arch/c6x/mm/dma-coherent.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/csky/mm/dma-mapping.c b/arch/csky/mm/dma-mapping.c index 3d9ff26c4bb4..c3a775a7e8f9 100644 --- a/arch/csky/mm/dma-mapping.c +++ b/arch/csky/mm/dma-mapping.c @@ -3,7 +3,6 @@ #include #include -#include #include #include #include diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index 25f388d9cfcc..00b9a81075dd 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -5,7 +5,7 @@ * Copyright (c) 2010-2012, The Linux Foundation. All rights reserved. */ -#include +#include #include #include #include diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 02e5aa08294e..ccba04d12671 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index b1ca3522eccc..1c1b875fadc1 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -6,7 +6,7 @@ #undef DEBUG -#include +#include #include #include #include diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index a564863db06e..04d091ade417 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/microblaze/mm/consistent.c b/arch/microblaze/mm/consistent.c index e09b66e43cb6..81dffe43b18c 100644 --- a/arch/microblaze/mm/consistent.c +++ b/arch/microblaze/mm/consistent.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index b8fb42e56da0..461457b28982 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 22d99ccc70c8..38d3d9143b47 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -6,7 +6,6 @@ */ #include #include -#include #include #include diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c index 69d762182d49..2ac8e6c82a61 100644 --- a/arch/nds32/kernel/dma.c +++ b/arch/nds32/kernel/dma.c @@ -3,7 +3,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 345727638d52..1b16d97e7da7 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -13,7 +13,7 @@ * DMA mapping callbacks... */ -#include +#include #include #include diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index ce38c0b91581..36610a5c029f 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include /* for DMA_CHUNK_SIZE */ diff --git a/arch/powerpc/mm/dma-noncoherent.c b/arch/powerpc/mm/dma-noncoherent.c index 5ab4f868e919..30260b5d146d 100644 --- a/arch/powerpc/mm/dma-noncoherent.c +++ b/arch/powerpc/mm/dma-noncoherent.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/kernel/dma-coherent.c b/arch/sh/kernel/dma-coherent.c index cd46a9825e3c..6a44c0e7ba40 100644 --- a/arch/sh/kernel/dma-coherent.c +++ b/arch/sh/kernel/dma-coherent.c @@ -3,7 +3,7 @@ * Copyright (C) 2004 - 2007 Paul Mundt */ #include -#include +#include #include #include diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index d6874c9b639f..8e1d72a16759 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 790dbe022a07..94955caa4488 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -12,7 +12,6 @@ */ #include -#include #include #include #include diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 22c221bba13e..3a00fb64477b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c index 71ff4bf38073..71ce1b7a23d1 100644 --- a/drivers/xen/swiotlb-xen.c +++ b/drivers/xen/swiotlb-xen.c @@ -27,9 +27,8 @@ #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt #include -#include #include -#include +#include #include #include #include diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index a2d6640c42c0..18aade195884 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -7,7 +7,7 @@ #define _LINUX_DMA_DIRECT_H 1 #include -#include +#include #include /* for min_low_pfn */ #include #include diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 9891def42da7..33c6e24707a9 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -7,6 +7,7 @@ #define _LINUX_DMA_MAP_OPS_H #include +#include struct cma; @@ -202,6 +203,107 @@ static inline int dma_mmap_from_global_coherent(struct vm_area_struct *vma, } #endif /* CONFIG_DMA_DECLARE_COHERENT */ +#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H +#include +#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ + defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return dev->dma_coherent; +} +#else +static inline bool dev_is_dma_coherent(struct device *dev) +{ + return true; +} +#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ + +/* + * Check if an allocation needs to be marked uncached to be coherent. + */ +static __always_inline bool dma_alloc_need_uncached(struct device *dev, + unsigned long attrs) +{ + if (dev_is_dma_coherent(dev)) + return false; + if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) + return false; + return true; +} + +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs); +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t dma_addr, unsigned long attrs); + +#ifdef CONFIG_MMU +/* + * Page protection so that devices that can't snoop CPU caches can use the + * memory coherently. We default to pgprot_noncached which is usually used + * for ioremap as a safe bet, but architectures can override this with less + * strict semantics if possible. + */ +#ifndef pgprot_dmacoherent +#define pgprot_dmacoherent(prot) pgprot_noncached(prot) +#endif + +pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); +#else +static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) +{ + return prot; /* no protection bits supported without page tables */ +} +#endif /* CONFIG_MMU */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir); +#else +static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ +} +#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ + +#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL +void arch_sync_dma_for_cpu_all(void); +#else +static inline void arch_sync_dma_for_cpu_all(void) +{ +} +#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ + +#ifdef CONFIG_ARCH_HAS_DMA_PREP_COHERENT +void arch_dma_prep_coherent(struct page *page, size_t size); +#else +static inline void arch_dma_prep_coherent(struct page *page, size_t size) +{ +} +#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ + +#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN +void arch_dma_mark_clean(phys_addr_t paddr, size_t size); +#else +static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) +{ +} +#endif /* ARCH_HAS_DMA_MARK_CLEAN */ + +void *arch_dma_set_uncached(void *addr, size_t size); +void arch_dma_clear_uncached(void *addr, size_t size); + #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent); diff --git a/include/linux/dma-noncoherent.h b/include/linux/dma-noncoherent.h deleted file mode 100644 index e61283e06576..000000000000 --- a/include/linux/dma-noncoherent.h +++ /dev/null @@ -1,109 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_DMA_NONCOHERENT_H -#define _LINUX_DMA_NONCOHERENT_H 1 - -#include -#include - -#ifdef CONFIG_ARCH_HAS_DMA_COHERENCE_H -#include -#elif defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ - defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) -static inline bool dev_is_dma_coherent(struct device *dev) -{ - return dev->dma_coherent; -} -#else -static inline bool dev_is_dma_coherent(struct device *dev) -{ - return true; -} -#endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ - -/* - * Check if an allocation needs to be marked uncached to be coherent. - */ -static __always_inline bool dma_alloc_need_uncached(struct device *dev, - unsigned long attrs) -{ - if (dev_is_dma_coherent(dev)) - return false; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) - return false; - return true; -} - -void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs); -void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs); - -#ifdef CONFIG_MMU -/* - * Page protection so that devices that can't snoop CPU caches can use the - * memory coherently. We default to pgprot_noncached which is usually used - * for ioremap as a safe bet, but architectures can override this with less - * strict semantics if possible. - */ -#ifndef pgprot_dmacoherent -#define pgprot_dmacoherent(prot) pgprot_noncached(prot) -#endif - -pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs); -#else -static inline pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, - unsigned long attrs) -{ - return prot; /* no protection bits supported without page tables */ -} -#endif /* CONFIG_MMU */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE -void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir); -#else -static inline void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) -{ -} -#endif /* ARCH_HAS_SYNC_DMA_FOR_DEVICE */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir); -#else -static inline void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) -{ -} -#endif /* ARCH_HAS_SYNC_DMA_FOR_CPU */ - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL -void arch_sync_dma_for_cpu_all(void); -#else -static inline void arch_sync_dma_for_cpu_all(void) -{ -} -#endif /* CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL */ - -#ifdef CONFIG_ARCH_HAS_DMA_PREP_COHERENT -void arch_dma_prep_coherent(struct page *page, size_t size); -#else -static inline void arch_dma_prep_coherent(struct page *page, size_t size) -{ -} -#endif /* CONFIG_ARCH_HAS_DMA_PREP_COHERENT */ - -#ifdef CONFIG_ARCH_HAS_DMA_MARK_CLEAN -void arch_dma_mark_clean(phys_addr_t paddr, size_t size); -#else -static inline void arch_dma_mark_clean(phys_addr_t paddr, size_t size) -{ -} -#endif /* ARCH_HAS_DMA_MARK_CLEAN */ - -void *arch_dma_set_uncached(void *addr, size_t size); -void arch_dma_clear_uncached(void *addr, size_t size); - -#endif /* _LINUX_DMA_NONCOHERENT_H */ diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c index bc80fd2648bc..910ae69cae77 100644 --- a/kernel/dma/ops_helpers.c +++ b/kernel/dma/ops_helpers.c @@ -4,7 +4,6 @@ * the allocated memory contains normal pages in the direct kernel mapping. */ #include -#include /* * Create scatter-list for the already allocated DMA buffer. diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c index c9fb5c3d8bd0..d4637f72239b 100644 --- a/kernel/dma/pool.c +++ b/kernel/dma/pool.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 4ea72d145cd2..2be1e8b34ae3 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include From 08a89c28304ae74e4c7422f784359e41a37e3e7c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 26 Sep 2020 16:39:36 +0200 Subject: [PATCH 60/63] dma-direct check for highmem pages in dma_direct_alloc_pages Check for highmem pages from CMA, just like in the dma_direct_alloc path. Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index bf9f77623022..f5ecadd4e1c1 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -309,6 +309,17 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, page = __dma_direct_alloc_pages(dev, size, gfp); if (!page) return NULL; + if (PageHighMem(page)) { + /* + * Depending on the cma= arguments and per-arch setup + * dma_alloc_contiguous could return highmem pages. + * Without remapping there is no way to return them here, + * so log an error and fail. + */ + dev_info(dev, "Rejecting highmem page from CMA.\n"); + goto out_free_pages; + } + ret = page_address(page); if (force_dma_unencrypted(dev)) { if (set_memory_decrypted((unsigned long)ret, From 5b138c534fda57c2ebc1e6de72578aa1d70788a6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Oct 2020 11:06:09 +0200 Subject: [PATCH 61/63] dma-direct: factor out a dma_direct_alloc_from_pool helper This ensures dma_direct_alloc_pages will use the right gfp mask, as well as keeping the code for that common between the two allocators. Signed-off-by: Christoph Hellwig --- kernel/dma/direct.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index f5ecadd4e1c1..eddcb70251c9 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -147,6 +147,22 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, return page; } +static void *dma_direct_alloc_from_pool(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp) +{ + struct page *page; + u64 phys_mask; + void *ret; + + gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, + &phys_mask); + page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok); + if (!page) + return NULL; + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + return ret; +} + void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { @@ -163,17 +179,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; - if (dma_should_alloc_from_pool(dev, gfp, attrs)) { - u64 phys_mask; - - gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, - &phys_mask); - page = dma_alloc_from_pool(dev, size, &ret, gfp, - dma_coherent_ok); - if (!page) - return NULL; - goto done; - } + if (dma_should_alloc_from_pool(dev, gfp, attrs)) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); @@ -298,13 +305,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, struct page *page; void *ret; - if (dma_should_alloc_from_pool(dev, gfp, 0)) { - page = dma_alloc_from_pool(dev, size, &ret, gfp, - dma_coherent_ok); - if (!page) - return NULL; - goto done; - } + if (dma_should_alloc_from_pool(dev, gfp, 0)) + return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); page = __dma_direct_alloc_pages(dev, size, gfp); if (!page) @@ -327,7 +329,6 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, goto out_free_pages; } memset(ret, 0, size); -done: *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); return page; out_free_pages: From 849facea92fa68d9292f9b06d7c4ee9e7a06b8dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Oct 2020 11:04:08 +0200 Subject: [PATCH 62/63] dma-direct: simplify the DMA_ATTR_NO_KERNEL_MAPPING handling Use and entirely separate code path for the DMA_ATTR_NO_KERNEL_MAPPING path. This avoids any confusion about the ret type, and avoids lots of attr checks and helpers that can be significantly simplified now. It also ensures that common handling is applied to architetures still using the arch alloc/free hooks. Signed-off-by: Christoph Hellwig --- include/linux/dma-map-ops.h | 13 ----- kernel/dma/direct.c | 108 ++++++++++++++---------------------- 2 files changed, 43 insertions(+), 78 deletions(-) diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index 33c6e24707a9..8029f7e04145 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -219,19 +219,6 @@ static inline bool dev_is_dma_coherent(struct device *dev) } #endif /* CONFIG_ARCH_HAS_DMA_COHERENCE_H */ -/* - * Check if an allocation needs to be marked uncached to be coherent. - */ -static __always_inline bool dma_alloc_need_uncached(struct device *dev, - unsigned long attrs) -{ - if (dev_is_dma_coherent(dev)) - return false; - if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) - return false; - return true; -} - void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index eddcb70251c9..b92d08e65999 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -75,39 +75,6 @@ static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit); } -/* - * Decrypting memory is allowed to block, so if this device requires - * unencrypted memory it must come from atomic pools. - */ -static inline bool dma_should_alloc_from_pool(struct device *dev, gfp_t gfp, - unsigned long attrs) -{ - if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return false; - if (gfpflags_allow_blocking(gfp)) - return false; - if (force_dma_unencrypted(dev)) - return true; - if (!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP)) - return false; - if (dma_alloc_need_uncached(dev, attrs)) - return true; - return false; -} - -static inline bool dma_should_free_from_pool(struct device *dev, - unsigned long attrs) -{ - if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL)) - return true; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) - return false; - if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP)) - return true; - return false; -} - static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, gfp_t gfp) { @@ -170,16 +137,36 @@ void *dma_direct_alloc(struct device *dev, size_t size, void *ret; int err; - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) - return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); - size = PAGE_ALIGN(size); if (attrs & DMA_ATTR_NO_WARN) gfp |= __GFP_NOWARN; - if (dma_should_alloc_from_pool(dev, gfp, attrs)) + if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && + !force_dma_unencrypted(dev)) { + page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO); + if (!page) + return NULL; + /* remove any dirty cache lines on the kernel alias */ + if (!PageHighMem(page)) + arch_dma_prep_coherent(page, size); + *dma_handle = phys_to_dma_direct(dev, page_to_phys(page)); + /* return the page pointer as the opaque cookie */ + return page; + } + + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev)) + return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); + + /* + * Remapping or decrypting memory may block. If either is required and + * we can't block, allocate the memory from the atomic pools. + */ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + !gfpflags_allow_blocking(gfp) && + (force_dma_unencrypted(dev) || + (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && !dev_is_dma_coherent(dev)))) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); /* we always manually zero the memory once we are done */ @@ -187,18 +174,8 @@ void *dma_direct_alloc(struct device *dev, size_t size, if (!page) return NULL; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && - !force_dma_unencrypted(dev)) { - /* remove any dirty cache lines on the kernel alias */ - if (!PageHighMem(page)) - arch_dma_prep_coherent(page, size); - /* return the page pointer as the opaque cookie */ - ret = page; - goto done; - } - if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) || + !dev_is_dma_coherent(dev)) || (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) { /* remove any dirty cache lines on the kernel alias */ arch_dma_prep_coherent(page, size); @@ -241,7 +218,7 @@ void *dma_direct_alloc(struct device *dev, size_t size, memset(ret, 0, size); if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - dma_alloc_need_uncached(dev, attrs)) { + !dev_is_dma_coherent(dev)) { arch_dma_prep_coherent(page, size); ret = arch_dma_set_uncached(ret, size); if (IS_ERR(ret)) @@ -269,18 +246,6 @@ void dma_direct_free(struct device *dev, size_t size, { unsigned int page_order = get_order(size); - if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && - !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && - dma_alloc_need_uncached(dev, attrs)) { - arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); - return; - } - - /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ - if (dma_should_free_from_pool(dev, attrs) && - dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) - return; - if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && !force_dma_unencrypted(dev)) { /* cpu_addr is a struct page cookie, not a kernel address */ @@ -288,6 +253,18 @@ void dma_direct_free(struct device *dev, size_t size, return; } + if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) && + !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) && + !dev_is_dma_coherent(dev)) { + arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); + return; + } + + /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size))) + return; + if (force_dma_unencrypted(dev)) set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); @@ -305,7 +282,8 @@ struct page *dma_direct_alloc_pages(struct device *dev, size_t size, struct page *page; void *ret; - if (dma_should_alloc_from_pool(dev, gfp, 0)) + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && + force_dma_unencrypted(dev) && !gfpflags_allow_blocking(gfp)) return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp); page = __dma_direct_alloc_pages(dev, size, gfp); @@ -344,7 +322,7 @@ void dma_direct_free_pages(struct device *dev, size_t size, void *vaddr = page_address(page); /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ - if (dma_should_free_from_pool(dev, 0) && + if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) && dma_free_from_pool(dev, vaddr, size)) return; From 2a410d09417b5344ab1f3cf001ac73a1daf8dcce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 13 Oct 2020 13:27:00 +0200 Subject: [PATCH 63/63] ARM/ixp4xx: add a missing include of dma-map-ops.h Compilation of ixp4xx_defconfig fails with: arch/arm/mach-ixp4xx/common.c: In function 'ixp4xx_platform_notify_remove': arch/arm/mach-ixp4xx/common.c:291:3: error: implicit declaration of function 'dmabounce_unregister_dev' [-Werror=implicit-function-declaration] 291 | dmabounce_unregister_dev(dev); | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/arm/mach-ixp4xx/common.c: In function 'ixp4xx_platform_notify': arch/arm/mach-ixp4xx/common.c:307:3: error: implicit declaration of function 'dmabounce_register_dev' [-Werror=implicit-function-declaration] 307 | dmabounce_register_dev(dev, 2048, 4096, ixp4xx_needs_bounce); Add a missing include that is needed because of the header reshuffle. Fixes: 0a0f0d8be76d ("dma-mapping: split ") Reported-by: Guenter Roeck Signed-off-by: Christoph Hellwig --- arch/arm/mach-ixp4xx/common.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c index 184262d660ba..000f672a94c9 100644 --- a/arch/arm/mach-ixp4xx/common.c +++ b/arch/arm/mach-ixp4xx/common.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include