summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorBarry Song <baohua@kernel.org>2026-03-01 06:13:16 +0800
committerMarek Szyprowski <m.szyprowski@samsung.com>2026-03-13 23:47:31 +0100
commitd7eafe655b741dfc241d5b920f6d2cea45b568d9 (patch)
tree8185e14e3eb80484ccd4700c2756c33f62dedb1d /kernel
parentcf875c4b6863fd64054e1c3550c349eac09c4f35 (diff)
dma-mapping: Separate DMA sync issuing and completion waiting
Currently, arch_sync_dma_for_cpu and arch_sync_dma_for_device always wait for the completion of each DMA buffer. That is, issuing the DMA sync and waiting for completion is done in a single API call. For scatter-gather lists with multiple entries, this means issuing and waiting is repeated for each entry, which can hurt performance. Architectures like ARM64 may be able to issue all DMA sync operations for all entries first and then wait for completion together. To address this, arch_sync_dma_for_* now batches DMA operations and performs a flush afterward. On ARM64, the flush is implemented with a dsb instruction in arch_sync_dma_flush(). On other architectures, arch_sync_dma_flush() is currently a nop. Cc: Leon Romanovsky <leon@kernel.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will@kernel.org> Cc: Marek Szyprowski <m.szyprowski@samsung.com> Cc: Robin Murphy <robin.murphy@arm.com> Cc: Ada Couprie Diaz <ada.coupriediaz@arm.com> Cc: Ard Biesheuvel <ardb@kernel.org> Cc: Marc Zyngier <maz@kernel.org> Cc: Anshuman Khandual <anshuman.khandual@arm.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Suren Baghdasaryan <surenb@google.com> Cc: Joerg Roedel <joro@8bytes.org> Cc: Stefano Stabellini <sstabellini@kernel.org> Cc: Oleksandr Tyshchenko <oleksandr_tyshchenko@epam.com> Cc: Tangquan Zheng <zhengtangquan@oppo.com> Reviewed-by: Juergen Gross <jgross@suse.com> # drivers/xen/swiotlb-xen.c Tested-by: Xueyuan Chen <xueyuan.chen21@gmail.com> Signed-off-by: Barry Song <baohua@kernel.org> Reviewed-by: Leon Romanovsky <leonro@nvidia.com> Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> Link: https://lore.kernel.org/r/20260228221316.59934-1-21cnbao@gmail.com
Diffstat (limited to 'kernel')
-rw-r--r--kernel/dma/Kconfig3
-rw-r--r--kernel/dma/direct.c6
-rw-r--r--kernel/dma/direct.h9
-rw-r--r--kernel/dma/swiotlb.c7
4 files changed, 21 insertions, 4 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 159900736f25..bfef21b4a9ae 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -72,6 +72,9 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
+config ARCH_HAS_BATCHED_DMA_SYNC
+ bool
+
#
# Select this option if the architecture assumes DMA devices are coherent
# by default.
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 8f43a930716d..c7666e5d5e7c 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -406,6 +406,8 @@ void dma_direct_sync_sg_for_device(struct device *dev,
arch_sync_dma_for_device(paddr, sg->length,
dir);
}
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
}
#endif
@@ -427,8 +429,10 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
swiotlb_sync_single_for_cpu(dev, paddr, sg->length, dir);
}
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
+ }
}
/*
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
index f476c63b668c..f925a7e8b000 100644
--- a/kernel/dma/direct.h
+++ b/kernel/dma/direct.h
@@ -60,8 +60,10 @@ static inline void dma_direct_sync_single_for_device(struct device *dev,
swiotlb_sync_single_for_device(dev, paddr, size, dir);
- if (!dev_is_dma_coherent(dev))
+ if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_device(paddr, size, dir);
+ arch_sync_dma_flush();
+ }
}
static inline void dma_direct_sync_single_for_cpu(struct device *dev,
@@ -71,6 +73,7 @@ static inline void dma_direct_sync_single_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev)) {
arch_sync_dma_for_cpu(paddr, size, dir);
+ arch_sync_dma_flush();
arch_sync_dma_for_cpu_all();
}
@@ -106,8 +109,10 @@ static inline dma_addr_t dma_direct_map_phys(struct device *dev,
}
if (!dev_is_dma_coherent(dev) &&
- !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO)))
+ !(attrs & (DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_MMIO))) {
arch_sync_dma_for_device(phys, size, dir);
+ arch_sync_dma_flush();
+ }
return dma_addr;
err_overflow:
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index d8e6f1d889d5..1105db1689d5 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -867,6 +867,9 @@ static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size
if (orig_addr == INVALID_PHYS_ADDR)
return;
+ if (dir == DMA_FROM_DEVICE && !dev_is_dma_coherent(dev))
+ arch_sync_dma_flush();
+
/*
* It's valid for tlb_offset to be negative. This can happen when the
* "offset" returned by swiotlb_align_offset() is non-zero, and the
@@ -1595,8 +1598,10 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
return DMA_MAPPING_ERROR;
}
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
arch_sync_dma_for_device(swiotlb_addr, size, dir);
+ arch_sync_dma_flush();
+ }
return dma_addr;
}