背景

最近开发异构驱动时,遇到许多问题和疑惑,故想了解下Linux的内存申请机制。

内核:Linux4.9

设备Ops初始化

一般 dma之类的内存申请都会尝试走 dev->ops.xxxx的路径,所以这里梳理下 dev.ops.xxx的来源

以arm为例:

of_platform_bus_create
|--- of_platform_device_create_pdata					//根据device node创建device
	|--- of_dma_configure								// 默认 dma_base=0,size=4G
		|--- of_iommu_configure							//根据iommus属性获取 iommu_ops
		|--- arch_setup_dma_ops
			|--- arm_setup_iommu_dma_ops				//iommu 返回 true,
				|--- arm_iommu_create_mapping			//创建一个mapping
					|--- iommu_domain_alloc
					__arm_iommu_attach_device
					|--- iommu_attach_device
					|--- dev->archdata.mapping			//这里初始化
				true:arm_get_iommu_dma_map_ops			//有iommu所以走这个 获取到 iommu_ops(dma-mapping.c)
				false:arm_get_dma_map_ops				//无iommu,返回 arm_dma_ops(dma-mapping.c)
				set_dma_ops								//设置 dev->archdata.dma_ops = ops
				

根据上述初始化流程可以得出:

  • if device_node 定义了 iommus:
    • dev->archdata.dma_ops = iommu_ops
    • dev->archdata.mapping = mapping
  • else:
    • dev->archdata.dma_ops = arm_dma_ops

mapping:映射信息,iova的分配使用。

domain:类似于mmu页表,管理着整个地址的映射,目前全志的iommu只支持一个domain,对应只有一个mapping

可以看到,初始化做的最主要的事情就是设置每个 deivcearchdata

预留内存

代码位置:drivers/of/of_reserved_mem.c

最大预留块数:

#define MAX_RESERVED_REGIONS    16
static struct reserved_mem reserved_mem[MAX_RESERVED_REGIONS];
static int reserved_mem_count;

也就是说 reserved-memory这个节点最多能预留16块,如果要更多,需要修改这个宏

预留内存初始化

函数:fdt_init_reserved_mem

ARM32:

start_kernel
	setup_arch
		arm_memblock_init/arm64_memblock_init
			early_init_fdt_scan_reserved_mem
				early_init_dt_reserve_memory_arch(扫描initial_boot_params,调用这个保留内存)
				of_scan_flat_dt (扫描设备树)
					__fdt_scan_reserved_mem
						if 'node != reserved-memory' 直接返回
						__reserved_mem_reserve_reg
							early_init_dt_reserve_memory_arch(初始化每个节点)
                fdt_init_reserved_mem

__reserved_mem_reserve_reg

位置:drivers/of/fdt.c

伪代码:

  static int __init __reserved_mem_reserve_reg(unsigned long node,
                           const char *uname)
  {
      prop = of_get_flat_dt_prop(node, "reg", &len);
      if (!prop)
          return -ENOENT;

      if (len && len % t_len != 0) {
          return -EINVAL;
      }
  
	 nomap = of_get_flat_dt_prop(node, "no-map", NULL) != NULL;

      while (len >= t_len) {
          /* 从 reg 属性获取到 base和size */
          base = dt_mem_next_cell(dt_root_addr_cells, &prop);
          size = dt_mem_next_cell(dt_root_size_cells, &prop);

          /* 调用 early_init_dt_reserve_memory_arch 进行预留 */
          if (size &&
              early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
              pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %ld MiB\n",
                  uname, &base, (unsigned long)size / SZ_1M);
          else
              pr_info("Reserved memory: failed to reserve memory for node '%s': base %pa, size %ld MiB\n",
                  uname, &base, (unsigned long)size / SZ_1M);

          len -= t_len;
          if (first) {
              /* 记录第一个node, 用于再次初始化 */
              fdt_reserved_mem_save_node(node, uname, base, size);
              first = 0;
          }
      }
      return 0;
  }

int __init __weak early_init_dt_reserve_memory_arch(phys_addr_t base,
                                       phys_addr_t size, bool nomap)
{
    /* nomap属性会让内存在Linux系统中不可见 */
    if (nomap)
        return memblock_remove(base, size);
    return memblock_reserve(base, size);
}
void __init fdt_reserved_mem_save_node(unsigned long node, const char *uname,
                                       phys_addr_t base, phys_addr_t size)
{
    struct reserved_mem *rmem = &reserved_mem[reserved_mem_count];
    
    rmem->fdt_node = node;
    rmem->name = uname;
    rmem->base = base;
    rmem->size = size;
    reserved_mem_count++;
}

fdt_init_reserved_mem

伪代码:

void __init fdt_init_reserved_mem(void)
 {
     int i;
     /* check for overlapping reserved regions */
     __rmem_check_for_overlap();
     for (i = 0; i < reserved_mem_count; i++) {
         struct reserved_mem *rmem = &reserved_mem[i];
         unsigned long node = rmem->fdt_node;
         int len;
         const __be32 *prop;
         int err = 0;

         prop = of_get_flat_dt_prop(node, "phandle", &len);
         if (!prop)
             prop = of_get_flat_dt_prop(node, "linux,phandle", &len);
         if (prop)
             rmem->phandle = of_read_number(prop, len/4);
  
         if (rmem->size == 0)
             err = __reserved_mem_alloc_size(node, rmem->name,
                          &rmem->base, &rmem->size);
         
         /* 初始化这个mem */
         if (err == 0)
             __reserved_mem_init_node(rmem);
     }
 }

/*
 * 调用对应 name 的 initfn(前提是该节点设置了 compatible = "xxxxxx")
 * 使用: RESERVEDMEM_OF_DECLARE 声明
 * 在4.9下有以下声明:
 * 		ion_of.c:249:RESERVEDMEM_OF_DECLARE(ion, "ion-region", rmem_ion_setup);
 *		bman_ccsr.c:133:RESERVEDMEM_OF_DECLARE(bman_fbpr, "fsl,bman-fbpr", bman_fbpr);
 *      qman_ccsr.c:419:RESERVEDMEM_OF_DECLARE(qman_fqd, "fsl,qman-fqd", qman_fqd);
 *      qman_ccsr.c:430:RESERVEDMEM_OF_DECLARE(qman_pfdr, "fsl,qman-pfdr", qman_pfdr);
 *      dma-contiguous.c:279:RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
 *      dma-coherent.c:337:RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
 *
 *  dma-coherent.c的就是我们最后调用到的
 */
static int __init __reserved_mem_init_node(struct reserved_mem *rmem)
{
      extern const struct of_device_id __reservedmem_of_table[];
      const struct of_device_id *i;

      /* 通过链接脚本让 __rmem_of_table_sentinel 在 __reservedmem_of_table 后*/
      for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) {
          reservedmem_of_init_fn initfn = i->data;
          const char *compat = i->compatible;

          if (!of_flat_dt_is_compatible(rmem->fdt_node, compat))
              continue;

          if (initfn(rmem) == 0) {
              pr_info("initialized node %s, compatible id %s\n",
                  rmem->name, compat);
              return 0;
          }
      }
      return -ENOENT;
}

// __reserved_mem_init_node -> dma-coherent.c:rmem_dma_setup(rmm)
static int __init rmem_dma_setup(struct reserved_mem *rmem)
{
    unsigned long node = rmem->fdt_node;

    if (of_get_flat_dt_prop(node, "reusable", NULL))
        return -EINVAL;

#ifdef CONFIG_ARM
    if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
        pr_err("Reserved memory: regions without no-map are not yet supported\n");
        return -EINVAL;
    }
#endif

    rmem->ops = &rmem_dma_ops;
    pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
        &rmem->base, (unsigned long)rmem->size / SZ_1M);
    return 0;
}

绑定内存到dev

将设备树中 memory-region的第idx个节点预留给设备

of_reserved_mem_device_init_by_idx(struct device *dev,struct device_node *np, int idx)
	rmem = __find_rmem(target);
	rmem->ops->device_init(rmem, dev);
		rmem_dma_device_init
			dma_init_coherent_memory(进行地址映射,虚拟机地址分配等)
				dma_mem->virt_base = memremap(phys_addr, size, MEMREMAP_WC);
				                   |- ioremap(phys_addr, size);
				dma_mem->device_base = phys_addr
				dma_mem->pfn_base = PFN_DOWN(phys_addr);
			dma_assign_coherent_memory
				dev->dma_mem = mem;

memremap流程

memremap(phys_addr, size, MEMREMAP_WC);
|--- ioremap_wc(offset, size);
	 |---- __arm_ioremap_caller(res_cookie, size, MT_DEVICE_WC, __builtin_return_address(0))
	 	  |---- __arm_ioremap_pfn_caller(pfn, offset, size, MT_DEVICE_WC, NULL)

__arm_ioremap_pfn_caller函数实现具体映射,伪代码如下:

 static void __iomem * __arm_ioremap_pfn_caller(unsigned long pfn,
      unsigned long offset, size_t size, unsigned int mtype, void *caller)
  {
     const struct mem_type *type;
     int err;
     unsigned long addr;
     struct vm_struct *area;
     phys_addr_t paddr = __pfn_to_phys(pfn);
    
     type = get_mem_type(mtype);
     if (!type)
         return NULL;
     
     /*
      * Page align the mapping size, taking account of any offset.
      */
     size = PAGE_ALIGN(offset + size);
     /*
      * 尽可能重用其中一个静态映射. 略
      */
     if (size && !(sizeof(phys_addr_t) == 4 && pfn >= 0x100000)) {
         struct static_vm *svm;

         svm = find_static_vm_paddr(paddr, size, mtype);
         if (svm) {
             addr = (unsigned long)svm->vm.addr;
             addr += paddr - svm->vm.phys_addr;
             return (void __iomem *) (offset + addr);
         }
     }
     /*
      * 不允许使用不匹配的属性映射 RAM
      * pfn_valid检查 addr所属的 memblock.memory.regions的flags属性是否配置了 MEMBLOCK_NOMAP
      */
     if (WARN_ON(pfn_valid(pfn) && mtype != MT_MEMORY_RW))
         return NULL;
 
     /*
      * 从内核虚拟地址(vmalloc区域)中寻找一块空闲的虚拟地址
      */
     area = get_vm_area_caller(size, VM_IOREMAP, caller);
     if (!area)
         return NULL;
     addr = (unsigned long)area->addr;
     area->phys_addr = paddr;
 
#if !defined(CONFIG_SMP) && !defined(CONFIG_ARM_LPAE)
     if (DOMAIN_IO == 0 &&
         (((cpu_architecture() >= CPU_ARCH_ARMv6) && (get_cr() & CR_XP)) ||
            cpu_is_xsc3()) && pfn >= 0x100000 &&
            !((paddr | size | addr) & ~SUPERSECTION_MASK)) {
         area->flags |= VM_ARM_SECTION_MAPPING;
         err = remap_area_supersections(addr, pfn, size, type);
     } else if (!((paddr | size | addr) & ~PMD_MASK)) {
         area->flags |= VM_ARM_SECTION_MAPPING;
         err = remap_area_sections(addr, pfn, size, type);
     } else
#endif
      /* 
       * 映射对应的虚拟地址和物理地址
       */
      err = ioremap_page_range(addr, addr + size, paddr,
                      __pgprot(type->prot_pte));
     
     if (err) {
         vunmap((void *)addr);
         return NULL;
     }
     
   flush_cache_vmap(addr, addr + size);
     return (void __iomem *) (offset + addr);
 }

DMA-Mapping接口

dma_alloc_coherent

个人对dma_alloc_coherent的调用路径比较好奇,这里单独进行分析。

调用链:

dma_alloc_coherent(dev, size, dma_handle, flag)
|--- dma_alloc_attrs(dev, size, dma_handle, flag, 0)
	|--- dma_alloc_from_coherent(dev, size, dma_handle, &cpu_addr)  -> 路线1 走设备路线, 成功则直接返回
	|--- ops = get_dma_ops(dev)		// 获取 dev->archdata.dma_ops, 创建device时候会根据有无IOMMU进行赋值
		|--- if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops;	// 有 IOMMMU
			 else return &arm_dma_ops;	-> 没有IOMMU, 则等于 arm_dma_ops			// 无 IOMMU
	|--- cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
		|--- arm_dma_alloc(dev, size, dma_handle, flag, attrs);
			|--- __dma_alloc(dev, size, handle, gfp, prot, false, attrs, NULL);

路线1

函数:dma_alloc_from_coherent

伪代码如下:

int dma_alloc_from_coherent(struct device *dev, ssize_t size,
                       dma_addr_t *dma_handle, void **ret)
{
    struct dma_coherent_mem *mem;
    int order = get_order(size);
    unsigned long flags;
    int pageno;
    int dma_memory_map;
    if (!dev)
         return 0;

    mem = dev->dma_mem;					// 获取设备树预留的物理内存基址
    if (!mem)
         return 0;
    *ret = NULL;
    spin_lock_irqsave(&mem->spinlock, flags);
    if (unlikely(size > (mem->size << PAGE_SHIFT)))
         goto err;

    // 从 bitmap 或者空闲的内存
    pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
    if (unlikely(pageno < 0))
        goto err;
    /*
     * 获取物理地址和虚拟地址,虚拟地址在前面 __arm_ioremap_pfn_caller 已经用
     * memremap从 vmalloc 映射好了
     */
    *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
    *ret = mem->virt_base + (pageno << PAGE_SHIFT);
    dma_memory_map = (mem->flags & DMA_MEMORY_MAP);
    spin_unlock_irqrestore(&mem->spinlock, flags);
    if (dma_memory_map)
        memset(*ret, 0, size);
    else
        memset_io(*ret, 0, size);

     return 1;
 err:
     spin_unlock_irqrestore(&mem->spinlock, flags);
     /*
      * In the case where the allocation can not be satisfied from the
      * per-device area, try to fall back to generic memory if the
      * constraints allow it.
      */
     return mem->flags & DMA_MEMORY_EXCLUSIVE;
 }

一致性DMA

不需要手动刷cache,硬件来保证cache一致性。通常是将改buffer在页表中标志为 no cache,由CPU直接读写DRAM。通常适合少写多读,否则经常更新DRAM的数据,效率比较低。

dma_alloc_coherent

dma_alloc_coherent(dev, size, dma, flag)

  1. dev:申请的设备(这个会影响是使用dma_ops还是iommu_ops)
  2. size:大小
  3. dma:返回的io地址
  4. flag:申请标志,同kmalloc
  5. 返回值:可用的虚拟地址

建议申请大于一个PAGE_SIZE 以上使用,小内存推荐 dma_pool 接口

dma_free_coherent

dma_free_coherent(dev, size, addr, dma)

  1. dev:申请的设备
  2. size:大小
  3. addr:前面申请得到的虚拟地址
  4. dma:前面得到的io地址

dma_pool_create

dma_pool_create(name, dev, size , align, boundary)

  1. name:名字
  2. dev:要使用该内存的dev
  3. size:
  4. align:对齐,要2的次幂对齐
  5. boundary:边界,每次分配不会超过该大小
  6. 返回:struct dma_pool *局部

dma_pool_alloc

dma_pool_alloc(pool, flags, dma)

  1. pool:前面拿到的句柄
  2. flasg:GFP_*开头
  3. dma:返回io地址
  4. 返回值:虚拟地址

dma_pool_free

dma_pool_free(pool, vaddr, dma)

参数同上,vaddr是前面的alloc得到的地址

dma_pool_destroy

dma_pool_destroy(pool)

流式DMA

通常需要手动刷cache。

DMA流映射函数可以在中断上下文使用

一般做法:传输前进行映射,传输完后取消映射。在映射和解映射的相关代码中会进行cache一致性操作,一般调用map/unmap接口即可,无需自己手动调用刷cache接口。

流式DMA有2种映射,一种是映射单个内存区域,一种是映射一个scatterlist。

映射单个内存区域的接口有:

  1. dma_{map,unmap}_single:不能映射高端地址,基于 va 映射
  2. dma_{map,unmap}_page:解决上面不能映射高端地址的缺点,基于page映射

上面的函数需要用 dma_mapping_error() 来判断返回值。

映射scatterlist的接口有:

  1. dma_{map,unmap}_sg:将几个连续的sglist条目合并成一个

同步接口:

  1. dma_sync_{single, sg}_for_cpu:cpu收到DMA传输完的数据后(invalid cache)
  2. dma_sync_{single, sg}_for_device:cpu发送完数据后,同步到设备(clean cache)

传输方向

  1. DMA_BIDIRECTIONAL:不清楚传输方向则可用该类型,一致性内存映射隐性的设置为DMA_BIDIRECTIONAL
  2. DMA_TO_DEVICE:数据从内存传输到设备(DMA从内存读到设备,需要执行clean,将cache上的数据刷回去)
  3. DMA_FROM_DEVICE: 数据从设备传输到内存(DMA从设备读到内存,需要执行invalid,需要将cache上的数据无效)
  4. DMA_NONE 调试用途,传输方向初始化时可以设为此值

dma_map/unmap_single

dma_map_single(dev, addr, size, dir)

  1. dev:设备
  2. addr:虚拟地址
  3. size:大小
  4. dir:方向
  5. 返回值:返回io地址

dma_map_single(dev, dma, size, direction)

dma_map/unmap_page

dma_map_page(dev, page, offset, size, direction)

参数基本同 dma_map_single,只是基于page映射

dma_unmap_page(dev, dma, size, direction);

dma_map/unmap_sg

dma_map_sg(dev, sglist, nents, direction);

dma_unmap_sg(dev, sglist, nents, direction);

  1. dev:设备
  2. sglist:散列表指针
  3. nents为sglist的条目数量。
  4. direction:方向
  5. 返回值:返回真正映射的sg条目数量。返回零表示失败。

获取对应的物理地址,可以使用sg_dma_address和sg_dma_len来获取sg的物理地址和长度:

int i, count = dma_map_sg(dev, sglist, nents, direction);
struct scatterlist *sg;
for_each_sg(sglist, sg, count, i) {
        hw_address[i] = sg_dma_address(sg);
        hw_len[i] = sg_dma_len(sg);
}

使用scatterlists,将若干个区域合并成一个区域用于映射

DMA-Buf接口

dma-buf 的出现就是为了解决各个驱动之间 buffer 共享的问题,因此它本质上是 buffer 与 file 的结合

主要是参考https://blog.csdn/hexiaolong2009/article/details/102596772的文章

通常:

  1. exporter:提供并分配buffer
  2. inporter:内核空间的使用者
  3. user:用户空间的使用者

实现dma-buf

  1. 实现一个dma_buf_opsmap_dma_buf/unmap_dma_buf, map, mmap, release
  2. 使用DEFINE_DMA_BUF_EXPORT_INFO定义一个infoDEFINE_DMA_BUF_EXPORT_INFO(info)
    1. info.ops = &ops;
    2. info.size = xxx;
    3. info.flags = O_CLOEXEC;
    4. info.priv = xxx;
  3. 使用dma_buf_export初始化info对象:struct dma_buf *dmabuf = dma_buf_export(info);

kmap/vmap接口

主要是用于在内核映射并访问内存:

  1. dma_buf_kmap/dma_buf_kunmap:一次映射一个page,可能会休眠
  2. dma_buf_kamp_atomic/dma_buf_kunmap:一次映射一个page,不会休眠
  3. dma_buf_vmap/dma_buf_vunmap:一次可以映射多个page

attach接口

提供给DMA来访问物理内存

绑定API

  1. dma_buf_attach
    1. 建立一个 dma-bufdevice 的连接关系,这个连接关系被存放在新创建的 dma_buf_attachment 对象中
    2. 该函数对应 dma_buf_ops 中的 attach 回调接口
  2. dma_buf_map_attachment:主要是生成sg_table和同步cache

必须先调用attach再调用attachment

对应的反向操作:

  1. dma_buf_unmap_attachment
  2. dma_buf_dettach

Driver Demo

#include <linux/dma-buf.h>
#include <linux/module.h>
#include <linux/slab.h>

struct dma_buf *dmabuf_exported;
EXPORT_SYMBOL(dmabuf_exported);

static int exporter_attach(struct dma_buf *dmabuf, struct device *dev,
			struct dma_buf_attachment *attachment)
{
	pr_info("dmabuf attach device: %s\n", dev_name(dev));
	return 0;
}

static void exporter_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attachment)
{
	pr_info("dmabuf detach device: %s\n", dev_name(attachment->dev));
}

static struct sg_table *exporter_map_dma_buf(struct dma_buf_attachment *attachment,
					 enum dma_data_direction dir)
{
	void *vaddr = attachment->dmabuf->priv;
	struct sg_table *table;

	table = kmalloc(sizeof(*table), GFP_KERNEL);

	sg_alloc_table(table, 1, GFP_KERNEL);
	sg_dma_len(table->sgl) = PAGE_SIZE;
	sg_dma_address(table->sgl) = dma_map_single(NULL, vaddr, PAGE_SIZE, dir);

	return table;
}

static void exporter_unmap_dma_buf(struct dma_buf_attachment *attachment,
			       struct sg_table *table,
			       enum dma_data_direction dir)
{
	dma_unmap_single(NULL, sg_dma_address(table->sgl), PAGE_SIZE, dir);
	sg_free_table(table);
	kfree(table);
}

...

static const struct dma_buf_ops exp_dmabuf_ops = {
	.attach = exporter_attach,
	.detach = exporter_detach,
	.map_dma_buf = exporter_map_dma_buf,
	.unmap_dma_buf = exporter_unmap_dma_buf,
	...
};

static struct dma_buf *exporter_alloc_page(void)
{
	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
	struct dma_buf *dmabuf;
	void *vaddr;

	vaddr = kzalloc(PAGE_SIZE, GFP_KERNEL);

	exp_info.ops = &exp_dmabuf_ops;
	exp_info.size = PAGE_SIZE;
	exp_info.flags = O_CLOEXEC;
	exp_info.priv = vaddr;

	dmabuf = dma_buf_export(&exp_info);

	sprintf(vaddr, "hello world!");

	return dmabuf;
}

static int __init exporter_init(void)
{
	dmabuf_exported = exporter_alloc_page();
	return 0; 
}

module_init(exporter_init);

Demo

struct dma_buf_attachment *attachment;
struct sg_table *table;
struct device *dev;
unsigned int reg_addr, reg_size;

dev = kzalloc(sizeof(*dev), GFP_KERNEL);
dev_set_name(dev, "importer");

attachment = dma_buf_attach(dmabuf, dev);
table = dma_buf_map_attachment(attachment, DMA_BIDIRECTIONAL);

reg_addr = sg_dma_address(table->sgl);
reg_size = sg_dma_len(table->sgl);
pr_info("reg_addr = 0x%08x, reg_size = 0x%08x\n", reg_addr, reg_size);

dma_buf_unmap_attachment(attachment, table, DMA_BIDIRECTIONAL);
dma_buf_detach(dmabuf, attachment);

mmap

用户空间的mmap会调用到对应exp_dmabuf_ops->mmap

内核空间可以使用dma_buf_mmap()来直接引用 dma-bufmmap 实现,以此来间接的实现设备驱动的 mmap 文件操作接口

驱动demo:

#include <linux/dma-buf.h>
#include <linux/module.h>
#include <linux/miscdevice.h>
#include <linux/slab.h>
#include <linux/uaccess.h>

struct dma_buf *dmabuf_exported;
EXPORT_SYMBOL(dmabuf_exported);

static int exporter_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
{
	void *vaddr = dmabuf->priv;

	return remap_pfn_range(vma, vma->vm_start, virt_to_pfn(vaddr),
				PAGE_SIZE, vma->vm_page_prot);
}

...

static const struct dma_buf_ops exp_dmabuf_ops = {
	...
	.mmap = exporter_mmap,
};

...

static long exporter_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
	int fd = dma_buf_fd(dmabuf_exported, O_CLOEXEC);
	copy_to_user((int __user *)arg, &fd, sizeof(fd));

	return 0;
}
 
static struct file_operations exporter_fops = {
	.owner		= THIS_MODULE,
	.unlocked_ioctl	= exporter_ioctl,
};
 
static struct miscdevice mdev = {
	.minor = MISC_DYNAMIC_MINOR,
	.name = "exporter",
	.fops = &exporter_fops,
};
 
static int __init exporter_init(void)
{
	...
	misc_register(&mdev);
    ...
}

static void __exit exporter_exit(void)
{
    ...
	misc_deregister(&mdev);
    ...
}

module_init(exporter_init);
module_exit(exporter_exit);

用户空间程序:

int main(int argc, char *argv[])
{
	int fd;
	int dmabuf_fd = 0;

	fd = open("/dev/exporter", O_RDONLY);
	ioctl(fd, 0, &dmabuf_fd);
	close(fd);

	char *str = mmap(NULL, 4096, PROT_READ, MAP_SHARED, dmabuf_fd, 0);
	printf("read from dmabuf mmap: %s\n", str);

	return 0;
}

下面是 驱动层 使用 dma_buf_mmap() 内核 API,以此来简化设备驱动的 mmap 文件操作接口的实现。

比驱动demo新增filemmap,去掉 dma_buf_opsioctl

...
static int exporter_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
{
	void *vaddr = dmabuf->priv;

	return remap_pfn_range(vma, vma->vm_start, virt_to_pfn(vaddr),
				PAGE_SIZE, vma->vm_page_prot);
}

...

static const struct dma_buf_ops exp_dmabuf_ops = {
	...
	.mmap = exporter_mmap,		// 去掉
};

...

static int exporter_misc_mmap(struct file *file, struct vm_area_struct *vma)
{
	return dma_buf_mmap(dmabuf_exported, vma, 0);
}

static struct file_operations exporter_fops = {
	.owner	= THIS_MODULE,
	.mmap	= exporter_misc_mmap,	// 新增
};
 
...

用户程序demo:

int main(int argc, char *argv[])
{
	int fd;

	fd = open("/dev/exporter", O_RDONLY);

	char *str = mmap(NULL, 4096, PROT_READ, MAP_SHARED, fd, 0);
	printf("read from /dev/exporter mmap: %s\n", str);

	close(fd);

	return 0;
}

file

dma-buf 本质上是 buffer 与 file 的结合,该 file 还是个被 open 过的 file。

从我们调用 dma_buf_export() 开始,这个 file 就已经被 open 了。

而且该 file 还是个匿名文件,因此应用程序无法通过 fd = open(“name”) 的方式来获取它所对应的 fd,只能依托于 exporter 驱动的 ioctl 接口,通过 dma_buf_fd() 来获取。

内核 API 实现了 dma-buffd之间的相互转换:

  1. fd = dma_buf_fd(dmabuf);
  2. dmabuf = dma_buf_get(fd);

get/put

只要是文件,内部都会有一个引用计数(f_count)。当使用 dma_buf_export() 函数创建 dma-buf 时,该引用计数被初始化为1;当这个引用计数为0时,则会自动触发 dma_buf_opsrelease 回调接口,并释放 dma-buf 对象

linux 内核中操作 file 引用计数的常用函数为 fget()fput(),而 dma-buf 又在此基础上进行了封装,如下:

函数区别
get_dma_buf引用计数加1
dma_buf_get引用计数加1,并将 fd 转换成 dma_buf 指针
dma_buf_put引用计数减1
dma_buf_fd引用计数不变,仅创建 fd

为什么需要 fd

  1. 方便应用程序直接在 user space 访问该 buffer(通过 mmap);
  2. 方便该 buffer 在各个驱动模块之间流转,而无需拷贝;
  3. 降低了各驱动之间的耦合度

更多推荐

Linux DMA内存