在开始之前,先看一下SSD202的内存使用范围

硬件上SSD202内置128MB内存,其中有一部分预留给MMA,MMAP以及CMA

具体的大小设置在bootargs 中

bootargs = "wt_board=WT2022 console=ttyS0,115200 rootfstype=squashfs,ubifs rootwait=1

cma=8M

LX_MEM=0x7f00000 mma_heap=mma_heap_name0,miu=0,sz=0x1000000

mma_memblock_remove=1 highres=off mmap_reserved=fb,miu=0,sz=0x300000

max_start_off=0x7C00000,max_end_off=0x7F00000";

即最大可使用内存0x7f00000=127MB,CMA占用8M,MMA使用16M,MMAP使用3M,剩下就是kernel可使用范围

预留内存相关启动日志如下:

[ 0.000000] LXmem is 0x7f00000 PHYS_OFFSET is 0x20000000

[ 0.000000] Add mem start 0x20000000 size 0x7f00000!!!!

[ 0.000000]

[ 0.000000] LX_MEM = 0x20000000, 0x7f00000 (16*7=112+15=127MB)

[ 0.000000] LX_MEM2 = 0x0, 0x0

[ 0.000000] LX_MEM3 = 0x0, 0x0

[ 0.000000] EMAC_LEN= 0x0

[ 0.000000] DRAM_LEN= 0x0

----mmap_reserved=fb,miu=0,sz=0x300000=3M

[ 0.000000] deal_with_reserved_mmap memblock_reserve success mmap_reserved_config[0].reserved_start=

[ 0.000000] 0x27c00000 == 0x27f00000 - 3M(0x00300000)

[ 0.000000]

---mma_heap=mma_heap_name0,miu=0,sz=0x1000000=16M

[ 0.000000] deal_with_reserve_mma_heap memblock_reserve success mma_config[0].reserved_start=

[ 0.000000] 0x26c00000 == 0x27c00000 - 16M(0x1000000)

---cma size = 8M

[ 0.000000] cma: Reserved 8 MiB at 0x26400000 = 0x26c00000 - 8M

[ 0.000000] Memory policy: Data cache writealloc

[ 0.000000] percpu: Embedded 13 pages/cpu @c62bc000 s21208 r8192 d23848 u53248

[ 0.000000] Built 1 zonelists in Zone order, mobility grouping on. Total pages: 28162

[ 0.000000] Kernel command line: wt_board=WT2022 console=ttyS0,115200 rootfstype=squashfs,ubifs rootwait=1 cma=8M LX_MEM=0x7f00000 mma_heap=mma_heap_na 。。。。。)

[ 0.000000] PID hash table entries: 512 (order: -1, 2048 bytes)

[ 0.000000] Dentry cache hash table entries: 16384 (order: 4, 65536 bytes)

[ 0.000000] Inode-cache hash table entries: 8192 (order: 3, 32768 bytes)

[ 0.000000] Memory: 96736K/113664Kavailable (2467K kernel code, 222K rwdata, 1212K rodata, 164K init, 174K bss, 8736K reserved, 8192K cma-reserved)

从内存最顶端往下预留,最开始为3MB的MMAP,然后是16M的MMA,最后是8M的CMA区域,留给kernel的可用区域在0x26400000以下

接着看一下SSD202 kernel的虚拟映射表:

[    0.000000] Virtual kernel memory layout:
[    0.000000]     vector  : 0xffff0000 - 0xffff1000   (   4 kB)
[    0.000000]     fixmap  : 0xffc00000 - 0xfff00000   (3072 kB)
[    0.000000]     vmalloc : 0xc8000000 - 0xff800000   ( 888 MB)
[    0.000000]     lowmem  : 0xc0000000 - 0xc7f00000   ( 127 MB)
[    0.000000]     modules : 0xbf800000 - 0xc0000000   (   8 MB)
[    0.000000]       .text : 0xc0008000 - 0xc02710a8   (2469 kB)
[    0.000000]       .init : 0xc03c3000 - 0xc03ec000   ( 164 kB)
[    0.000000]       .data : 0xc03ec000 - 0xc0423bd8   ( 223 kB)
[    0.000000]        .bss : 0xc0425000 - 0xc04509e0   ( 175 kB)

vector为中断向量映射区,位于内存最高端区域

fixmap为固定映射区,即虚拟地址固定,主要的kernel初始化阶段使用,比如console,dtb等以及热补丁应用

vmalloc,虚拟内存申请的地址范围,用于给vmalloc/ioremap动态分配内存

lowmem是线性映射区,1:1映射到物理地址

vmalloc区域和lowmem区域之间有一个1MB的hole,可以防止vmalloc越界

.text、.init、.data、.bss都属于lowmem区域,也即ZONE_NORMAL;

vector、fixmap、vmalloc属于ZONE_HIGHMEM区域。

modules属于用户空间

以上预留的MMA,MMAP,CMA等空间都在lowmem区

关于CMA

Contiguous Memory Allocator, CMA,连续内存分配器,用于分配连续的大块内存

CMA分配器,会Reserve一片物理内存区域:

  1. 设备驱动不用时,内存管理系统将该区域用于分配和管理可移动类型页面;
  2. 设备驱动使用时,用于连续内存分配,此时已经分配的页面需要进行迁移;
  3. CMA并不进行内存管理,CMA area的内存最终还是要并入伙伴系统进行管理
  4. cma_alloc用来从指定的CMA area上分配count个连续的page frame,按照align对齐

此外,CMA分配器还可以与DMA子系统集成在一起,使用DMA的设备驱动程序无需使用单独的CMA API

在SSD202中,cma相关的日志如下:

------USB HOST Controller 使用

[ 0.000000] cma: Reserved 8 MiB at 0x26400000 = 0x26c00000 - 8M

[ 1.371962] Sstar-ehci-2 soc:Sstar-ehci-2: EHCI Host Controller

[ 1.377889] Sstar-ehci-2 soc:Sstar-ehci-2: new USB bus registered, assigned bus number 1

[ 1.385990] cma: cma_alloc(cma c0435ef0, count 1, align 0)

[ 1.386037] cma: cma_alloc(): returned c63bd840

[ 1.386052] cma: cma_alloc(cma c0435ef0, count 1, align 0)

[ 1.386068] cma: cma_alloc(): returned c63bd860

[ 1.386083] cma: cma_alloc(cma c0435ef0, count 1, align 0)

[ 1.386099] cma: cma_alloc(): returned c63bd880

------DMA 使用

[ 1.748775] MSYS: DMEM request: [BDMA]:0x00000840

[ 1.753324] cma: cma_alloc(cma c0435ef0, count 1, align 0)

[ 1.753367] cma: cma_alloc(): returned c63bd900

[ 1.753382] MSYS: DMEM request: [BDMA]:0x00000840 success, CPU phy:@0x26448000, virt:@0xC6448000

[ 6.942669] MSYS: DMEM request: [emac0_buff]:0x00000812

[ 6.947755] cma: cma_alloc(cma c0435ef0, count 1, align 0)

[ 6.947946] cma: cma_alloc(): returned c63bd920

------ETH PHY 使用

[ 6.947962] MSYS: DMEM request: [emac0_buff]:0x00000812 success, CPU phy:@0x26449000, virt:@0xC6449000

[ 7.902325] >> [sdmmc] ms_sdmmc_probe

[ 7.906510] cma: cma_alloc(cma c0435ef0, count 1, align 0)

[ 7.906630] cma: cma_alloc(): returned c63bd940

关于测试cma代码,借用宋老师的测试用例

/* 

 * kernel module helper for testing CMA 

 * 

 * Licensed under GPLv2 or later. 

 */

#define DEBUG

#include <linux/module.h>

#include <linux/device.h>

#include <linux/fs.h>

#include <linux/miscdevice.h>

#include <linux/dma-mapping.h>

#define CMA_NUM 10

static struct device *cma_dev;

static dma_addr_t dma_phys[CMA_NUM];

static void *dma_virt[CMA_NUM];

/* any read request will free coherent memory, eg. 

 * cat /dev/cma_test 

 */

static ssize_t

cma_test_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)

{

    int i;

    for (i = 0; i < CMA_NUM; i++)
    {

        if (dma_virt[i])
        {

            dma_free_coherent(cma_dev, (i + 1) * SZ_1M, dma_virt[i], dma_phys[i]);

            _dev_info(cma_dev, "free virt: %p phys: %p\n", dma_virt[i], (void *)dma_phys[i]);

            dma_virt[i] = NULL;

            break;
        }
    }

    return 0;
}

/* 

 * any write request will alloc coherent memory, eg. 

 * echo 0 > /dev/cma_test 

 */

static ssize_t

cma_test_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos)

{

    int i;

    int ret;

    unsigned int ctc = 0;

    for (i = 0; i < CMA_NUM; i++)
    {

        if (!dma_virt[i])
        {

            dma_virt[i] = dma_alloc_coherent(cma_dev, (i + 1) * SZ_1M, &dma_phys[i], GFP_KERNEL);

            if (dma_virt[i])
            {

                void *p;

                /* touch every page in the allocated memory */

                for (p = dma_virt[i]; p < dma_virt[i] + (i + 1) * SZ_1M; p += PAGE_SIZE)
                    *(u32 *)p = ctc++;

                _dev_info(cma_dev, "[%d] alloc virt: %p phys: %p\n", i, dma_virt[i], (void *)dma_phys[i]);
            }
            else
            {

                dev_err(cma_dev, "[%d] no mem in CMA area\n", i);

                ret = -ENOMEM;
            }
            break;
        }
    }

    return count;
}

static const struct file_operations cma_test_fops = {

    .owner = THIS_MODULE,

    .read = cma_test_read,

    .write = cma_test_write,

};

static struct miscdevice cma_test_misc = {

    .name = "cma_test",

    .fops = &cma_test_fops,

};

static int __init cma_test_init(void)

{

    int i = 0;
    int ret = 0;

    ret = misc_register(&cma_test_misc);

    if (unlikely(ret))
    {

        pr_err("failed to register cma test misc device!\n");

        return ret;
    }

    cma_dev = cma_test_misc.this_device;

    cma_dev->coherent_dma_mask = ~0;

    for (i = 0; i < CMA_NUM; i++)
        dma_virt[i] = 0;

    _dev_info(cma_dev, "registered.\n");

    return ret;
}

module_init(cma_test_init);

static void __exit cma_test_exit(void)

{

    misc_deregister(&cma_test_misc);
}

module_exit(cma_test_exit);

MODULE_LICENSE("GPL");

MODULE_AUTHOR("Barry Song <[email protected]>");

MODULE_DESCRIPTION("kernel module to help the test of CMA");

MODULE_ALIAS("CMA test");

insmod cma-test.ko 加载模块

root@wireless-tag:/# insmod cma-test
[ 3971.556944] devtmpfs: create node [cma_test] dev-name [(null)]
[ 3971.562783] misc cma_test: registered.

echo 1 > /dev/cma-test 开始分配空间,每运行一次分配N(MB)

[ 3992.269833] cma: cma_alloc(cma c0435ef0, count 256, align 4)
[ 3992.269986] cma: cma_alloc(): returned c63be800
[ 3992.270497] misc cma_test: [0] alloc virt: c64c0000 phys: 264c0000
[ 3993.008489] random: fast init done

cat /dev/cma-test 释放空间,每运行一次释放前一次分配的空间

[ 4022.896707] cma: cma_release(page c63be800)
[ 4022.896887] misc cma_test: free virt: c64c0000 phys: 264c0000

根据内存分配关系,cma物理区域为0x26400000到26c00000,日志显示物理地址从264c0000开始增长,刚好在cma区

因为一共8M空间,超过3次后,空间将不够

root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4187.761083] misc cma_test: [0] alloc virt: c64c0000 phys: 264c0000    --1M
root@wireless-tag:/#
root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4188.574042] misc cma_test: [1] alloc virt: c65c0000 phys: 265c0000    --2M
root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4189.444204] misc cma_test: [2] alloc virt: c67c0000 phys: 267c0000    --3M
root@wireless-tag:/# echo 1 > /dev/cma_test
[ 4190.766380] misc cma_test: [3] no mem in CMA area

最后介绍一下fixmap映射,关于详细fixmap可以参考 Fix-Mapped Addresses

在此以dtb加载为例进行介绍

由于使用openwrt系统,有以下几个点比较特别:

1. kernel,dtb,rootfs是打包在一起的,形成一个固件

2. dtb在打包时带有特殊标记,已便于在启动过程中自动查找dtb在固件(内存)中的位置

最后在dtb加载过程在arch/arm/kernel/devtree.c  setup_machine_fdt

/**
 * setup_machine_fdt - Machine setup when an dtb was passed to the kernel
 * @dt_phys: physical address of dt blob
 *
 * If a dtb was passed to the kernel in r2, then use it to choose the
 * correct machine_desc and to setup the system.
 */
const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys)
{
	const struct machine_desc *mdesc, *mdesc_best = NULL;
	void *virt_p = NULL;

#if defined(CONFIG_ARCH_MULTIPLATFORM) || defined(CONFIG_ARM_SINGLE_ARMV7M)
	DT_MACHINE_START(GENERIC_DT, "Generic DT based system")
		.l2c_aux_val = 0x0,
		.l2c_aux_mask = ~0x0,
	MACHINE_END

	mdesc_best = &__mach_desc_GENERIC_DT;
#endif

	virt_p = phys_to_virt(dt_phys);
	early_print("to check atags dtb phys %p, virt %p\n", (void*)dt_phys, virt_p);
	if (!dt_phys || !early_init_dt_verify(virt_p))
	{
#ifdef CONFIG_SS_BUILTIN_DTB
		if(early_init_dt_verify(builtin_dtb_start))
		{
			extern int early_atags_to_fdt(void *atag_list, void *fdt, int total_space);
			extern u32 builtin_dtb_size;
			
			//early_print("early_init_dt_verify() pass...\n");
			if((!dt_phys ) || (!early_atags_to_fdt(virt_p, builtin_dtb_start, builtin_dtb_size)))
			{
				early_print("early_atags_to_fdt() success\n");
			}


		}
		else
#endif
		{
			return NULL;
		}
	}

	mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach);

	if (!mdesc) {
		const char *prop;
		int size;
		unsigned long dt_root;

		early_print("\nError: unrecognized/unsupported "
			    "device tree compatible list:\n[ ");

		dt_root = of_get_flat_dt_root();
		prop = of_get_flat_dt_prop(dt_root, "compatible", &size);
		while (size > 0) {
			early_print("'%s' ", prop);
			size -= strlen(prop) + 1;
			prop += strlen(prop) + 1;
		}
		early_print("]\n\n");

		dump_machine_table(); /* does not return */
	}

	/* We really don't want to do this, but sometimes firmware provides buggy data */
	if (mdesc->dt_fixup)
		mdesc->dt_fixup();

	early_init_dt_scan_nodes();

	/* Change machine number to match the mdesc we're using */
	__machine_arch_type = mdesc->nr;

	return mdesc;
}

fixmap初始化在arch/arm/mm/mmu.c中,执行过程为 setup_arch --> early_fixmap_init

void __init early_fixmap_init(void)
{
	pmd_t *pmd;

	/*
	 * The early fixmap range spans multiple pmds, for which
	 * we are not prepared:
	 */
	BUILD_BUG_ON((__fix_to_virt(__end_of_early_ioremap_region) >> PMD_SHIFT)
		     != FIXADDR_TOP >> PMD_SHIFT);

	pmd = fixmap_pmd(FIXADDR_TOP);
	pmd_populate_kernel(&init_mm, pmd, bm_pte);

	pte_offset_fixmap = pte_offset_early_fixmap;
}

 

kernel加载初始化阶段的页面建立的linux/arch/arm/kernel/head.S中,有3级页表,PGD–>PMD–>PTE

/*
 * Setup the initial page tables.  We only setup the barest
 * amount which are required to get the kernel running, which
 * generally means mapping in the kernel code.
 *
 * r8 = phys_offset, r9 = cpuid, r10 = procinfo
 *
 * Returns:
 *  r0, r3, r5-r7 corrupted
 *  r4 = physical page table address
 */
__create_page_tables:
	pgtbl	r4, r8				@ page table address

	/*
	 * Clear the swapper page table
	 */
	mov	r0, r4
	mov	r3, #0
	add	r6, r0, #PG_DIR_SIZE
1:	str	r3, [r0], #4
	str	r3, [r0], #4
	str	r3, [r0], #4
	str	r3, [r0], #4
	teq	r0, r6
	bne	1b

#ifdef CONFIG_ARM_LPAE
	/*
	 * Build the PGD table (first level) to point to the PMD table. A PGD
	 * entry is 64-bit wide.
	 */
	mov	r0, r4
	add	r3, r4, #0x1000			@ first PMD table address
	orr	r3, r3, #3			@ PGD block type
	mov	r6, #4				@ PTRS_PER_PGD
	mov	r7, #1 << (55 - 32)		@ L_PGD_SWAPPER
1:
#ifdef CONFIG_CPU_ENDIAN_BE8
	str	r7, [r0], #4			@ set top PGD entry bits
	str	r3, [r0], #4			@ set bottom PGD entry bits
#else
	str	r3, [r0], #4			@ set bottom PGD entry bits
	str	r7, [r0], #4			@ set top PGD entry bits
#endif
	add	r3, r3, #0x1000			@ next PMD table
	subs	r6, r6, #1
	bne	1b

	add	r4, r4, #0x1000			@ point to the PMD tables
#ifdef CONFIG_CPU_ENDIAN_BE8
	add	r4, r4, #4			@ we only write the bottom word
#endif
#endif

	ldr	r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags

	/*
	 * Create identity mapping to cater for __enable_mmu.
	 * This identity mapping will be removed by paging_init().
	 */
	adr	r0, __turn_mmu_on_loc
	ldmia	r0, {r3, r5, r6}
	sub	r0, r0, r3			@ virt->phys offset
	add	r5, r5, r0			@ phys __turn_mmu_on
	add	r6, r6, r0			@ phys __turn_mmu_on_end
	mov	r5, r5, lsr #SECTION_SHIFT
	mov	r6, r6, lsr #SECTION_SHIFT

1:	orr	r3, r7, r5, lsl #SECTION_SHIFT	@ flags + kernel base
	str	r3, [r4, r5, lsl #PMD_ORDER]	@ identity mapping
	cmp	r5, r6
	addlo	r5, r5, #1			@ next section
	blo	1b

	/*
	 * Map our RAM from the start to the end of the kernel .bss section.
	 */
	add	r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER)
	ldr	r6, =(_end - 1)
	orr	r3, r8, r7
	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1:	str	r3, [r0], #1 << PMD_ORDER
	add	r3, r3, #1 << SECTION_SHIFT
	cmp	r0, r6
	bls	1b

#ifdef CONFIG_XIP_KERNEL
	/*
	 * Map the kernel image separately as it is not located in RAM.
	 */
#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR)
	mov	r3, pc
	mov	r3, r3, lsr #SECTION_SHIFT
	orr	r3, r7, r3, lsl #SECTION_SHIFT
	add	r0, r4,  #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER)
	str	r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]!
	ldr	r6, =(_edata_loc - 1)
	add	r0, r0, #1 << PMD_ORDER
	add	r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER)
1:	cmp	r0, r6
	add	r3, r3, #1 << SECTION_SHIFT
	strls	r3, [r0], #1 << PMD_ORDER
	bls	1b
#endif

	/*
	 * Then map boot params address in r2 if specified.
	 * We map 2 sections in case the ATAGs/DTB crosses a section boundary.
	 */
	mov	r0, r2, lsr #SECTION_SHIFT
	movs	r0, r0, lsl #SECTION_SHIFT
	subne	r3, r0, r8
	addne	r3, r3, #PAGE_OFFSET
	addne	r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER)
	orrne	r6, r7, r0
	strne	r6, [r3], #1 << PMD_ORDER
	addne	r6, r6, #1 << SECTION_SHIFT
	strne	r6, [r3]

#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8)
	sub	r4, r4, #4			@ Fixup page table pointer
						@ for 64-bit descriptors
#endif

#ifdef CONFIG_DEBUG_LL
#if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING)
	/*
	 * Map in IO space for serial debugging.
	 * This allows debug messages to be output
	 * via a serial console before paging_init.
	 */
	addruart r7, r3, r0

	mov	r3, r3, lsr #SECTION_SHIFT
	mov	r3, r3, lsl #PMD_ORDER

	add	r0, r4, r3
	mov	r3, r7, lsr #SECTION_SHIFT
	ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
	orr	r3, r7, r3, lsl #SECTION_SHIFT
#ifdef CONFIG_ARM_LPAE
	mov	r7, #1 << (54 - 32)		@ XN
#ifdef CONFIG_CPU_ENDIAN_BE8
	str	r7, [r0], #4
	str	r3, [r0], #4
#else
	str	r3, [r0], #4
	str	r7, [r0], #4
#endif
#else
	orr	r3, r3, #PMD_SECT_XN
	str	r3, [r0], #4
#endif

#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */
	/* we don't need any serial debugging mappings */
	ldr	r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags
#endif

#if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS)
	/*
	 * If we're using the NetWinder or CATS, we also need to map
	 * in the 16550-type serial port for the debug messages
	 */
	add	r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER)
	orr	r3, r7, #0x7c000000
	str	r3, [r0]
#endif
#ifdef CONFIG_ARCH_RPC
	/*
	 * Map in screen at 0x02000000 & SCREEN2_BASE
	 * Similar reasons here - for debug.  This is
	 * only for Acorn RiscPC architectures.
	 */
	add	r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER)
	orr	r3, r7, #0x02000000
	str	r3, [r0]
	add	r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER)
	str	r3, [r0]
#endif
#endif
#ifdef CONFIG_ARM_LPAE
	sub	r4, r4, #0x1000		@ point to the PGD table
#endif
	ret	lr
ENDPROC(__create_page_tables)

关于swapper_pg_dir

/*

* swapper_pg_dir is the virtual address of the initial page table.

* We place the page tables 16K below KERNEL_RAM_VADDR. Therefore, we must

* make sure that KERNEL_RAM_VADDR is correctly set. Currently, we expect

* the least significant 16 bits to be 0x8000, but we could probably

* relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000.

*/

#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET)

#if (KERNEL_RAM_VADDR & 0xffff) != 0x8000

#error KERNEL_RAM_VADDR must start at 0xXXXX8000

更多推荐

Linux虚拟内存映射分析以及CMA测试 - 以SSD202为例