asmlinkage void __init start_kernel(void) { char * command_line; extern struct kernel_param __start___param[], __stop___param[]; smp_setup_processor_id(); /* * Need to run as early as possible, to initialize the * lockdep hash: */ unwind_init(); lockdep_init(); cgroup_init_early(); local_irq_disable(); early_boot_irqs_off(); early_init_irq_lock_class(); /* * Interrupts are still disabled. Do necessary setups, then * enable them */ lock_kernel(); tick_init(); boot_cpu_init(); page_address_init(); printk(KERN_NOTICE); printk(linux_banner); setup_arch(&command_line); setup_command_line(command_line); unwind_setup(); setup_per_cpu_areas(); smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */ /* * Set up the scheduler prior starting any interrupts (such as the * timer interrupt). Full topology setup happens at smp_init() * time - but meanwhile we still have a functioning scheduler. */ sched_init(); /* * Disable preemption - early bootup scheduling is extremely * fragile until we cpu_idle() for the first time. */ preempt_disable(); build_all_zonelists(); page_alloc_init(); printk(KERN_NOTICE "Kernel command line: %s\n", boot_command_line); parse_early_param(); parse_args("Booting kernel", static_command_line, __start___param, __stop___param - __start___param, &unknown_bootoption); if (!irqs_disabled()) { printk(KERN_WARNING "start_kernel(): bug: interrupts were " "enabled *very* early, fixing it\n"); local_irq_disable(); } sort_main_extable(); trap_init(); rcu_init(); init_IRQ(); pidhash_init(); init_timers(); hrtimers_init(); softirq_init(); timekeeping_init(); time_init(); profile_init(); if (!irqs_disabled()) printk("start_kernel(): bug: interrupts were enabled early\n"); early_boot_irqs_on(); local_irq_enable(); /* * HACK ALERT! This is early. We're enabling the console before * we've done PCI setups etc, and console_init() must be aware of * this. But we do want output early, in case something goes wrong. */ console_init(); if (panic_later) panic(panic_later, panic_param); lockdep_info(); /* * Need to run this when irqs are enabled, because it wants * to self-test [hard/soft]-irqs on/off lock inversion bugs * too: */ locking_selftest(); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start && !initrd_below_start_ok && initrd_start < min_low_pfn << PAGE_SHIFT) { printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " "disabling it.\n",initrd_start,min_low_pfn << PAGE_SHIFT); initrd_start = 0; } #endif vfs_caches_init_early(); cpuset_init_early(); mem_init(); kmem_cache_init(); setup_per_cpu_pageset(); numa_policy_init(); if (late_time_init) late_time_init(); calibrate_delay(); pidmap_init(); pgtable_cache_init(); prio_tree_init(); anon_vma_init(); #ifdef CONFIG_X86 if (efi_enabled) efi_enter_virtual_mode(); #endif fork_init(num_physpages); proc_caches_init(); buffer_init(); unnamed_dev_init(); key_init(); security_init(); vfs_caches_init(num_physpages); radix_tree_init(); signals_init(); /* rootfs populating might need page-writeback */ page_writeback_init(); #ifdef CONFIG_PROC_FS proc_root_init(); #endif cgroup_init(); cpuset_init(); taskstats_init_early(); delayacct_init(); check_bugs(); acpi_early_init(); /* before LAPIC and SMP init */ /* Do the rest non-__init'ed, we're now alive */ rest_init(); }
2、build_all_zonelist用来建立管理结点以及其他内存区域所需的数据结构。其中主要工作由__build_all_zonelist来完成。
/* return values int ....just for stop_machine_run() */ static int __build_all_zonelists(void *dummy) { int nid;//for_each_online_node遍历了系统中所有活动的结点 for_each_online_node(nid) { //NODE_DATA宏,用于通过结点编号来查询与一个NUMA结点相关的pgdata_t实例 pg_data_t *pgdat = NODE_DATA(nid); build_zonelists(pgdat); build_zonelist_cache(pgdat); } return 0; } void build_all_zonelists(void) { set_zonelist_order(); if (system_state == SYSTEM_BOOTING) { __build_all_zonelists(NULL); cpuset_init_current_mems_allowed(); } else { /* we have to stop all cpus to guarantee there is no user of zonelist */ stop_machine_run(__build_all_zonelists, NULL, NR_CPUS); /* cpuset refresh routine should be here */ } vm_total_pages = nr_free_pagecache_pages(); /* * Disable grouping by mobility if the number of pages in the * system is too low to allow the mechanism to work. It would be * more accurate, but expensive to check per-zone. This check is * made on memory-hotadd so a system can start with mobility * disabled and enable it later */ if (vm_total_pages < (pageblock_nr_pages * MIGRATE_TYPES)) page_group_by_mobility_disabled = 1; else page_group_by_mobility_disabled = 0; printk("Built %i zonelists in %s order, mobility grouping %s. " "Total pages: %ld\n", num_online_nodes(), zonelist_order_name[current_zonelist_order], page_group_by_mobility_disabled ? "off" : "on", vm_total_pages); #ifdef CONFIG_NUMA printk("Policy zone: %s\n", zone_names[policy_zone]); #endif }
3、NODE_DATA,用来根据结点编号,查询一个NUMA结点相关的pgdata_t实例
在UMA系统上,NODE_DATA返回contig_page_data的地址。
绝大多数的实现是:
<mmzone.h>
#define NODE_DATA(nid) (&contig_page_data)