在我们内核里添加调试信息时,很多时候需要记录一段时间内有哪些进程曾经活动过。
自然就需要有一个集合来完成此功能。
用数组来实现集合比较麻烦,而且查找效率较低。因此抄袭内核本身的PID HASH结构,
用hash链表外加一个list结构,来实现一个集合。hash可以提高查找速度,list结构可以
得到集合的概念。
修改方案:
1. kernel/pid.c
#if 1 static __cacheline_aligned_in_smp DEFINE_SPINLOCK(test_pidmap_lock); static struct hlist_head *test_pid_hash; struct test_pid_data { int magic; long load; char comm[TASK_COMM_LEN]; }; struct test_pid { int nr; struct list_head list; struct test_pid_data data; struct hlist_node pid_chain; struct rcu_head rcu; }; #define test_pid_hashfn(nr) hash_long((unsigned long)nr, pidhash_shift) struct list_head test_pid_list; static void init_test_pid_hash_list(void) { int pidhash_size,i; pidhash_size = 1 << pidhash_shift; test_pid_hash = alloc_bootmem(pidhash_size*sizeof(*(test_pid_hash))); if (!test_pid_hash) panic("Could not alloc test_pidhash!\n"); for (i = 0; i < pidhash_size; i++) INIT_HLIST_HEAD(&test_pid_hash[i]); INIT_LIST_HEAD(&test_pid_list); } static struct test_pid * fastcall test_find_pid(int nr) { struct hlist_node *elem; struct test_pid *pid; hlist_for_each_entry_rcu(pid, elem, &test_pid_hash[test_pid_hashfn(nr)], pid_chain) { if (pid->nr == nr) return pid; } return NULL; } static struct test_pid * fastcall test_alloc_pid(int nr) { struct test_pid *pid; pid = (struct test_pid *)kzalloc(sizeof(struct test_pid),GFP_KERNEL); if(NULL==pid){ return NULL; } pid->nr = nr; spin_lock(&test_pidmap_lock); hlist_add_head_rcu(&pid->pid_chain, &test_pid_hash[test_pid_hashfn(pid->nr)]); list_add_rcu(&pid->list,&test_pid_list); spin_unlock(&test_pidmap_lock); return pid; } typedef void (*test_fn)(struct test_pid * pid); void test_traverse_pid_list(test_fn callback) { struct list_head *p; struct test_pid *pid; rcu_read_lock(); list_for_each_rcu(p, &test_pid_list) { pid = list_entry(p, struct test_pid, list); callback(pid); } rcu_read_unlock(); } /* * If not exit,create it and add to set then assign with data */ int test_set_pid_data(int nr,struct test_pid_data* test_data) { struct test_pid *pid; pid = test_find_pid(nr); if(NULL==pid){ pid = test_alloc_pid(nr); if(NULL==pid) return -ENOMEM; } memcpy(&pid->data,test_data,sizeof(struct test_pid_data)); return 0; } struct test_pid_data* test_get_pid_data(int nr) { struct test_pid *pid; pid = test_find_pid(nr); if(NULL==pid) return NULL; return &pid->data; } static void test_safe_free_pid(struct rcu_head *rp) { struct test_pid *pid = container_of(rp, struct test_pid, rcu); kfree(pid); } void test_release_pid(struct test_pid *pid) { spin_lock(&test_pidmap_lock); hlist_del_rcu(&pid->pid_chain); list_del_rcu(&pid->list); spin_unlock(&test_pidmap_lock); call_rcu(&pid->rcu,test_safe_free_pid); } void test_remove_pid_all(void) { test_traverse_pid_list(test_release_pid); } #endif void __init pidhash_init(void) { #if 1 init_test_pid_hash_list(); #endif }
2. 下面是该集合的一个使用场景:记录某段时间内,smp下,各进程消耗虚拟内存的大小。
用户通过系统调用通知开始和结束记录
mm/swap.c
#if 1 struct test_pid_data { int magic; long load; char comm[TASK_COMM_LEN]; }; struct test_pid { int nr; struct list_head list; struct test_pid_data data; struct hlist_node pid_chain; struct rcu_head rcu; }; typedef void (*test_fn)(struct test_pid* pid); extern int test_set_pid_data(int nr,struct test_pid_data* test_data); extern struct test_pid_data* test_get_pid_data(int nr); extern void test_traverse_pid_list(test_fn callback); extern void test_remove_pid_all(void); static atomic_t test_flag = ATOMIC_INIT(0); void test_print_pid_data(struct test_pid *pid) { printk("(pid:%d,name:%s) mmap len:%ld", pid->nr,pid->data.comm,(unsigned long)pid->data.load); if(pid->data.magic) printk(",NO ALLOC!"); printk("\n"); } void dump_pid_result(void) { test_traverse_pid_list(test_print_pid_data); } static void update_process_mmap_len(long delta) { int pid = 0; struct test_pid_data data; struct test_pid_data* datap; if(0 == atomic_read(&test_flag)) return; pid = (int)current->pid; rcu_read_lock(); datap = test_get_pid_data(pid); if(datap == NULL){ strcpy(data.comm,current->comm); data.load = delta; data.magic = 0; test_set_pid_data(pid,&data); }else datap->load +=delta; rcu_read_unlock(); } void update_process_mmap_flag(unsigned long flag) { int pid = 0; struct test_pid_data data; struct test_pid_data* datap; if(0 == atomic_read(&test_flag)) return; if(!flag) return; pid = (int)current->pid; rcu_read_lock(); datap = test_get_pid_data(pid); if(datap == NULL){ strcpy(data.comm,current->comm); data.load = 0; data.magic = 0; test_set_pid_data(pid,&data); } datap->magic = 1; rcu_read_unlock(); } asmlinkage void sys_notify_record(int data) { int flag = (int)data; if(1 == flag){ printk("start record\n"); atomic_set(&test_flag,1); smp_wmb(); } else if(0 == flag){ atomic_set(&test_flag,0); smp_wmb(); printk("end record\n"); dump_pid_result(); test_remove_pid_all(); } } void test_start_mmap(void) { printk("start record\n"); sys_notify_record(1); } void test_end_mmap(void) { printk("end record\n"); sys_notify_record(0); //dump_pid_result(); } #endif #define ACCT_THRESHOLD max(16, NR_CPUS * 2) static DEFINE_PER_CPU(long, committed_space) = 0; void vm_acct_memory(long pages) { long *local; preempt_disable(); local = &__get_cpu_var(committed_space); *local += pages; if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) { #if 1 update_process_mmap_len(*local); #endif atomic_add(*local, &vm_committed_space); *local = 0; } preempt_enable(); } mm/mmap.c #if 1 extern void dump_pid_result(void); extern void test_remove_pid_all(void); #endif int __vm_enough_memory(long pages, int cap_sys_admin) { if (atomic_read(&vm_committed_space) < (long)allowed) return 0; #if 1 dump_pid_result(); test_remove_pid_all(); #endif error: vm_unacct_memory(pages); include/asm-x86_64/unistd.h #define __NR_unify_syscall 280 __SYSCALL(__NR_unify_syscall, sys_unify_syscall) #define __NR_notify_record_syscall 281 __SYSCALL(__NR_notify_record_syscall, sys_notify_record) //#define __NR_syscall_max __NR_unify_syscall #define __NR_syscall_max __NR_notify_record_syscall
测试:
用户态malloc一块10000000字节的内存,然后fork
通过syscall(281,1)启动记录,syscall(281,0)关闭记录并打印结果,最后清除记录
pid自带的信息我们用struct test_pid_data来表示,用户可根据情况自行定义结构类型。
上面例子中magic是用来指示该进程是否曾经用MAP_NOALLOC分配过内存。
(pid:338,name:fork_malloc) mmap len:5004 (pid:337,name:fork_malloc) mmap len:5004 (pid:336,name:fork_malloc) mmap len:5004 (pid:335,name:fork_malloc) mmap len:5004 (pid:334,name:fork_malloc) mmap len:5021