这段时间,断断续续的忙了一阵,因为父亲的去世也不情愿的休息了20来天,一点也没有办法。回来后重新开始学习android的启动流程。对android系统级别的学习,阅读代码成为了唯一的办法,不像应用程序开发来得那么明了快捷。之前花了好多时间才对android的binder驱动做了一定的了解,最近几天从android的启动画面,分析到了init这个内核最先启动的一个进程。参考内容包括老罗的android之旅和邓平凡老师的深入理解android卷,本人只是对学习做一定的总结,帮助自己进一步理解。
一 . init.c中的main函数(路径:system/core/init/init.c)
先给出main的源码,然后对个别关键函数进行分析
int main(int argc, char **argv) { int fd_count = 0; struct pollfd ufds[4]; char *tmpdev; char* debuggable; char tmp[32]; int property_set_fd_init = 0; int signal_fd_init = 0; int keychord_fd_init = 0; if (!strcmp(basename(argv[0]), "ueventd")) return ueventd_main(argc, argv); /* clear the umask */ umask(0); /* Get the basic filesystem setup we need put * together in the initramdisk on / and then we'll * let the rc file figure out the rest. */ mkdir("/dev", 0755); mkdir("/proc", 0755); mkdir("/sys", 0755); mount("tmpfs", "/dev", "tmpfs", 0, "mode=0755"); mkdir("/dev/pts", 0755); mkdir("/dev/socket", 0755); mount("devpts", "/dev/pts", "devpts", 0, NULL); mount("proc", "/proc", "proc", 0, NULL); mount("sysfs", "/sys", "sysfs", 0, NULL); /* We must have some place other than / to create the * device nodes for kmsg and null, otherwise we won't * be able to remount / read-only later on. * Now that tmpfs is mounted on /dev, we can actually * talk to the outside world. */ open_devnull_stdio(); log_init(); INFO("reading config file\n"); init_parse_config_file("/init.rc"); /* pull the kernel commandline and ramdisk properties file in */ import_kernel_cmdline(0); get_hardware_name(hardware, &revision); snprintf(tmp, sizeof(tmp), "/init.%s.rc", hardware);//和平台硬件hardware有关系 init_parse_config_file(tmp); action_for_each_trigger("early-init", action_add_queue_tail); queue_builtin_action(wait_for_coldboot_done_action, "wait_for_coldboot_done"); queue_builtin_action(property_init_action, "property_init"); queue_builtin_action(keychord_init_action, "keychord_init"); queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数 queue_builtin_action(set_init_properties_action, "set_init_properties"); /* execute all the boot actions to get us started */ action_for_each_trigger("init", action_add_queue_tail); action_for_each_trigger("early-fs", action_add_queue_tail); action_for_each_trigger("fs", action_add_queue_tail); action_for_each_trigger("post-fs", action_add_queue_tail); queue_builtin_action(property_service_init_action, "property_service_init"); queue_builtin_action(signal_init_action, "signal_init"); queue_builtin_action(check_startup_action, "check_startup"); /* execute all the boot actions to get us started */ action_for_each_trigger("early-boot", action_add_queue_tail); action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中 /* run all property triggers based on current state of the properties */ queue_builtin_action(queue_property_triggers_action, "queue_propety_triggers"); #if BOOTCHART queue_builtin_action(bootchart_init_action, "bootchart_init"); #endif for(;;) { int nr, i, timeout = -1; execute_one_command(); //检查action_queue列表是否为空,执行action restart_processes();//检查是否有进程需要重启 if (!property_set_fd_init && get_property_set_fd() > 0) { ufds[fd_count].fd = get_property_set_fd(); ufds[fd_count].events = POLLIN; ufds[fd_count].revents = 0; fd_count++; property_set_fd_init = 1; } if (!signal_fd_init && get_signal_fd() > 0) { ufds[fd_count].fd = get_signal_fd(); ufds[fd_count].events = POLLIN; ufds[fd_count].revents = 0; fd_count++; signal_fd_init = 1; } if (!keychord_fd_init && get_keychord_fd() > 0) { ufds[fd_count].fd = get_keychord_fd(); ufds[fd_count].events = POLLIN; ufds[fd_count].revents = 0; fd_count++; keychord_fd_init = 1; } if (process_needs_restart) { timeout = (process_needs_restart - gettime()) * 1000; if (timeout < 0) timeout = 0; } if (!action_queue_empty() || cur_action) timeout = 0; #if BOOTCHART if (bootchart_count > 0) { if (timeout < 0 || timeout > BOOTCHART_POLLING_MS) timeout = BOOTCHART_POLLING_MS; if (bootchart_step() < 0 || --bootchart_count == 0) { bootchart_finish(); bootchart_count = 0; } } #endif nr = poll(ufds, fd_count, timeout); if (nr <= 0) continue; for (i = 0; i < fd_count; i++) { if (ufds[i].revents == POLLIN) { if (ufds[i].fd == get_property_set_fd()) handle_property_set_fd(); else if (ufds[i].fd == get_keychord_fd()) handle_keychord(); else if (ufds[i].fd == get_signal_fd()) handle_signal(); } } } return 0; }
init作为用户空间第一个启动的进程,需要完成很多的任务。分以下部分内容来分析
1. uevent进程
if (!strcmp(basename(argv[0]), "ueventd"))
return ueventd_main(argc, argv);
这个函数是取出argv中的第一个参数,比如/sbin/ueventd,则basename为ueventd。android系统第一次启动的进程名init,所以该函数ueventd_main不执行,该函数的真正执行在init启动service ueventd /sbin/ueventd后,fork出一个子进程,execve启动/sbin/ueventd后,实际上该函数是对init的符合链接,也就是ueventd进程执行起来后执行的代码还是init.c中的main,因此不同的进程名执行相同的main函数。ueventd_main函数的主要功能:在Linux系统中现在都使用uevent机制来管理设备的热插拔事件,给用户空间权利来完成一些设备文件节点的创建。这种机制是建立在socket的通信机制上,用户空间和内核驱动进行交互,详细的机制没有去了解过。是linux2.6的版本中常用的机制。比如驱动出现device_create等时,会向用户空间报告一个uevent事件,用户空间由uevent进程解析后去创建设备节点。
2.init.rc的解析
INFO("reading config file\n");
init_parse_config_file("/init.rc");
init.rc是一个配置文件,内部有许多的语言规则,所有语言会在init_parse_config_file中进行解析。调用流程如下:init_parse_config_file—>read_file—>parse_config.
parse_config源码如下:
static void parse_config(const char *fn, char *s)//s为init.rc中字符串的内容 { struct parse_state state; char *args[INIT_PARSER_MAXARGS]; int nargs; nargs = 0; state.filename = fn; state.line = 1; state.ptr = s; state.nexttoken = 0; state.parse_line = parse_line_no_op; for (;;) { switch (next_token(&state)) { case T_EOF: //文件的结尾 state.parse_line(&state, 0, 0); return; case T_NEWLINE://新的一行 if (nargs) { int kw = lookup_keyword(args[0]); //读取init.rc返回关键字例如service,返回K_service if (kw_is(kw, SECTION)) { //查看关键字是否为SECTION,只有service和on满足 state.parse_line(&state, 0, 0); parse_new_section(&state, kw, nargs, args); } else { state.parse_line(&state, nargs, args);//on 和service两个段下面的内容 } nargs = 0; } break; case T_TEXT://文本内容 if (nargs < INIT_PARSER_MAXARGS) { args[nargs++] = state.text; } break; } } } int init_parse_config_file(const char *fn) { char *data; data = read_file(fn, 0); if (!data) return -1; parse_config(fn, data); DUMP(); return 0; }
这个函数中可以看到在for的无邪循环中,主要对init.rc的内容进行解析,以一行一行进行读取,每读取完一行内容换行时到下一行时,使用lookup_keyword分析已经读取的一行的第一个参数,部分代码如下:
case 's': if (!strcmp(s, "ervice")) return K_service; if (!strcmp(s, "etenv")) return K_setenv; if (!strcmp(s, "etkey")) return K_setkey; if (!strcmp(s, "etprop")) return K_setprop; if (!strcmp(s, "etrlimit")) return K_setrlimit; if (!strcmp(s, "ocket")) return K_socket; if (!strcmp(s, "tart")) return K_start; if (!strcmp(s, "top")) return K_stop; if (!strcmp(s, "ymlink")) return K_symlink; if (!strcmp(s, "ysclktz")) return K_sysclktz;
该函数主要对每一行的第一个字符做case,然后在strcmp第一个命令,这些命令都是按init.rc的格式要求来进行的。比如常用的service和on等经过lookup_keyword后返回K_servcie和K_on。随后使用kw_is(kw, SECTION)判断返回的kw是不是属于SECTION类型,在init.rc中只有service和on满足该类型,这样就会对on和service所在的段进行解析,我们这里首先分析service,以init.rc中的service zygote为例
service zygote /system/bin/app_process -Xzygote /system/bin --zygote --start-system-server class main socket zygote stream 666 onrestart write /sys/android_power/request_state wake onrestart write /sys/power/state on onrestart restart media onrestart restart netd
当解析到这段代码时,执行parse_service
static void *parse_service(struct parse_state *state, int nargs, char **args) { struct service *svc; if (nargs < 3) { parse_error(state, "services must have a name and a program\n"); return 0; } if (!valid_name(args[1])) { parse_error(state, "invalid service name '%s'\n", args[1]); return 0; } svc = service_find_by_name(args[1]);//查找服务是否已经存在 if (svc) { parse_error(state, "ignored duplicate definition of service '%s'\n", args[1]); return 0; } nargs -= 2; svc = calloc(1, sizeof(*svc) + sizeof(char*) * nargs); if (!svc) { parse_error(state, "out of memory\n"); return 0; } svc->name = args[1]; //sevice的名字 svc->classname = "default"; //svc的类名默认是default memcpy(svc->args, args + 2, sizeof(char*) * nargs);//首个参数放的是可执行文件 svc->args[nargs] = 0; svc->nargs = nargs;//参数个数 svc->onrestart.name = "onrestart"; list_init(&svc->onrestart.commands); list_add_tail(&service_list, &svc->slist); return svc; }
在这里agrs[1]就是zygote,系统会先查找是否已经存在该服务,然后构建一个service svc,进行相关的填充,包括服务名,服务所属的类别名字,已经服务启动带入的参数个数(要减去service和服务名zygote),最后将这个svc加入到service_list全局链表中。随后所做的是对Service的下面几行Option进行解析,比如class,socket,onrestart等等。使用的是parse_line_service函数,如下:
static void parse_line_service(struct parse_state *state, int nargs, char **args) { struct service *svc = state->context; struct command *cmd; int i, kw, kw_nargs; if (nargs == 0) { return; } svc->ioprio_class = IoSchedClass_NONE; kw = lookup_keyword(args[0]); switch (kw) { case K_capability: break; case K_class: if (nargs != 2) { parse_error(state, "class option requires a classname\n"); } else { svc->classname = args[1];//比如main,core类 } break; case K_console: svc->flags |= SVC_CONSOLE; break; case K_disabled: svc->flags |= SVC_DISABLED; ...... case K_onrestart: nargs--; args++; kw = lookup_keyword(args[0]); if (!kw_is(kw, COMMAND)) { parse_error(state, "invalid command '%s'\n", args[0]); break; } kw_nargs = kw_nargs(kw); if (nargs < kw_nargs) { parse_error(state, "%s requires %d %s\n", args[0], kw_nargs - 1, kw_nargs > 2 ? "arguments" : "argument"); break; } cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs); cmd->func = kw_func(kw); cmd->nargs = nargs; memcpy(cmd->args, args, sizeof(char*) * nargs); list_add_tail(&svc->onrestart.commands, &cmd->clist); break; ....... }
这里以class这个keyword为例,会将当前class所属的svc进行类名的变革,变为main类别,类似的socket和onrestart类似。
到此为止整个service都解析完成 ,开始下一个section的内容。但是zygote这个服务进程的启动还没有开始,将在下面分析。
下面分析on字段的内容,以on boot这个section作为列子进行分析
on boot ifup lo hostname localhost domainname localdomain .... # Set this property so surfaceflinger is not started by system_init setprop system_init.startsurfaceflinger 0 class_start core class_start main
和前面分析像类似,case中进入K_on选项执行函数parse_action
static void *parse_action(struct parse_state *state, int nargs, char **args) { struct action *act; if (nargs < 2) { parse_error(state, "actions must have a trigger\n"); return 0; } if (nargs > 2) { parse_error(state, "actions may not have extra parameters\n"); return 0; } act = calloc(1, sizeof(*act)); act->name = args[1]; //action的名字如boot,init等 list_init(&act->commands); list_add_tail(&action_list, &act->alist); /* XXX add to hash */ return act; }
在这里可以看到一个action结构体类似于service,这个action的名字为boot,最后会将这个action加入到全局链表action_list中。
随后执行parse_line_action函数,对on字段所在的option进行解析,代码如下:
static void parse_line_action(struct parse_state* state, int nargs, char **args) //action所在的行 { struct command *cmd; struct action *act = state->context;//on boot启动 int (*func)(int nargs, char **args); int kw, n; if (nargs == 0) { return; } kw = lookup_keyword(args[0]);//命令的参数个数 if (!kw_is(kw, COMMAND)) { parse_error(state, "invalid command '%s'\n", args[0]); return; } n = kw_nargs(kw); if (nargs < n) { parse_error(state, "%s requires %d %s\n", args[0], n - 1, n > 2 ? "arguments" : "argument"); return; } cmd = malloc(sizeof(*cmd) + sizeof(char*) * nargs); cmd->func = kw_func(kw); cmd->nargs = nargs; memcpy(cmd->args, args, sizeof(char*) * nargs); list_add_tail(&act->commands, &cmd->clist); //
这里以class_start main为例该关键字为 KEYWORD(class_start, COMMAND, 1, do_class_start),填充一个command结构体,包括这个cmd的执行函数如class_start对应的func为do_class_start,函数的参数个数nargs=1。同时将这个cmd添加到action的commands所在的全局列表中。本文中将会出现2个cmd。
至此,on和service两个section已经举列子分析完成。
3 下面继续分析main函数中的queue_builtin_action和action_for_each_trigger。
queue_builtin_action(console_init_action, "console_init");//第二个开机画面显示函数
该函数实现将console_init这个action添加到action_queue全局链表中看。
action_for_each_trigger("boot", action_add_queue_tail);//把boot这个action添加到action_queue链表中
void action_for_each_trigger(const char *trigger, void (*func)(struct action *act)) { struct listnode *node; struct action *act; list_for_each(node, &action_list) { act = node_to_item(node, struct action, alist); if (!strcmp(act->name, trigger)) { func(act); } } }
在该函数中,首先遍历action_list链表,找到action,看是否有名字叫boot的trigger存在,我们知道刚才在解析init.rc中的on boot时,将boot这个作为action的name加入到了action_list中去,所以可以找到这个boot的action。成功匹配后调用action_add_queue_tail,家这个action再次加入到action_queue中,等待着执行。
4 for(;;)循环中执行execute_one_command
void execute_one_command(void) { int ret; if (!cur_action || !cur_command || is_last_command(cur_action, cur_command)) { cur_action = action_remove_queue_head(); cur_command = NULL; if (!cur_action) return; INFO("processing action %p (%s)\n", cur_action, cur_action->name); cur_command = get_first_command(cur_action); } else { cur_command = get_next_command(cur_action, cur_command); } if (!cur_command) return; ret = cur_command->func(cur_command->nargs, cur_command->args);//执行class_start等 INFO("command '%s' r=%d\n", cur_command->args[0], ret); }
使用action_remove_queue_head获取action_queue链表中的action后,移除该节点,使用get_first_command获得在action中的命令,比如这里出现的boot和console_init这两个action。针对console_init启动console_init_action这个函数。如果是boot则会对boot这个action所具有的commands链表进行cmd的获取,class_start的func指针函数为do_class_start:
int do_class_start(int nargs, char **args) { /* Starting a class does not start services * which are explicitly disabled. They must * be started individually. */ service_for_each_class(args[1], service_start_if_not_disabled);//查找要启动的舒服所属类是否是当前要启动的类 return 0; }
可以看到提取了命令行的第二个参数入main,core等。在service_for_each_class中遍历service_list查找属于该类的service,如我们前面提到的zygote,查找到后执行service_start_if_not_disabled——>service_start至此我们进入了启动service的代码
void service_start(struct service *svc, const char *dynamic_args) { struct stat s; pid_t pid; int needs_console; int n; /* starting a service removes it from the disabled * state and immediately takes it out of the restarting * state if it was in there */ svc->flags &= (~(SVC_DISABLED|SVC_RESTARTING)); svc->time_started = 0;//服务的启动时间设为0 /* running processes require no additional work -- if * they're in the process of exiting, we've ensured * that they will immediately restart on exit, unless * they are ONESHOT */ if (svc->flags & SVC_RUNNING) { return; } needs_console = (svc->flags & SVC_CONSOLE) ? 1 : 0; if (needs_console && (!have_console)) { ERROR("service '%s' requires console\n", svc->name); svc->flags |= SVC_DISABLED; return; } if (stat(svc->args[0], &s) != 0) { //通过文件名获取文件信息保存到s的buf中 ERROR("cannot find '%s', disabling '%s'\n", svc->args[0], svc->name); svc->flags |= SVC_DISABLED; return; } if ((!(svc->flags & SVC_ONESHOT)) && dynamic_args) { ERROR("service '%s' must be one-shot to use dynamic args, disabling\n", svc->args[0]); svc->flags |= SVC_DISABLED; return; } NOTICE("starting '%s'\n", svc->name); pid = fork();//创建子进程 if (pid == 0) { //子进程 struct socketinfo *si; struct svcenvinfo *ei; char tmp[32]; int fd, sz; if (properties_inited()) { get_property_workspace(&fd, &sz); sprintf(tmp, "%d,%d", dup(fd), sz); add_environment("ANDROID_PROPERTY_WORKSPACE", tmp); } for (ei = svc->envvars; ei; ei = ei->next) add_environment(ei->name, ei->value); for (si = svc->sockets; si; si = si->next) { int socket_type = ( !strcmp(si->type, "stream") ? SOCK_STREAM : (!strcmp(si->type, "dgram") ? SOCK_DGRAM : SOCK_SEQPACKET)); int s = create_socket(si->name, socket_type, si->perm, si->uid, si->gid);//创建套接字 if (s >= 0) { publish_socket(si->name, s); } } if (svc->ioprio_class != IoSchedClass_NONE) { if (android_set_ioprio(getpid(), svc->ioprio_class, svc->ioprio_pri)) { ERROR("Failed to set pid %d ioprio = %d,%d: %s\n", getpid(), svc->ioprio_class, svc->ioprio_pri, strerror(errno)); } } if (needs_console) { setsid(); open_console(); } else { zap_stdio(); } #if 0 for (n = 0; svc->args[n]; n++) { INFO("args[%d] = '%s'\n", n, svc->args[n]); } for (n = 0; ENV[n]; n++) { INFO("env[%d] = '%s'\n", n, ENV[n]); } #endif setpgid(0, getpid()); /* as requested, set our gid, supplemental gids, and uid */ if (svc->gid) { setgid(svc->gid); } if (svc->nr_supp_gids) { setgroups(svc->nr_supp_gids, svc->supp_gids); } if (svc->uid) { setuid(svc->uid); } if (!dynamic_args) { if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0) { ERROR("cannot execve('%s'): %s\n", svc->args[0], strerror(errno));//执行服务的可执行文件 } } else { char *arg_ptrs[INIT_PARSER_MAXARGS+1]; int arg_idx = svc->nargs; char *tmp = strdup(dynamic_args); char *next = tmp; char *bword; /* Copy the static arguments */ memcpy(arg_ptrs, svc->args, (svc->nargs * sizeof(char *))); while((bword = strsep(&next, " "))) { arg_ptrs[arg_idx++] = bword; if (arg_idx == INIT_PARSER_MAXARGS) break; } arg_ptrs[arg_idx] = '\0'; execve(svc->args[0], (char**) arg_ptrs, (char**) ENV); } _exit(127); } if (pid < 0) { ERROR("failed to start '%s'\n", svc->name); svc->pid = 0; return; } svc->time_started = gettime(); svc->pid = pid; svc->flags |= SVC_RUNNING; if (properties_inited()) notify_service_state(svc->name, "running"); }
分析这段代码,主要内容:
a.检查当前service如zygote的flag即SVC_RUNNING(服务运行中),SVC_DISABLE等
b.fork一个子进程,子进程中会建立一个socket用于通信,同时使用if (execve(svc->args[0], (char**) svc->args, (char**) ENV) < 0)执行zygote对应的可执行文件,至此service zygote真正的启动。
到这里为止,对android系统的init启动有了清晰的了解,对init如何启动adbd,zygote等service有了一定的了解,以及对init.rc有了清晰的认识。init中还有部分内容等着后续几天做一定的学习。
补充:service进程的重启在restart_processes中进行,他会重启flag为SVC_RESTARTING的服务。这部分进程的重启其实在init由handle_signal来管理,一旦出现service崩溃,poll函数会接受到相关文件变化的信息,执行handle_signal中的wait_for_one_process
static int wait_for_one_process(int block) { pid_t pid; int status; struct service *svc; struct socketinfo *si; time_t now; struct listnode *node; struct command *cmd; while ( (pid = waitpid(-1, &status, block ? 0 : WNOHANG)) == -1 && errno == EINTR ); if (pid <= 0) return -1; INFO("waitpid returned pid %d, status = %08x\n", pid, status); svc = service_find_by_pid(pid); if (!svc) { ERROR("untracked pid %d exited\n", pid); return 0; } ..... svc->flags |= SVC_RESTARTING; /* Execute all onrestart commands for this service. */ list_for_each(node, &svc->onrestart.commands) { cmd = node_to_item(node, struct command, clist); cmd->func(cmd->nargs, cmd->args); } notify_service_state(svc->name, "restarting"); return 0; }
该函数使用waitpid,找到子进程退出的进程号pid,然后查找到该service,对service中的onrestart这个commands进行操作,入restart media等。同时将service的flag设置为SVC_RESTARTING,这样就结合前面讲到的restart_processes重新启动该服务进程。。