Linux虚拟文件系统之文件打开（sys_open()）

现在的位置: 首页 > 综合 > 正文

Linux虚拟文件系统之文件打开（sys_open()）

2013年12月08日 ⁄ 综合 ⁄ 共 7238字 ⁄ 字号小中大 ⁄ 评论关闭

在文件读写之前，我们必须先打开文件。从应用程序的角度来看，这是通过标准库的open函数完成的，该函数返回一个文件描述符。内核中是由系统调用sys_open()函数完成。

/*sys_open*/
SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
{
	long ret;
	/*检查是否应该不考虑用户层传递的标志、总是强行设置
	O_LARGEFILE标志。如果底层处理器的字长不是32位，就是这种
	情况*/
	if (force_o_largefile())
		flags |= O_LARGEFILE;
	/*实际工作*/
	ret = do_sys_open(AT_FDCWD, filename, flags, mode);
	/* avoid REGPARM breakage on x86: */
	asmlinkage_protect(3, ret, filename, flags, mode);
	return ret;
}

实际实现工作

long do_sys_open(int dfd, const char __user *filename, int flags, int mode)
{
	/*从进程地址空间读取该文件的路径名*/
	char *tmp = getname(filename);
	int fd = PTR_ERR(tmp);

	if (!IS_ERR(tmp)) {
		/*在内核中，每个打开的文件由一个文件描述符表示
		该描述符在特定于进程的数组中充当位置索引(数组是
		task_struct->files->fd_arry)，该数组的元素包含了file结构，其中
		包括每个打开文件的所有必要信息。因此，调用下面
		函数查找一个未使用的文件描述符,返回的是上面
		说的数组的下标*/
		fd = get_unused_fd_flags(flags);
		if (fd >= 0) {
			/*fd获取成功则开始打开文件，此函数是主要完成打开功能的函数*/
			struct file *f = do_filp_open(dfd, tmp, flags, mode, 0);
			if (IS_ERR(f)) {
				put_unused_fd(fd);
				fd = PTR_ERR(f);
			} else {
				fsnotify_open(f->f_path.dentry);
				fd_install(fd, f);
			}
		}
		putname(tmp);
	}
	return fd;
}


打开文件主体实现
/*
 * Note that the low bits of the passed in "open_flag"
 * are not the same as in the local variable "flag". See
 * open_to_namei_flags() for more details.
 */
struct file *do_filp_open(int dfd, const char *pathname,
		int open_flag, int mode, int acc_mode)
{
	struct file *filp;
	struct nameidata nd;
	int error;
	struct path path;
	struct dentry *dir;
	int count = 0;
	int will_write;
	  /*改变参数flag的值，具体做法是flag+1*/
	int flag = open_to_namei_flags(open_flag);
	/*设置访问权限*/
	if (!acc_mode)
		acc_mode = MAY_OPEN | ACC_MODE(flag);

	/* O_TRUNC implies we need access checks for write permissions */
	
	/*根据 O_TRUNC标志设置写权限 */
	if (flag & O_TRUNC)
		acc_mode |= MAY_WRITE;

	/* Allow the LSM permission hook to distinguish append 
	   access from general write access. */
	   /* 设置O_APPEND 标志*/
	if (flag & O_APPEND)
		acc_mode |= MAY_APPEND;

	/*
	 * The simplest case - just a plain lookup.
	 */
	  /*如果不是创建文件*/
	if (!(flag & O_CREAT)) {
		/*当内核要访问一个文件的时候，第一步要做的是找到这个文件，
		而查找文件的过程在vfs里面是由path_lookup或者path_lookup_open函数来完成的。
		这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构，
		并建立好相应的inode和file结构，将指向file的描述符返回用户。用户随后
		通过文件描述符，来访问这些数据结构*/
		error = path_lookup_open(dfd, pathname, lookup_flags(flag),
					 &nd, flag);
		if (error)
			return ERR_PTR(error);
		goto ok;/*跳过下面的创建部分*/
	}

	/*
	 * Create - we need to know the parent.
	 */
	 /*到此则是要创建文件*/
	/* path-init为查找作准备工作，path_walk真正上路查找，
	这两个函数联合起来根据一段路径名找到对应的dentry */
	error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
	if (error)
		return ERR_PTR(error);
	error = path_walk(pathname, &nd);
	if (error) {
		if (nd.root.mnt)
			path_put(&nd.root);
		return ERR_PTR(error);
	}
	if (unlikely(!audit_dummy_context()))
		/*保存inode节点信息*/
		audit_inode(pathname, nd.path.dentry);

	/*
	 * We have the parent and last component. First of all, check
	 * that we are not asked to creat(2) an obvious directory - that
	 * will not do.
	 */
	error = -EISDIR;
	/*父节点信息*/
	if (nd.last_type != LAST_NORM || nd.last.name[nd.last.len])
		goto exit_parent;

	error = -ENFILE;
	 /*获取文件指针*/
	filp = get_empty_filp();
	if (filp == NULL)
		goto exit_parent;
	/*填充nameidata 结构*/
	nd.intent.open.file = filp;
	nd.intent.open.flags = flag;
	nd.intent.open.create_mode = mode;
	dir = nd.path.dentry;
	nd.flags &= ~LOOKUP_PARENT;
	nd.flags |= LOOKUP_CREATE | LOOKUP_OPEN;
	if (flag & O_EXCL)
		nd.flags |= LOOKUP_EXCL;
	mutex_lock(&dir->d_inode->i_mutex);
	/*从哈希表中查找目的文件对应的dentry,上面路径搜索的是父节点
	也就是目的文件的上一层目录，为了得到目的文件的
	path结构，我们用nd中的last结构和上一层目录的dentry结构
	可以找到*/
	path.dentry = lookup_hash(&nd);
	path.mnt = nd.path.mnt;
	/*到此目标节点的path结构已经找到*/
do_last:
	error = PTR_ERR(path.dentry);
	if (IS_ERR(path.dentry)) {
		mutex_unlock(&dir->d_inode->i_mutex);
		goto exit;
	}

	if (IS_ERR(nd.intent.open.file)) {
		error = PTR_ERR(nd.intent.open.file);
		goto exit_mutex_unlock;
	}

	/* Negative dentry, just create the file */
	/*如果此dentry结构没有对应的inode节点，说明是无效的，应该创建文件节点 */
	if (!path.dentry->d_inode) {
		/*
		 * This write is needed to ensure that a
		 * ro->rw transition does not occur between
		 * the time when the file is created and when
		 * a permanent write count is taken through
		 * the 'struct file' in nameidata_to_filp().
		 */
		 /*write权限是必需的*/
		error = mnt_want_write(nd.path.mnt);
		if (error)
			goto exit_mutex_unlock;
		/*按照namei格式的flag open*,主要是创建inode*/
		error = __open_namei_create(&nd, &path, flag, mode);
		if (error) {
			mnt_drop_write(nd.path.mnt);
			goto exit;
		}
		/*根据nameidata 得到相应的file结构*/
		filp = nameidata_to_filp(&nd, open_flag);
		if (IS_ERR(filp))
			ima_counts_put(&nd.path,
				       acc_mode & (MAY_READ | MAY_WRITE |
						   MAY_EXEC));
		/*放弃写权限*/
		mnt_drop_write(nd.path.mnt);
		if (nd.root.mnt)
			path_put(&nd.root);
		return filp;
	}

	/*
	 * It already exists.
	 */
	  /*要打开的文件已经存在*/
	mutex_unlock(&dir->d_inode->i_mutex);
	/*保存inode节点*/
	audit_inode(pathname, path.dentry);

	error = -EEXIST;
	if (flag & O_EXCL)
		goto exit_dput;
	/*如果path上安装了文件系统，则依次往下找，直到找到
	的文件系统没有安装别的文件系统，更新path结构为
	此文件系统的根目录信息*/
	if (__follow_mount(&path)) {
		error = -ELOOP;
		if (flag & O_NOFOLLOW)
			goto exit_dput;
	}

	error = -ENOENT;
	if (!path.dentry->d_inode)
		goto exit_dput;
	if (path.dentry->d_inode->i_op->follow_link)
		goto do_link;/*顺次遍历符号链接*/
	/*路径转化为相应的nameidata 结构*/
	path_to_nameidata(&path, &nd);
	error = -EISDIR;
	/*如果是文件夹*/
	if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
		goto exit;
	/*到这里，nd结构中存放的信息已经是最后的目的文件信息*/
ok:
	/*
	 * Consider:
	 * 1. may_open() truncates a file
	 * 2. a rw->ro mount transition occurs
	 * 3. nameidata_to_filp() fails due to
	 *    the ro mount.
	 * That would be inconsistent, and should
	 * be avoided. Taking this mnt write here
	 * ensures that (2) can not occur.
	 */
	will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
	if (will_write) {
		error = mnt_want_write(nd.path.mnt);
		if (error)
			goto exit;
	}
	/*may_open执行权限检测、文件打开和truncate的操作*/
	error = may_open(&nd.path, acc_mode, flag);
	if (error) {
		if (will_write)
			mnt_drop_write(nd.path.mnt);
		goto exit;
	}
	/*将nameidata转化为file*/
	filp = nameidata_to_filp(&nd, open_flag);
	if (IS_ERR(filp))
		ima_counts_put(&nd.path,
			       acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
	/*
	 * It is now safe to drop the mnt write
	 * because the filp has had a write taken
	 * on its behalf.
	 */
	if (will_write)
		/*释放写权限*/
		mnt_drop_write(nd.path.mnt);
	if (nd.root.mnt)
		/*释放引用计数*/
		path_put(&nd.root);
	return filp;

exit_mutex_unlock:
	mutex_unlock(&dir->d_inode->i_mutex);
exit_dput:
	path_put_conditional(&path, &nd);
exit:
	if (!IS_ERR(nd.intent.open.file))
		release_open_intent(&nd);
exit_parent:
	if (nd.root.mnt)
		path_put(&nd.root);
	path_put(&nd.path);
	return ERR_PTR(error);
/*允许遍历连接文件，则手工找到连接文件对应的文件*/
do_link:
	error = -ELOOP;
	if (flag & O_NOFOLLOW)
		goto exit_dput;/*不允许遍历连接文件，返回错误*/
	/*
	 * This is subtle. Instead of calling do_follow_link() we do the
	 * thing by hands. The reason is that this way we have zero link_count
	 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
	 * After that we have the parent and last component, i.e.
	 * we are in the same situation as after the first path_walk().
	 * Well, almost - if the last component is normal we get its copy
	 * stored in nd->last.name and we will have to putname() it when we
	 * are done. Procfs-like symlinks just set LAST_BIND.
	 */
	 /*以下是手工找到链接文件对应的文件dentry结构代码
          */
          /*设置查找LOOKUP_PARENT标志*/
	nd.flags |= LOOKUP_PARENT;
	/*判断操作是否安全*/
	error = security_inode_follow_link(path.dentry, &nd);
	if (error)
		goto exit_dput;
	/*处理符号链接,即路径搜索，结果放入nd中*/
	error = __do_follow_link(&path, &nd);
	if (error) {
		/* Does someone understand code flow here? Or it is only
		 * me so stupid? Anathema to whoever designed this non-sense
		 * with "intent.open".
		 */
		release_open_intent(&nd);
		if (nd.root.mnt)
			path_put(&nd.root);
		return ERR_PTR(error);
	}
	nd.flags &= ~LOOKUP_PARENT;
	/*检查最后一段文件或目录名的属性情况*/
	if (nd.last_type == LAST_BIND)
		goto ok;
	error = -EISDIR;
	if (nd.last_type != LAST_NORM)
		goto exit;
	if (nd.last.name[nd.last.len]) {
		__putname(nd.last.name);
		goto exit;
	}
	error = -ELOOP;
	/*出现回环标志: 循环超过32次*/
	if (count++==32) {
		__putname(nd.last.name);
		goto exit;
	}
	dir = nd.path.dentry;
	mutex_lock(&dir->d_inode->i_mutex);
	/*更新路径的挂接点和dentry*/
	path.dentry = lookup_hash(&nd);
	path.mnt = nd.path.mnt;
	__putname(nd.last.name);
	goto do_last;
}

在内核中要打开一个文件，首先应该找到这个文件，而查找文件的过程在vfs里面是由do_path_lookup或者path_lookup_open函数来完成的，关于文件路径查找在前面已经分析过相关的代码了。这两个函数将用户传进来的字符串表示的文件路径转换成一个dentry结构，并建立好相应的inode和file结构，将指向file的描述符返回用户。用户随后通过文件描述符，来访问这些数据结构。

【上篇】Google Sparse Hash
【下篇】ORACLE数据库表空间迁移–关于数据文件

作者: apo1119sostar

该日志由 apo1119sostar 于10年前发表在综合分类下，最后更新于 2013年12月08日.
转载请注明: Linux虚拟文件系统之文件打开（sys_open()） | 学步园 +复制链接

抱歉!评论已关闭.

学步园

Linux虚拟文件系统之文件打开（sys_open()）

作者: apo1119sostar

书签

最新文章New

本站推荐

返回首页