2021SC@SDUSC
上一条博文已经讲过,除了本身的Ext2之外,Linux支持多种文件系统(Ext4,FAT等),用户可以通过一个文件系统界面(系统调用)来操作不同的文件系统。从用户的角度看,我们并不需要关注文件系统的类型以及怎么进行具体的操作,这就是虚拟文件系统(VFS)。VFS为用户提供了一组系统调用,如读写文件reed()、write()以及移动文件指针lseek()等。
那么,内核与不同的文件系统的接口是怎么实现的呢?
这就要提到include\Linux文件夹下fs.h文件中的file_operations数据结构。
struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); int (*iopoll)(struct kiocb *kiocb, bool spin); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int (*setlease)(struct file *, long, struct file_lock **, void **); long (*fallocate)(struct file *file, int mode, loff_t offset, loff_t len); void (*show_fdinfo)(struct seq_file *m, struct file *f); #ifndef CONFIG_MMU unsigned (*mmap_capabilities)(struct file *); #endif ssize_t (*copy_file_range)(struct file *, loff_t, struct file *, loff_t, size_t, unsigned int); loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in, struct file *file_out, loff_t pos_out, loff_t len, unsigned int remap_flags); int (*fadvise)(struct file *, loff_t, loff_t, int); }
不难发现,该结构体内几乎全都是函数指针,具体到每一个文件系统,均需实现自己的file_operations。比如,write就指向该文件系统的用来实现写文件操作的函数。
每个进程通过open()与具体的文件建立起一个读写联系。在代表进程的task_struct数据结构中有两个指针,分别是代表文件系统信息的fs_struct和代表已打开文件信息的files_struct。
在fs_struct结构体中:
struct fs_struct { atomic_t count; rwlock_t lock; int umask; struct dentry * root, * pwd, * altroot; struct vfsmount * rootmnt, * pwdmnt, * altrootmnt; };
root,pwd,altroot这三个指针指向结构体dentry,而dentry记录的是目录项,所以root代表的是进程的根目录,pwd代表的是进程当前的目录, altroot代表的是替换根目录。实际上,这三个目录不一定安装在同一个文件系统中,所以用rootmnt,pwdmnt和altrootmnt这三个指针指向对应的vfsmount数据结构。
在files_struct结构体中:
struct files_struct { /* * read mostly part */ atomic_t count; bool resize_in_progress; wait_queue_head_t resize_wait; struct fdtable __rcu *fdt; struct fdtable fdtab; /* * written part on a separate cache line in SMP */ spinlock_t file_lock ____cacheline_aligned_in_smp; unsigned int next_fd; unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; unsigned long full_fds_bits_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; };
files_struct结构体的主要内容就是file结构体数组,每一个file结构体保存的是进程已经打开的文件相关信息。
在file结构体中:
struct file { union { struct llist_node fu_llist; struct rcu_head fu_rcuhead; } f_u; struct path f_path; struct inode *f_inode; /* cached value */ const struct file_operations *f_op; /* * Protects f_ep, f_flags. * Must not be taken from IRQ context. */ spinlock_t f_lock; enum rw_hint f_write_hint; atomic_long_t f_count; unsigned int f_flags; fmode_t f_mode; struct mutex f_pos_lock; loff_t f_pos; struct fown_struct f_owner; const struct cred *f_cred; struct file_ra_state f_ra; u64 f_version; #ifdef CONFIG_SECURITY void *f_security; #endif /* needed for tty driver, and maybe others */ void *private_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct hlist_head *f_ep; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; errseq_t f_wb_err; errseq_t f_sb_err; /* for syncfs */ }
f_op指针指向该文件所属文件系统的file_operation结构体,通过file可以知道如何对该文件进行操作。f_dentry指针指向该文件的dentry数据结构,记录了该文件的目录。
Linux通过VFS提供一组约定的数据结构(dentry、inode等),在进程与某一个文件建立联系open之后被初始化。虽然不同文件系统的实现细节不同,但是他们都会有实现类似功能的具体数据结构。