-- TOC --
这是一个很简陋的字符设备驱动程序:
$ cat mychar.c
#include <linux/init.h>
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/cdev.h>
MODULE_LICENSE("GPL");
unsigned int major = 0;
struct cdev mychar;
struct file_operations mychar_fop;
int mychar_open(struct inode *inode, struct file *fp) {
printk(KERN_INFO"[mychar] open called, major %u minor %u.\n",
imajor(inode), iminor(inode));
if (fp->f_op == &mychar_fop)
printk(KERN_INFO"[mychar] yes! fp->f_op == &mychar_fop\n");
printk(KERN_INFO"[mychar] inode = %px\n", inode);
printk(KERN_INFO"[mychar] fp = %px\n", fp);
return 0;
}
int mychar_release(struct inode *inode, struct file *fp) {
printk(KERN_INFO"[mychar] release called.\n");
printk(KERN_INFO"[mychar] inode = %px\n", inode);
printk(KERN_INFO"[mychar] fp = %px\n", fp);
return 0;
}
struct file_operations mychar_fop = {
.owner = THIS_MODULE,
.open = mychar_open,
.release = mychar_release,
};
static void mychar_exit(void) {
cdev_del(&mychar);
unregister_chrdev_region(MKDEV(major,0), 1);
printk(KERN_INFO"[mychar] exit.\n");
}
static int __init mychar_init(void) {
int rn;
dev_t dev;
/* get a major number */
if ((rn = alloc_chrdev_region(&dev,0,1,"mychar"))) {
printk(KERN_WARNING"[mychar] can't get major number, err %d.\n", rn);
return rn;
}
major = MAJOR(dev);
printk(KERN_INFO"[mychar] major is %d, 1 minor with 0.\n", major);
/* init and add a cdev */
cdev_init(&mychar, &mychar_fop);
if ((rn = cdev_add(&mychar,MKDEV(major,0),1))) {
printk(KERN_WARNING"[mychar] cdev_add err %d.\n", rn);
mychar_exit();
return rn;
}
return 0;
}
module_init(mychar_init);
module_exit(mychar_exit);
mychar_init
首先以动态的方式获取一个major number,调用alloc_chrdev_region
,返回0表示成功!
内核接口调用成功,返回0,失败,返回一个负数,因此只要if条件满足,就是失败。从函数名可以推测,char device和block device,属于不同的region,major number也是独立分配的。用cat /proc/devices
,的确可以在char和block中,找到相同的major编号。
alloc_chrdev_region
函数的第1个参数用来接收返回值,第2个参数表说minor number的开始数字,一般都是0,即minor从0开始,不浪费,第3个参数表示一共的minor数量。此例minor从0开始,只有1个minor编号。为了简单,代码中表示minor的这两个数字,都直接写死了,没有使用macro。
one major one driver, but there could be more than one device in a single driver! Such as /dev/zero and /dev/null, both are belong to major 1.
与dev_t
类型匹配的两个helper macro是MAJOR
,MINOR
和MKDEV
,上面代码只有MINOR没有使用,因为就是0这一个。这几个macro定义在include/linux/kdev_t.h
文件中:
#define MINORBITS 20
#define MINORMASK ((1U << MINORBITS) - 1)
#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi))
从inode中取出major和minor,分别使用
imajor
和iminor
,为了保持兼容,别自己去解析。
dev_t
的定义在include/linux/types.h
,另一个数据类型fmode_t
也定义在此文件中:
include/linux/types.h:typedef u32 __kernel_dev_t;
include/linux/types.h:typedef __kernel_dev_t dev_t;
include/linux/types.h:typedef unsigned int __bitwise fmode_t;
__bitwise只是一个annotation,阅读代码时提示这个类型的使用方式,编译的时候它就没有了,这个macro是一个空符号!类似的还有__user,提示指向user space。
所以,全局变量major要定义成unsinged,我看kernel中的一些代码,也是用这个定义,估计长期来看用32bit来分割major和minor不会改变,使用上面的几个macro,可以保证如果MINORBITS发生变化后,代码可以兼容。不建议自己去取32bit中的这两个部分。
另一个全局变量是struct cdev mychar
,它是cdev结构体,表示一个char device。代码在得到major之后,用cdev_init
接口初始化mychar,接着用cdev_add
将此char设备加入kernel,此时这个char设备就被激活,kernel就能看到这个设备了。
先关注mychar_fop
这个结构体变量,为了简单,这个例子仅仅定了3个成员,.owner
指向当前这个模块,THIS_MODULE
是指向当前这个模块的地址,我估计是在调用insmod命令的时候,内核就分配了这个地址,在执行驱动init的过程中使用。.open
和.release
成员分别对应mychar的两个函数。
cdev_init
接口的第1个参数,是struct cdev mychar
结构体地址,第2个参数是mychar_fop
结构体地址,这个函数在fs/char_dev.c
文件中:
/**
* cdev_init() - initialize a cdev structure
* @cdev: the structure to initialize
* @fops: the file_operations for this device
*
* Initializes @cdev, remembering @fops, making it ready to add to the
* system with cdev_add().
*/
void cdev_init(struct cdev *cdev, const struct file_operations *fops)
{
memset(cdev, 0, sizeof *cdev);
INIT_LIST_HEAD(&cdev->list);
kobject_init(&cdev->kobj, &ktype_cdev_default);
cdev->ops = fops;
}
将cdev结构体做初始化,将代表这个char device的变量加入内核的一个list中,最后做cdev->ops = fops
赋值!
cdev_add
接口第1个参数也是mychar指针,第2个参数是dev_t,第3个参数count,一般都是1,还没搞懂它的具体作用。
There are a couple of important things to keep in mind when using cdev_add. The first is that this call can fail. If it returns a negative error code, your device has not been added to the system. It almost always succeeds, however, and that brings up the other point: as soon as cdev_add returns, your device is “live” and its operations can be called by the kernel. You should not call cdev_add until your driver is completely ready to handle operations on the device.
mychar_exit
函数对所有资源进行释放,由于在init中也要调用它,因此没有使用__exit token。cdev_del
接口只有一个参数,mychar指针。unregister_chrdev_region
接口注销driver的major number,第1个参数是dev_t first,第2个参数是count,跟alloc_chrdev_region对应。
其实char_dev.c中还有一个char_alloc接口,没有使用是因为这个接口内有内存申请,但是我却无法确认
cdev_del
接口是否能够将其释放!
用insmod命令加载此drivers后,还需要用mknod命令创建对应的设备文件节点。习惯上我们都把设备文件放在/dev
路径下面,但是其实放在任何位置都可以。设备文件节点与驱动的对应关系,我觉得应该就是设备类型,以及major和minor这两个数字。创建设备节点需要major和minor,通过cat /proc/devices | grep mychar
可以查询到major,minor不用查,是自己在代码中定义的。
建好节点后,我们就可以对mychar这个驱动执行open和close系统调用,查看其运行情况,我用Python代码直接操作这个设备文件节点,这样比较简单:
$ pwd
/home/xinlin/test/mychar
$ ls mychar.c
mychar.c
$ cat Makefile
obj-m := mychar.o
$ touch mychar.c
$ make -C ~/sources/linux-5.14.14/ M=`pwd` modules
make: Entering directory '/home/xinlin/sources/linux-5.14.14'
CC [M] /home/xinlin/test/mychar/mychar.o
MODPOST /home/xinlin/test/mychar/Module.symvers
LD [M] /home/xinlin/test/mychar/mychar.ko
make: Leaving directory '/home/xinlin/sources/linux-5.14.14'
$ sudo insmod mychar.ko
$ cat /proc/devices | grep mychar # get major number
237 mychar
$ sudo mknod -m 666 AA c 237 0 # must be correct major and minor
$ ls -l AA
crw-rw-rw- 1 root root 237, 0 12月 3 14:31 AA
$ python3 -q
>>> f = open('AA')
>>> g = open('AA')
>>> f.close()
>>> g.close()
>>> exit()
$ dmesg | tail -n 15
[626764.646083] [mychar] major is 237, 1 minor with 0.
[626836.440115] [mychar] open called, major 237 minor 0.
[626836.440161] [mychar] yes! fp->f_op == &mychar_fop
[626836.440164] [mychar] inode = ffff918856905d20
[626836.440165] [mychar] fp = ffff91884debf500
[626841.439979] [mychar] open called, major 237 minor 0.
[626841.439984] [mychar] yes! fp->f_op == &mychar_fop
[626841.439984] [mychar] inode = ffff918856905d20
[626841.439985] [mychar] fp = ffff91884a6e6500
[626843.927790] [mychar] release called.
[626843.927792] [mychar] inode = ffff918856905d20
[626843.927793] [mychar] fp = ffff91884debf500
[626845.769069] [mychar] release called.
[626845.769071] [mychar] inode = ffff918856905d20
[626845.769072] [mychar] fp = ffff91884a6e6500
从dmesg命令的打印,我们可以分析一下。驱动init代码在内核中只是注册了一个cdev结构体,file operations也是挂在这个cdev下面,但当open被调用的时候,file结构体中的f_op指针与驱动自定义的mychar_fop相等,这说明,内核在open系统调用的时候,创建了一个file结构体,并将已经注册在内核的cdev->mychar_fop赋值给了fp->f_op。另外,上面的打印,inode值都是一样的,inode对应某个具体的文件,它是不变的,但是每次open,都会新建一个file结构体,看上面打印,两次open调用fp的值不一样!当close系统调用时,release函数被调用。
printk
用%px
打印真实的内核指针地址,注意只能在测试的时候使用!
如果我们创建一个major和minor不正确的设备节点:
$ sudo mknod -m 666 BB c 237 1
$ python3 -c 'import os;print(os.getcwd());f=open("BB");f.close()'
/home/xinlin/test/mychar
Traceback (most recent call last):
File "<string>", line 1, in <module>
OSError: [Errno 6] No such device or address: 'BB'
node文件是存在了,但是当open的时候,内核一定是发现major和minor没有对应的驱动,因此返回错误!此时,驱动的open没有被调用,dmesg没有新增信息。
kernel还会在invoke驱动之前,检查权限是否足够。
经过以上分析,整个流程就清楚了!
In include/linux/fs.h
:
struct file_operations {
struct module *owner;
loff_t (*llseek) (struct file *, loff_t, int);
ssize_t (*read) (struct file *, char __user *, size_t, loff_t *);
ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *);
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iopoll)(struct kiocb *kiocb, bool spin);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
unsigned long mmap_supported_flags;
int (*open) (struct inode *, struct file *);
int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*fasync) (int, struct file *, int);
int (*lock) (struct file *, int, struct file_lock *);
ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int);
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **, void **);
long (*fallocate)(struct file *file, int mode, loff_t offset,
loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
#ifndef CONFIG_MMU
unsigned (*mmap_capabilities)(struct file *);
#endif
ssize_t (*copy_file_range)(struct file *, loff_t, struct file *,
loff_t, size_t, unsigned int);
loff_t (*remap_file_range)(struct file *file_in, loff_t pos_in,
struct file *file_out, loff_t pos_out,
loff_t len, unsigned int remap_flags);
int (*fadvise)(struct file *, loff_t, loff_t, int);
} __randomize_layout;
这个结构体内,全部都是函数指针,即所谓operations。这些操作,全部都是通过系统调用进行调用。Linux内核看到的device都是文件,因此叫做file operation,这些指针是否赋值,都属于接口的一部分,有的成员为NULL也可以被调用。对应本文,open系统调用会执行mychar_open,close系统调用可能会执行mychar_release。
In include/linux/fs.h
:
struct file {
union {
struct llist_node fu_llist;
struct rcu_head fu_rcuhead;
} f_u;
struct path f_path;
struct inode *f_inode; /* cached value */
const struct file_operations *f_op;
/*
* Protects f_ep, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
enum rw_hint f_write_hint;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
u64 f_version;
#ifdef CONFIG_SECURITY
void *f_security;
#endif
/* needed for tty driver, and maybe others */
void *private_data;
#ifdef CONFIG_EPOLL
/* Used by fs/eventpoll.c to link all the hooks to this file */
struct hlist_head *f_ep;
#endif /* #ifdef CONFIG_EPOLL */
struct address_space *f_mapping;
errseq_t f_wb_err;
errseq_t f_sb_err; /* for syncfs */
} __randomize_layout
__attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
kernel的代码也不是全都非常整洁呀...不要有代码洁癖!
这个file是kernel中的,与user space的FILE不一样,相互之间也不可见!
每个open系统调用,都会在内核中创建一个file结构体。
f_mode
就是我们用ls命令可以看到的文件权限bit。
f_pos
,当前position:
The current reading or writing position. loff_t is a 64-bit value on all platforms ( long long in gcc terminology). The driver can read this value if it needs to know the current position in the file but should not normally change it; read and write should update a position using the pointer they receive as the last argument instead of acting on filp->f_pos directly. The one exception to this rule is in the llseek method, the purpose of which is to change the file position.
private_data
这个指针用的比较多,open之后,驱动可以在这个地方放一些自定义的数据,在后续其它调用中,可以直接从这个指针位置取出这些数据,但是要注意这里的数据空间要驱动来释放。
f_count
的作用需要说一下(应该就是这个count,没有别的count呀),并不是每次close系统调用,都会触发驱动的release操作。比如fork之后,已经open的file,在新进程中还是open的,只是这个file的count增加了1。因此,只有最后一个close,即count为0的时候,才会触发驱动的release。
本文链接:https://cs.pynote.net/sf/linux/dd/202112031/
-- EOF --
-- MORE --