Netfilter源码分析（3）

2020-05-27 16:46:03

表的注册中涉及到的重要函数

表注册函数中，主要涉及到的重要函数有：
translate_table
list_named_find
list_prepend

1、translate_table
/*
* 函数:translate_table()
* 参数：
* name:表名称；
* valid_hooks：当前表所影响的hook
* newinfo：包含当前表的所有信息的结构
* size：表的大小
* number：表中的规则数
* hook_entries：记录所影响的HOOK的规则入口相对于下面的entries变量的偏移量
* underflows：与hook_entry相对应的规则表上限偏移量
* 作用：
* translate_table函数将newinfo表示的table的各个规则进行边界检查，然后对于newinfo所指的
* ipt_talbe_info结构中的hook_entries和underflows赋予正确的值，后将表项向其他cpu拷贝
* 返回值：
* int ret==0表示成功返回
*/

static int
translate_table(const char *name,
unsigned int valid_hooks,
struct ipt_table_info *newinfo,
unsigned int size,
unsigned int number,
const unsigned int *hook_entries,
const unsigned int *underflows)
{
unsigned int i;
int ret;

newinfo->size = size;
newinfo->number = number;

/* 初始化所有Hooks为不可能的值. */
for (i = 0; i < NF_IP_NUMHOOKS; i++) {
newinfo->hook_entry[i] = 0xFFFFFFFF;
newinfo->underflow[i] = 0xFFFFFFFF;
}

duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* 遍历所有规则，检查所有偏量，检查的工作都是由IPT_ENTRY_ITERATE这个宏来完成，并且它
的后一个参数i，返回表的所有规则数. */
ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
check_entry_size_and_hooks,
newinfo,
newinfo->entries,
newinfo->entries + size,
hook_entries, underflows, &i);
if (ret != 0)
return ret;

/*实际计算得到的规则数与指定的不符*/
if (i != number) {
duprintf("translate_table: %u not %u entries\n",
   i, number);
return -EINVAL;
}

/* 因为函数一开始将HOOK的偏移地址全部初始成了不可能的值，而在上一个宏的遍历中设置了
hook_entries和underflows的值，这里对它们进行检查 */
for (i = 0; i < NF_IP_NUMHOOKS; i++) {
/* 只检查当前表所影响的hook */
if (!(valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
   i, hook_entries[i]);
return -EINVAL;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
   i, underflows[i]);
return -EINVAL;
}
}

/*确保新的table中不存在规则环*/
if (!mark_source_chains(newinfo, valid_hooks))
return -ELOOP;

/* 对tables中的规则项进行完整性检查，保证每一个规则项在形式上是合法的*/
i = 0;
ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
check_entry, name, size, &i);

/*检查失败，释放空间，返回*/
if (ret != 0) {
IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
   cleanup_entry, &i);
return ret;
}

/* 为每个CPU复制一个完整的table项*/
for (i = 1; i < smp_num_cpus; i++) {
memcpy(newinfo->entries + SMP_ALIGN(newinfo->size)*i,
         newinfo->entries,
         SMP_ALIGN(newinfo->size));
}

return ret;
}

函数的核心处理，是调用了IPT_ENTRY_ITERATE，我在《iptables源码分析》中已提过，这个宏用来遍历每一个规则，然后
调用其第三个参数（函数指针）进行处理，前两个参数分别表示规则的起始位置和规则总大小，后面的参数则视情况而定。
再来看一次：
/* fn returns 0 to continue iteration */
#define IPT_ENTRY_ITERATE(entries, size, fn, args...) \
({ \
unsigned int __i; \
int __ret = 0; \
struct ipt_entry *__entry; \
\
for (__i = 0; __i < (size); __i += __entry->next_offset) { \
__entry = (void *)(entries) + __i; \
\
__ret = fn(__entry , ## args); \
if (__ret != 0) \
break; \
} \
__ret; \
})

对应地，函数的次宏的调用，
ret = IPT_ENTRY_ITERATE(newinfo->entries, newinfo->size,
check_entry_size_and_hooks,
newinfo,
newinfo->entries,
newinfo->entries + size,
hook_entries, underflows, &i);
遍历到每一项规则后，就调用check_entry_size_and_hooks继续处理。

static inline int
check_entry_size_and_hooks(struct ipt_entry *e,
   struct ipt_table_info *newinfo,
   unsigned char *base,
   unsigned char *limit,
   const unsigned int *hook_entries,
   const unsigned int *underflows,
   unsigned int *i)
{
unsigned int h;

/*(unsigned long)e % __alignof__(struct ipt_entry) != 0--不能整除，规则不完整
(unsigned char *)e + sizeof(struct ipt_entry) >= limit--超过上限了*/

if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
      || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}

/*e->next_offset
      < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)--规则太"短"了，小于基本的长度
      */
if (e->next_offset
      < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
duprintf("checking: element %p size %u\n",
   e, e->next_offset);
return -EINVAL;
}

/* 检查并设置正确的 hooks & underflows */
for (h = 0; h < NF_IP_NUMHOOKS; h++) {
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h])
newinfo->underflow[h] = underflows[h];
}

/* FIXME: underflows must be unconditional, standard verdicts
         < 0 (not IPT_RETURN). --RR */

/* Clear counters and comefrom */
e->counters = ((struct ipt_counters) { 0, 0 }); /*包和字节的计数器清零*/
e->comefrom = 0; /*环路计数器清零*/

(*i)++; /*规则计数器累加*/
return 0;
}

2、replace_table
前面说过，表中以struct ipt_table_info *private;表示实际数据区。但是在初始化赋值的时候，被设为
NULL，而表的初始变量都以模版的形式，放在struct ipt_replace *table;中。
注册函数一开始，就声明了：
struct ipt_table_info *newinfo;
然后对其分配了空间，将模块中的初值拷贝了进来。所以replace_table要做的工作，主要就是把newinfo中的
值传递给table结构中的private成员。

其函数原型如下：

static struct ipt_table_info *
replace_table(struct ipt_table *table,
      unsigned int num_counters,
      struct ipt_table_info *newinfo,
      int *error)
{
struct ipt_table_info *oldinfo;

#ifdef CONFIG_NETFILTER_DEBUG
{
struct ipt_entry *table_base;
unsigned int i;

for (i = 0; i < smp_num_cpus; i++) {
table_base =
(void *)newinfo->entries
+ TABLE_OFFSET(newinfo, i);

table_base->comefrom = 0xdead57ac;
}
}
#endif

/* Do the substitution. */
write_lock_bh(&table->lock);
/* Check inside lock: is the old number correct? */
if (num_counters != table->private->number) {
duprintf("num_counters != table->private->number (%u/%u)\n",
   num_counters, table->private->number);
write_unlock_bh(&table->lock);
*error = -EAGAIN;
return NULL;
}
oldinfo = table->private;
table->private = newinfo;
newinfo->initial_entries = oldinfo->initial_entries;
write_unlock_bh(&table->lock);

return oldinfo;
}

3、list_named_find

在注册函数中，调用
/* Don't autoload: we'd eat our tail... */
if (list_named_find(&ipt_tables, table->name)) {
ret = -EEXIST;
goto free_unlock;
}
来检查当前表是否已被注册过了。可见，个参数为链表首部，第二个参数为当前表名。
其原型如下：
/* Find this named element in the list. */
#define list_named_find(head, name) \
LIST_FIND(head, __list_cmp_name, void *, name)

/* Return pointer to first true entry, if any, or NULL.  A macro
required to allow inlining of cmpfn. */
#define LIST_FIND(head, cmpfn, type, args...) \
({ \
const struct list_head *__i = (head); \
\
ASSERT_READ_LOCK(head); \
do { \
__i = __i->next; \
if (__i == (head)) { \
__i = NULL; \
break; \
} \
} while (!cmpfn((const type)__i , ## args)); \
(type)__i; \
})

前面提过，表是一个双向链表，在宏当中，以while进行循环，以__i = __i->next;
进行遍历，然后调用比较函数进行比较，传递过来的比较函数是__list_cmp_name。

比较函数很简单：
static inline int __list_cmp_name(const void *i, const char *name)
{
return strcmp(name, i+sizeof(struct list_head)) == 0;
}

4、list_prepend
当所有的初始化工作结束，就调用list_prepend来构建链表了。
/* Prepend. */
static inline void
list_prepend(struct list_head *head, void *new)
{
ASSERT_WRITE_LOCK(head); /*设置写互斥*/
list_add(new, head); /*将当前表节点添加进链表*/
}
list_add就是一个构建双向链表的过程：
static __inline__ void list_add(struct list_head *new, struct list_head *head)
{
__list_add(new, head, head->next);
}

static __inline__ void __list_add(struct list_head * new,
struct list_head * prev,
struct list_head * next)
{
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}

文章来源CU社区：[原创]Netfilter源码分析-我来抛砖，望能引玉

分享好友

分享这个小栈给你的朋友们，一起进步吧。

内核源码

创建时间：2020-05-18 13:36:55

内核源码精华帖内容汇总

展开

订阅须知

• 所有用户可根据关注领域订阅专区或所有专区

• 付费订阅：虚拟交易，一经交易不退款；若特殊情况，可3日内客服咨询

• 专区发布评论属默认订阅所评论专区（除付费小栈外）

技术专家

查看更多

飘絮絮絮丶
专家