前面说到,对于filter表来说,所有的一切,要靠ipt_do_table函数来进行包配备,前面提过,过滤规则分为三部份:标准mathc,扩展match,target。可以预想一想,ipt_do_table就是要针对这三部份来过滤,来看一下该函数:
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ipt_do_table(struct sk_buff **pskb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
struct ipt_table *table,
void *userdata)
{
static const char nulldevname[IFNAMSIZ] = { 0 };
u_int16_t offset;
struct iphdr *ip;
void *protohdr;
u_int16_t datalen;
int hotdrop = 0;
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
const char *indev, *outdev;
void *table_base;
struct ipt_entry *e, *back;
/* Initialization */
ip = (*pskb)->nh.iph;
protohdr = (u_int32_t *)ip + ip->ihl;
datalen = (*pskb)->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
* if it was a normal packet. All other fragments are treated
* normally, except that they will NEVER match rules that ask
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
offset = ntohs(ip->frag_off) & IP_OFFSET;
read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
table_base = (void *)table->private->entries
+ TABLE_OFFSET(table->private,
cpu_number_map(smp_processor_id()));
e = get_entry(table_base, table->private->hook_entry[hook]);
#ifdef CONFIG_NETFILTER_DEBUG
/* Check noone else using our table */
if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
&& ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
smp_processor_id(),
table->name,
&((struct ipt_entry *)table_base)->comefrom,
((struct ipt_entry *)table_base)->comefrom);
}
((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
#endif
/* For return from builtin chain */
back = get_entry(table_base, table->private->underflow[hook]);
do {
IP_NF_ASSERT(e);
IP_NF_ASSERT(back);
(*pskb)->nfcache |= e->nfcache;
if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
struct ipt_entry_target *t;
if (IPT_MATCH_ITERATE(e, do_match,
*pskb, in, out,
offset, protohdr,
datalen, &hotdrop) != 0)
goto no_match;
ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
/* Standard target? */
if (!t->u.kernel.target->target) {
int v;
v = ((struct ipt_standard_target *)t)->verdict;
if (v < 0) {
/* Pop from stack? */
if (v != IPT_RETURN) {
verdict = (unsigned)(-v) - 1;
break;
}
e = back;
back = get_entry(table_base,
back->comefrom);
continue;
}
if (table_base + v
!= (void *)e + e->next_offset) {
/* Save old back ptr in next entry */
struct ipt_entry *next
= (void *)e + e->next_offset;
next->comefrom
= (void *)back - table_base;
/* set back pointer to next entry */
back = next;
}
e = get_entry(table_base, v);
} else {
/* Targets which reenter must return
abs. verdicts */
#ifdef CONFIG_NETFILTER_DEBUG
((struct ipt_entry *)table_base)->comefrom
= 0xeeeeeeec;
#endif
verdict = t->u.kernel.target->target(pskb,
hook,
in, out,
t->data,
userdata);
#ifdef CONFIG_NETFILTER_DEBUG
if (((struct ipt_entry *)table_base)->comefrom
!= 0xeeeeeeec
&& verdict == IPT_CONTINUE) {
printk("Target %s reentered!\n",
t->u.kernel.target->name);
verdict = NF_DROP;
}
((struct ipt_entry *)table_base)->comefrom
= 0x57acc001;
#endif
/* Target might have changed stuff. */
ip = (*pskb)->nh.iph;
protohdr = (u_int32_t *)ip + ip->ihl;
datalen = (*pskb)->len - ip->ihl * 4;
if (verdict == IPT_CONTINUE)
e = (void *)e + e->next_offset;
else
/* Verdict */
break;
}
} else {
no_match:
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
#ifdef CONFIG_NETFILTER_DEBUG
((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
#endif
read_unlock_bh(&table->lock);
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
#else
if (hotdrop)
return NF_DROP;
else return verdict;
#endif
}
再来一句句看这个函数吧:
先是把该取的值取出来:
ip = (*pskb)->nh.iph; /*获取IP头*/
protohdr = (u_int32_t *)ip + ip->ihl; /*跳过IP头,搞不明白,为什么不用( u_int8_t * )ip + ip->ihl << 2^o^*/
datalen = (*pskb)->len - ip->ihl * 4; /*指向数据区*/
indev = in ? in->name : nulldevname; /*取得输入设备名*/
outdev = out ? out->name : nulldevname; /*取得输出设备名*/
offset = ntohs(ip->frag_off) & IP_OFFSET; /*设置分片包的偏移*/
read_lock_bh(&table->lock); /*设置互斥锁*/
IP_NF_ASSERT(table->valid_hooks & (1 << hook)); /*检验HOOK,debug用的*/
/*获取当前表的当前CPU的规则入口*/
table_base = (void *)table->private->entries
+ TABLE_OFFSET(table->private,
cpu_number_map(smp_processor_id()));
/*获得当前表的当前Hook的规则的起始偏移量*/
e = get_entry(table_base, table->private->hook_entry[hook]);
/*获得当前表的当前Hook的规则的上限偏移量*/
back = get_entry(table_base, table->private->underflow[hook]);
然后进行规则的匹配,是在一个do...while中实现的:do {
(*pskb)->nfcache |= e->nfcache;
/*如果IP包匹配,就断续匹配下去,否则就跳到下一条规则*/
if (ip_packet_match(ip, indev, outdev, &e->ip, offset))
{
}
else
{
no_match:
e = (void *)e + e->next_offset;
}
}while (!hotdrop);
标准的match匹配,即struct ipt_ip这部份,是通过调用函数ip_packet_match来实现的;
当 ip_packet_match匹配,则继续匹配下去,否则就跳到下一条规则(e = (void *)e + e->next_offset;)
ip_packet_match放到一边,把来看看后面的情况:如果标准的match匹配了,则:
接着匹配扩展match
if (IPT_MATCH_ITERATE(e, do_match,
*pskb, in, out,
offset, protohdr,
datalen, &hotdrop) != 0)
goto no_match;
IPT_MATCH_ITERATE这个宏已经遇到很多次了,它的作用是遍历扩展的match,而实际执行的功能函数是
do_match
OK,如果不匹配,则goto no_match;执行下条规则,否则:
/*这个宏用来分别处理字节计数器和分组计数器这两个计数器*/
ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
/*获取规则的target的偏移地址*/
t = ipt_get_target(e);
然后接着匹备target:
if (!t->u.kernel.target->target) {
int v;
v = ((struct ipt_standard_target *)t)->verdict;
if (v < 0) {
/* Pop from stack? */
if (v != IPT_RETURN) {
verdict = (unsigned)(-v) - 1;
break;
}
e = back;
back = get_entry(table_base,
back->comefrom);
continue;
}
if (table_base + v
!= (void *)e + e->next_offset) {
/* Save old back ptr in next entry */
struct ipt_entry *next
= (void *)e + e->next_offset;
next->comefrom
= (void *)back - table_base;
/* set back pointer to next entry */
back = next;
}
e = get_entry(table_base, v);
} else {
/* Targets which reenter must return
abs. verdicts */
#ifdef CONFIG_NETFILTER_DEBUG
((struct ipt_entry *)table_base)->comefrom
= 0xeeeeeeec;
#endif
verdict = t->u.kernel.target->target(pskb,
hook,
in, out,
t->data,
userdata);
#ifdef CONFIG_NETFILTER_DEBUG
if (((struct ipt_entry *)table_base)->comefrom
!= 0xeeeeeeec
&& verdict == IPT_CONTINUE) {
printk("Target %s reentered!\n",
t->u.kernel.target->name);
verdict = NF_DROP;
}
((struct ipt_entry *)table_base)->comefrom
= 0x57acc001;
#endif
/* Target might have changed stuff. */
ip = (*pskb)->nh.iph;
protohdr = (u_int32_t *)ip + ip->ihl;
datalen = (*pskb)->len - ip->ihl * 4;
if (verdict == IPT_CONTINUE)
e = (void *)e + e->next_offset;
else
/* Verdict */
break;
}
因为到目前为止,我们谈match/target的数据结构,只接触到用户态,对于内核态的,几乎没有怎么接触,所以要把它说清楚,可不是一件容易的事情。(或者先分析用户态添加规则在内核中是如何实现的,了解了match和target的组织再来分析代码,更容易些)。不过,反正无论如何,还是一步步地来,
先来看看标准match的匹配部份,即ip_packet_match函数
文章来源CU社区:[原创]Netfilter源码分析-我来抛砖,望能引玉