如下fib_triestat文件的内容。
# cat /proc/net/fib_triestat Basic info: size of leaf: 48 bytes, size of tnode: 40 bytes. Id 10: Aver depth: 1.66 Max depth: 2 Leaves: 3 Prefixes: 3 Internal nodes: 2 2: 2 Pointers: 8 Null ptrs: 4 Total size: 1 kB Counters: --------- gets = 2 backtracks = 0 semantic match passed = 0 semantic match miss = 0 null node hit= 0 skipped node resize = 0
如下命名空间初始化函数fib_proc_init中注册了处理函数fib_triestat_seq_show。
int __net_init fib_proc_init(struct net *net) { if (!proc_create_net_single("fib_triestat", 0444, net->proc_net, fib_triestat_seq_show, NULL)) goto out2;
基本信息的输出,叶子节点的大小由宏LEAF_SIZE得到;中间节点的大小由宏TNODE_SIZE(0)得到,稍后介绍。
static int fib_triestat_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; unsigned int h; seq_printf(seq, "Basic info: size of leaf:" " %zd bytes, size of tnode: %zd bytes.\n", LEAF_SIZE, TNODE_SIZE(0));
之后,遍历哈希桶,以及其中的每个哈希链表,对于每个路由表显示相应的table、stats、usage等信息。
rcu_read_lock(); for (h = 0; h < FIB_TABLE_HASHSZ; h++) { struct hlist_head *head = &net->ipv4.fib_table_hash[h]; struct fib_table *tb; hlist_for_each_entry_rcu(tb, head, tb_hlist) { struct trie *t = (struct trie *) tb->tb_data; struct trie_stat stat; if (!t) continue; fib_table_print(seq, tb); trie_collect_stats(t, &stat); trie_show_stats(seq, &stat); #ifdef CONFIG_IP_FIB_TRIE_STATS trie_show_usage(seq, t->stats); #endif }
叶子节点的大小为LEAF_SIZE,即宏TNODE_SIZE(1);中间节点的大小为宏TNODE_SIZE(0),两者相差了一个key_vector结构的大小。这里LEAF_SIZE为48,中间节点大小TNODE_SIZE(0)为40,结构key_vector的大小为8字节。
struct key_vector { t_key key; unsigned char pos; /* 2log(KEYLENGTH) bits needed */ unsigned char bits; /* 2log(KEYLENGTH) bits needed */ unsigned char slen; union { /* This list pointer if valid if (pos | bits) == 0 (LEAF) */ struct hlist_head leaf; /* This array is valid if (pos | bits) > 0 (TNODE) */ struct key_vector __rcu *tnode[0]; }; }; struct tnode { struct rcu_head rcu; t_key empty_children; /* KEYLENGTH bits needed */ t_key full_children; /* KEYLENGTH bits needed */ struct key_vector __rcu *parent; struct key_vector kv[1]; #define tn_bits kv[0].bits }; #define TNODE_SIZE(n) offsetof(struct tnode, kv[0].tnode[n]) #define LEAF_SIZE TNODE_SIZE(1)
对于本地和主路由表,打印文字Local:或者Main:字符串,而对于其它路由表,打印其ID号。
static void fib_table_print(struct seq_file *seq, struct fib_table *tb) { if (tb->tb_id == RT_TABLE_LOCAL) seq_puts(seq, "Local:\n"); else if (tb->tb_id == RT_TABLE_MAIN) seq_puts(seq, "Main:\n"); else seq_printf(seq, "Id %d:\n", tb->tb_id); }
遍历路由表的trie树,统计叶子节点的数量,并计算总的深度totdepth,即所有叶子结点的深度之和。并且,计算叶子节点的最大深度maxdepth。最后,通过叶子结点的fa_alias链表,计算前缀的数量。
另外统计所有中间节点的数量,以及分别统计处理不同比特位宽的节点的数量,并且,统计所有中间节点的empty_children节点的数量之和。
static void trie_collect_stats(struct trie *t, struct trie_stat *s) { struct key_vector *n; struct fib_trie_iter iter; memset(s, 0, sizeof(*s)); rcu_read_lock(); for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) { if (IS_LEAF(n)) { struct fib_alias *fa; s->leaves++; s->totdepth += iter.depth; if (iter.depth > s->maxdepth) s->maxdepth = iter.depth; hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) ++s->prefixes; } else { s->tnodes++; if (n->bits < MAX_STAT_DEPTH) s->nodesizes[n->bits]++; s->nullpointers += tn_info(n)->empty_children; }
对于路由表trie的第一个节点,由函数fib_trie_get_first获取,参数t取值为路由表结构fib_table的子成员tb_data,其kv结构的成员tnode[0]即为第一个节点。此节点如果为叶子节点,将iter的深度depth设置为0,否则,对于中间节点将深度设置为1。
对于叶子,iter的tnode为其父节点的值;而对于中间节点,iter的tnode即为节点自身的值。
static struct key_vector *fib_trie_get_first(struct fib_trie_iter *iter, struct trie *t) { struct key_vector *n, *pn; if (!t) return NULL; pn = t->kv; n = rcu_dereference(pn->tnode[0]); if (!n) return NULL; if (IS_TNODE(n)) { iter->tnode = n; iter->index = 0; iter->depth = 1; } else { iter->tnode = pn; iter->index = 0; iter->depth = 0; } return n;
获取下一个节点由函数fib_trie_get_next实现。首先,取得当前遍历的节点索引值赋值给cindex,并将当前遍历节点赋值于pn。下一个节点不存在的条件为IS_TRIE为真,即当前节点的前缀长度已经达到32(KEYLENGTH长度)。
#define IS_TRIE(n) ((n)->pos >= KEYLENGTH)
如果子节点索引值小于pn的子节点数量(child_length),获取其下一个子节点,如果新节点为叶子节点,保存新的索引值,下次遍历将由此继续。反之,对于中间节点,移动到trie的下一深度,下一次将遍历此中间节点的第一个子节点(pn的孙节点)。
如果当前索引值超过或者等于pn节点的子节点数量,表明遍历完成。找到pn节点的父节点,通过父节点找到其下一个子节点(pn节点的兄弟节点)。
static struct key_vector *fib_trie_get_next(struct fib_trie_iter *iter) { unsigned long cindex = iter->index; struct key_vector *pn = iter->tnode; t_key pkey; while (!IS_TRIE(pn)) { while (cindex < child_length(pn)) { struct key_vector *n = get_child_rcu(pn, cindex++); if (!n) continue; if (IS_LEAF(n)) { iter->tnode = pn; iter->index = cindex; } else { /* push down one level */ iter->tnode = n; iter->index = 0; ++iter->depth; } return n; } /* Current node exhausted, pop back up */ pkey = pn->key; pn = node_parent_rcu(pn); cindex = get_index(pkey, pn) + 1; --iter->depth; } /* record root node so further searches know we are done */ iter->tnode = pn; iter->index = 0; return NULL;
首先,根据总的深度值,和叶子结点数量,计算平均深度值(avdepth)。函数trie_show_stats中,还显示最大深度,叶子节点数量,前缀数量。并且计算叶子结点、前缀、中间节点占用的空间大小等信息(bytes)。
static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) { unsigned int i, max, pointers, bytes, avdepth; if (stat->leaves) avdepth = stat->totdepth*100 / stat->leaves; else avdepth = 0; seq_printf(seq, "\tAver depth: %u.%02d\n", avdepth / 100, avdepth % 100); seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth); seq_printf(seq, "\tLeaves: %u\n", stat->leaves); bytes = LEAF_SIZE * stat->leaves; seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes); bytes += sizeof(struct fib_alias) * stat->prefixes; seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes); bytes += TNODE_SIZE(0) * stat->tnodes;
最后,显示每个位宽段对应的节点数量,并计算中间节点相连所使用的指针的数量,以及其占用空间,与以上计算的空间bytes相加,得到路由trie占用的总空间。
max = MAX_STAT_DEPTH; while (max > 0 && stat->nodesizes[max-1] == 0) max--; pointers = 0; for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<<i) * stat->nodesizes[i]; } seq_putc(seq, '\n'); seq_printf(seq, "\tPointers: %u\n", pointers); bytes += sizeof(struct key_vector *) * pointers; seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers); seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
遍历每处理器统计变量结构trie_use_stats,求总和,并进行显示。gets对应于路由查询操作的次数(fib_table_lookup);backtrack对应于路由trie向下查找过程中,没有名字从而向树根回退的次数。semantic_match_passed对应于查询成功的计数。
semantic_match_miss对应于路由未找到的计数;null_node_hit对应路由trie树查找过程中遇到的空节点;resize_node_skipped对应于resize函数执行过程中空间未改变的节点的计数(inflate或halve)。
static void trie_show_usage(struct seq_file *seq, const struct trie_use_stats __percpu *stats) { struct trie_use_stats s = { 0 }; /* loop through all of the CPUs and gather up the stats */ for_each_possible_cpu(cpu) { const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu); s.gets += pcpu->gets; s.backtrack += pcpu->backtrack; s.semantic_match_passed += pcpu->semantic_match_passed; s.semantic_match_miss += pcpu->semantic_match_miss; s.null_node_hit += pcpu->null_node_hit; s.resize_node_skipped += pcpu->resize_node_skipped; } seq_printf(seq, "\nCounters:\n---------\n"); seq_printf(seq, "gets = %u\n", s.gets); seq_printf(seq, "backtracks = %u\n", s.backtrack); seq_printf(seq, "semantic match passed = %u\n", s.semantic_match_passed); seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss); seq_printf(seq, "null node hit= %u\n", s.null_node_hit); seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
内核版本 5.10