VPP节点统计信息

节点的统计位于函数dispatch_node函数中，节点处理函数运行之后（node->function）返回值为处理的报文数量n（n_vectors），通过函数vlib_node_runtime_update_stats来更新节点的相关统计信息。

static_always_inline u64
dispatch_node (vlib_main_t * vm, vlib_node_runtime_t * node,vlib_node_type_t type, vlib_node_state_t dispatch_state,vlib_frame_t * frame, u64 last_time_stamp)
{if (PREDICT_TRUE (vm->dispatch_wrapper_fn == 0))n = node->function (vm, node, frame);elsen = vm->dispatch_wrapper_fn (vm, node, frame);t = clib_cpu_time_now ();v = vlib_node_runtime_update_stats (vm, node,/* n_calls */ 1,/* n_vectors */ n,/* n_clocks */ t - last_time_stamp);

三种统计维度：调用次数、报文数量和时间，对应以上的参数n_calls=1，n_vectors=n和n_clocks=t - last_time_stamp。将node中的三个记录统计信息的成员分别累加上对应的值。

always_inline u32
vlib_node_runtime_update_stats (vlib_main_t * vm, vlib_node_runtime_t * node,uword n_calls, uword n_vectors, uword n_clocks)
{u32 ca0, ca1, v0, v1, cl0, cl1, r;cl0 = cl1 = node->clocks_since_last_overflow;ca0 = ca1 = node->calls_since_last_overflow;v0 = v1 = node->vectors_since_last_overflow;ca1 = ca0 + n_calls;v1 = v0 + n_vectors;cl1 = cl0 + n_clocks;node->calls_since_last_overflow = ca1;node->clocks_since_last_overflow = cl1;node->vectors_since_last_overflow = v1;

max_clock和max_clock_n分别记录最大的时钟周期，和在此周期中处理的报文数量。

  node->max_clock_n = node->max_clock > n_clocks ? node->max_clock_n : n_vectors;node->max_clock = node->max_clock > n_clocks ? node->max_clock : n_clocks;

如果累加统计值之后，三者之中有某一个数值出现反转（32bit溢出overflow），调用同步函数vlib_node_runtime_sync_stats，将统计信息由节点结构vlib_node_runtime_t同步到对应的vlib_node_t中，即由运行时节点结构同步到主节点结构中，参见函数vlib_node_runtime_sync_stats。

  r = vlib_node_runtime_update_main_loop_vector_stats (vm, node, n_vectors);if (PREDICT_FALSE (ca1 < ca0 || v1 < v0 || cl1 < cl0)) {node->calls_since_last_overflow = ca0;node->clocks_since_last_overflow = cl0;node->vectors_since_last_overflow = v0;vlib_node_runtime_sync_stats (vm, node, n_calls, n_vectors, n_clocks);}return r;

统计信息同步

根据节点索引，取得主节点结构。

void
vlib_node_runtime_sync_stats (vlib_main_t *vm, vlib_node_runtime_t *r,uword n_calls, uword n_vectors, uword n_clocks)
{vlib_node_t *n = vlib_get_node (vm, r->node_index);vlib_node_runtime_sync_stats_node (n, r, n_calls, n_vectors, n_clocks);
}

将统计信息由32位的*_overflow累加到主节点的统计结构中，并且，请求运行节点结构中的统计值*_overflow。

/* Sync up runtime (32 bit counters) and main node stats (64 bit counters). */
void
vlib_node_runtime_sync_stats_node (vlib_node_t *n, vlib_node_runtime_t *r,uword n_calls, uword n_vectors, uword n_clocks)
{n->stats_total.calls += n_calls + r->calls_since_last_overflow;n->stats_total.vectors += n_vectors + r->vectors_since_last_overflow;n->stats_total.clocks += n_clocks + r->clocks_since_last_overflow;n->stats_total.max_clock = r->max_clock;n->stats_total.max_clock_n = r->max_clock_n;r->calls_since_last_overflow = 0;r->vectors_since_last_overflow = 0;r->clocks_since_last_overflow = 0;
}

主循环统计更新

每次主循环执行，都会将计数递增main_loop_count。

always_inline void
vlib_increment_main_loop_counter (vlib_main_t * vm)
{      vm->main_loop_count++;...
}static_always_inline void
vlib_main_or_worker_loop (vlib_main_t * vm, int is_main)
{vlib_increment_main_loop_counter (vm);

PROCESS节点统计

以上dispatch_node处理的是VLIB_NODE_TYPE_PRE_INPUT/INPUT/INTERNAL三种类型的节点，对于VLIB_NODE_TYPE_PROCESS类型节点，由函数vlib_process_update_stats进行统计更新。其核心为以上介绍的函数vlib_node_runtime_update_stats。

always_inline void
vlib_process_update_stats (vlib_main_t * vm,vlib_process_t * p,uword n_calls, uword n_vectors, uword n_clocks)
{vlib_node_runtime_update_stats (vm, &p->node_runtime,n_calls, n_vectors, n_clocks);
}

显示节点统计信息

注册显示节点信息命令：show runtime，可以显示指定节点的统计信息，或者显示所有节点的统计信息。

VLIB_CLI_COMMAND (show_node_runtime_command, static) = {.path = "show runtime",.short_help = "Show packet processing runtime",.function = show_node_runtime,.is_mp_safe = 1,
};

在显示节点统计信息之前，需要先收集下节点信息，如下函数vlib_node_sync_stats。首先，根据获取到节点对应的运行节点结构rt。其次由函数vlib_node_runtime_sync_stats将未同步的统计信息同步到节点主结构中。

void
vlib_node_sync_stats (vlib_main_t * vm, vlib_node_t * n)
{vlib_node_runtime_t *rt;if (n->type == VLIB_NODE_TYPE_PROCESS) {/* Nothing to do for PROCESS nodes except in main thread */if (vm != vlib_get_first_main ()) return;vlib_process_t *p = vlib_get_process_from_node (vm, n);n->stats_total.suspends += p->n_suspends;p->n_suspends = 0;rt = &p->node_runtime;} elsert = vec_elt_at_index (vm->node_main.nodes_by_type[n->type], n->runtime_index);vlib_node_runtime_sync_stats (vm, rt, 0, 0, 0);

最后，将运行时下一个frame结构中统计的vectors值，同步到主节点结构中。

  /* Sync up runtime next frame vector counters with main node structure. */{vlib_next_frame_t *nf;for (i = 0; i < rt->n_next_nodes; i++) {nf = vlib_node_runtime_get_next_frame (vm, rt, i);vec_elt (n->n_vectors_by_next_node, i) += nf->vectors_since_last_overflow;nf->vectors_since_last_overflow = 0;

格式化统计信息函数，如下format_vlib_node_stats。如果节点结构为空，仅显示标题信息。

static u8 *
format_vlib_node_stats (u8 * s, va_list * va)
{vlib_main_t *vm = va_arg (*va, vlib_main_t *);vlib_node_t *n = va_arg (*va, vlib_node_t *);int max = va_arg (*va, int);f64 v, x, maxc, maxcn;u8 *ns, *misc_info = 0;u64 c, p, l, d;if (!n) {if (max)s = format (s, "%=30s%=17s%=16s%=16s%=16s%=16s","Name", "Max Node Clocks", "Vectors at Max","Max Clocks", "Avg Clocks", "Avg Vectors/Call");elses = format (s, "%=30s%=12s%=16s%=16s%=16s%=16s%=16s","Name", "State", "Calls", "Vectors", "Suspends","Clocks", "Vectors/Call");return s;}indent = format_get_indent (s);

由节点中获得时钟周期、调用数量、报文数量和suspends数量的值。计算处理一个报文使用的最长的时钟周期maxcn。

  l = n->stats_total.clocks - n->stats_last_clear.clocks;c = n->stats_total.calls - n->stats_last_clear.calls;p = n->stats_total.vectors - n->stats_last_clear.vectors;d = n->stats_total.suspends - n->stats_last_clear.suspends;maxc = (f64) n->stats_total.max_clock;maxn = n->stats_total.max_clock_n;if (n->stats_total.max_clock_n)maxcn = (f64) n->stats_total.max_clock / (f64) maxn;elsemaxcn = 0.0;

计算每个报文、每次调用、每个suspend所花费的时钟周期，优先级依次降低。计算每次调用calls，处理的报文数量vectors，结果为v。

  /* Clocks per packet, per call or per suspend. */x = 0;if (p > 0)x = (f64) l / (f64) p;else if (c > 0)x = (f64) l / (f64) c;else if (d > 0)x = (f64) l / (f64) d;if (c > 0)v = (double) p / (double) c;elsev = 0;

输出节点的统计信息，以及以上的计算值。

  ns = n->name;if (max)s = format (s, "%-30v%=17.2e%=16d%=16.2e%=16.2e%=16.2e",ns, maxc, maxn, maxcn, x, v);elses = format (s, "%-30v%=12U%16Ld%16Ld%16Ld%16.2e%16.2f", ns,format_vlib_node_state, vm, n, c, p, d, x, v);if (ns != n->name)vec_free (ns);return s;

函数show_node_runtime处理命令show runtime。
如果在命令行指定了节点名称，如：show runtime ip4-input，调用以上介绍的统计信息收集函数和显示函数处理。

static clib_error_t *
show_node_runtime (vlib_main_t * vm, unformat_input_t * input, vlib_cli_command_t * cmd)
{vlib_node_main_t *nm = &vm->node_main;vlib_node_t *n, ***node_dups = 0;f64 time_now, *internal_node_vector_rates = 0;u32 node_index;time_now = vlib_time_now (vm);if (unformat (input, "%U", unformat_vlib_node, vm, &node_index)) {n = vlib_get_node (vm, node_index);vlib_node_sync_stats (vm, n);vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, 0, 0);vlib_cli_output (vm, "%U\n", format_vlib_node_stats, vm, n, 0);} else {

如下显示信息：

vpp# show runtime ip4-inputName                 State         Calls          Vectors        Suspends         Clocks       Vectors/Call
ip4-input                         active          0               0               0            0.00e0            0.00

否则，对于未指定节点名称的情况，有以下处理流程。默认按照brief简洁方式显示，还可指定verbose、max或者summary方式。

      vlib_node_t **nodes;f64 dt;u64 n_input, n_output, n_drop, n_punt;u64 n_clocks, l, v, c, d;int brief = 1, summary = 0, max = 0;vlib_main_t **stat_vms = 0, *stat_vm;/* Suppress nodes with zero calls since last clear */if (unformat (input, "brief") || unformat (input, "b"))    brief = 1;if (unformat (input, "verbose") || unformat (input, "v"))  brief = 0;if (unformat (input, "max") || unformat (input, "m"))      max = 1;if (unformat (input, "summary") || unformat (input, "sum") || unformat (input, "su"))summary = 1;

遍历所有的线程，生成线程vlib_main_t结构向量。

    for (i = 0; i < vlib_get_n_threads (); i++) {stat_vm = vlib_get_main_by_index (i);if (stat_vm)vec_add1 (stat_vms, stat_vm);}/* Barrier sync across stats scraping.* Otherwise, the counts will be grossly inaccurate.*/vlib_worker_thread_barrier_sync (vm);

遍历每个线程的vlib_main结构，并遍历其中的每个节点，同步每个节点的统计信息。完成之后，将节点克隆一份，添加到node_dups向量。

    for (j = 0; j < vec_len (stat_vms); j++) {stat_vm = stat_vms[j];nm = &stat_vm->node_main;for (i = 0; i < vec_len (nm->nodes); i++) {n = nm->nodes[i];vlib_node_sync_stats (stat_vm, n);}nodes = vec_dup (nm->nodes);vec_add1 (node_dups, nodes);vec_add1 (internal_node_vector_rates, vlib_internal_node_vector_rate (stat_vm));}vlib_worker_thread_barrier_release (vm);

遍历每个线程的vlib_main结构，以及其中的每个主节点结构，计算全部节点的n_output/n_drop/n_punt/n_input的统计总量。

    for (j = 0; j < vec_len (stat_vms); j++) {stat_vm = stat_vms[j];nodes = node_dups[j];vec_sort_with_function (nodes, node_cmp);n_input = n_output = n_drop = n_punt = n_clocks = 0;for (i = 0; i < vec_len (nodes); i++) {n = nodes[i];v = n->stats_total.vectors - n->stats_last_clear.vectors;switch (n->type) {default: continue;case VLIB_NODE_TYPE_INTERNAL:n_output += (n->flags & VLIB_NODE_FLAG_IS_OUTPUT) ? v : 0;n_drop += (n->flags & VLIB_NODE_FLAG_IS_DROP) ? v : 0;n_punt += (n->flags & VLIB_NODE_FLAG_IS_PUNT) ? v : 0;if (n->flags & VLIB_NODE_FLAG_IS_HANDOFF)  n_input += v;break;case VLIB_NODE_TYPE_INPUT:n_input += v;break;}}

输出当前线程的id，线程名称和cpu索引等信息。数据线程处理的全部节点的总量信息。最后，如果没有指定summary参数，输出每个节点的统计信息。

      if (vlib_get_n_threads () > 1) {vlib_worker_thread_t *w = vlib_worker_threads + j;if (j > 0) vlib_cli_output (vm, "---------------");if (w->cpu_id > -1)vlib_cli_output (vm, "Thread %d %s (lcore %u)", j, w->name, w->cpu_id);elsevlib_cli_output (vm, "Thread %d %s", j, w->name);}dt = time_now - nm->time_last_runtime_stats_clear;vlib_cli_output (vm, "Time %.1f, %f sec internal node vector rate %.2f loops/sec %.2f\n""  vector rates in %.4e, out %.4e, drop %.4e, punt %.4e",dt, vlib_stats_get_segment_update_rate (), internal_node_vector_rates[j], stat_vm->loops_per_second,(f64) n_input / dt, (f64) n_output / dt, (f64) n_drop / dt, (f64) n_punt / dt);if (summary == 0) {vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, 0, max);for (i = 0; i < vec_len (nodes); i++) {c = nodes[i]->stats_total.calls - nodes[i]->stats_last_clear.calls;d = nodes[i]->stats_total.suspends - nodes[i]->stats_last_clear.suspends;if (c || d || !brief) {vlib_cli_output (vm, "%U", format_vlib_node_stats, stat_vm, nodes[i], max);

如下，输出线程2的统计信息：

---------------
Thread 2 vpp_wk_1 (lcore 3)
Time 13238.4, 10 sec internal node vector rate 0.00 loops/sec 14040836.44vector rates in 5.0524e5, out 5.0524e5, drop 0.0000e0, punt 0.0000e0Name                 State         Calls          Vectors        Suspends         Clocks       Vectors/Call
dpdk-input                       polling      180198867720      6688500986               0          4.59e3             .04
ethernet-input                   active          544505661      6688500986               0          4.32e1           12.28
...