diff --git a/README.md b/README.md index 58340622d0a138fc7a4f98d776ae4c4eefbaeb0c..d54e3848f5e133cc59976924a9032f002a40a1ef 100644 --- a/README.md +++ b/README.md @@ -128,6 +128,7 @@ pair of .c and .py files, and some are directories of files. - tools/[memleak](tools/memleak.py): Display outstanding memory allocations to find memory leaks. [Examples](tools/memleak_example.txt). - tools/[mountsnoop](tools/mountsnoop.py): Trace mount and umount syscalls system-wide. [Examples](tools/mountsnoop_example.txt). - tools/[mysqld_qslower](tools/mysqld_qslower.py): Trace MySQL server queries slower than a threshold. [Examples](tools/mysqld_qslower_example.txt). +- tools/[netqtop](tools/netqtop.py) tools/[netqtop.c](tools/netqtop.c): Trace and display packets distribution on NIC queues. [Examples](tools/netqtop_example.txt). - tools/[nfsslower](tools/nfsslower.py): Trace slow NFS operations. [Examples](tools/nfsslower_example.txt). - tools/[nfsdist](tools/nfsdist.py): Summarize NFS operation latency distribution as a histogram. [Examples](tools/nfsdist_example.txt). - tools/[offcputime](tools/offcputime.py): Summarize off-CPU time by kernel stack trace. [Examples](tools/offcputime_example.txt). diff --git a/man/man8/netqtop.8 b/man/man8/netqtop.8 new file mode 100644 index 0000000000000000000000000000000000000000..bfa34d11f3e4fe3ddc41b55bce275b91ba11dcca --- /dev/null +++ b/man/man8/netqtop.8 @@ -0,0 +1,56 @@ +.TH netqtop 8 "2020-07-30" "USER COMMANDS" +.SH NAME +netqtop \- Summarize PPS, BPS, average size of packets and packet counts ordered by packet sizes +on each queue of a network interface. +.SH SYNOPSIS +.B netqtop [\-n nic] [\-i interval] [\-t throughput] +.SH DESCRIPTION +netqtop accounts statistics of both transmitted and received packets on each queue of +a specified network interface to help developers check if its traffic load is balanced. +The result is displayed as a table with columns of PPS, BPS, average size and +packet counts in range [0,64), [64, 5120), [512, 2048), [2048, 16K), [16K, 64K). +This is printed every given interval (default 1) in seconds. + +The tool uses the net:net_dev_start_xmit and net:netif_receive_skb kernel tracepoints. +Since it uses tracepoint, the tool only works on Linux 4.7+. + +netqtop introduces significant overhead while network traffic is large. See OVERHEAD +section below. + +.SH REQUIREMENTS +CONFIG_bpf and bcc +.SH OPTIONS +.TP +\-n NIC +Specify the network interface card +.TP +\-i INTERVAL +Print results every INTERVAL seconds. +The default value is 1. +.TP +\-t THROUGHPUT +Print BPS and PPS of each queue. +.SH EXAMPLES +.TP +Account statistics of eth0 and output every 2 seconds: +# +.B netqtop -n eth0 -i 1 +.SH OVERHEAD +In performance test, netqtop introduces a overhead up to 30% PPS drop +while printing interval is set to 1 second. So be mindful of potential packet drop +when using this tool. + +It also increases ping-pong latency by about 1 usec. +.SH SOURCE +This is from bcc +.IP +https://github.com/iovisor/bcc +.PP +Also look in the bcc distribution for a netqtop_example.txt file containing +example usage, output and commentary for this tool. +.SH OS +Linux +.SH STABILITY +Unstable - in development +.SH AUTHOR +Yolandajn diff --git a/tools/netqtop.c b/tools/netqtop.c new file mode 100644 index 0000000000000000000000000000000000000000..52605ddab44b1863d7970f7930d4ca918817119f --- /dev/null +++ b/tools/netqtop.c @@ -0,0 +1,113 @@ + +#include <linux/netdevice.h> +#include <linux/ethtool.h> +#if IFNAMSIZ != 16 +#error "IFNAMSIZ != 16 is not supported" +#endif +#define MAX_QUEUE_NUM 1024 + +/** +* This union is use to store name of the specified interface +* and read it as two different data types +*/ +union name_buf{ + char name[IFNAMSIZ]; + struct { + u64 hi; + u64 lo; + }name_int; +}; + +/* data retrieved in tracepoints */ +struct queue_data{ + u64 total_pkt_len; + u32 num_pkt; + u32 size_64B; + u32 size_512B; + u32 size_2K; + u32 size_16K; + u32 size_64K; +}; + +/* array of length 1 for device name */ +BPF_ARRAY(name_map, union name_buf, 1); +/* table for transmit & receive packets */ +BPF_HASH(tx_q, u16, struct queue_data, MAX_QUEUE_NUM); +BPF_HASH(rx_q, u16, struct queue_data, MAX_QUEUE_NUM); + +static inline int name_filter(struct sk_buff* skb){ + /* get device name from skb */ + union name_buf real_devname; + struct net_device *dev; + bpf_probe_read(&dev, sizeof(skb->dev), ((char *)skb + offsetof(struct sk_buff, dev))); + bpf_probe_read(&real_devname, IFNAMSIZ, dev->name); + + int key=0; + union name_buf *leaf = name_map.lookup(&key); + if(!leaf){ + return 0; + } + if((leaf->name_int).hi != real_devname.name_int.hi || (leaf->name_int).lo != real_devname.name_int.lo){ + return 0; + } + + return 1; +} + +static void updata_data(struct queue_data *data, u64 len){ + data->total_pkt_len += len; + data->num_pkt ++; + if(len / 64 == 0){ + data->size_64B ++; + } + else if(len / 512 == 0){ + data->size_512B ++; + } + else if(len / 2048 == 0){ + data->size_2K ++; + } + else if(len / 16384 == 0){ + data->size_16K ++; + } + else if(len / 65536 == 0){ + data->size_64K ++; + } +} + +TRACEPOINT_PROBE(net, net_dev_start_xmit){ + /* read device name */ + struct sk_buff* skb = (struct sk_buff*)args->skbaddr; + if(!name_filter(skb)){ + return 0; + } + + /* update table */ + u16 qid = skb->queue_mapping; + struct queue_data newdata; + __builtin_memset(&newdata, 0, sizeof(newdata)); + struct queue_data *data = tx_q.lookup_or_try_init(&qid, &newdata); + if(!data){ + return 0; + } + updata_data(data, skb->len); + + return 0; +} + +TRACEPOINT_PROBE(net, netif_receive_skb){ + struct sk_buff* skb = (struct sk_buff*)args->skbaddr; + if(!name_filter(skb)){ + return 0; + } + + u16 qid = skb->queue_mapping; + struct queue_data newdata; + __builtin_memset(&newdata, 0, sizeof(newdata)); + struct queue_data *data = rx_q.lookup_or_try_init(&qid, &newdata); + if(!data){ + return 0; + } + updata_data(data, skb->len); + + return 0; +} diff --git a/tools/netqtop.py b/tools/netqtop.py new file mode 100755 index 0000000000000000000000000000000000000000..e2823ac6eddcc8254e19339ecefd6745ca4e4470 --- /dev/null +++ b/tools/netqtop.py @@ -0,0 +1,218 @@ +#!/usr/bin/python + +from bcc import BPF +from ctypes import * +import argparse +import os +from time import sleep,time,localtime,asctime +import types + +# pre defines ------------------------------- +ROOT_PATH = "/sys/class/net" +IFNAMSIZ = 16 +COL_WIDTH = 10 +MAX_QUEUE_NUM = 1024 +EBPF_FILE = "netqtop.c" + +# structure for network interface name array +class Devname(Structure): + _fields_=[ + ('name', c_char*IFNAMSIZ) + ] + +################## printer for results ################### +def to_str(num): + s = "" + if num > 1000000: + return str(round(num/(1024*1024.0), 2)) + 'M' + elif num > 1000: + return str(round(num/1024.0, 2)) + 'K' + else: + if type(num) == types.FloatType: + return str(round(num, 2)) + else: + return str(num) + +def print_table(table, qnum): + global print_interval + + # ---- print headers ---------------- + headers = [ + "QueueID", + "avg_size", + "[0, 64)", + "[64, 512)", + "[512, 2K)", + "[2K, 16K)", + "[16K, 64K)" + ] + if args.throughput: + headers.append("BPS") + headers.append("PPS") + + for hd in headers: + print(hd.center(COL_WIDTH)), + print + + # ------- calculates -------------- + qids=[] + tBPS = 0 + tPPS = 0 + tAVG = 0 + tGroup = [0,0,0,0,0] + tpkt = 0 + tlen = 0 + for k, v in table.items(): + qids += [k.value] + tlen += v.total_pkt_len + tpkt += v.num_pkt + tGroup[0] += v.size_64B + tGroup[1] += v.size_512B + tGroup[2] += v.size_2K + tGroup[3] += v.size_16K + tGroup[4] += v.size_64K + tBPS = tlen / print_interval + tPPS = tpkt / print_interval + if tpkt != 0: + tAVG = tlen / tpkt + + # -------- print table -------------- + for k in range(qnum): + if k in qids: + item = table[c_ushort(k)] + data = [ + k, + item.total_pkt_len, + item.num_pkt, + item.size_64B, + item.size_512B, + item.size_2K, + item.size_16K, + item.size_64K + ] + else: + data = [k,0,0,0,0,0,0,0] + + # print a line per queue + avg = 0 + if data[2] != 0: + avg = data[1] / data[2] + print("%5d %11s %10s %10s %10s %10s %10s" % ( + data[0], + to_str(avg), + to_str(data[3]), + to_str(data[4]), + to_str(data[5]), + to_str(data[6]), + to_str(data[7]) + )), + if args.throughput: + BPS = data[1] / print_interval + PPS = data[2] / print_interval + print("%10s %10s" % ( + to_str(BPS), + to_str(PPS) + )) + else: + print + + # ------- print total -------------- + print(" Total %10s %10s %10s %10s %10s %10s" % ( + to_str(tAVG), + to_str(tGroup[0]), + to_str(tGroup[1]), + to_str(tGroup[2]), + to_str(tGroup[3]), + to_str(tGroup[4]) + )), + + if args.throughput: + print("%10s %10s" % ( + to_str(tBPS), + to_str(tPPS) + )) + else: + print + + +def print_result(b): + # --------- print tx queues --------------- + print(asctime(localtime(time()))) + print("TX") + table = b['tx_q'] + print_table(table, tx_num) + b['tx_q'].clear() + + # --------- print rx queues --------------- + print("") + print("RX") + table = b['rx_q'] + print_table(table, rx_num) + b['rx_q'].clear() + if args.throughput: + print("-"*95) + else: + print("-"*76) + +############## specify network interface ################# +parser = argparse.ArgumentParser(description="") +parser.add_argument("--name", "-n", type=str, default="") +parser.add_argument("--interval", "-i", type=float, default=1) +parser.add_argument("--throughput", "-t", action="store_true") +parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) +args = parser.parse_args() + +if args.ebpf: + with open(EBPF_FILE) as fileobj: + progtxt = fileobj.read() + print(progtxt) + exit() + +if args.name == "": + print ("Please specify a network interface.") + exit() +else: + dev_name = args.name + +if len(dev_name) > IFNAMSIZ-1: + print ("NIC name too long") + exit() + +print_interval = args.interval + 0.0 +if print_interval == 0: + print "print interval must be non-zero" + exit() + +################ get number of queues ##################### +tx_num = 0 +rx_num = 0 +path = ROOT_PATH + "/" + dev_name + "/queues" +if not os.path.exists(path): + print "Net interface", dev_name, "does not exits." + exit() + +list = os.listdir(path) +for s in list: + if s[0] == 'r': + rx_num += 1 + if s[0] == 't': + tx_num += 1 + +if tx_num > MAX_QUEUE_NUM or rx_num > MAX_QUEUE_NUM: + print "number of queues over 1024 is not supported." + exit() + +################## start tracing ################## +b = BPF(src_file = EBPF_FILE) +# --------- set hash array -------- +devname_map = b['name_map'] +_name = Devname() +_name.name = dev_name +devname_map[0] = _name + +while 1: + try: + sleep(print_interval) + print_result(b) + except KeyboardInterrupt: + exit() diff --git a/tools/netqtop_example.txt b/tools/netqtop_example.txt new file mode 100644 index 0000000000000000000000000000000000000000..443cfb715f69624fe6e91a7724a882ef1748fe3b --- /dev/null +++ b/tools/netqtop_example.txt @@ -0,0 +1,190 @@ +Demonstrations of netqtop. + + +netqtop traces the kernel functions performing packet transmit (xmit_one) +and packet receive (__netif_receive_skb_core) on data link layer. The tool +not only traces every packet via a specified network interface, but also accounts +the PPS, BPS and average size of packets as well as packet amounts (categorized by +size range) on sending and receiving direction respectively. Results are printed +as tables, which can be used to understand traffic load allocation on each queue +of interested network interface to see if it is balanced. And the overall performance +is provided in the buttom. + +For example, suppose you want to know current traffic on lo, and print result +every second: +# ./netqtop.py -n lo -i 1 +Thu Sep 10 11:28:39 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 88 0 9 0 0 0 + Total 88 0 9 0 0 0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 74 4 5 0 0 0 + Total 74 4 5 0 0 0 +---------------------------------------------------------------------------- +Thu Sep 10 11:28:40 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 233 0 3 1 0 0 + Total 233 0 3 1 0 0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 219 2 1 1 0 0 + Total 219 2 1 1 0 0 +---------------------------------------------------------------------------- + +or you can just use the default mode +# ./netqtop.py -n lo +Thu Sep 10 11:27:45 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 92 0 7 0 0 0 + Total 92 0 7 0 0 0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 78 3 4 0 0 0 + Total 78 3 4 0 0 0 +---------------------------------------------------------------------------- +Thu Sep 10 11:27:46 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 179 0 5 1 0 0 + Total 179 0 5 1 0 0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 165 3 2 1 0 0 + Total 165 3 2 1 0 0 +---------------------------------------------------------------------------- + +This NIC only has 1 queue. +If you want the tool to print results after a longer interval, specify seconds with -i: +# ./netqtop.py -n lo -i 3 +Thu Sep 10 11:31:26 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 85 0 11 0 0 0 + Total 85 0 11 0 0 0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 71 5 6 0 0 0 + Total 71 5 6 0 0 0 +---------------------------------------------------------------------------- +Thu Sep 10 11:31:29 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 153 0 7 1 0 0 + Total 153 0 7 1 0 0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) + 0 139 4 3 1 0 0 + Total 139 4 3 1 0 0 +---------------------------------------------------------------------------- + +To see PPS and BPS of each queue, use -t: +# ./netqtop.py -n lo -i 1 -t +Thu Sep 10 11:37:02 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS + 0 114 0 10 0 0 0 1.11K 10.0 + Total 114 0 10 0 0 0 1.11K 10.0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS + 0 100 4 6 0 0 0 1000.0 10.0 + Total 100 4 6 0 0 0 1000.0 10.0 +----------------------------------------------------------------------------------------------- +Thu Sep 10 11:37:03 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS + 0 271 0 3 1 0 0 1.06K 4.0 + Total 271 0 3 1 0 0 1.06K 4.0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS + 0 257 2 1 1 0 0 1.0K 4.0 + Total 257 2 1 1 0 0 1.0K 4.0 +----------------------------------------------------------------------------------------------- + +When filtering multi-queue NICs, you do not need to specify the number of queues, +the tool calculates it for you: +# ./netqtop.py -n eth0 -t +Thu Sep 10 11:39:21 2020 +TX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS + 0 0 0 0 0 0 0 0.0 0.0 + 1 0 0 0 0 0 0 0.0 0.0 + 2 0 0 0 0 0 0 0.0 0.0 + 3 0 0 0 0 0 0 0.0 0.0 + 4 0 0 0 0 0 0 0.0 0.0 + 5 0 0 0 0 0 0 0.0 0.0 + 6 0 0 0 0 0 0 0.0 0.0 + 7 0 0 0 0 0 0 0.0 0.0 + 8 54 2 0 0 0 0 108.0 2.0 + 9 161 0 9 0 0 0 1.42K 9.0 + 10 0 0 0 0 0 0 0.0 0.0 + 11 0 0 0 0 0 0 0.0 0.0 + 12 0 0 0 0 0 0 0.0 0.0 + 13 0 0 0 0 0 0 0.0 0.0 + 14 0 0 0 0 0 0 0.0 0.0 + 15 0 0 0 0 0 0 0.0 0.0 + 16 0 0 0 0 0 0 0.0 0.0 + 17 0 0 0 0 0 0 0.0 0.0 + 18 0 0 0 0 0 0 0.0 0.0 + 19 0 0 0 0 0 0 0.0 0.0 + 20 0 0 0 0 0 0 0.0 0.0 + 21 0 0 0 0 0 0 0.0 0.0 + 22 0 0 0 0 0 0 0.0 0.0 + 23 0 0 0 0 0 0 0.0 0.0 + 24 0 0 0 0 0 0 0.0 0.0 + 25 0 0 0 0 0 0 0.0 0.0 + 26 0 0 0 0 0 0 0.0 0.0 + 27 0 0 0 0 0 0 0.0 0.0 + 28 0 0 0 0 0 0 0.0 0.0 + 29 0 0 0 0 0 0 0.0 0.0 + 30 0 0 0 0 0 0 0.0 0.0 + 31 0 0 0 0 0 0 0.0 0.0 + Total 141 2 9 0 0 0 1.52K 11.0 + +RX + QueueID avg_size [0, 64) [64, 512) [512, 2K) [2K, 16K) [16K, 64K) BPS PPS + 0 127 3 9 0 0 0 1.5K 12.0 + 1 0 0 0 0 0 0 0.0 0.0 + 2 0 0 0 0 0 0 0.0 0.0 + 3 0 0 0 0 0 0 0.0 0.0 + 4 0 0 0 0 0 0 0.0 0.0 + 5 0 0 0 0 0 0 0.0 0.0 + 6 0 0 0 0 0 0 0.0 0.0 + 7 0 0 0 0 0 0 0.0 0.0 + 8 0 0 0 0 0 0 0.0 0.0 + 9 0 0 0 0 0 0 0.0 0.0 + 10 0 0 0 0 0 0 0.0 0.0 + 11 0 0 0 0 0 0 0.0 0.0 + 12 0 0 0 0 0 0 0.0 0.0 + 13 0 0 0 0 0 0 0.0 0.0 + 14 0 0 0 0 0 0 0.0 0.0 + 15 0 0 0 0 0 0 0.0 0.0 + 16 0 0 0 0 0 0 0.0 0.0 + 17 0 0 0 0 0 0 0.0 0.0 + 18 0 0 0 0 0 0 0.0 0.0 + 19 0 0 0 0 0 0 0.0 0.0 + 20 0 0 0 0 0 0 0.0 0.0 + 21 0 0 0 0 0 0 0.0 0.0 + 22 0 0 0 0 0 0 0.0 0.0 + 23 0 0 0 0 0 0 0.0 0.0 + 24 0 0 0 0 0 0 0.0 0.0 + 25 0 0 0 0 0 0 0.0 0.0 + 26 0 0 0 0 0 0 0.0 0.0 + 27 0 0 0 0 0 0 0.0 0.0 + 28 0 0 0 0 0 0 0.0 0.0 + 29 0 0 0 0 0 0 0.0 0.0 + 30 0 0 0 0 0 0 0.0 0.0 + 31 0 0 0 0 0 0 0.0 0.0 + Total 127 3 9 0 0 0 1.5K 12.0 +----------------------------------------------------------------------------------------------- \ No newline at end of file