diff --git a/man/man8/runqslower.8 b/man/man8/runqslower.8 index 2feef13fb1eead01adc05100eddaad5736ed572a..17052a1414220e6d8b4cff8c3517b247d6ee8de9 100644 --- a/man/man8/runqslower.8 +++ b/man/man8/runqslower.8 @@ -2,7 +2,7 @@ .SH NAME runqslower \- Trace long process scheduling delays. .SH SYNOPSIS -.B runqslower [\-p PID] [min_us] +.B runqslower [\-p PID] [\-t TID] [min_us] .SH DESCRIPTION This measures the time a task spends waiting on a run queue (or equivalent scheduler data structure) for a turn on-CPU, and shows occurrences of time @@ -38,6 +38,9 @@ Print usage message. \-p PID Only show this PID (filtered in kernel for efficiency). .TP +\-t TID +Only show this TID (filtered in kernel for efficiency). +.TP min_us Minimum scheduling delay in microseconds to output. .SH EXAMPLES diff --git a/tools/runqslower.py b/tools/runqslower.py index b67853301699804ea3977b1f2d2046276e5f3276..8f790602a227a24f73cc9b030a6d858edebd543f 100755 --- a/tools/runqslower.py +++ b/tools/runqslower.py @@ -7,7 +7,7 @@ # This script traces high scheduling delays between tasks being # ready to run and them running on CPU after that. # -# USAGE: runqslower [-p PID] [min_us] +# USAGE: runqslower [-p PID] [-t TID] [min_us] # # REQUIRES: Linux 4.9+ (BPF_PROG_TYPE_PERF_EVENT support). # @@ -42,19 +42,25 @@ from time import strftime examples = """examples: ./runqslower # trace run queue latency higher than 10000 us (default) ./runqslower 1000 # trace run queue latency higher than 1000 us - ./runqslower -p 123 # trace pid 123 only + ./runqslower -p 123 # trace pid 123 + ./runqslower -t 123 # trace tid 123 (use for threads only) """ parser = argparse.ArgumentParser( description="Trace high run queue latency", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=examples) -parser.add_argument("-p", "--pid", type=int, metavar="PID", dest="pid", - help="trace this PID only") parser.add_argument("min_us", nargs="?", default='10000', help="minimum run queue latecy to trace, in ms (default 10000)") parser.add_argument("--ebpf", action="store_true", help=argparse.SUPPRESS) + +thread_group = parser.add_mutually_exclusive_group() +thread_group.add_argument("-p", "--pid", metavar="PID", dest="pid", + help="trace this PID only", type=int) +thread_group.add_argument("-t", "--tid", metavar="TID", dest="tid", + help="trace this TID only", type=int) args = parser.parse_args() + min_us = int(args.min_us) debug = 0 @@ -80,7 +86,7 @@ BPF_PERF_OUTPUT(events); // record enqueue timestamp static int trace_enqueue(u32 tgid, u32 pid) { - if (FILTER_PID || pid == 0) + if (FILTER_PID || FILTER_TGID || pid == 0) return 0; u64 ts = bpf_ktime_get_ns(); start.update(&pid, &ts); @@ -109,13 +115,14 @@ int trace_run(struct pt_regs *ctx, struct task_struct *prev) if (prev->state == TASK_RUNNING) { tgid = prev->tgid; pid = prev->pid; - if (!(FILTER_PID || pid == 0)) { - u64 ts = bpf_ktime_get_ns(); - start.update(&pid, &ts); + u64 ts = bpf_ktime_get_ns(); + if (pid != 0) { + if (!(FILTER_PID) && !(FILTER_TGID)) { + start.update(&pid, &ts); + } } } - tgid = bpf_get_current_pid_tgid() >> 32; pid = bpf_get_current_pid_tgid(); u64 *tsp, delta_us; @@ -167,17 +174,21 @@ RAW_TRACEPOINT_PROBE(sched_switch) // TP_PROTO(bool preempt, struct task_struct *prev, struct task_struct *next) struct task_struct *prev = (struct task_struct *)ctx->args[1]; struct task_struct *next= (struct task_struct *)ctx->args[2]; - u32 pid; + u32 tgid, pid; long state; // ivcsw: treat like an enqueue event and store timestamp bpf_probe_read(&state, sizeof(long), (const void *)&prev->state); if (state == TASK_RUNNING) { + bpf_probe_read(&tgid, sizeof(prev->tgid), &prev->tgid); bpf_probe_read(&pid, sizeof(prev->pid), &prev->pid); - if (!(FILTER_PID || pid == 0)) { - u64 ts = bpf_ktime_get_ns(); - start.update(&pid, &ts); + u64 ts = bpf_ktime_get_ns(); + if (pid != 0) { + if (!(FILTER_PID) && !(FILTER_TGID)) { + start.update(&pid, &ts); + } } + } bpf_probe_read(&pid, sizeof(next->pid), &next->pid); @@ -218,10 +229,17 @@ if min_us == 0: bpf_text = bpf_text.replace('FILTER_US', '0') else: bpf_text = bpf_text.replace('FILTER_US', 'delta_us <= %s' % str(min_us)) -if args.pid: - bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % args.pid) + +if args.tid: + bpf_text = bpf_text.replace('FILTER_PID', 'pid != %s' % args.tid) else: bpf_text = bpf_text.replace('FILTER_PID', '0') + +if args.pid: + bpf_text = bpf_text.replace('FILTER_TGID', 'tgid != %s' % args.pid) +else: + bpf_text = bpf_text.replace('FILTER_TGID', '0') + if debug or args.ebpf: print(bpf_text) if args.ebpf: @@ -240,7 +258,7 @@ if not is_support_raw_tp: b.attach_kprobe(event="finish_task_switch", fn_name="trace_run") print("Tracing run queue latency higher than %d us" % min_us) -print("%-8s %-16s %-6s %14s" % ("TIME", "COMM", "PID", "LAT(us)")) +print("%-8s %-16s %-6s %14s" % ("TIME", "COMM", "TID", "LAT(us)")) # read events b["events"].open_perf_buffer(print_event, page_cnt=64) diff --git a/tools/runqslower_example.txt b/tools/runqslower_example.txt index 61825e5201c270ebbd09ba17d5da13186ccfcd79..e64f9451af2e931af81593898b1604e81373079f 100644 --- a/tools/runqslower_example.txt +++ b/tools/runqslower_example.txt @@ -5,8 +5,15 @@ runqslower shows high latency scheduling times between tasks being ready to run and them running on CPU after that. For example: # runqslower -Tracing run queue latency higher than 10000 us -TIME COMM PID LAT(us) + +Note: Showing TID (thread id) in the report column. The smallest +execution unit becomes a TID when using the --pid flag as +in that case the tool reports not only the parent pid but +its children threads as well. + +Tracing run queue latency higher than 10000 us. + +TIME COMM TID LAT(us) 04:16:32 cc1 12924 12739 04:16:32 sh 13640 12118 04:16:32 make 13639 12730 @@ -32,7 +39,7 @@ These delays can be analyzed in depth with "perf sched" tool, see: USAGE message: # ./runqslower -h -usage: runqslower.py [-h] [-p PID] [min_us] +usage: runqslower.py [-h] [-p PID | -t TID] [min_us] Trace high run queue latency @@ -41,9 +48,11 @@ positional arguments: optional arguments: -h, --help show this help message and exit - -p PID, --pid PID trace this PID only + -p PID, --pid PID trace this PID + -t TID, --tid TID trace this TID only examples: ./runqslower # trace run queue latency higher than 10000 us (default) ./runqslower 1000 # trace run queue latency higher than 1000 us - ./runqslower -p 123 # trace pid 123 only + ./runqslower -p 123 # trace pid 123 + ./runqslower -t 123 # trace tid (thread) 123