Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
B
bcc
Manage
Activity
Members
Plan
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Terraform modules
Analyze
Contributor analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
CodeLinaro
la
platform
external
bcc
Commits
eb32c158
Commit
eb32c158
authored
6 years ago
by
Brendan Gregg
Committed by
yonghong-song
6 years ago
Browse files
Options
Downloads
Patches
Plain Diff
biosnoop: add -Q for queued time (#2279)
* biosnoop: add -Q for queued time * biotop: fix PID
parent
2d182dc6
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
man/man8/biosnoop.8
+13
-2
13 additions, 2 deletions
man/man8/biosnoop.8
tools/biosnoop.py
+60
-31
60 additions, 31 deletions
tools/biosnoop.py
tools/biosnoop_example.txt
+44
-16
44 additions, 16 deletions
tools/biosnoop_example.txt
tools/biotop.py
+1
-1
1 addition, 1 deletion
tools/biotop.py
with
118 additions
and
50 deletions
man/man8/biosnoop.8
+
13
−
2
View file @
eb32c158
...
...
@@ -2,7 +2,7 @@
.SH NAME
biosnoop \- Trace block device I/O and print details incl. issuing PID.
.SH SYNOPSIS
.B biosnoop
.B biosnoop
[\-hQ]
.SH DESCRIPTION
This tools traces block device I/O (disk I/O), and prints a one-line summary
for each I/O showing various details. These include the latency from the time of
...
...
@@ -22,6 +22,13 @@ which uses an older mechanism
Since this uses BPF, only the root user can use this tool.
.SH REQUIREMENTS
CONFIG_BPF and bcc.
.SH OPTIONS
.TP
\-h
Print usage message.
.TP
\-Q
Include a column showing the time spent quueued in the OS.
.SH EXAMPLES
.TP
Trace all block device I/O and print a summary line per I/O:
...
...
@@ -30,7 +37,7 @@ Trace all block device I/O and print a summary line per I/O:
.SH FIELDS
.TP
TIME(s)
Time of the I/O, in seconds since the first I/O was seen.
Time of the I/O
completion
, in seconds since the first I/O was seen.
.TP
COMM
Cached process name, if present. This usually (but isn't guaranteed) to identify
...
...
@@ -52,6 +59,10 @@ Device sector for the I/O.
BYTES
Size of the I/O, in bytes.
.TP
QUE(ms)
Time the I/O was queued in the OS before being issued to the device,
in milliseconds.
.TP
LAT(ms)
Time for the I/O (latency) from the issue to the device, to its completion,
in milliseconds.
...
...
This diff is collapsed.
Click to expand it.
tools/biosnoop.py
+
60
−
31
View file @
eb32c158
...
...
@@ -2,7 +2,7 @@
# @lint-avoid-python-3-compatibility-imports
#
# biosnoop Trace block device I/O and print details including issuing PID.
# For Linux, uses BCC, eBPF.
#
For Linux, uses BCC, eBPF.
#
# This uses in-kernel eBPF maps to cache process details (PID and comm) by I/O
# request, as well as a starting timestamp for calculating I/O latency.
...
...
@@ -16,13 +16,31 @@
from
__future__
import
print_function
from
bcc
import
BPF
import
re
# load BPF program
b
=
BPF
(
text
=
"""
import
argparse
# arguments
examples
=
"""
examples:
./biosnoop # trace all block I/O
./biosnoop -Q # include OS queued time
"""
parser
=
argparse
.
ArgumentParser
(
description
=
"
Trace block I/O
"
,
formatter_class
=
argparse
.
RawDescriptionHelpFormatter
,
epilog
=
examples
)
parser
.
add_argument
(
"
-Q
"
,
"
--queue
"
,
action
=
"
store_true
"
,
help
=
"
include OS queued time
"
)
parser
.
add_argument
(
"
--ebpf
"
,
action
=
"
store_true
"
,
help
=
argparse
.
SUPPRESS
)
args
=
parser
.
parse_args
()
debug
=
0
# define BPF program
bpf_text
=
"""
#include <uapi/linux/ptrace.h>
#include <linux/blkdev.h>
struct val_t {
u64 ts;
u32 pid;
char name[TASK_COMM_LEN];
};
...
...
@@ -31,6 +49,7 @@ struct data_t {
u32 pid;
u64 rwflag;
u64 delta;
u64 qdelta;
u64 sector;
u64 len;
u64 ts;
...
...
@@ -46,9 +65,13 @@ BPF_PERF_OUTPUT(events);
int trace_pid_start(struct pt_regs *ctx, struct request *req)
{
struct val_t val = {};
u64 ts;
if (bpf_get_current_comm(&val.name, sizeof(val.name)) == 0) {
val.pid = bpf_get_current_pid_tgid();
val.pid = bpf_get_current_pid_tgid() >> 32;
if (##QUEUE##) {
val.ts = bpf_ktime_get_ns();
}
infobyreq.update(&req, &val);
}
return 0;
...
...
@@ -58,18 +81,15 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
int trace_req_start(struct pt_regs *ctx, struct request *req)
{
u64 ts;
ts = bpf_ktime_get_ns();
start.update(&req, &ts);
return 0;
}
// output
int trace_req_completion(struct pt_regs *ctx, struct request *req)
{
u64 *tsp, delta;
u32 *pidp = 0;
u64 *tsp;
struct val_t *valp;
struct data_t data = {};
u64 ts;
...
...
@@ -83,12 +103,16 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
ts = bpf_ktime_get_ns();
data.delta = ts - *tsp;
data.ts = ts / 1000;
data.qdelta = 0;
valp = infobyreq.lookup(&req);
if (valp == 0) {
data.len = req->__data_len;
strcpy(data.name,
"
?
"
);
} else {
if (##QUEUE##) {
data.qdelta = *tsp - valp->ts;
}
data.pid = valp->pid;
data.len = req->__data_len;
data.sector = req->__sector;
...
...
@@ -119,7 +143,18 @@ int trace_req_completion(struct pt_regs *ctx, struct request *req)
return 0;
}
"""
,
debug
=
0
)
"""
if
args
.
queue
:
bpf_text
=
bpf_text
.
replace
(
'
##QUEUE##
'
,
'
1
'
)
else
:
bpf_text
=
bpf_text
.
replace
(
'
##QUEUE##
'
,
'
0
'
)
if
debug
or
args
.
ebpf
:
print
(
bpf_text
)
if
args
.
ebpf
:
exit
()
# initialize BPF
b
=
BPF
(
text
=
bpf_text
)
b
.
attach_kprobe
(
event
=
"
blk_account_io_start
"
,
fn_name
=
"
trace_pid_start
"
)
if
BPF
.
get_kprobe_functions
(
b
'
blk_start_request
'
):
b
.
attach_kprobe
(
event
=
"
blk_start_request
"
,
fn_name
=
"
trace_req_start
"
)
...
...
@@ -128,8 +163,11 @@ b.attach_kprobe(event="blk_account_io_completion",
fn_name
=
"
trace_req_completion
"
)
# header
print
(
"
%-14s %-14s %-6s %-7s %-2s %-9s %-7s %7s
"
%
(
"
TIME(s)
"
,
"
COMM
"
,
"
PID
"
,
"
DISK
"
,
"
T
"
,
"
SECTOR
"
,
"
BYTES
"
,
"
LAT(ms)
"
))
print
(
"
%-11s %-14s %-6s %-7s %-1s %-10s %-7s
"
%
(
"
TIME(s)
"
,
"
COMM
"
,
"
PID
"
,
"
DISK
"
,
"
T
"
,
"
SECTOR
"
,
"
BYTES
"
),
end
=
""
)
if
args
.
queue
:
print
(
"
%7s
"
%
(
"
QUE(ms)
"
),
end
=
""
)
print
(
"
%7s
"
%
"
LAT(ms)
"
)
rwflg
=
""
start_ts
=
0
...
...
@@ -140,33 +178,24 @@ delta = 0
def
print_event
(
cpu
,
data
,
size
):
event
=
b
[
"
events
"
].
event
(
data
)
val
=
-
1
global
start_ts
global
prev_ts
global
delta
if
start_ts
==
0
:
start_ts
=
event
.
ts
if
event
.
rwflag
==
1
:
rwflg
=
"
W
"
if
event
.
rwflag
==
0
:
else
:
rwflg
=
"
R
"
if
not
re
.
match
(
b
'
\?
'
,
event
.
name
):
val
=
event
.
sector
if
start_ts
==
0
:
prev_ts
=
start_ts
if
start_ts
==
1
:
delta
=
float
(
delta
)
+
(
event
.
ts
-
prev_ts
)
delta
=
float
(
event
.
ts
)
-
start_ts
print
(
"
%-1
4.9
f %-14.14s %-6s %-7s %-
2
s %-
9
s %-7s
%7.2f
"
%
(
print
(
"
%-1
1.6
f %-14.14s %-6s %-7s %-
1
s %-
10
s %-7s
"
%
(
delta
/
1000000
,
event
.
name
.
decode
(
'
utf-8
'
,
'
replace
'
),
event
.
pid
,
event
.
disk_name
.
decode
(
'
utf-8
'
,
'
replace
'
),
rwflg
,
val
,
event
.
len
,
float
(
event
.
delta
)
/
1000000
)
)
pr
ev_ts
=
event
.
ts
start_ts
=
1
event
.
disk_name
.
decode
(
'
utf-8
'
,
'
replace
'
),
rwflg
,
event
.
sector
,
event
.
len
)
,
end
=
""
)
if
args
.
queue
:
pr
int
(
"
%7.2f
"
%
(
float
(
event
.
qdelta
)
/
1000000
),
end
=
""
)
print
(
"
%7.2f
"
%
(
float
(
event
.
delta
)
/
1000000
))
# loop with callback to print_event
b
[
"
events
"
].
open_perf_buffer
(
print_event
,
page_cnt
=
64
)
...
...
This diff is collapsed.
Click to expand it.
tools/biosnoop_example.txt
+
44
−
16
View file @
eb32c158
...
...
@@ -5,22 +5,22 @@ biosnoop traces block device I/O (disk I/O), and prints a line of output
per I/O. Example:
# ./biosnoop
TIME(s)
COMM PID DISK T
SECTOR BYTES LAT(ms)
0.000004
001
supervise 1950 xvda1 W
13092560 4096 0.74
0.000178
002
supervise 1950 xvda1 W
13092432 4096 0.61
0.001469
001
supervise 1956 xvda1 W
13092440 4096 1.24
0.001588
002
supervise 1956 xvda1 W
13115128 4096 1.09
1.022346
001
supervise 1950 xvda1 W
13115272 4096 0.98
1.022568
002
supervise 1950 xvda1 W
13188496 4096 0.93
1.023534
000
supervise 1956 xvda1 W
13188520 4096 0.79
1.023585
003
supervise 1956 xvda1 W
13189512 4096 0.60
2.003920
000
xfsaild/md0 456 xvdc W
62901512 8192 0.23
2.003931
001
xfsaild/md0 456 xvdb W
62901513 512 0.25
2.004034
001
xfsaild/md0 456 xvdb W
62901520 8192 0.35
2.004042
000
xfsaild/md0 456 xvdb W
63542016 4096 0.36
2.004204
001
kworker/0:3 26040 xvdb W
41950344 65536 0.34
2.044352
002
supervise 1950 xvda1 W
13192672 4096 0.65
2.044574
000
supervise 1950 xvda1 W
13189072 4096 0.58
TIME(s) COMM PID DISK T SECTOR
BYTES LAT(ms)
0.000004 supervise 1950 xvda1 W 13092560
4096 0.74
0.000178 supervise 1950 xvda1 W 13092432
4096 0.61
0.001469 supervise 1956 xvda1 W 13092440
4096 1.24
0.001588 supervise 1956 xvda1 W 13115128
4096 1.09
1.022346 supervise 1950 xvda1 W 13115272
4096 0.98
1.022568 supervise 1950 xvda1 W 13188496
4096 0.93
1.023534 supervise 1956 xvda1 W 13188520
4096 0.79
1.023585 supervise 1956 xvda1 W 13189512
4096 0.60
2.003920 xfsaild/md0 456 xvdc W 62901512
8192 0.23
2.003931 xfsaild/md0 456 xvdb W 62901513
512 0.25
2.004034 xfsaild/md0 456 xvdb W 62901520
8192 0.35
2.004042 xfsaild/md0 456 xvdb W 63542016
4096 0.36
2.004204 kworker/0:3 26040 xvdb W 41950344
65536 0.34
2.044352 supervise 1950 xvda1 W 13192672
4096 0.65
2.044574 supervise 1950 xvda1 W 13189072
4096 0.58
This includes the PID and comm (process name) that were on-CPU at the time of
issue (which usually means the process responsible).
...
...
@@ -47,3 +47,31 @@ There are 4 write IOPS.
The output of biosnoop identifies the reason: multiple supervise processes are
issuing writes to the xvda1 disk. I can now drill down on supervise using other
tools to understand its file system workload.
The -Q option includes a column to show the time spent queued in the OS:
# biosnoop.py -Q
TIME(s) COMM PID DISK T SECTOR BYTES QUE(ms) LAT(ms)
0.000000 kworker/u72:1 13379 nvme1n1 W 1142400 4096 0.01 0.55
0.000771 sync 22177 nvme1n1 W 41963894 3072 0.11 0.47
5.332998 xfsaild/nvme1n 1061 nvme1n1 W 545728 16384 0.01 0.61
5.333044 xfsaild/nvme1n 1061 nvme1n1 W 2349728 16384 0.02 0.64
5.333065 xfsaild/nvme1n 1061 nvme1n1 W 20971521 512 0.02 0.65
5.333067 xfsaild/nvme1n 1061 nvme1n1 W 20971528 8192 0.00 0.65
[...]
USAGE message:
usage: biosnoop.py [-h] [-Q]
Trace block I/O
optional arguments:
-h, --help show this help message and exit
-Q, --queue include OS queued time
examples:
./biosnoop # trace all block I/O
./biosnoop -Q # include OS queued time
This diff is collapsed.
Click to expand it.
tools/biotop.py
+
1
−
1
View file @
eb32c158
...
...
@@ -93,7 +93,7 @@ int trace_pid_start(struct pt_regs *ctx, struct request *req)
struct who_t who = {};
if (bpf_get_current_comm(&who.name, sizeof(who.name)) == 0) {
who.pid = bpf_get_current_pid_tgid();
who.pid = bpf_get_current_pid_tgid()
>> 32
;
whobyreq.update(&req, &who);
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment