an elegant way could be by using systemtap, and writing custom probes to collect only the traffic you need, sorted the way you need …
this is an example tap that collects packets and kilobytes transmitted and received, by pid and executable name in the ‘network’ probe, and collects total packets and kilobytes transmitted by source ip and port and destination ip and port.
The tap can collect ever increasing numbers, or you can set it to zero the numbers whenever the probes are read, so that you can more easily count traffic between timestamps:
#*NOTE* You, the end user of this script, are responsible for any risks #associated with using this script.
#Compile string:
# stap -m nettop zabbix.stp
#Information can be found in /proc/systemtap/nettop/*
#Current process statistics are found in network
#Current disk statistics are found in disk
#Current socket statistics are found in netfilter
#clear_on_read sets weather or not the statistics are reset when read
# the default is 0.
#The network file is limited to 4096 bytes in size
global ifxmit, ifrecv
global io_read, io_write
global merged
global packets
global clear_on_read
probe begin
{
clear_on_read=0
}
function get_exec()
{
if (pid()==0)
exec="LinuxKernel"
else
exec=execname()
return exec
}
probe netdev.transmit
{
ifxmit[pid(), dev_name, get_exec()] <<< length
}
probe netdev.receive
{
ifrecv[pid(), dev_name, get_exec()] <<< length
}
probe netfilter.ipv4.pre_routing {
// Using aggregates avoids contention from packets being sent in
// parallel from different processors:
packets[saddr, daddr, dport, sport] <<< length
}
probe vfs.read.return
{
if (($return>0) && (devname!="N/A"))
io_read[pid(), devname,get_exec()] <<< $return
}
probe vfs.write.return
{
if (($return>0) && (devname!="N/A"))
io_write[pid(), devname,get_exec()] <<< $return
}
probe procfs("clear_on_read").read
{
$value=sprintf("%u",clear_on_read)
}
probe procfs("clear_on_read").write
{
clear_on_read=strtol($value,2)
}
probe procfs("network").umask(0444).read.maxsize(4096)
{
delete merged
$value=sprintf("%5s %-15s %-7s %7s %7s %7s %7s\n",
"PID", "COMMAND", "DEV", "XMIT_PK", "RECV_PK",
"XMIT_KB", "RECV_KB")
foreach ([pid, dev, exec] in ifrecv) {
merged[pid, dev, exec] += @count(ifrecv[pid, dev,exec]);
}
foreach ([pid, dev, exec] in ifxmit) {
merged[pid, dev, exec] += @count(ifxmit[pid, dev,exec]);
}
foreach ([pid, dev, exec] in merged-) {
n_xmit = @count(ifxmit[pid, dev, exec])
n_recv = @count(ifrecv[pid, dev, exec])
$value.=sprintf("%5u %-15s %-7s %7d %7d %7d %7d\n",
pid, exec, dev, n_xmit, n_recv,
n_xmit ? @sum(ifxmit[pid, dev, exec])/1024 : 0,
n_recv ? @sum(ifrecv[pid, dev, exec])/1024 : 0)
}
if (clear_on_read)
{
delete ifxmit
delete ifrecv
}
delete merged
}
probe procfs("disk").umask(0444).read.maxsize(4096)
{
delete merged
$value=sprintf("%5s %-15s %-7s %7s %7s %7s %7s\n",
"PID","COMMAND", "DEV", "READS", "WRITES",
"READ_KB", "WRITE_KB")
foreach ([pid, dev, exec] in io_read) {
merged[pid,dev, exec] += @count(io_read[pid,dev,exec]);
}
foreach ([pid, dev, exec] in io_write) {
merged[pid,dev, exec] += @count(io_write[pid,dev,exec]);
}
foreach ([pid, dev, exec] in merged-) {
writes = @count(io_write[pid, dev, exec])
reads = @count(io_read[pid, dev, exec])
$value.=sprintf("%5u %-15s %-7s %7d %7d %7d %7d\n",
pid, exec, dev, reads, writes,
reads ? @sum(io_read[pid, dev, exec])/1024 : 0,
writes ? @sum(io_write[pid, dev, exec])/1024 : 0)
}
if (clear_on_read)
{
delete ifxmit
delete ifrecv
}
delete merged
}
probe procfs("netfilter").umask(0444).read.maxsize(4096)
{
$value=sprintf("%-15s %-7s %-15s %-7s %-7s %-7s \n",
"SOURCEIP","SPORT", "DESTIP", "DPORT", "PACKETS",
"TOTAL_KB")
foreach ([saddr, daddr,dport,sport] in packets-) {
numpackets = @count(packets[saddr, daddr,dport,sport])
$value.=sprintf("%-15s %-7d %-15s %-7d %7d %7d\n",
saddr, sport,daddr,dport,
numpackets ,
numpackets ? @sum(packets[saddr, daddr,dport,sport])/1024 : 0)
}
if (clear_on_read)
{
delete packets
}
}
[root@vyos-02 ~]# stap -v -m nettop -DSTP_NO_BUILDID_CHECK z.stp
Pass 1: parsed user script and 488 library scripts using 558732virt/150040res/18744shr/133396data kb, in 340usr/30sys/373real ms.
Pass 2: analyzed script: 16 probes, 53 functions, 13 embeds, 42 globals using 813752virt/406956res/20308shr/388416data kb, in 2850usr/1010sys/3864real ms.
Pass 3: translated to C into "/tmp/stapu6kodh/nettop_src.c" using 813752virt/407084res/20436shr/388416data kb, in 30usr/90sys/111real ms.
Pass 4: compiled C into "nettop.ko" in 5060usr/940sys/5318real ms.
Pass 5: starting run.
(running iperf between two clients and test server, two separate instances)
[root@vyos-02 ~]# cat /proc/systemtap/nettop/network
PID COMMAND DEV XMIT_PK RECV_PK XMIT_KB RECV_KB
0 LinuxKernel ens18 700219 4344365 45132 218385925
17379 iperf3 ens18 884787 168045 57027 8708127
9584 iperf3 ens18 859429 141177 55393 7335426
22 ksoftirqd/2 ens18 7550 63700 486 3232153
1172 tuned ens18 1871 12139 120 614477
745 qemu-ga ens18 638 6031 41 307453
1679 gnome-shell ens18 611 4261 39 218817
1187 containerd ens18 419 2879 27 145864
17397 pgrep ens18 353 2109 22 103289
10272 bash ens18 307 1732 19 88336
10253 sshd ens18 246 809 27 41423
17394 awk ens18 119 740 7 37164
2554 sshd ens18 146 603 18 29990
725 irqbalance ens18 115 605 7 31204
2626 sshd ens18 181 504 21 25745
464 xfsaild/dm-0 ens18 86 570 5 28208
1175 in:imjournal ens18 81 556 5 28232
17372 stapio ens18 46 434 2 22172
13480 kworker/u8:0 ens18 64 348 4 17616
40 kcompactd0 ens18 68 342 4 18789
562 systemd-journal ens18 40 361 2 18002
170 kworker/2:1H ens18 37 326 2 16302
17381 grep ens18 46 301 2 15587
1679 llvmpipe-3 ens18 18 256 1 13337
1679 JS Helper ens18 30 240 1 12194
16611 kworker/u8:1 ens18 29 212 1 10594
17405 bash ens18 40 194 2 9985
197 kworker/2:3 ens18 28 182 1 9212
1679 gmain ens18 26 173 1 8852
17408 cat ens18 19 156 1 8309
2191 pool ens18 20 124 1 6403
17387 bash ens18 16 121 1 6051
718 avahi-daemon ens18 9 128 0 6380
17382 kworker/u8:2 ens18 14 116 0 5855
17384 bash ens18 15 95 0 4807
17383 bash ens18 0 110 0 5638
766 sssd_be ens18 11 78 0 4143
17397 ksmtuned ens18 16 71 1 3697
771 sssd_nss ens18 6 74 0 3774
17408 bash ens18 8 67 0 3498
17381 bash ens18 6 64 0 3254
800 gmain ens18 1 64 0 3231
16781 sleep ens18 7 51 0 2391
17394 ksmtuned ens18 9 47 0 2479
2141 wpa_supplicant ens18 6 40 0 1872
17393 ksmtuned ens18 5 38 0 2014
1679 llvmpipe-2 ens18 2 24 0 1040
1518 dockerd ens18 3 22 0 1136
741 rtkit-daemon ens18 2 15 0 658
2055 gmain ens18 1 15 0 720
2 kthreadd ens18 3 10 0 547
21 migration/2 ens18 1 10 0 556
11 rcu_sched ens18 0 11 0 570
756 ksmtuned ens18 0 5 0 273
2337 sshd ens18 3 0 0 0
16608 sshd ens18 3 0 0 0
2645 bash ens18 0 1 0 0
17410 bash ens18 0 1 0 0
735 chronyd ens18 1 0 0 0
[root@vyos-02 ~]# cat /proc/systemtap/nettop/netfilter
SOURCEIP SPORT DESTIP DPORT PACKETS TOTAL_KB
172.30.2.17 41284 172.30.2.44 5202 1974254 104496392
172.30.2.147 64191 172.30.2.44 5201 1891159 90553850
172.30.2.147 59824 172.30.2.44 22 68 3
172.30.2.147 59945 172.30.2.44 22 67 3
172.30.2.147 60818 172.30.2.44 22 57 3
172.30.2.147 64190 172.30.2.44 5201 18 1
172.30.2.17 41270 172.30.2.44 5202 17 1
172.30.2.240 5353 224.0.0.251 5353 7 1
172.30.2.196 5353 224.0.0.251 5353 7 1
172.30.2.147 63492 172.30.2.44 22 6 0
172.30.2.147 59438 172.30.2.44 22 6 0
172.30.2.210 57621 172.30.2.255 57621 5 0
172.30.2.147 57621 172.30.2.255 57621 5 0
172.30.2.240 0 224.0.0.22 0 4 0
172.30.2.196 0 224.0.0.22 0 3 0
172.30.2.200 5353 224.0.0.251 5353 3 1
172.30.2.93 5678 255.255.255.255 5678 3 0
172.30.2.94 5678 255.255.255.255 5678 3 0
172.30.2.220 5353 224.0.0.251 5353 2 0
172.30.2.147 5353 224.0.0.251 5353 2 0
You should be able to use this data to piece together your puzzle (which process is listening on which port, which traffic you are interested in) and/or modify the tap to suit you needs if you feel brave enough
The hard part, from your comments, will be making sure you can deploy systemtap on production servers, my suggestion would be to test things out in pre-prod if you do capacity testing of releases in pre-prod and if you have a pre-prod