Another oddity. I can passthrough the Chelsio NICs (function 4) to HVMs no problem:
root@archiso ~ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host noprefixroute
valid_lft forever preferred_lft forever
2: enX0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:16:3e:22:85:36 brd ff:ff:ff:ff:ff:ff
inet 192.168.69.17/24 metric 100 brd 192.168.69.255 scope global dynamic enX0
valid_lft 3385sec preferred_lft 3385sec
inet6 fe80::216:3eff:fe22:8536/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
3: ens5f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:07:43:4b:f7:e0 brd ff:ff:ff:ff:ff:ff
altname enp0s5f0
inet 192.168.1.15/24 metric 100 brd 192.168.1.255 scope global dynamic ens5f0
valid_lft 86191sec preferred_lft 86191sec
inet6 fe80::207:43ff:fe4b:f7e0/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
4: ens5f0d1: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether 00:07:43:4b:f7:e8 brd ff:ff:ff:ff:ff:ff
altname enp0s5f0d1
5: ens5f0d2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether 00:07:43:4b:f7:f0 brd ff:ff:ff:ff:ff:ff
altname enp0s5f0d2
6: ens5f0d3: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether 00:07:43:4b:f7:f8 brd ff:ff:ff:ff:ff:ff
altname enp0s5f0d3
Note that two interfaces got assigned IP addreses - enX0 is a virtualized management LAN, ens5f0 is the physical NIC connection to my router.
root@archiso ~ # dmesg | grep cxgb4
[ 4.899632] cxgb4 0000:00:05.0: Coming up as MASTER: Initializing adapter
[ 5.596729] cxgb4 0000:00:05.0: Direct firmware load for cxgb4/t5-config.txt failed with error -2
[ 6.533372] cxgb4 0000:00:05.0: Successfully configured using Firmware Configuration File "Firmware Default", version 0x0, computed checksum 0x0
[ 6.766686] cxgb4 0000:00:05.0: Hash filter supported only on T6
[ 6.816682] cxgb4 0000:00:05.0: max_ordird_qp 21 max_ird_adapter 387072
[ 6.856683] cxgb4 0000:00:05.0: Current filter mode/mask 0x632b:0x21
[ 7.010427] cxgb4 0000:00:05.0: 130 MSI-X vectors allocated, nic 32 eoqsets 32 per uld 8 mirrorqsets 32
[ 7.010647] cxgb4 0000:00:05.0: 63.008 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x8 link)
[ 7.061697] cxgb4 0000:00:05.0 eth1: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 7.061938] cxgb4 0000:00:05.0 eth2: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 7.062141] cxgb4 0000:00:05.0 eth3: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 7.062330] cxgb4 0000:00:05.0 eth4: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 7.099978] cxgb4 0000:00:05.0: Chelsio T540-BT rev 1
[ 7.099981] cxgb4 0000:00:05.0: S/N: PT40180192, P/N: 110124450A0
[ 7.099983] cxgb4 0000:00:05.0: Firmware version: 1.27.5.0
[ 7.099984] cxgb4 0000:00:05.0: Bootstrap version: 1.1.0.0
[ 7.099985] cxgb4 0000:00:05.0: TP Microcode version: 0.1.4.9
[ 7.099986] cxgb4 0000:00:05.0: No Expansion ROM loaded
[ 7.099987] cxgb4 0000:00:05.0: Serial Configuration version: 0x1009000
[ 7.099988] cxgb4 0000:00:05.0: VPD version: 0x2
[ 7.099989] cxgb4 0000:00:05.0: Configuration: RNIC MSI-X, Offload capable
[ 11.817237] cxgb4 0000:00:05.0 ens5f0: renamed from eth1
[ 11.854079] cxgb4 0000:00:05.0 ens5f0d2: renamed from eth3
[ 11.922367] cxgb4 0000:00:05.0 ens5f0d1: renamed from eth2
[ 11.977684] cxgb4 0000:00:05.0 ens5f0d3: renamed from eth4
[ 15.401824] cxgb4 0000:00:05.0 ens5f0: link up, 1Gbps, full-duplex, Tx/Rx PAUSE
Now for comparison I’ll start the same Xen configuration, but as PV instead of HVM (I’d prefer PVH but the PCI passthrough not implemented thingy…)
root@archiso ~ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host noprefixroute
valid_lft forever preferred_lft forever
2: enX0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:16:3e:22:85:36 brd ff:ff:ff:ff:ff:ff
inet6 fe80::216:3eff:fe22:8536/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
3: enp0s0f4: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:07:43:4b:f7:e0 brd ff:ff:ff:ff:ff:ff
inet 192.168.1.15/24 metric 100 brd 192.168.1.255 scope global dynamic enp0s0f4
valid_lft 86386sec preferred_lft 86386sec
inet6 fe80::207:43ff:fe4b:f7e0/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
4: enp0s0f4d1: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether 00:07:43:4b:f7:e8 brd ff:ff:ff:ff:ff:ff
5: enp0s0f4d2: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether 00:07:43:4b:f7:f0 brd ff:ff:ff:ff:ff:ff
6: enp0s0f4d3: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc mq state DOWN group default qlen 1000
link/ether 00:07:43:4b:f7:f8 brd ff:ff:ff:ff:ff:ff
Notice no address on the management interface. Any attempt to configure it with DHCP will fail, static assignment also doesn’t help, i.e. pings don’t work in any way.
root@archiso ~ # dmesg | grep cxgb4
[ 14.720529] cxgb4 0000:00:00.4: Xen PCI mapped GSI32 to IRQ77
[ 14.766826] cxgb4 0000:00:00.4: Coming up as MASTER: Initializing adapter
[ 16.445938] cxgb4 0000:00:00.4: Successfully configured using Firmware Configuration File "/lib/firmware/cxgb4/t5-config.txt", version 0x1425001c, computed checksum 0xd8c8fbd6
[ 16.622529] cxgb4 0000:00:00.4: Hash filter supported only on T6
[ 16.672608] cxgb4 0000:00:00.4: max_ordird_qp 21 max_ird_adapter 387072
[ 16.712537] cxgb4 0000:00:00.4: Current filter mode/mask 0x632b:0x21
[ 16.813999] cxgb4 0000:00:00.4: too many vectors (0x82) for PCI frontend: Increase SH_INFO_MAX_VEC
[ 16.814034] cxgb4 0000:00:00.4: Xen PCI frontend error: -22!
[ 16.815542] cxgb4 0000:00:00.4: enable msix get err ffffffe4
[ 16.815560] cxgb4 0000:00:00.4: Xen PCI frontend error: -28!
[ 16.815684] cxgb4 0000:00:00.4: Disabling MSI-X due to insufficient MSI-X vectors
[ 16.815924] cxgb4 0000:00:00.4: 63.008 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x8 link)
[ 16.880617] cxgb4 0000:00:00.4 eth0: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 16.880877] cxgb4 0000:00:00.4 eth1: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 16.881117] cxgb4 0000:00:00.4 eth2: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 16.881354] cxgb4 0000:00:00.4 eth3: Chelsio T540-BT 100M/1G/10GBASE-BT_XFI
[ 16.919202] cxgb4 0000:00:00.4: Chelsio T540-BT rev 1
[ 16.919205] cxgb4 0000:00:00.4: S/N: PT40180192, P/N: 110124450A0
[ 16.919212] cxgb4 0000:00:00.4: Firmware version: 1.27.5.0
[ 16.919214] cxgb4 0000:00:00.4: Bootstrap version: 1.1.0.0
[ 16.919215] cxgb4 0000:00:00.4: TP Microcode version: 0.1.4.9
[ 16.919216] cxgb4 0000:00:00.4: No Expansion ROM loaded
[ 16.919217] cxgb4 0000:00:00.4: Serial Configuration version: 0x1009000
[ 16.919219] cxgb4 0000:00:00.4: VPD version: 0x2
[ 16.919220] cxgb4 0000:00:00.4: Configuration: RNIC MSI, Offload capable
[ 16.939658] cxgb4 0000:00:00.4 enp0s0f4: renamed from eth0
[ 16.979387] cxgb4 0000:00:00.4 enp0s0f4d3: renamed from eth3
[ 16.999341] cxgb4 0000:00:00.4 enp0s0f4d2: renamed from eth2
[ 17.029394] cxgb4 0000:00:00.4 enp0s0f4d1: renamed from eth1
[ 19.800967] cxgb4 0000:00:00.4 enp0s0f4: link up, 1Gbps, full-duplex, Tx/Rx PAUSE
Notice the bit about insufficient MSI-X vectors and SH_INFO_MAX_VEC
. So I checked and… well…
In the HVM passthrough you can see the card gets 130 vectors. On the host it gets even more:
root@vmserver:/opt/xen # dmesg | grep -i msi-x
[ 6.713113] cxgb4 0000:17:00.4: 162 MSI-X vectors allocated, nic 32 eoqsets 32 per uld 16 mirrorqsets 32
[ 6.800866] cxgb4 0000:17:00.4: Configuration: RNIC MSI-X, Offload capable
[ 8.854739] cxgb4 0000:18:00.4: 162 MSI-X vectors allocated, nic 32 eoqsets 32 per uld 16 mirrorqsets 32
[ 8.944282] cxgb4 0000:18:00.4: Configuration: RNIC MSI-X, Offload capable
[ 17.364304] cxgb4vf 0000:17:01.0: eth0: Chelsio VF NIC PCIe MSI-X
[ 17.564546] cxgb4vf 0000:17:01.4: eth1: Chelsio VF NIC PCIe MSI-X
[ 1532.330417] pciback 0000:17:00.4: xen_pciback: error enabling MSI-X for guest 3: err -28!
The last pciback
error is consistent, as the SH_INFO_MAX_VEC
limit is checked on both front and back.
Now for more oddities, if I shut down the PV and recreate it without the passthrough:
root@archiso ~ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host noprefixroute
valid_lft forever preferred_lft forever
2: enX0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:16:3e:22:85:36 brd ff:ff:ff:ff:ff:ff
inet6 fe80::216:3eff:fe22:8536/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
And as HVM:
root@archiso ~ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host noprefixroute
valid_lft forever preferred_lft forever
2: enX0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:16:3e:22:85:36 brd ff:ff:ff:ff:ff:ff
inet6 fe80::216:3eff:fe22:8536/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
However if I reboot the whole server and try again as PV:
root@archiso ~ # ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host noprefixroute
valid_lft forever preferred_lft forever
2: enX0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
link/ether 00:16:3e:22:85:36 brd ff:ff:ff:ff:ff:ff
inet 192.168.69.17/24 metric 100 brd 192.168.69.255 scope global dynamic enX0
valid_lft 3568sec preferred_lft 3568sec
inet6 fe80::216:3eff:fe22:8536/64 scope link proto kernel_ll
valid_lft forever preferred_lft forever
Which tells me something gets really broken in the host network stack along the way.
Now I could rebuild the kernel again, this time with higher SH_INFO_MAX_VEC
(note the source says it should not exceed 128 for some reason) but I’m not entirely sure if I should. I would also need to use the same kernel for both dom0 and domU (not a big problem, but still) , or at least have consistent patches between them.
The config
name = "OVSwitch"
type = "pv" # or hvm
driver_domain=1
memory = 4096
maxmem = 4096
vcpus = 8
kernel = "/mnt/arch/boot/x86_64/vmlinuz-linux"
ramdisk = "/mnt/arch/boot/x86_64/initramfs-linux.img"
extra = "archisobasedir=arch archisodevice=UUID=2024-01-01-16-44-54-00"
disk = [
"file:/opt/xen/isos/archlinux-2024.01.01-x86_64.iso,hdc:cdrom,r",
"phy:/dev/vgroot/ovswitch-root,xvda,w",
]
vif = [
"mac=00:16:3e:22:85:36,bridge=mgmt-lan-br",
]
pci = [
# "0:17:00.4",
# "0:18:00.4",
]
vnc = 1
vnclisten = '192.168.69.1'
vncdisplay = 1