Increasing VFIO VGA Performance

For reference here is my complete qemu launch args.

/usr/local/bin/qemu-system-x86_64 \
  -nographic \
  -machine q35,accel=kvm,usb=off,vmport=off,dump-guest-core=off,kernel-irqchip=on \
  -cpu host,hv_time,hv_vpindex,hv_reset,hv_runtime,hv_crash,hv_synic,hv_stimer,hv_spinlocks=0x1fff,hv_vendor_id=lakeuv283713,kvm=off,l3-cache=on,-hypervisor,migratable=no,+invtsc \
  -drive file=/opt/VM/Windows/ovmf/OVMF_CODE-pure-efi.fd,if=pflash,format=raw,unit=0,readonly=on \
  -drive file=/opt/VM/Windows/ovmf/vars.fd,if=pflash,format=raw,unit=1 \
  -realtime mlock=on \
  -pidfile /var/run/vm.Windows.pid \
  -monitor stdio \
  -runas geoff \
  -enable-kvm \
  -name guest=Windows,debug-threads=on \
  -smp 8,sockets=1,cores=4,threads=2 \
  -m 16384 \
  -mem-prealloc \
  -global ICH9-LPC.disable_s3=1 \
  -global ICH9-LPC.disable_s4=1 \
  -no-user-config \
  -nodefaults \
  -rtc base=localtime,driftfix=slew \
  -global kvm-pit.lost_tick_policy=discard \
  -boot strict=on \
  -no-hpet \
  -netdev tap,script=/opt/VM/bin/ovs-ifup,downscript=/opt/VM/bin/ovs-ifdown,ifname=windows.30,id=hostnet0,vhost=on \
  -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:06:99:25,bus=pcie.0 \
  -soundhw ac97 \
  -device ioh3420,id=root_port1,chassis=0,slot=0,bus=pcie.0 \
  -device vfio-pci,host=45:00.0,id=hostdev1,bus=root_port1,addr=0x00,multifunction=on,romfile=/opt/VM/Windows/1080Ti.rom \
  -device vfio-pci,host=45:00.1,id=hostdev2,bus=root_port1,addr=0x00.1 \
  -drive  id=disk,file=/dev/disk/by-id/md-uuid-18bc7433:b223acbf:a1cbb062:f0b030c9,format=raw,if=none,cache=none,aio=native,discard=unmap,detect-zeroes=unmap,copy-on-read=on \
  -device virtio-scsi-pci,id=scsi \
  -device scsi-hd,drive=disk,bus=scsi.0,rotation_rate=1 \
  -device ivshmem-plain,memdev=ivshmem,bus=pcie.0 \
  -object memory-backend-file,id=ivshmem,share=on,mem-path=/dev/shm/looking-glass,size=128M \
  -spice disable-ticketing,seamless-migration=off,port=5900,addr=192.168.10.50 \
  -device virtio-keyboard-pci,id=input2,bus=pcie.0,addr=0xc

I also run a script that pins the CPU threads to the correct cores and ensures all memory accesses are local to the CPU it is pinned to. For Ryzen this isn’t so critical but for ThreadRipper it is critical if you wish to obtain the same results I have as shown below.

3 Likes