Hello. I just got my first QEMU/KVM win10 vm set up the other day, and performance seemed good in 3dmark and superposition, but when I ran an actual game it’s quite bad.
System setup:
CPU: Ryzen 1700 - 6c/12 threads passed through and pinned, 2c/4t reserved for host with io/emulator thread pinned.
GPU: Host - R9 290, Guest - GTX 1070
RAM: 16GiB total, 11.5GiB for guest. huge pages enabled
Disks: Many using virt scsi, 1 controller. No performance issues here
OS: Host - Manjaro Stable (kernel 5.6.3-2), Guest - Windows 10 LTSC
The game was Mechwarrior Online. This game ran better on on an R9 290 on bare metal than it was using a GTX1070 within the VM. Checking afterburner graphs I could see that ‘no-load’ was the GPU limiter, and it was only at about 20% usage and not running boost clocks.
This made me think it was a CPU bottleneck, but the same graphs also showed only a few cores bouncing around 40 - 60% usage and the rest at low usage. That makes me think maybe I did the pinning or isolating incorrectly and somehow the guest can’t get full CPU performance because the host is contending for threads and polluting cache etc.
I’ve set the VM up mostly just going down through the arch wiki post here:
https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF
But maybe I did something wrong in the #Dynamically_isolating_CPUs section.
In fact I know I did because if I run mprime on 2 threads while the vm is running, they do not stay pinned to cpu0,1,8, or 9 like they should. The iothreads do do this however which I can see if I run CrystalDiskMark inside the vm. Also the guest threads do only use cpu3-7 and 10-15 and do not touch the host threads, so it’s half working.
This is the cset.sh script which I’ve placed in
/etc/libvirt/hooks/qemu.d/win10vm/prepare/begin
and duplicated in
/etc/libvirt/hooks/qemu.d/win10vm/release/end
Summary
cat /etc/libvirt/hooks/qemu.d/win10vm/release/end/cset.sh
#!/usr/bin/env bash
#
# Original author: Rokas Kupstys <[email protected]>
# Heavily modified by: Danny Lin <[email protected]>
#
# This hook uses the `cset` tool to dynamically isolate and unisolate CPUs using
# the kernel's cgroup cpusets feature. While it's not as effective as
# full kernel-level scheduler and timekeeping isolation, it still does wonders
# for VM latency as compared to not isolating CPUs at all. Note that vCPU thread
# affinity is a must for this to work properly.
#
# Original source: https://rokups.github.io/#!pages/gaming-vm-performance.md
#
# Target file locations:
# - $SYSCONFDIR/hooks/qemu.d/vm_name/prepare/begin/cset.sh
# - $SYSCONFDIR/hooks/qemu.d/vm_name/release/end/cset.sh
# $SYSCONFDIR is usually /etc/libvirt.
#
TOTAL_CORES='0-15'
TOTAL_CORES_MASK=FFFF # 0-15, bitmask 0b1111111111111111
HOST_CORES='0-1,8-9' # Cores reserved for host
HOST_CORES_MASK=0303 # 0-1,8-9, bitmask 0b0000001100000011
VIRT_CORES='2-7,10-15' # Cores reserved for virtual machine(s)
VM_NAME="$1"
VM_ACTION="$2/$3"
function shield_vm() {
cset -m set -c $TOTAL_CORES -s machine.slice
cset -m shield --kthread on --cpu $VIRT_CORES
}
function unshield_vm() {
cset -m shield --reset
}
# For convenient manual invocation
if [[ "$VM_NAME" == "shield" ]]; then
shield_vm
exit
elif [[ "$VM_NAME" == "unshield" ]]; then
unshield_vm
exit
fi
if [[ "$VM_ACTION" == "prepare/begin" ]]; then
echo "libvirt-qemu cset: Reserving CPUs $VIRT_CORES for VM $VM_NAME" > /dev/kmsg 2>&1
shield_vm > /dev/kmsg 2>&1
# the kernel's dirty page writeback mechanism uses kthread workers. They introduce
# massive arbitrary latencies when doing disk writes on the host and aren't
# migrated by cset. Restrict the workqueue to use only cpu 0.
echo $HOST_CORES_MASK > /sys/bus/workqueue/devices/writeback/cpumask
echo 0 > /sys/bus/workqueue/devices/writeback/numa
echo "libvirt-qemu cset: Successfully reserved CPUs $VIRT_CORES" > /dev/kmsg 2>&1
elif [[ "$VM_ACTION" == "release/end" ]]; then
echo "libvirt-qemu cset: Releasing CPUs $VIRT_CORES from VM $VM_NAME" > /dev/kmsg 2>&1
unshield_vm > /dev/kmsg 2>&1
# Revert changes made to the writeback workqueue
echo $TOTAL_CORES_MASK > /sys/bus/workqueue/devices/writeback/cpumask
echo 1 > /sys/bus/workqueue/devices/writeback/numa
echo "libvirt-qemu cset: Successfully released CPUs $VIRT_CORES" > /dev/kmsg 2>&1
fi
And here is the VM’s XML config:
Summary
cat /etc/libvirt/qemu/win10vm.xml
<!--
WARNING: THIS IS AN AUTO-GENERATED FILE. CHANGES TO IT ARE LIKELY TO BE
OVERWRITTEN AND LOST. Changes to this xml configuration should be made using:
virsh edit win10vm
or other application using the libvirt API.
-->
<domain type='kvm' xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'>
<name>win10vm</name>
<uuid>88629899-fa2d-4313-ad65-264a897774eb</uuid>
<title>Windows 10 LTSC</title>
<description>Windows 10 Gaming VM with PCI-E passthrough</description>
<metadata>
<libosinfo:libosinfo xmlns:libosinfo="http://libosinfo.org/xmlns/libvirt/domain/1.0">
<libosinfo:os id="http://microsoft.com/win/10"/>
</libosinfo:libosinfo>
</metadata>
<memory unit='KiB'>11534336</memory>
<currentMemory unit='KiB'>11534336</currentMemory>
<vcpu placement='static'>12</vcpu>
<iothreads>1</iothreads>
<cputune>
<vcpupin vcpu='0' cpuset='2'/>
<vcpupin vcpu='1' cpuset='10'/>
<vcpupin vcpu='2' cpuset='3'/>
<vcpupin vcpu='3' cpuset='11'/>
<vcpupin vcpu='4' cpuset='4'/>
<vcpupin vcpu='5' cpuset='12'/>
<vcpupin vcpu='6' cpuset='5'/>
<vcpupin vcpu='7' cpuset='13'/>
<vcpupin vcpu='8' cpuset='6'/>
<vcpupin vcpu='9' cpuset='14'/>
<vcpupin vcpu='10' cpuset='7'/>
<vcpupin vcpu='11' cpuset='15'/>
<emulatorpin cpuset='0-1,8-9'/>
<iothreadpin iothread='1' cpuset='0-1,8-9'/>
</cputune>
<os>
<type arch='x86_64' machine='pc-q35-4.2'>hvm</type>
<loader readonly='yes' type='pflash'>/usr/share/ovmf/x64/OVMF_CODE.fd</loader>
<nvram>/var/lib/libvirt/qemu/nvram/win10vm_VARS.fd</nvram>
<bootmenu enable='no'/>
</os>
<features>
<acpi/>
<apic/>
<hyperv>
<relaxed state='on'/>
<vapic state='on'/>
<spinlocks state='on' retries='8191'/>
<vendor_id state='on' value='1043'/>
</hyperv>
<kvm>
<hidden state='on'/>
</kvm>
<vmport state='off'/>
</features>
<cpu mode='host-passthrough' check='none'>
<topology sockets='1' cores='6' threads='2'/>
<feature policy='require' name='topoext'/>
</cpu>
<clock offset='localtime'>
<timer name='rtc' tickpolicy='catchup'/>
<timer name='pit' tickpolicy='delay'/>
<timer name='hpet' present='no'/>
<timer name='hypervclock' present='yes'/>
</clock>
<on_poweroff>destroy</on_poweroff>
<on_reboot>restart</on_reboot>
<on_crash>destroy</on_crash>
<pm>
<suspend-to-mem enabled='no'/>
<suspend-to-disk enabled='no'/>
</pm>
<devices>
<emulator>/usr/bin/qemu-system-x86_64</emulator>
<disk type='block' device='disk'>
<driver name='qemu' type='raw' cache='writeback' io='threads' discard='unmap'/>
<source dev='/dev/disk/by-id/ata-Samsung_SSD_840_EVO_500GB_S1DHNSADC15712J'/>
<target dev='sda' bus='scsi'/>
<boot order='2'/>
<address type='drive' controller='0' bus='0' target='0' unit='0'/>
</disk>
<disk type='block' device='disk'>
<driver name='qemu' type='raw' cache='writeback' io='threads' discard='unmap'/>
<source dev='/dev/disk/by-id/ata-ST2000DM001-1ER164_W4Z43P9Y'/>
<target dev='sdd' bus='scsi'/>
<address type='drive' controller='0' bus='0' target='0' unit='3'/>
</disk>
<disk type='block' device='disk'>
<driver name='qemu' type='raw' cache='writeback' io='threads' discard='unmap'/>
<source dev='/dev/disk/by-id/ata-ST2000DM008-2FR102_ZFL0XEJD'/>
<target dev='sde' bus='scsi'/>
<address type='drive' controller='0' bus='0' target='0' unit='4'/>
</disk>
<disk type='block' device='disk'>
<driver name='qemu' type='raw' cache='writeback' io='threads' discard='unmap'/>
<source dev='/dev/disk/by-id/nvme-INTEL_SSDPEKNW010T8_BTNH93860ULN1P0B'/>
<target dev='sdm' bus='scsi'/>
<address type='drive' controller='0' bus='0' target='0' unit='5'/>
</disk>
<controller type='usb' index='0' model='qemu-xhci' ports='15'>
<address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/>
</controller>
<controller type='pci' index='0' model='pcie-root'/>
<controller type='pci' index='1' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='1' port='0x10'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x0' multifunction='on'/>
</controller>
<controller type='pci' index='2' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='2' port='0x11'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x1'/>
</controller>
<controller type='pci' index='3' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='3' port='0x12'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x2'/>
</controller>
<controller type='pci' index='4' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='4' port='0x13'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x3'/>
</controller>
<controller type='pci' index='5' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='5' port='0x14'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x4'/>
</controller>
<controller type='pci' index='6' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='6' port='0x15'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x5'/>
</controller>
<controller type='pci' index='7' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='7' port='0x16'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x6'/>
</controller>
<controller type='pci' index='8' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='8' port='0x17'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x02' function='0x7'/>
</controller>
<controller type='pci' index='9' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='9' port='0x18'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0' multifunction='on'/>
</controller>
<controller type='pci' index='10' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='10' port='0x19'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x1'/>
</controller>
<controller type='pci' index='11' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='11' port='0x8'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0' multifunction='on'/>
</controller>
<controller type='pci' index='12' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='12' port='0x9'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/>
</controller>
<controller type='pci' index='13' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='13' port='0xa'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
</controller>
<controller type='pci' index='14' model='pcie-to-pci-bridge'>
<model name='pcie-pci-bridge'/>
<address type='pci' domain='0x0000' bus='0x0b' slot='0x00' function='0x0'/>
</controller>
<controller type='scsi' index='0' model='virtio-scsi'>
<driver queues='8' iothread='1'/>
<address type='pci' domain='0x0000' bus='0x05' slot='0x00' function='0x0'/>
</controller>
<controller type='sata' index='0'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/>
</controller>
<interface type='network'>
<mac address='52:54:00:65:92:6a'/>
<source network='default'/>
<model type='virtio'/>
<driver queues='12'/>
<address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
</interface>
<serial type='pty'>
<target type='isa-serial' port='0'>
<model name='isa-serial'/>
</target>
</serial>
<console type='pty'>
<target type='serial' port='0'/>
</console>
<input type='mouse' bus='virtio'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x0e' function='0x0'/>
</input>
<input type='keyboard' bus='virtio'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x0f' function='0x0'/>
</input>
<input type='mouse' bus='ps2'/>
<input type='keyboard' bus='ps2'/>
<hostdev mode='subsystem' type='pci' managed='yes'>
<source>
<address domain='0x0000' bus='0x0b' slot='0x00' function='0x0'/>
</source>
<address type='pci' domain='0x0000' bus='0x03' slot='0x00' function='0x0'/>
</hostdev>
<hostdev mode='subsystem' type='pci' managed='yes'>
<source>
<address domain='0x0000' bus='0x0b' slot='0x00' function='0x1'/>
</source>
<address type='pci' domain='0x0000' bus='0x0c' slot='0x00' function='0x0'/>
</hostdev>
<hostdev mode='subsystem' type='usb' managed='yes'>
<source>
<vendor id='0x0951'/>
<product id='0x16a4'/>
</source>
<address type='usb' bus='0' port='1'/>
</hostdev>
<hostdev mode='subsystem' type='pci' managed='yes'>
<source>
<address domain='0x0000' bus='0x0c' slot='0x00' function='0x3'/>
</source>
<address type='pci' domain='0x0000' bus='0x06' slot='0x00' function='0x0'/>
</hostdev>
<memballoon model='virtio'>
<address type='pci' domain='0x0000' bus='0x04' slot='0x00' function='0x0'/>
</memballoon>
</devices>
<qemu:commandline>
<qemu:arg value='-object'/>
<qemu:arg value='input-linux,id=mouse1,evdev=/dev/input/by-id/usb-Logitech_Gaming_Mouse_G402_6D91387E5255-event-mouse'/>
<qemu:arg value='-object'/>
<qemu:arg value='input-linux,id=kbd1,evdev=/dev/input/by-id/usb-Logitech_Gaming_Keyboard_G105-event-kbd,grab_all=on,repeat=on'/>
</qemu:commandline>
</domain>
Let me know if you need any other info.