Navi Reset
What this does is use powerplay tables to turn the card off and back on again. (Insert IT crowd meme here, haha).
@gnif has done it again. Good work!!
Ok, here is the navi reset. Navi is easier to implement as the SMU has most of the logic in it.
Update 27-11-2019: Updated patch down below: Navi Reset Kernel Patch
From 69ea42207b544b6e3fa9755022bff09d2ce953d9 Mon Sep 17 00:00:00 2001
From: Geoffrey McRae <[email protected]>
Date: Thu, 12 Sep 2019 03:19:28 +1000
Subject: [PATCH] pci quirk: AMD Navi 10 series vendor specific reset
Signed-off-by: Geoffrey McRae <[email protected]>
---
drivers/pci/quirks.c | 98 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 98 insertions(+)
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 44c4ae1abd00..d94ddb1c6832 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3825,6 +3825,97 @@ static int delay_250ms_after_flr(struct pci_dev *dev, int probe)
return 0;
}
+/*
+ * AMD Navi 10 series GPUs require a vendor specific reset procedure.
+ * According to AMD a PSP mode 2 reset should be enough however at this
+ * time the details of how to perform this are not available to us.
+ * Instead we can signal the SMU to enter and exit BACO which has the same
+ * desired effect.
+ */
+static int reset_amd_navi10(struct pci_dev *dev, int probe)
+{
+ const int mmMP0_SMN_C2PMSG_81 = 0x16091;
+ const int mmMP1_SMN_C2PMSG_66 = 0x16282;
+ const int mmMP1_SMN_C2PMSG_82 = 0x16292;
+ const int mmMP1_SMN_C2PMSG_90 = 0x1629a;
+
+ u16 cfg;
+ resource_size_t mmio_base, mmio_size;
+ uint32_t __iomem * mmio;
+ unsigned int sol;
+ unsigned int timeout;
+
+ /* bus resets still cause navi to flake out */
+ dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET;
+
+ if (probe)
+ return 0;
+
+ /* save the PCI state and enable memory access */
+ pci_save_state(dev);
+ pci_read_config_word(dev, PCI_COMMAND, &cfg);
+ pci_write_config_word(dev, PCI_COMMAND, cfg | PCI_COMMAND_MEMORY);
+
+ /* map BAR5 */
+ mmio_base = pci_resource_start(dev, 5);
+ mmio_size = pci_resource_len(dev, 5);
+ mmio = ioremap_nocache(mmio_base, mmio_size);
+ if (mmio == NULL) {
+ pci_disable_device(dev);
+ pci_err(dev, "Navi10: cannot iomap device\n");
+ return 0;
+ }
+
+ /* check the sign of life indicator */
+ sol = readl(mmio + mmMP0_SMN_C2PMSG_81);
+ pci_info(dev, "Navi10: SOL 0x%x\n", sol);
+ if (sol == 0 || sol == 0xffffffff) {
+ pci_info(dev, "Navi10: device doesn't need to be reset\n");
+ goto out;
+ }
+
+ pci_info(dev, "Navi10: performing BACO reset\n");
+
+ /* the SMU might be busy already, wait for it */
+ for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+ msleep(1);
+ readl(mmio + mmMP1_SMN_C2PMSG_90);
+
+ /* send PPSMC_MSG_ArmD3 */
+ writel(0x00, mmio + mmMP1_SMN_C2PMSG_90);
+ writel(0x46, mmio + mmMP1_SMN_C2PMSG_66);
+ for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+ msleep(1);
+
+ /* send PPSMC_MSG_EnterBaco with param */
+ writel(0x00, mmio + mmMP1_SMN_C2PMSG_90);
+ writel(0x00, mmio + mmMP1_SMN_C2PMSG_82);
+ writel(0x18, mmio + mmMP1_SMN_C2PMSG_66);
+ for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+ msleep(1);
+
+ /* wait for the regulators to shutdown */
+ msleep(400);
+
+ /* send PPSMC_MSG_ExitBaco */
+ writel(0x00, mmio + mmMP1_SMN_C2PMSG_90);
+ writel(0x19, mmio + mmMP1_SMN_C2PMSG_66);
+ for(timeout = 200; timeout && readl(mmio + mmMP1_SMN_C2PMSG_90) != 0; --timeout)
+ msleep(1);
+
+ /* wait for regulators to startup again */
+ msleep(400);
+
+out:
+ /* unmap BAR5 */
+ iounmap(mmio);
+
+ /* restore the PCI state and command register */
+ pci_restore_state(dev);
+ pci_write_config_word(dev, PCI_COMMAND, cfg);
+ return 0;
+}
+
static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
{ PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF,
reset_intel_82599_sfp_virtfn },
@@ -3836,6 +3927,13 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = {
{ PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr },
{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
reset_chelsio_generic_dev },
+ { PCI_VENDOR_ID_ATI, 0x7310, reset_amd_navi10 },
+ { PCI_VENDOR_ID_ATI, 0x7312, reset_amd_navi10 },
+ { PCI_VENDOR_ID_ATI, 0x7318, reset_amd_navi10 },
+ { PCI_VENDOR_ID_ATI, 0x7319, reset_amd_navi10 },
+ { PCI_VENDOR_ID_ATI, 0x731a, reset_amd_navi10 },
+ { PCI_VENDOR_ID_ATI, 0x731b, reset_amd_navi10 },
+ { PCI_VENDOR_ID_ATI, 0x731f, reset_amd_navi10 },
{ 0 }
};
--
2.20.1