diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl index 0b6a2e6e8fbb..7c2b846521f3 100644 --- a/Documentation/ABI/testing/sysfs-bus-cxl +++ b/Documentation/ABI/testing/sysfs-bus-cxl @@ -1,3 +1,12 @@ +What: /sys/bus/cxl/flush +Date: Januarry, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (WO) If userspace manually unbinds a port the kernel schedules + all descendant memdevs for unbind. Writing '1' to this attribute + flushes that work. + What: /sys/bus/cxl/devices/memX/firmware_version Date: December, 2020 KernelVersion: v5.12 @@ -25,6 +34,24 @@ Description: identically named field in the Identify Memory Device Output Payload in the CXL-2.0 specification. +What: /sys/bus/cxl/devices/memX/serial +Date: January, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) 64-bit serial number per the PCIe Device Serial Number + capability. Mandatory for CXL devices, see CXL 2.0 8.1.12.2 + Memory Device PCIe Capabilities and Extended Capabilities. + +What: /sys/bus/cxl/devices/memX/numa_node +Date: January, 2022 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + (RO) If NUMA is enabled and the platform has affinitized the + host PCI device for this memory device, emit the CPU node + affinity for this device. + What: /sys/bus/cxl/devices/*/devtype Date: June, 2021 KernelVersion: v5.14 @@ -34,6 +61,15 @@ Description: the same value communicated in the DEVTYPE environment variable for uevents for devices on the "cxl" bus. +What: /sys/bus/cxl/devices/*/modalias +Date: December, 2021 +KernelVersion: v5.18 +Contact: linux-cxl@vger.kernel.org +Description: + CXL device objects export the modalias attribute which mirrors + the same value communicated in the MODALIAS environment variable + for uevents for devices on the "cxl" bus. + What: /sys/bus/cxl/devices/portX/uport Date: June, 2021 KernelVersion: v5.14 diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst index 3b8f41395f6b..db476bb170b6 100644 --- a/Documentation/driver-api/cxl/memory-devices.rst +++ b/Documentation/driver-api/cxl/memory-devices.rst @@ -14,6 +14,303 @@ that optionally define a device's contribution to an interleaved address range across multiple devices underneath a host-bridge or interleaved across host-bridges. +CXL Bus: Theory of Operation +============================ +Similar to how a RAID driver takes disk objects and assembles them into a new +logical device, the CXL subsystem is tasked to take PCIe and ACPI objects and +assemble them into a CXL.mem decode topology. The need for runtime configuration +of the CXL.mem topology is also similar to RAID in that different environments +with the same hardware configuration may decide to assemble the topology in +contrasting ways. One may choose performance (RAID0) striping memory across +multiple Host Bridges and endpoints while another may opt for fault tolerance +and disable any striping in the CXL.mem topology. + +Platform firmware enumerates a menu of interleave options at the "CXL root port" +(Linux term for the top of the CXL decode topology). From there, PCIe topology +dictates which endpoints can participate in which Host Bridge decode regimes. +Each PCIe Switch in the path between the root and an endpoint introduces a point +at which the interleave can be split. For example platform firmware may say at a +given range only decodes to 1 one Host Bridge, but that Host Bridge may in turn +interleave cycles across multiple Root Ports. An intervening Switch between a +port and an endpoint may interleave cycles across multiple Downstream Switch +Ports, etc. + +Here is a sample listing of a CXL topology defined by 'cxl_test'. The 'cxl_test' +module generates an emulated CXL topology of 2 Host Bridges each with 2 Root +Ports. Each of those Root Ports are connected to 2-way switches with endpoints +connected to those downstream ports for a total of 8 endpoints:: + + # cxl list -BEMPu -b cxl_test + { + "bus":"root3", + "provider":"cxl_test", + "ports:root3":[ + { + "port":"port5", + "host":"cxl_host_bridge.1", + "ports:port5":[ + { + "port":"port8", + "host":"cxl_switch_uport.1", + "endpoints:port8":[ + { + "endpoint":"endpoint9", + "host":"mem2", + "memdev":{ + "memdev":"mem2", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x1", + "numa_node":1, + "host":"cxl_mem.1" + } + }, + { + "endpoint":"endpoint15", + "host":"mem6", + "memdev":{ + "memdev":"mem6", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x5", + "numa_node":1, + "host":"cxl_mem.5" + } + } + ] + }, + { + "port":"port12", + "host":"cxl_switch_uport.3", + "endpoints:port12":[ + { + "endpoint":"endpoint17", + "host":"mem8", + "memdev":{ + "memdev":"mem8", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x7", + "numa_node":1, + "host":"cxl_mem.7" + } + }, + { + "endpoint":"endpoint13", + "host":"mem4", + "memdev":{ + "memdev":"mem4", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x3", + "numa_node":1, + "host":"cxl_mem.3" + } + } + ] + } + ] + }, + { + "port":"port4", + "host":"cxl_host_bridge.0", + "ports:port4":[ + { + "port":"port6", + "host":"cxl_switch_uport.0", + "endpoints:port6":[ + { + "endpoint":"endpoint7", + "host":"mem1", + "memdev":{ + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + } + }, + { + "endpoint":"endpoint14", + "host":"mem5", + "memdev":{ + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + } + } + ] + }, + { + "port":"port10", + "host":"cxl_switch_uport.2", + "endpoints:port10":[ + { + "endpoint":"endpoint16", + "host":"mem7", + "memdev":{ + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + } + }, + { + "endpoint":"endpoint11", + "host":"mem3", + "memdev":{ + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + } + ] + } + ] + } + ] + } + +In that listing each "root", "port", and "endpoint" object correspond a kernel +'struct cxl_port' object. A 'cxl_port' is a device that can decode CXL.mem to +its descendants. So "root" claims non-PCIe enumerable platform decode ranges and +decodes them to "ports", "ports" decode to "endpoints", and "endpoints" +represent the decode from SPA (System Physical Address) to DPA (Device Physical +Address). + +Continuing the RAID analogy, disks have both topology metadata and on device +metadata that determine RAID set assembly. CXL Port topology and CXL Port link +status is metadata for CXL.mem set assembly. The CXL Port topology is enumerated +by the arrival of a CXL.mem device. I.e. unless and until the PCIe core attaches +the cxl_pci driver to a CXL Memory Expander there is no role for CXL Port +objects. Conversely for hot-unplug / removal scenarios, there is no need for +the Linux PCI core to tear down switch-level CXL resources because the endpoint +->remove() event cleans up the port data that was established to support that +Memory Expander. + +The port metadata and potential decode schemes that a give memory device may +participate can be determined via a command like:: + + # cxl list -BDMu -d root -m mem3 + { + "bus":"root3", + "provider":"cxl_test", + "decoders:root3":[ + { + "decoder":"decoder3.1", + "resource":"0x8030000000", + "size":"512.00 MiB (536.87 MB)", + "volatile_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.3", + "resource":"0x8060000000", + "size":"512.00 MiB (536.87 MB)", + "pmem_capable":true, + "nr_targets":2 + }, + { + "decoder":"decoder3.0", + "resource":"0x8020000000", + "size":"256.00 MiB (268.44 MB)", + "volatile_capable":true, + "nr_targets":1 + }, + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ], + "memdevs:root3":[ + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + } + +...which queries the CXL topology to ask "given CXL Memory Expander with a kernel +device name of 'mem3' which platform level decode ranges may this device +participate". A given expander can participate in multiple CXL.mem interleave +sets simultaneously depending on how many decoder resource it has. In this +example mem3 can participate in one or more of a PMEM interleave that spans to +Host Bridges, a PMEM interleave that targets a single Host Bridge, a Volatile +memory interleave that spans 2 Host Bridges, and a Volatile memory interleave +that only targets a single Host Bridge. + +Conversely the memory devices that can participate in a given platform level +decode scheme can be determined via a command like the following:: + + # cxl list -MDu -d 3.2 + [ + { + "memdevs":[ + { + "memdev":"mem1", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0", + "numa_node":0, + "host":"cxl_mem.0" + }, + { + "memdev":"mem5", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x4", + "numa_node":0, + "host":"cxl_mem.4" + }, + { + "memdev":"mem7", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x6", + "numa_node":0, + "host":"cxl_mem.6" + }, + { + "memdev":"mem3", + "pmem_size":"256.00 MiB (268.44 MB)", + "ram_size":"256.00 MiB (268.44 MB)", + "serial":"0x2", + "numa_node":0, + "host":"cxl_mem.2" + } + ] + }, + { + "root decoders":[ + { + "decoder":"decoder3.2", + "resource":"0x8050000000", + "size":"256.00 MiB (268.44 MB)", + "pmem_capable":true, + "nr_targets":1 + } + ] + } + ] + +...where the naming scheme for decoders is "decoder.". + Driver Infrastructure ===================== @@ -28,6 +325,14 @@ CXL Memory Device .. kernel-doc:: drivers/cxl/pci.c :internal: +.. kernel-doc:: drivers/cxl/mem.c + :doc: cxl mem + +CXL Port +-------- +.. kernel-doc:: drivers/cxl/port.c + :doc: cxl port + CXL Core -------- .. kernel-doc:: drivers/cxl/cxl.h @@ -36,10 +341,16 @@ CXL Core .. kernel-doc:: drivers/cxl/cxl.h :internal: -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c :doc: cxl core -.. kernel-doc:: drivers/cxl/core/bus.c +.. kernel-doc:: drivers/cxl/core/port.c + :identifiers: + +.. kernel-doc:: drivers/cxl/core/pci.c + :doc: cxl core pci + +.. kernel-doc:: drivers/cxl/core/pci.c :identifiers: .. kernel-doc:: drivers/cxl/core/pmem.c diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig index 67c91378f2dd..b88ab956bb7c 100644 --- a/drivers/cxl/Kconfig +++ b/drivers/cxl/Kconfig @@ -13,25 +13,26 @@ menuconfig CXL_BUS if CXL_BUS -config CXL_MEM - tristate "CXL.mem: Memory Devices" +config CXL_PCI + tristate "PCI manageability" default CXL_BUS help - The CXL.mem protocol allows a device to act as a provider of - "System RAM" and/or "Persistent Memory" that is fully coherent - as if the memory was attached to the typical CPU memory - controller. + The CXL specification defines a "CXL memory device" sub-class in the + PCI "memory controller" base class of devices. Device's identified by + this class code provide support for volatile and / or persistent + memory to be mapped into the system address map (Host-managed Device + Memory (HDM)). - Say 'y/m' to enable a driver that will attach to CXL.mem devices for - configuration and management primarily via the mailbox interface. See - Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification for more - details. + Say 'y/m' to enable a driver that will attach to CXL memory expander + devices enumerated by the memory device class code for configuration + and management primarily via the mailbox interface. See Chapter 2.3 + Type 3 CXL Device in the CXL 2.0 specification for more details. If unsure say 'm'. config CXL_MEM_RAW_COMMANDS bool "RAW Command Interface for Memory Devices" - depends on CXL_MEM + depends on CXL_PCI help Enable CXL RAW command interface. @@ -76,4 +77,25 @@ config CXL_PMEM provisioning the persistent memory capacity of CXL memory expanders. If unsure say 'm'. + +config CXL_MEM + tristate "CXL: Memory Expansion" + depends on CXL_PCI + default CXL_BUS + help + The CXL.mem protocol allows a device to act as a provider of "System + RAM" and/or "Persistent Memory" that is fully coherent as if the + memory were attached to the typical CPU memory controller. This is + known as HDM "Host-managed Device Memory". + + Say 'y/m' to enable a driver that will attach to CXL.mem devices for + memory expansion and control of HDM. See Chapter 9.13 in the CXL 2.0 + specification for a detailed description of HDM. + + If unsure say 'm'. + +config CXL_PORT + default CXL_BUS + tristate + endif diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile index d1aaabc940f3..ce267ef11d93 100644 --- a/drivers/cxl/Makefile +++ b/drivers/cxl/Makefile @@ -1,9 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CXL_BUS) += core/ -obj-$(CONFIG_CXL_MEM) += cxl_pci.o +obj-$(CONFIG_CXL_PCI) += cxl_pci.o +obj-$(CONFIG_CXL_MEM) += cxl_mem.o obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o +obj-$(CONFIG_CXL_PORT) += cxl_port.o +cxl_mem-y := mem.o cxl_pci-y := pci.o cxl_acpi-y := acpi.o cxl_pmem-y := pmem.o +cxl_port-y := port.o diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 3163167ecc3a..d15a6aec0331 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -6,6 +6,7 @@ #include #include #include +#include "cxlpci.h" #include "cxl.h" /* Encode defined in CXL 2.0 8.2.5.12.7 HDM Decoder Control Register */ @@ -14,7 +15,7 @@ static unsigned long cfmws_to_decoder_flags(int restrictions) { - unsigned long flags = 0; + unsigned long flags = CXL_DECODER_F_ENABLE; if (restrictions & ACPI_CEDT_CFMWS_RESTRICT_TYPE2) flags |= CXL_DECODER_F_TYPE2; @@ -101,16 +102,14 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, for (i = 0; i < CFMWS_INTERLEAVE_WAYS(cfmws); i++) target_map[i] = cfmws->interleave_targets[i]; - cxld = cxl_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); + cxld = cxl_root_decoder_alloc(root_port, CFMWS_INTERLEAVE_WAYS(cfmws)); if (IS_ERR(cxld)) return 0; cxld->flags = cfmws_to_decoder_flags(cfmws->restrictions); cxld->target_type = CXL_DECODER_EXPANDER; - cxld->range = (struct range){ - .start = cfmws->base_hpa, - .end = cfmws->base_hpa + cfmws->window_size - 1, - }; + cxld->platform_res = (struct resource)DEFINE_RES_MEM(cfmws->base_hpa, + cfmws->window_size); cxld->interleave_ways = CFMWS_INTERLEAVE_WAYS(cfmws); cxld->interleave_granularity = CFMWS_INTERLEAVE_GRANULARITY(cfmws); @@ -120,67 +119,17 @@ static int cxl_parse_cfmws(union acpi_subtable_headers *header, void *arg, else rc = cxl_decoder_autoremove(dev, cxld); if (rc) { - dev_err(dev, "Failed to add decoder for %#llx-%#llx\n", - cfmws->base_hpa, - cfmws->base_hpa + cfmws->window_size - 1); + dev_err(dev, "Failed to add decoder for %pr\n", + &cxld->platform_res); return 0; } - dev_dbg(dev, "add: %s node: %d range %#llx-%#llx\n", - dev_name(&cxld->dev), phys_to_target_node(cxld->range.start), - cfmws->base_hpa, cfmws->base_hpa + cfmws->window_size - 1); + dev_dbg(dev, "add: %s node: %d range %pr\n", dev_name(&cxld->dev), + phys_to_target_node(cxld->platform_res.start), + &cxld->platform_res); return 0; } -__mock int match_add_root_ports(struct pci_dev *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct pci_bus *root_bus = ctx->root; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct device *dev = ctx->dev; - u32 lnkcap, port_num; - int rc; - - if (pdev->bus != root_bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != PCI_EXP_TYPE_ROOT_PORT) - return 0; - if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, - &lnkcap) != PCIBIOS_SUCCESSFUL) - return 0; - - /* TODO walk DVSEC to find component register base */ - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); - if (rc) { - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); - - return 0; -} - -static struct cxl_dport *find_dport_by_dev(struct cxl_port *port, struct device *dev) -{ - struct cxl_dport *dport; - - device_lock(&port->dev); - list_for_each_entry(dport, &port->dports, list) - if (dport->dport == dev) { - device_unlock(&port->dev); - return dport; - } - - device_unlock(&port->dev); - return NULL; -} - __mock struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) { @@ -204,83 +153,35 @@ static int add_host_bridge_uport(struct device *match, void *arg) struct device *host = root_port->dev.parent; struct acpi_device *bridge = to_cxl_host_bridge(host, match); struct acpi_pci_root *pci_root; - struct cxl_walk_context ctx; - int single_port_map[1], rc; - struct cxl_decoder *cxld; struct cxl_dport *dport; struct cxl_port *port; + int rc; if (!bridge) return 0; - dport = find_dport_by_dev(root_port, match); + dport = cxl_find_dport_by_dev(root_port, match); if (!dport) { dev_dbg(host, "host bridge expected and not found\n"); return 0; } + /* + * Note that this lookup already succeeded in + * to_cxl_host_bridge(), so no need to check for failure here + */ + pci_root = acpi_pci_find_root(bridge->handle); + rc = devm_cxl_register_pci_bus(host, match, pci_root->bus); + if (rc) + return rc; + port = devm_cxl_add_port(host, match, dport->component_reg_phys, root_port); if (IS_ERR(port)) return PTR_ERR(port); dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev)); - /* - * Note that this lookup already succeeded in - * to_cxl_host_bridge(), so no need to check for failure here - */ - pci_root = acpi_pci_find_root(bridge->handle); - ctx = (struct cxl_walk_context){ - .dev = host, - .root = pci_root->bus, - .port = port, - }; - pci_walk_bus(pci_root->bus, match_add_root_ports, &ctx); - - if (ctx.count == 0) - return -ENODEV; - if (ctx.error) - return ctx.error; - if (ctx.count > 1) - return 0; - - /* TODO: Scan CHBCR for HDM Decoder resources */ - - /* - * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability - * Structure) single ported host-bridges need not publish a decoder - * capability when a passthrough decode can be assumed, i.e. all - * transactions that the uport sees are claimed and passed to the single - * dport. Disable the range until the first CXL region is enumerated / - * activated. - */ - cxld = cxl_decoder_alloc(port, 1); - if (IS_ERR(cxld)) - return PTR_ERR(cxld); - - cxld->interleave_ways = 1; - cxld->interleave_granularity = PAGE_SIZE; - cxld->target_type = CXL_DECODER_EXPANDER; - cxld->range = (struct range) { - .start = 0, - .end = -1, - }; - - device_lock(&port->dev); - dport = list_first_entry(&port->dports, typeof(*dport), list); - device_unlock(&port->dev); - - single_port_map[0] = dport->port_id; - - rc = cxl_decoder_add(cxld, single_port_map); - if (rc) - put_device(&cxld->dev); - else - rc = cxl_decoder_autoremove(host, cxld); - - if (rc == 0) - dev_dbg(host, "add: %s\n", dev_name(&cxld->dev)); - return rc; + return 0; } struct cxl_chbs_context { @@ -309,9 +210,9 @@ static int cxl_get_chbcr(union acpi_subtable_headers *header, void *arg, static int add_host_bridge_dport(struct device *match, void *arg) { - int rc; acpi_status status; unsigned long long uid; + struct cxl_dport *dport; struct cxl_chbs_context ctx; struct cxl_port *root_port = arg; struct device *host = root_port->dev.parent; @@ -340,11 +241,11 @@ static int add_host_bridge_dport(struct device *match, void *arg) return 0; } - rc = cxl_add_dport(root_port, match, uid, ctx.chbcr); - if (rc) { + dport = devm_cxl_add_dport(root_port, match, uid, ctx.chbcr); + if (IS_ERR(dport)) { dev_err(host, "failed to add downstream port: %s\n", dev_name(match)); - return rc; + return PTR_ERR(dport); } dev_dbg(host, "add dport%llu: %s\n", uid, dev_name(match)); return 0; @@ -413,7 +314,8 @@ static int cxl_acpi_probe(struct platform_device *pdev) if (rc < 0) return rc; - return 0; + /* In case PCI is scanned before ACPI re-trigger memdev attach */ + return cxl_bus_rescan(); } static const struct acpi_device_id cxl_acpi_ids[] = { diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile index 40ab50318daf..6d37cd78b151 100644 --- a/drivers/cxl/core/Makefile +++ b/drivers/cxl/core/Makefile @@ -2,8 +2,10 @@ obj-$(CONFIG_CXL_BUS) += cxl_core.o ccflags-y += -I$(srctree)/drivers/cxl -cxl_core-y := bus.o +cxl_core-y := port.o cxl_core-y += pmem.o cxl_core-y += regs.o cxl_core-y += memdev.o cxl_core-y += mbox.o +cxl_core-y += pci.o +cxl_core-y += hdm.o diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c deleted file mode 100644 index 3f9b98ecd18b..000000000000 --- a/drivers/cxl/core/bus.c +++ /dev/null @@ -1,675 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include "core.h" - -/** - * DOC: cxl core - * - * The CXL core provides a set of interfaces that can be consumed by CXL aware - * drivers. The interfaces allow for creation, modification, and destruction of - * regions, memory devices, ports, and decoders. CXL aware drivers must register - * with the CXL core via these interfaces in order to be able to participate in - * cross-device interleave coordination. The CXL core also establishes and - * maintains the bridge to the nvdimm subsystem. - * - * CXL core introduces sysfs hierarchy to control the devices that are - * instantiated by the core. - */ - -static DEFINE_IDA(cxl_port_ida); - -static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return sysfs_emit(buf, "%s\n", dev->type->name); -} -static DEVICE_ATTR_RO(devtype); - -static struct attribute *cxl_base_attributes[] = { - &dev_attr_devtype.attr, - NULL, -}; - -struct attribute_group cxl_base_attribute_group = { - .attrs = cxl_base_attributes, -}; - -static ssize_t start_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - - return sysfs_emit(buf, "%#llx\n", cxld->range.start); -} -static DEVICE_ATTR_RO(start); - -static ssize_t size_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - - return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range)); -} -static DEVICE_ATTR_RO(size); - -#define CXL_DECODER_FLAG_ATTR(name, flag) \ -static ssize_t name##_show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct cxl_decoder *cxld = to_cxl_decoder(dev); \ - \ - return sysfs_emit(buf, "%s\n", \ - (cxld->flags & (flag)) ? "1" : "0"); \ -} \ -static DEVICE_ATTR_RO(name) - -CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM); -CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM); -CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2); -CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3); -CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK); - -static ssize_t target_type_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - - switch (cxld->target_type) { - case CXL_DECODER_ACCELERATOR: - return sysfs_emit(buf, "accelerator\n"); - case CXL_DECODER_EXPANDER: - return sysfs_emit(buf, "expander\n"); - } - return -ENXIO; -} -static DEVICE_ATTR_RO(target_type); - -static ssize_t target_list_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - ssize_t offset = 0; - int i, rc = 0; - - device_lock(dev); - for (i = 0; i < cxld->interleave_ways; i++) { - struct cxl_dport *dport = cxld->target[i]; - struct cxl_dport *next = NULL; - - if (!dport) - break; - - if (i + 1 < cxld->interleave_ways) - next = cxld->target[i + 1]; - rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id, - next ? "," : ""); - if (rc < 0) - break; - offset += rc; - } - device_unlock(dev); - - if (rc < 0) - return rc; - - rc = sysfs_emit_at(buf, offset, "\n"); - if (rc < 0) - return rc; - - return offset + rc; -} -static DEVICE_ATTR_RO(target_list); - -static struct attribute *cxl_decoder_base_attrs[] = { - &dev_attr_start.attr, - &dev_attr_size.attr, - &dev_attr_locked.attr, - &dev_attr_target_list.attr, - NULL, -}; - -static struct attribute_group cxl_decoder_base_attribute_group = { - .attrs = cxl_decoder_base_attrs, -}; - -static struct attribute *cxl_decoder_root_attrs[] = { - &dev_attr_cap_pmem.attr, - &dev_attr_cap_ram.attr, - &dev_attr_cap_type2.attr, - &dev_attr_cap_type3.attr, - NULL, -}; - -static struct attribute_group cxl_decoder_root_attribute_group = { - .attrs = cxl_decoder_root_attrs, -}; - -static const struct attribute_group *cxl_decoder_root_attribute_groups[] = { - &cxl_decoder_root_attribute_group, - &cxl_decoder_base_attribute_group, - &cxl_base_attribute_group, - NULL, -}; - -static struct attribute *cxl_decoder_switch_attrs[] = { - &dev_attr_target_type.attr, - NULL, -}; - -static struct attribute_group cxl_decoder_switch_attribute_group = { - .attrs = cxl_decoder_switch_attrs, -}; - -static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = { - &cxl_decoder_switch_attribute_group, - &cxl_decoder_base_attribute_group, - &cxl_base_attribute_group, - NULL, -}; - -static void cxl_decoder_release(struct device *dev) -{ - struct cxl_decoder *cxld = to_cxl_decoder(dev); - struct cxl_port *port = to_cxl_port(dev->parent); - - ida_free(&port->decoder_ida, cxld->id); - kfree(cxld); -} - -static const struct device_type cxl_decoder_switch_type = { - .name = "cxl_decoder_switch", - .release = cxl_decoder_release, - .groups = cxl_decoder_switch_attribute_groups, -}; - -static const struct device_type cxl_decoder_root_type = { - .name = "cxl_decoder_root", - .release = cxl_decoder_release, - .groups = cxl_decoder_root_attribute_groups, -}; - -bool is_root_decoder(struct device *dev) -{ - return dev->type == &cxl_decoder_root_type; -} -EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL); - -struct cxl_decoder *to_cxl_decoder(struct device *dev) -{ - if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release, - "not a cxl_decoder device\n")) - return NULL; - return container_of(dev, struct cxl_decoder, dev); -} -EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL); - -static void cxl_dport_release(struct cxl_dport *dport) -{ - list_del(&dport->list); - put_device(dport->dport); - kfree(dport); -} - -static void cxl_port_release(struct device *dev) -{ - struct cxl_port *port = to_cxl_port(dev); - struct cxl_dport *dport, *_d; - - device_lock(dev); - list_for_each_entry_safe(dport, _d, &port->dports, list) - cxl_dport_release(dport); - device_unlock(dev); - ida_free(&cxl_port_ida, port->id); - kfree(port); -} - -static const struct attribute_group *cxl_port_attribute_groups[] = { - &cxl_base_attribute_group, - NULL, -}; - -static const struct device_type cxl_port_type = { - .name = "cxl_port", - .release = cxl_port_release, - .groups = cxl_port_attribute_groups, -}; - -struct cxl_port *to_cxl_port(struct device *dev) -{ - if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type, - "not a cxl_port device\n")) - return NULL; - return container_of(dev, struct cxl_port, dev); -} - -static void unregister_port(void *_port) -{ - struct cxl_port *port = _port; - struct cxl_dport *dport; - - device_lock(&port->dev); - list_for_each_entry(dport, &port->dports, list) { - char link_name[CXL_TARGET_STRLEN]; - - if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", - dport->port_id) >= CXL_TARGET_STRLEN) - continue; - sysfs_remove_link(&port->dev.kobj, link_name); - } - device_unlock(&port->dev); - device_unregister(&port->dev); -} - -static void cxl_unlink_uport(void *_port) -{ - struct cxl_port *port = _port; - - sysfs_remove_link(&port->dev.kobj, "uport"); -} - -static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) -{ - int rc; - - rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport"); - if (rc) - return rc; - return devm_add_action_or_reset(host, cxl_unlink_uport, port); -} - -static struct cxl_port *cxl_port_alloc(struct device *uport, - resource_size_t component_reg_phys, - struct cxl_port *parent_port) -{ - struct cxl_port *port; - struct device *dev; - int rc; - - port = kzalloc(sizeof(*port), GFP_KERNEL); - if (!port) - return ERR_PTR(-ENOMEM); - - rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); - if (rc < 0) - goto err; - port->id = rc; - - /* - * The top-level cxl_port "cxl_root" does not have a cxl_port as - * its parent and it does not have any corresponding component - * registers as its decode is described by a fixed platform - * description. - */ - dev = &port->dev; - if (parent_port) - dev->parent = &parent_port->dev; - else - dev->parent = uport; - - port->uport = uport; - port->component_reg_phys = component_reg_phys; - ida_init(&port->decoder_ida); - INIT_LIST_HEAD(&port->dports); - - device_initialize(dev); - device_set_pm_not_required(dev); - dev->bus = &cxl_bus_type; - dev->type = &cxl_port_type; - - return port; - -err: - kfree(port); - return ERR_PTR(rc); -} - -/** - * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy - * @host: host device for devm operations - * @uport: "physical" device implementing this upstream port - * @component_reg_phys: (optional) for configurable cxl_port instances - * @parent_port: next hop up in the CXL memory decode hierarchy - */ -struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, - resource_size_t component_reg_phys, - struct cxl_port *parent_port) -{ - struct cxl_port *port; - struct device *dev; - int rc; - - port = cxl_port_alloc(uport, component_reg_phys, parent_port); - if (IS_ERR(port)) - return port; - - dev = &port->dev; - if (parent_port) - rc = dev_set_name(dev, "port%d", port->id); - else - rc = dev_set_name(dev, "root%d", port->id); - if (rc) - goto err; - - rc = device_add(dev); - if (rc) - goto err; - - rc = devm_add_action_or_reset(host, unregister_port, port); - if (rc) - return ERR_PTR(rc); - - rc = devm_cxl_link_uport(host, port); - if (rc) - return ERR_PTR(rc); - - return port; - -err: - put_device(dev); - return ERR_PTR(rc); -} -EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); - -static struct cxl_dport *find_dport(struct cxl_port *port, int id) -{ - struct cxl_dport *dport; - - device_lock_assert(&port->dev); - list_for_each_entry (dport, &port->dports, list) - if (dport->port_id == id) - return dport; - return NULL; -} - -static int add_dport(struct cxl_port *port, struct cxl_dport *new) -{ - struct cxl_dport *dup; - - device_lock(&port->dev); - dup = find_dport(port, new->port_id); - if (dup) - dev_err(&port->dev, - "unable to add dport%d-%s non-unique port id (%s)\n", - new->port_id, dev_name(new->dport), - dev_name(dup->dport)); - else - list_add_tail(&new->list, &port->dports); - device_unlock(&port->dev); - - return dup ? -EEXIST : 0; -} - -/** - * cxl_add_dport - append downstream port data to a cxl_port - * @port: the cxl_port that references this dport - * @dport_dev: firmware or PCI device representing the dport - * @port_id: identifier for this dport in a decoder's target list - * @component_reg_phys: optional location of CXL component registers - * - * Note that all allocations and links are undone by cxl_port deletion - * and release. - */ -int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id, - resource_size_t component_reg_phys) -{ - char link_name[CXL_TARGET_STRLEN]; - struct cxl_dport *dport; - int rc; - - if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >= - CXL_TARGET_STRLEN) - return -EINVAL; - - dport = kzalloc(sizeof(*dport), GFP_KERNEL); - if (!dport) - return -ENOMEM; - - INIT_LIST_HEAD(&dport->list); - dport->dport = get_device(dport_dev); - dport->port_id = port_id; - dport->component_reg_phys = component_reg_phys; - dport->port = port; - - rc = add_dport(port, dport); - if (rc) - goto err; - - rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name); - if (rc) - goto err; - - return 0; -err: - cxl_dport_release(dport); - return rc; -} -EXPORT_SYMBOL_NS_GPL(cxl_add_dport, CXL); - -static int decoder_populate_targets(struct cxl_decoder *cxld, - struct cxl_port *port, int *target_map) -{ - int rc = 0, i; - - if (!target_map) - return 0; - - device_lock(&port->dev); - if (list_empty(&port->dports)) { - rc = -EINVAL; - goto out_unlock; - } - - for (i = 0; i < cxld->nr_targets; i++) { - struct cxl_dport *dport = find_dport(port, target_map[i]); - - if (!dport) { - rc = -ENXIO; - goto out_unlock; - } - cxld->target[i] = dport; - } - -out_unlock: - device_unlock(&port->dev); - - return rc; -} - -struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets) -{ - struct cxl_decoder *cxld; - struct device *dev; - int rc = 0; - - if (nr_targets > CXL_DECODER_MAX_INTERLEAVE || nr_targets < 1) - return ERR_PTR(-EINVAL); - - cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL); - if (!cxld) - return ERR_PTR(-ENOMEM); - - rc = ida_alloc(&port->decoder_ida, GFP_KERNEL); - if (rc < 0) - goto err; - - cxld->id = rc; - cxld->nr_targets = nr_targets; - dev = &cxld->dev; - device_initialize(dev); - device_set_pm_not_required(dev); - dev->parent = &port->dev; - dev->bus = &cxl_bus_type; - - /* root ports do not have a cxl_port_type parent */ - if (port->dev.parent->type == &cxl_port_type) - dev->type = &cxl_decoder_switch_type; - else - dev->type = &cxl_decoder_root_type; - - return cxld; -err: - kfree(cxld); - return ERR_PTR(rc); -} -EXPORT_SYMBOL_NS_GPL(cxl_decoder_alloc, CXL); - -int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) -{ - struct cxl_port *port; - struct device *dev; - int rc; - - if (WARN_ON_ONCE(!cxld)) - return -EINVAL; - - if (WARN_ON_ONCE(IS_ERR(cxld))) - return PTR_ERR(cxld); - - if (cxld->interleave_ways < 1) - return -EINVAL; - - port = to_cxl_port(cxld->dev.parent); - rc = decoder_populate_targets(cxld, port, target_map); - if (rc) - return rc; - - dev = &cxld->dev; - rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id); - if (rc) - return rc; - - return device_add(dev); -} -EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL); - -static void cxld_unregister(void *dev) -{ - device_unregister(dev); -} - -int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld) -{ - return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev); -} -EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL); - -/** - * __cxl_driver_register - register a driver for the cxl bus - * @cxl_drv: cxl driver structure to attach - * @owner: owning module/driver - * @modname: KBUILD_MODNAME for parent driver - */ -int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, - const char *modname) -{ - if (!cxl_drv->probe) { - pr_debug("%s ->probe() must be specified\n", modname); - return -EINVAL; - } - - if (!cxl_drv->name) { - pr_debug("%s ->name must be specified\n", modname); - return -EINVAL; - } - - if (!cxl_drv->id) { - pr_debug("%s ->id must be specified\n", modname); - return -EINVAL; - } - - cxl_drv->drv.bus = &cxl_bus_type; - cxl_drv->drv.owner = owner; - cxl_drv->drv.mod_name = modname; - cxl_drv->drv.name = cxl_drv->name; - - return driver_register(&cxl_drv->drv); -} -EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL); - -void cxl_driver_unregister(struct cxl_driver *cxl_drv) -{ - driver_unregister(&cxl_drv->drv); -} -EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL); - -static int cxl_device_id(struct device *dev) -{ - if (dev->type == &cxl_nvdimm_bridge_type) - return CXL_DEVICE_NVDIMM_BRIDGE; - if (dev->type == &cxl_nvdimm_type) - return CXL_DEVICE_NVDIMM; - return 0; -} - -static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env) -{ - return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT, - cxl_device_id(dev)); -} - -static int cxl_bus_match(struct device *dev, struct device_driver *drv) -{ - return cxl_device_id(dev) == to_cxl_drv(drv)->id; -} - -static int cxl_bus_probe(struct device *dev) -{ - return to_cxl_drv(dev->driver)->probe(dev); -} - -static void cxl_bus_remove(struct device *dev) -{ - struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); - - if (cxl_drv->remove) - cxl_drv->remove(dev); -} - -struct bus_type cxl_bus_type = { - .name = "cxl", - .uevent = cxl_bus_uevent, - .match = cxl_bus_match, - .probe = cxl_bus_probe, - .remove = cxl_bus_remove, -}; -EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL); - -static __init int cxl_core_init(void) -{ - int rc; - - cxl_mbox_init(); - - rc = cxl_memdev_init(); - if (rc) - return rc; - - rc = bus_register(&cxl_bus_type); - if (rc) - goto err; - return 0; - -err: - cxl_memdev_exit(); - cxl_mbox_exit(); - return rc; -} - -static void cxl_core_exit(void) -{ - bus_unregister(&cxl_bus_type); - cxl_memdev_exit(); - cxl_mbox_exit(); -} - -module_init(cxl_core_init); -module_exit(cxl_core_exit); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index e0c9aacc4e9c..1a50c0fc399c 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -14,6 +14,8 @@ struct cxl_mem_query_commands; int cxl_query_cmd(struct cxl_memdev *cxlmd, struct cxl_mem_query_commands __user *q); int cxl_send_cmd(struct cxl_memdev *cxlmd, struct cxl_send_command __user *s); +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length); int cxl_memdev_init(void); void cxl_memdev_exit(void); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c new file mode 100644 index 000000000000..0e89a7a932d4 --- /dev/null +++ b/drivers/cxl/core/hdm.c @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "core.h" + +/** + * DOC: cxl core hdm + * + * Compute Express Link Host Managed Device Memory, starting with the + * CXL 2.0 specification, is managed by an array of HDM Decoder register + * instances per CXL port and per CXL endpoint. Define common helpers + * for enumerating these registers and capabilities. + */ + +static int add_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, + int *target_map) +{ + int rc; + + rc = cxl_decoder_add_locked(cxld, target_map); + if (rc) { + put_device(&cxld->dev); + dev_err(&port->dev, "Failed to add decoder\n"); + return rc; + } + + rc = cxl_decoder_autoremove(&port->dev, cxld); + if (rc) + return rc; + + dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev)); + + return 0; +} + +/* + * Per the CXL specification (8.2.5.12 CXL HDM Decoder Capability Structure) + * single ported host-bridges need not publish a decoder capability when a + * passthrough decode can be assumed, i.e. all transactions that the uport sees + * are claimed and passed to the single dport. Disable the range until the first + * CXL region is enumerated / activated. + */ +int devm_cxl_add_passthrough_decoder(struct cxl_port *port) +{ + struct cxl_decoder *cxld; + struct cxl_dport *dport; + int single_port_map[1]; + + cxld = cxl_switch_decoder_alloc(port, 1); + if (IS_ERR(cxld)) + return PTR_ERR(cxld); + + device_lock_assert(&port->dev); + + dport = list_first_entry(&port->dports, typeof(*dport), list); + single_port_map[0] = dport->port_id; + + return add_hdm_decoder(port, cxld, single_port_map); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_passthrough_decoder, CXL); + +static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm) +{ + u32 hdm_cap; + + hdm_cap = readl(cxlhdm->regs.hdm_decoder + CXL_HDM_DECODER_CAP_OFFSET); + cxlhdm->decoder_count = cxl_hdm_decoder_count(hdm_cap); + cxlhdm->target_count = + FIELD_GET(CXL_HDM_DECODER_TARGET_COUNT_MASK, hdm_cap); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_11_8, hdm_cap)) + cxlhdm->interleave_mask |= GENMASK(11, 8); + if (FIELD_GET(CXL_HDM_DECODER_INTERLEAVE_14_12, hdm_cap)) + cxlhdm->interleave_mask |= GENMASK(14, 12); +} + +static void __iomem *map_hdm_decoder_regs(struct cxl_port *port, + void __iomem *crb) +{ + struct cxl_component_reg_map map; + + cxl_probe_component_regs(&port->dev, crb, &map); + if (!map.hdm_decoder.valid) { + dev_err(&port->dev, "HDM decoder registers invalid\n"); + return IOMEM_ERR_PTR(-ENXIO); + } + + return crb + map.hdm_decoder.offset; +} + +/** + * devm_cxl_setup_hdm - map HDM decoder component registers + * @port: cxl_port to map + */ +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port) +{ + struct device *dev = &port->dev; + void __iomem *crb, *hdm; + struct cxl_hdm *cxlhdm; + + cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL); + if (!cxlhdm) + return ERR_PTR(-ENOMEM); + + cxlhdm->port = port; + crb = devm_cxl_iomap_block(dev, port->component_reg_phys, + CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_err(dev, "No component registers mapped\n"); + return ERR_PTR(-ENXIO); + } + + hdm = map_hdm_decoder_regs(port, crb); + if (IS_ERR(hdm)) + return ERR_CAST(hdm); + cxlhdm->regs.hdm_decoder = hdm; + + parse_hdm_decoder_caps(cxlhdm); + if (cxlhdm->decoder_count == 0) { + dev_err(dev, "Spec violation. Caps invalid\n"); + return ERR_PTR(-ENXIO); + } + + return cxlhdm; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_setup_hdm, CXL); + +static int to_interleave_granularity(u32 ctrl) +{ + int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IG_MASK, ctrl); + + return 256 << val; +} + +static int to_interleave_ways(u32 ctrl) +{ + int val = FIELD_GET(CXL_HDM_DECODER0_CTRL_IW_MASK, ctrl); + + switch (val) { + case 0 ... 4: + return 1 << val; + case 8 ... 10: + return 3 << (val - 8); + default: + return 0; + } +} + +static int init_hdm_decoder(struct cxl_port *port, struct cxl_decoder *cxld, + int *target_map, void __iomem *hdm, int which) +{ + u64 size, base; + u32 ctrl; + int i; + union { + u64 value; + unsigned char target_id[8]; + } target_list; + + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(which)); + base = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_BASE_LOW_OFFSET(which)); + size = ioread64_hi_lo(hdm + CXL_HDM_DECODER0_SIZE_LOW_OFFSET(which)); + + if (!(ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED)) + size = 0; + if (base == U64_MAX || size == U64_MAX) { + dev_warn(&port->dev, "decoder%d.%d: Invalid resource range\n", + port->id, cxld->id); + return -ENXIO; + } + + cxld->decoder_range = (struct range) { + .start = base, + .end = base + size - 1, + }; + + /* switch decoders are always enabled if committed */ + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) { + cxld->flags |= CXL_DECODER_F_ENABLE; + if (ctrl & CXL_HDM_DECODER0_CTRL_LOCK) + cxld->flags |= CXL_DECODER_F_LOCK; + } + cxld->interleave_ways = to_interleave_ways(ctrl); + if (!cxld->interleave_ways) { + dev_warn(&port->dev, + "decoder%d.%d: Invalid interleave ways (ctrl: %#x)\n", + port->id, cxld->id, ctrl); + return -ENXIO; + } + cxld->interleave_granularity = to_interleave_granularity(ctrl); + + if (FIELD_GET(CXL_HDM_DECODER0_CTRL_TYPE, ctrl)) + cxld->target_type = CXL_DECODER_EXPANDER; + else + cxld->target_type = CXL_DECODER_ACCELERATOR; + + if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent))) + return 0; + + target_list.value = + ioread64_hi_lo(hdm + CXL_HDM_DECODER0_TL_LOW(which)); + for (i = 0; i < cxld->interleave_ways; i++) + target_map[i] = target_list.target_id[i]; + + return 0; +} + +/** + * devm_cxl_enumerate_decoders - add decoder objects per HDM register set + * @cxlhdm: Structure to populate with HDM capabilities + */ +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) +{ + void __iomem *hdm = cxlhdm->regs.hdm_decoder; + struct cxl_port *port = cxlhdm->port; + int i, committed, failed; + u32 ctrl; + + /* + * Since the register resource was recently claimed via request_region() + * be careful about trusting the "not-committed" status until the commit + * timeout has elapsed. The commit timeout is 10ms (CXL 2.0 + * 8.2.5.12.20), but double it to be tolerant of any clock skew between + * host and target. + */ + for (i = 0, committed = 0; i < cxlhdm->decoder_count; i++) { + ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(i)); + if (ctrl & CXL_HDM_DECODER0_CTRL_COMMITTED) + committed++; + } + + /* ensure that future checks of committed can be trusted */ + if (committed != cxlhdm->decoder_count) + msleep(20); + + for (i = 0, failed = 0; i < cxlhdm->decoder_count; i++) { + int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; + int rc, target_count = cxlhdm->target_count; + struct cxl_decoder *cxld; + + if (is_cxl_endpoint(port)) + cxld = cxl_endpoint_decoder_alloc(port); + else + cxld = cxl_switch_decoder_alloc(port, target_count); + if (IS_ERR(cxld)) { + dev_warn(&port->dev, + "Failed to allocate the decoder\n"); + return PTR_ERR(cxld); + } + + rc = init_hdm_decoder(port, cxld, target_map, + cxlhdm->regs.hdm_decoder, i); + if (rc) { + put_device(&cxld->dev); + failed++; + continue; + } + rc = add_hdm_decoder(port, cxld, target_map); + if (rc) { + dev_warn(&port->dev, + "Failed to add decoder to port\n"); + return rc; + } + } + + if (failed == cxlhdm->decoder_count) { + dev_err(&port->dev, "No valid decoders found\n"); + return -ENXIO; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_decoders, CXL); diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index 61029cb7ac62..1f76b28f9826 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -89,10 +89,29 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, static struct device_attribute dev_attr_pmem_size = __ATTR(size, 0444, pmem_size_show, NULL); +static ssize_t serial_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + + return sysfs_emit(buf, "%#llx\n", cxlds->serial); +} +static DEVICE_ATTR_RO(serial); + +static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sprintf(buf, "%d\n", dev_to_node(dev)); +} +static DEVICE_ATTR_RO(numa_node); + static struct attribute *cxl_memdev_attributes[] = { + &dev_attr_serial.attr, &dev_attr_firmware_version.attr, &dev_attr_payload_max.attr, &dev_attr_label_storage_size.attr, + &dev_attr_numa_node.attr, NULL, }; @@ -106,8 +125,17 @@ static struct attribute *cxl_memdev_ram_attributes[] = { NULL, }; +static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a, + int n) +{ + if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr) + return 0; + return a->mode; +} + static struct attribute_group cxl_memdev_attribute_group = { .attrs = cxl_memdev_attributes, + .is_visible = cxl_memdev_visible, }; static struct attribute_group cxl_memdev_ram_attribute_group = { @@ -134,6 +162,12 @@ static const struct device_type cxl_memdev_type = { .groups = cxl_memdev_attribute_groups, }; +bool is_cxl_memdev(struct device *dev) +{ + return dev->type == &cxl_memdev_type; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL); + /** * set_exclusive_cxl_commands() - atomically disable user cxl commands * @cxlds: The device state to operate on @@ -185,6 +219,15 @@ static void cxl_memdev_unregister(void *_cxlmd) put_device(dev); } +static void detach_memdev(struct work_struct *work) +{ + struct cxl_memdev *cxlmd; + + cxlmd = container_of(work, typeof(*cxlmd), detach_work); + device_release_driver(&cxlmd->dev); + put_device(&cxlmd->dev); +} + static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, const struct file_operations *fops) { @@ -209,6 +252,7 @@ static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, dev->devt = MKDEV(cxl_mem_major, cxlmd->id); dev->type = &cxl_memdev_type; device_set_pm_not_required(dev); + INIT_WORK(&cxlmd->detach_work, detach_memdev); cdev = &cxlmd->cdev; cdev_init(cdev, fops); diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c new file mode 100644 index 000000000000..c9a494d6976a --- /dev/null +++ b/drivers/cxl/core/pci.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include "core.h" + +/** + * DOC: cxl core pci + * + * Compute Express Link protocols are layered on top of PCIe. CXL core provides + * a set of helpers for CXL interactions which occur via PCIe. + */ + +struct cxl_walk_context { + struct pci_bus *bus; + struct cxl_port *port; + int type; + int error; + int count; +}; + +static int match_add_dports(struct pci_dev *pdev, void *data) +{ + struct cxl_walk_context *ctx = data; + struct cxl_port *port = ctx->port; + int type = pci_pcie_type(pdev); + struct cxl_register_map map; + struct cxl_dport *dport; + u32 lnkcap, port_num; + int rc; + + if (pdev->bus != ctx->bus) + return 0; + if (!pci_is_pcie(pdev)) + return 0; + if (type != ctx->type) + return 0; + if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, + &lnkcap)) + return 0; + + rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + dev_dbg(&port->dev, "failed to find component registers\n"); + + port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); + dport = devm_cxl_add_dport(port, &pdev->dev, port_num, + cxl_regmap_to_base(pdev, &map)); + if (IS_ERR(dport)) { + ctx->error = PTR_ERR(dport); + return PTR_ERR(dport); + } + ctx->count++; + + dev_dbg(&port->dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); + + return 0; +} + +/** + * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port + * @port: cxl_port whose ->uport is the upstream of dports to be enumerated + * + * Returns a positive number of dports enumerated or a negative error + * code. + */ +int devm_cxl_port_enumerate_dports(struct cxl_port *port) +{ + struct pci_bus *bus = cxl_port_to_pci_bus(port); + struct cxl_walk_context ctx; + int type; + + if (!bus) + return -ENXIO; + + if (pci_is_root_bus(bus)) + type = PCI_EXP_TYPE_ROOT_PORT; + else + type = PCI_EXP_TYPE_DOWNSTREAM; + + ctx = (struct cxl_walk_context) { + .port = port, + .bus = bus, + .type = type, + }; + pci_walk_bus(bus, match_add_dports, &ctx); + + if (ctx.count == 0) + return -ENODEV; + if (ctx.error) + return ctx.error; + return ctx.count; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, CXL); diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c index b5fca97b0a07..8de240c4d96b 100644 --- a/drivers/cxl/core/pmem.c +++ b/drivers/cxl/core/pmem.c @@ -57,24 +57,30 @@ bool is_cxl_nvdimm_bridge(struct device *dev) } EXPORT_SYMBOL_NS_GPL(is_cxl_nvdimm_bridge, CXL); -__mock int match_nvdimm_bridge(struct device *dev, const void *data) +static int match_nvdimm_bridge(struct device *dev, void *data) { return is_cxl_nvdimm_bridge(dev); } struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd) { + struct cxl_port *port = find_cxl_root(&cxl_nvd->dev); struct device *dev; - dev = bus_find_device(&cxl_bus_type, NULL, cxl_nvd, match_nvdimm_bridge); + if (!port) + return NULL; + + dev = device_find_child(&port->dev, NULL, match_nvdimm_bridge); + put_device(&port->dev); + if (!dev) return NULL; + return to_cxl_nvdimm_bridge(dev); } EXPORT_SYMBOL_NS_GPL(cxl_find_nvdimm_bridge, CXL); -static struct cxl_nvdimm_bridge * -cxl_nvdimm_bridge_alloc(struct cxl_port *port) +static struct cxl_nvdimm_bridge *cxl_nvdimm_bridge_alloc(struct cxl_port *port) { struct cxl_nvdimm_bridge *cxl_nvb; struct device *dev; @@ -115,10 +121,10 @@ static void unregister_nvb(void *_cxl_nvb) * work to flush. Once the state has been changed to 'dead' then no new * work can be queued by user-triggered bind. */ - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); flush = cxl_nvb->state != CXL_NVB_NEW; cxl_nvb->state = CXL_NVB_DEAD; - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); /* * Even though the device core will trigger device_release_driver() diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c new file mode 100644 index 000000000000..2ab1ba4499b3 --- /dev/null +++ b/drivers/cxl/core/port.c @@ -0,0 +1,1568 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "core.h" + +/** + * DOC: cxl core + * + * The CXL core provides a set of interfaces that can be consumed by CXL aware + * drivers. The interfaces allow for creation, modification, and destruction of + * regions, memory devices, ports, and decoders. CXL aware drivers must register + * with the CXL core via these interfaces in order to be able to participate in + * cross-device interleave coordination. The CXL core also establishes and + * maintains the bridge to the nvdimm subsystem. + * + * CXL core introduces sysfs hierarchy to control the devices that are + * instantiated by the core. + */ + +static DEFINE_IDA(cxl_port_ida); +static DEFINE_XARRAY(cxl_root_buses); + +static ssize_t devtype_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, "%s\n", dev->type->name); +} +static DEVICE_ATTR_RO(devtype); + +static int cxl_device_id(struct device *dev) +{ + if (dev->type == &cxl_nvdimm_bridge_type) + return CXL_DEVICE_NVDIMM_BRIDGE; + if (dev->type == &cxl_nvdimm_type) + return CXL_DEVICE_NVDIMM; + if (is_cxl_port(dev)) { + if (is_cxl_root(to_cxl_port(dev))) + return CXL_DEVICE_ROOT; + return CXL_DEVICE_PORT; + } + if (is_cxl_memdev(dev)) + return CXL_DEVICE_MEMORY_EXPANDER; + return 0; +} + +static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return sysfs_emit(buf, CXL_MODALIAS_FMT "\n", cxl_device_id(dev)); +} +static DEVICE_ATTR_RO(modalias); + +static struct attribute *cxl_base_attributes[] = { + &dev_attr_devtype.attr, + &dev_attr_modalias.attr, + NULL, +}; + +struct attribute_group cxl_base_attribute_group = { + .attrs = cxl_base_attributes, +}; + +static ssize_t start_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + u64 start; + + if (is_root_decoder(dev)) + start = cxld->platform_res.start; + else + start = cxld->decoder_range.start; + + return sysfs_emit(buf, "%#llx\n", start); +} +static DEVICE_ATTR_ADMIN_RO(start); + +static ssize_t size_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + u64 size; + + if (is_root_decoder(dev)) + size = resource_size(&cxld->platform_res); + else + size = range_len(&cxld->decoder_range); + + return sysfs_emit(buf, "%#llx\n", size); +} +static DEVICE_ATTR_RO(size); + +#define CXL_DECODER_FLAG_ATTR(name, flag) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct cxl_decoder *cxld = to_cxl_decoder(dev); \ + \ + return sysfs_emit(buf, "%s\n", \ + (cxld->flags & (flag)) ? "1" : "0"); \ +} \ +static DEVICE_ATTR_RO(name) + +CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM); +CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM); +CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2); +CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3); +CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK); + +static ssize_t target_type_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + + switch (cxld->target_type) { + case CXL_DECODER_ACCELERATOR: + return sysfs_emit(buf, "accelerator\n"); + case CXL_DECODER_EXPANDER: + return sysfs_emit(buf, "expander\n"); + } + return -ENXIO; +} +static DEVICE_ATTR_RO(target_type); + +static ssize_t emit_target_list(struct cxl_decoder *cxld, char *buf) +{ + ssize_t offset = 0; + int i, rc = 0; + + for (i = 0; i < cxld->interleave_ways; i++) { + struct cxl_dport *dport = cxld->target[i]; + struct cxl_dport *next = NULL; + + if (!dport) + break; + + if (i + 1 < cxld->interleave_ways) + next = cxld->target[i + 1]; + rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id, + next ? "," : ""); + if (rc < 0) + return rc; + offset += rc; + } + + return offset; +} + +static ssize_t target_list_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + ssize_t offset; + unsigned int seq; + int rc; + + do { + seq = read_seqbegin(&cxld->target_lock); + rc = emit_target_list(cxld, buf); + } while (read_seqretry(&cxld->target_lock, seq)); + + if (rc < 0) + return rc; + offset = rc; + + rc = sysfs_emit_at(buf, offset, "\n"); + if (rc < 0) + return rc; + + return offset + rc; +} +static DEVICE_ATTR_RO(target_list); + +static struct attribute *cxl_decoder_base_attrs[] = { + &dev_attr_start.attr, + &dev_attr_size.attr, + &dev_attr_locked.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_base_attribute_group = { + .attrs = cxl_decoder_base_attrs, +}; + +static struct attribute *cxl_decoder_root_attrs[] = { + &dev_attr_cap_pmem.attr, + &dev_attr_cap_ram.attr, + &dev_attr_cap_type2.attr, + &dev_attr_cap_type3.attr, + &dev_attr_target_list.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_root_attribute_group = { + .attrs = cxl_decoder_root_attrs, +}; + +static const struct attribute_group *cxl_decoder_root_attribute_groups[] = { + &cxl_decoder_root_attribute_group, + &cxl_decoder_base_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static struct attribute *cxl_decoder_switch_attrs[] = { + &dev_attr_target_type.attr, + &dev_attr_target_list.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_switch_attribute_group = { + .attrs = cxl_decoder_switch_attrs, +}; + +static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = { + &cxl_decoder_switch_attribute_group, + &cxl_decoder_base_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static struct attribute *cxl_decoder_endpoint_attrs[] = { + &dev_attr_target_type.attr, + NULL, +}; + +static struct attribute_group cxl_decoder_endpoint_attribute_group = { + .attrs = cxl_decoder_endpoint_attrs, +}; + +static const struct attribute_group *cxl_decoder_endpoint_attribute_groups[] = { + &cxl_decoder_base_attribute_group, + &cxl_decoder_endpoint_attribute_group, + &cxl_base_attribute_group, + NULL, +}; + +static void cxl_decoder_release(struct device *dev) +{ + struct cxl_decoder *cxld = to_cxl_decoder(dev); + struct cxl_port *port = to_cxl_port(dev->parent); + + ida_free(&port->decoder_ida, cxld->id); + kfree(cxld); + put_device(&port->dev); +} + +static const struct device_type cxl_decoder_endpoint_type = { + .name = "cxl_decoder_endpoint", + .release = cxl_decoder_release, + .groups = cxl_decoder_endpoint_attribute_groups, +}; + +static const struct device_type cxl_decoder_switch_type = { + .name = "cxl_decoder_switch", + .release = cxl_decoder_release, + .groups = cxl_decoder_switch_attribute_groups, +}; + +static const struct device_type cxl_decoder_root_type = { + .name = "cxl_decoder_root", + .release = cxl_decoder_release, + .groups = cxl_decoder_root_attribute_groups, +}; + +static bool is_endpoint_decoder(struct device *dev) +{ + return dev->type == &cxl_decoder_endpoint_type; +} + +bool is_root_decoder(struct device *dev) +{ + return dev->type == &cxl_decoder_root_type; +} +EXPORT_SYMBOL_NS_GPL(is_root_decoder, CXL); + +bool is_cxl_decoder(struct device *dev) +{ + return dev->type && dev->type->release == cxl_decoder_release; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_decoder, CXL); + +struct cxl_decoder *to_cxl_decoder(struct device *dev) +{ + if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release, + "not a cxl_decoder device\n")) + return NULL; + return container_of(dev, struct cxl_decoder, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_decoder, CXL); + +static void cxl_ep_release(struct cxl_ep *ep) +{ + if (!ep) + return; + list_del(&ep->list); + put_device(ep->ep); + kfree(ep); +} + +static void cxl_port_release(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + struct cxl_ep *ep, *_e; + + cxl_device_lock(dev); + list_for_each_entry_safe(ep, _e, &port->endpoints, list) + cxl_ep_release(ep); + cxl_device_unlock(dev); + ida_free(&cxl_port_ida, port->id); + kfree(port); +} + +static const struct attribute_group *cxl_port_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +static const struct device_type cxl_port_type = { + .name = "cxl_port", + .release = cxl_port_release, + .groups = cxl_port_attribute_groups, +}; + +bool is_cxl_port(struct device *dev) +{ + return dev->type == &cxl_port_type; +} +EXPORT_SYMBOL_NS_GPL(is_cxl_port, CXL); + +struct cxl_port *to_cxl_port(struct device *dev) +{ + if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type, + "not a cxl_port device\n")) + return NULL; + return container_of(dev, struct cxl_port, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_port, CXL); + +static void unregister_port(void *_port) +{ + struct cxl_port *port = _port; + struct cxl_port *parent; + struct device *lock_dev; + + if (is_cxl_root(port)) + parent = NULL; + else + parent = to_cxl_port(port->dev.parent); + + /* + * CXL root port's and the first level of ports are unregistered + * under the platform firmware device lock, all other ports are + * unregistered while holding their parent port lock. + */ + if (!parent) + lock_dev = port->uport; + else if (is_cxl_root(parent)) + lock_dev = parent->uport; + else + lock_dev = &parent->dev; + + device_lock_assert(lock_dev); + port->uport = NULL; + device_unregister(&port->dev); +} + +static void cxl_unlink_uport(void *_port) +{ + struct cxl_port *port = _port; + + sysfs_remove_link(&port->dev.kobj, "uport"); +} + +static int devm_cxl_link_uport(struct device *host, struct cxl_port *port) +{ + int rc; + + rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport"); + if (rc) + return rc; + return devm_add_action_or_reset(host, cxl_unlink_uport, port); +} + +static struct cxl_port *cxl_port_alloc(struct device *uport, + resource_size_t component_reg_phys, + struct cxl_port *parent_port) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + port = kzalloc(sizeof(*port), GFP_KERNEL); + if (!port) + return ERR_PTR(-ENOMEM); + + rc = ida_alloc(&cxl_port_ida, GFP_KERNEL); + if (rc < 0) + goto err; + port->id = rc; + + /* + * The top-level cxl_port "cxl_root" does not have a cxl_port as + * its parent and it does not have any corresponding component + * registers as its decode is described by a fixed platform + * description. + */ + dev = &port->dev; + if (parent_port) + dev->parent = &parent_port->dev; + else + dev->parent = uport; + + port->uport = uport; + port->component_reg_phys = component_reg_phys; + ida_init(&port->decoder_ida); + INIT_LIST_HEAD(&port->dports); + INIT_LIST_HEAD(&port->endpoints); + + device_initialize(dev); + device_set_pm_not_required(dev); + dev->bus = &cxl_bus_type; + dev->type = &cxl_port_type; + + return port; + +err: + kfree(port); + return ERR_PTR(rc); +} + +/** + * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy + * @host: host device for devm operations + * @uport: "physical" device implementing this upstream port + * @component_reg_phys: (optional) for configurable cxl_port instances + * @parent_port: next hop up in the CXL memory decode hierarchy + */ +struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, + resource_size_t component_reg_phys, + struct cxl_port *parent_port) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + port = cxl_port_alloc(uport, component_reg_phys, parent_port); + if (IS_ERR(port)) + return port; + + if (parent_port) + port->depth = parent_port->depth + 1; + dev = &port->dev; + if (is_cxl_memdev(uport)) + rc = dev_set_name(dev, "endpoint%d", port->id); + else if (parent_port) + rc = dev_set_name(dev, "port%d", port->id); + else + rc = dev_set_name(dev, "root%d", port->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + rc = devm_add_action_or_reset(host, unregister_port, port); + if (rc) + return ERR_PTR(rc); + + rc = devm_cxl_link_uport(host, port); + if (rc) + return ERR_PTR(rc); + + return port; + +err: + put_device(dev); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_port, CXL); + +struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port) +{ + /* There is no pci_bus associated with a CXL platform-root port */ + if (is_cxl_root(port)) + return NULL; + + if (dev_is_pci(port->uport)) { + struct pci_dev *pdev = to_pci_dev(port->uport); + + return pdev->subordinate; + } + + return xa_load(&cxl_root_buses, (unsigned long)port->uport); +} +EXPORT_SYMBOL_NS_GPL(cxl_port_to_pci_bus, CXL); + +static void unregister_pci_bus(void *uport) +{ + xa_erase(&cxl_root_buses, (unsigned long)uport); +} + +int devm_cxl_register_pci_bus(struct device *host, struct device *uport, + struct pci_bus *bus) +{ + int rc; + + if (dev_is_pci(uport)) + return -EINVAL; + + rc = xa_insert(&cxl_root_buses, (unsigned long)uport, bus, GFP_KERNEL); + if (rc) + return rc; + return devm_add_action_or_reset(host, unregister_pci_bus, uport); +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_register_pci_bus, CXL); + +static bool dev_is_cxl_root_child(struct device *dev) +{ + struct cxl_port *port, *parent; + + if (!is_cxl_port(dev)) + return false; + + port = to_cxl_port(dev); + if (is_cxl_root(port)) + return false; + + parent = to_cxl_port(port->dev.parent); + if (is_cxl_root(parent)) + return true; + + return false; +} + +/* Find a 2nd level CXL port that has a dport that is an ancestor of @match */ +static int match_root_child(struct device *dev, const void *match) +{ + const struct device *iter = NULL; + struct cxl_dport *dport; + struct cxl_port *port; + + if (!dev_is_cxl_root_child(dev)) + return 0; + + port = to_cxl_port(dev); + cxl_device_lock(dev); + list_for_each_entry(dport, &port->dports, list) { + iter = match; + while (iter) { + if (iter == dport->dport) + goto out; + iter = iter->parent; + } + } +out: + cxl_device_unlock(dev); + + return !!iter; +} + +struct cxl_port *find_cxl_root(struct device *dev) +{ + struct device *port_dev; + struct cxl_port *root; + + port_dev = bus_find_device(&cxl_bus_type, NULL, dev, match_root_child); + if (!port_dev) + return NULL; + + root = to_cxl_port(port_dev->parent); + get_device(&root->dev); + put_device(port_dev); + return root; +} +EXPORT_SYMBOL_NS_GPL(find_cxl_root, CXL); + +static struct cxl_dport *find_dport(struct cxl_port *port, int id) +{ + struct cxl_dport *dport; + + device_lock_assert(&port->dev); + list_for_each_entry (dport, &port->dports, list) + if (dport->port_id == id) + return dport; + return NULL; +} + +static int add_dport(struct cxl_port *port, struct cxl_dport *new) +{ + struct cxl_dport *dup; + + device_lock_assert(&port->dev); + dup = find_dport(port, new->port_id); + if (dup) + dev_err(&port->dev, + "unable to add dport%d-%s non-unique port id (%s)\n", + new->port_id, dev_name(new->dport), + dev_name(dup->dport)); + else + list_add_tail(&new->list, &port->dports); + + return dup ? -EEXIST : 0; +} + +/* + * Since root-level CXL dports cannot be enumerated by PCI they are not + * enumerated by the common port driver that acquires the port lock over + * dport add/remove. Instead, root dports are manually added by a + * platform driver and cond_cxl_root_lock() is used to take the missing + * port lock in that case. + */ +static void cond_cxl_root_lock(struct cxl_port *port) +{ + if (is_cxl_root(port)) + cxl_device_lock(&port->dev); +} + +static void cond_cxl_root_unlock(struct cxl_port *port) +{ + if (is_cxl_root(port)) + cxl_device_unlock(&port->dev); +} + +static void cxl_dport_remove(void *data) +{ + struct cxl_dport *dport = data; + struct cxl_port *port = dport->port; + + put_device(dport->dport); + cond_cxl_root_lock(port); + list_del(&dport->list); + cond_cxl_root_unlock(port); +} + +static void cxl_dport_unlink(void *data) +{ + struct cxl_dport *dport = data; + struct cxl_port *port = dport->port; + char link_name[CXL_TARGET_STRLEN]; + + sprintf(link_name, "dport%d", dport->port_id); + sysfs_remove_link(&port->dev.kobj, link_name); +} + +/** + * devm_cxl_add_dport - append downstream port data to a cxl_port + * @port: the cxl_port that references this dport + * @dport_dev: firmware or PCI device representing the dport + * @port_id: identifier for this dport in a decoder's target list + * @component_reg_phys: optional location of CXL component registers + * + * Note that dports are appended to the devm release action's of the + * either the port's host (for root ports), or the port itself (for + * switch ports) + */ +struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, + struct device *dport_dev, int port_id, + resource_size_t component_reg_phys) +{ + char link_name[CXL_TARGET_STRLEN]; + struct cxl_dport *dport; + struct device *host; + int rc; + + if (is_cxl_root(port)) + host = port->uport; + else + host = &port->dev; + + if (!host->driver) { + dev_WARN_ONCE(&port->dev, 1, "dport:%s bad devm context\n", + dev_name(dport_dev)); + return ERR_PTR(-ENXIO); + } + + if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >= + CXL_TARGET_STRLEN) + return ERR_PTR(-EINVAL); + + dport = devm_kzalloc(host, sizeof(*dport), GFP_KERNEL); + if (!dport) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dport->list); + dport->dport = dport_dev; + dport->port_id = port_id; + dport->component_reg_phys = component_reg_phys; + dport->port = port; + + cond_cxl_root_lock(port); + rc = add_dport(port, dport); + cond_cxl_root_unlock(port); + if (rc) + return ERR_PTR(rc); + + get_device(dport_dev); + rc = devm_add_action_or_reset(host, cxl_dport_remove, dport); + if (rc) + return ERR_PTR(rc); + + rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name); + if (rc) + return ERR_PTR(rc); + + rc = devm_add_action_or_reset(host, cxl_dport_unlink, dport); + if (rc) + return ERR_PTR(rc); + + return dport; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_add_dport, CXL); + +static struct cxl_ep *find_ep(struct cxl_port *port, struct device *ep_dev) +{ + struct cxl_ep *ep; + + device_lock_assert(&port->dev); + list_for_each_entry(ep, &port->endpoints, list) + if (ep->ep == ep_dev) + return ep; + return NULL; +} + +static int add_ep(struct cxl_port *port, struct cxl_ep *new) +{ + struct cxl_ep *dup; + + cxl_device_lock(&port->dev); + if (port->dead) { + cxl_device_unlock(&port->dev); + return -ENXIO; + } + dup = find_ep(port, new->ep); + if (!dup) + list_add_tail(&new->list, &port->endpoints); + cxl_device_unlock(&port->dev); + + return dup ? -EEXIST : 0; +} + +/** + * cxl_add_ep - register an endpoint's interest in a port + * @port: a port in the endpoint's topology ancestry + * @ep_dev: device representing the endpoint + * + * Intermediate CXL ports are scanned based on the arrival of endpoints. + * When those endpoints depart the port can be destroyed once all + * endpoints that care about that port have been removed. + */ +static int cxl_add_ep(struct cxl_port *port, struct device *ep_dev) +{ + struct cxl_ep *ep; + int rc; + + ep = kzalloc(sizeof(*ep), GFP_KERNEL); + if (!ep) + return -ENOMEM; + + INIT_LIST_HEAD(&ep->list); + ep->ep = get_device(ep_dev); + + rc = add_ep(port, ep); + if (rc) + cxl_ep_release(ep); + return rc; +} + +struct cxl_find_port_ctx { + const struct device *dport_dev; + const struct cxl_port *parent_port; +}; + +static int match_port_by_dport(struct device *dev, const void *data) +{ + const struct cxl_find_port_ctx *ctx = data; + struct cxl_port *port; + + if (!is_cxl_port(dev)) + return 0; + if (ctx->parent_port && dev->parent != &ctx->parent_port->dev) + return 0; + + port = to_cxl_port(dev); + return cxl_find_dport_by_dev(port, ctx->dport_dev) != NULL; +} + +static struct cxl_port *__find_cxl_port(struct cxl_find_port_ctx *ctx) +{ + struct device *dev; + + if (!ctx->dport_dev) + return NULL; + + dev = bus_find_device(&cxl_bus_type, NULL, ctx, match_port_by_dport); + if (dev) + return to_cxl_port(dev); + return NULL; +} + +static struct cxl_port *find_cxl_port(struct device *dport_dev) +{ + struct cxl_find_port_ctx ctx = { + .dport_dev = dport_dev, + }; + + return __find_cxl_port(&ctx); +} + +static struct cxl_port *find_cxl_port_at(struct cxl_port *parent_port, + struct device *dport_dev) +{ + struct cxl_find_port_ctx ctx = { + .dport_dev = dport_dev, + .parent_port = parent_port, + }; + + return __find_cxl_port(&ctx); +} + +/* + * All users of grandparent() are using it to walk PCIe-like swich port + * hierarchy. A PCIe switch is comprised of a bridge device representing the + * upstream switch port and N bridges representing downstream switch ports. When + * bridges stack the grand-parent of a downstream switch port is another + * downstream switch port in the immediate ancestor switch. + */ +static struct device *grandparent(struct device *dev) +{ + if (dev && dev->parent) + return dev->parent->parent; + return NULL; +} + +static void delete_endpoint(void *data) +{ + struct cxl_memdev *cxlmd = data; + struct cxl_port *endpoint = dev_get_drvdata(&cxlmd->dev); + struct cxl_port *parent_port; + struct device *parent; + + parent_port = cxl_mem_find_port(cxlmd); + if (!parent_port) + goto out; + parent = &parent_port->dev; + + cxl_device_lock(parent); + if (parent->driver && endpoint->uport) { + devm_release_action(parent, cxl_unlink_uport, endpoint); + devm_release_action(parent, unregister_port, endpoint); + } + cxl_device_unlock(parent); + put_device(parent); +out: + put_device(&endpoint->dev); +} + +int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) +{ + struct device *dev = &cxlmd->dev; + + get_device(&endpoint->dev); + dev_set_drvdata(dev, endpoint); + return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_autoremove, CXL); + +/* + * The natural end of life of a non-root 'cxl_port' is when its parent port goes + * through a ->remove() event ("top-down" unregistration). The unnatural trigger + * for a port to be unregistered is when all memdevs beneath that port have gone + * through ->remove(). This "bottom-up" removal selectively removes individual + * child ports manually. This depends on devm_cxl_add_port() to not change is + * devm action registration order. + */ +static void delete_switch_port(struct cxl_port *port, struct list_head *dports) +{ + struct cxl_dport *dport, *_d; + + list_for_each_entry_safe(dport, _d, dports, list) { + devm_release_action(&port->dev, cxl_dport_unlink, dport); + devm_release_action(&port->dev, cxl_dport_remove, dport); + devm_kfree(&port->dev, dport); + } + devm_release_action(port->dev.parent, cxl_unlink_uport, port); + devm_release_action(port->dev.parent, unregister_port, port); +} + +static void cxl_detach_ep(void *data) +{ + struct cxl_memdev *cxlmd = data; + struct device *iter; + + for (iter = &cxlmd->dev; iter; iter = grandparent(iter)) { + struct device *dport_dev = grandparent(iter); + struct cxl_port *port, *parent_port; + LIST_HEAD(reap_dports); + struct cxl_ep *ep; + + if (!dport_dev) + break; + + port = find_cxl_port(dport_dev); + if (!port) + continue; + + if (is_cxl_root(port)) { + put_device(&port->dev); + continue; + } + + parent_port = to_cxl_port(port->dev.parent); + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + /* + * The bottom-up race to delete the port lost to a + * top-down port disable, give up here, because the + * parent_port ->remove() will have cleaned up all + * descendants. + */ + cxl_device_unlock(&parent_port->dev); + put_device(&port->dev); + continue; + } + + cxl_device_lock(&port->dev); + ep = find_ep(port, &cxlmd->dev); + dev_dbg(&cxlmd->dev, "disconnect %s from %s\n", + ep ? dev_name(ep->ep) : "", dev_name(&port->dev)); + cxl_ep_release(ep); + if (ep && !port->dead && list_empty(&port->endpoints) && + !is_cxl_root(parent_port)) { + /* + * This was the last ep attached to a dynamically + * enumerated port. Block new cxl_add_ep() and garbage + * collect the port. + */ + port->dead = true; + list_splice_init(&port->dports, &reap_dports); + } + cxl_device_unlock(&port->dev); + + if (!list_empty(&reap_dports)) { + dev_dbg(&cxlmd->dev, "delete %s\n", + dev_name(&port->dev)); + delete_switch_port(port, &reap_dports); + } + put_device(&port->dev); + cxl_device_unlock(&parent_port->dev); + } +} + +static resource_size_t find_component_registers(struct device *dev) +{ + struct cxl_register_map map; + struct pci_dev *pdev; + + /* + * Theoretically, CXL component registers can be hosted on a + * non-PCI device, in practice, only cxl_test hits this case. + */ + if (!dev_is_pci(dev)) + return CXL_RESOURCE_NONE; + + pdev = to_pci_dev(dev); + + cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + return cxl_regmap_to_base(pdev, &map); +} + +static int add_port_attach_ep(struct cxl_memdev *cxlmd, + struct device *uport_dev, + struct device *dport_dev) +{ + struct device *dparent = grandparent(dport_dev); + struct cxl_port *port, *parent_port = NULL; + resource_size_t component_reg_phys; + int rc; + + if (!dparent) { + /* + * The iteration reached the topology root without finding the + * CXL-root 'cxl_port' on a previous iteration, fail for now to + * be re-probed after platform driver attaches. + */ + dev_dbg(&cxlmd->dev, "%s is a root dport\n", + dev_name(dport_dev)); + return -ENXIO; + } + + parent_port = find_cxl_port(dparent); + if (!parent_port) { + /* iterate to create this parent_port */ + return -EAGAIN; + } + + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_warn(&cxlmd->dev, + "port %s:%s disabled, failed to enumerate CXL.mem\n", + dev_name(&parent_port->dev), dev_name(uport_dev)); + port = ERR_PTR(-ENXIO); + goto out; + } + + port = find_cxl_port_at(parent_port, dport_dev); + if (!port) { + component_reg_phys = find_component_registers(uport_dev); + port = devm_cxl_add_port(&parent_port->dev, uport_dev, + component_reg_phys, parent_port); + if (!IS_ERR(port)) + get_device(&port->dev); + } +out: + cxl_device_unlock(&parent_port->dev); + + if (IS_ERR(port)) + rc = PTR_ERR(port); + else { + dev_dbg(&cxlmd->dev, "add to new port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport)); + rc = cxl_add_ep(port, &cxlmd->dev); + if (rc == -EEXIST) { + /* + * "can't" happen, but this error code means + * something to the caller, so translate it. + */ + rc = -ENXIO; + } + put_device(&port->dev); + } + + put_device(&parent_port->dev); + return rc; +} + +int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd) +{ + struct device *dev = &cxlmd->dev; + struct device *iter; + int rc; + + rc = devm_add_action_or_reset(&cxlmd->dev, cxl_detach_ep, cxlmd); + if (rc) + return rc; + + /* + * Scan for and add all cxl_ports in this device's ancestry. + * Repeat until no more ports are added. Abort if a port add + * attempt fails. + */ +retry: + for (iter = dev; iter; iter = grandparent(iter)) { + struct device *dport_dev = grandparent(iter); + struct device *uport_dev; + struct cxl_port *port; + + if (!dport_dev) + return 0; + + uport_dev = dport_dev->parent; + if (!uport_dev) { + dev_warn(dev, "at %s no parent for dport: %s\n", + dev_name(iter), dev_name(dport_dev)); + return -ENXIO; + } + + dev_dbg(dev, "scan: iter: %s dport_dev: %s parent: %s\n", + dev_name(iter), dev_name(dport_dev), + dev_name(uport_dev)); + port = find_cxl_port(dport_dev); + if (port) { + dev_dbg(&cxlmd->dev, + "found already registered port %s:%s\n", + dev_name(&port->dev), dev_name(port->uport)); + rc = cxl_add_ep(port, &cxlmd->dev); + + /* + * If the endpoint already exists in the port's list, + * that's ok, it was added on a previous pass. + * Otherwise, retry in add_port_attach_ep() after taking + * the parent_port lock as the current port may be being + * reaped. + */ + if (rc && rc != -EEXIST) { + put_device(&port->dev); + return rc; + } + + /* Any more ports to add between this one and the root? */ + if (!dev_is_cxl_root_child(&port->dev)) { + put_device(&port->dev); + continue; + } + + put_device(&port->dev); + return 0; + } + + rc = add_port_attach_ep(cxlmd, uport_dev, dport_dev); + /* port missing, try to add parent */ + if (rc == -EAGAIN) + continue; + /* failed to add ep or port */ + if (rc) + return rc; + /* port added, new descendants possible, start over */ + goto retry; + } + + return 0; +} +EXPORT_SYMBOL_NS_GPL(devm_cxl_enumerate_ports, CXL); + +struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd) +{ + return find_cxl_port(grandparent(&cxlmd->dev)); +} +EXPORT_SYMBOL_NS_GPL(cxl_mem_find_port, CXL); + +struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, + const struct device *dev) +{ + struct cxl_dport *dport; + + cxl_device_lock(&port->dev); + list_for_each_entry(dport, &port->dports, list) + if (dport->dport == dev) { + cxl_device_unlock(&port->dev); + return dport; + } + + cxl_device_unlock(&port->dev); + return NULL; +} +EXPORT_SYMBOL_NS_GPL(cxl_find_dport_by_dev, CXL); + +static int decoder_populate_targets(struct cxl_decoder *cxld, + struct cxl_port *port, int *target_map) +{ + int i, rc = 0; + + if (!target_map) + return 0; + + device_lock_assert(&port->dev); + + if (list_empty(&port->dports)) + return -EINVAL; + + write_seqlock(&cxld->target_lock); + for (i = 0; i < cxld->nr_targets; i++) { + struct cxl_dport *dport = find_dport(port, target_map[i]); + + if (!dport) { + rc = -ENXIO; + break; + } + cxld->target[i] = dport; + } + write_sequnlock(&cxld->target_lock); + + return rc; +} + +/** + * cxl_decoder_alloc - Allocate a new CXL decoder + * @port: owning port of this decoder + * @nr_targets: downstream targets accessible by this decoder. All upstream + * ports and root ports must have at least 1 target. Endpoint + * devices will have 0 targets. Callers wishing to register an + * endpoint device should specify 0. + * + * A port should contain one or more decoders. Each of those decoders enable + * some address space for CXL.mem utilization. A decoder is expected to be + * configured by the caller before registering. + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). The decoder + * is initialized to be a "passthrough" decoder. + */ +static struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + struct cxl_decoder *cxld; + struct device *dev; + int rc = 0; + + if (nr_targets > CXL_DECODER_MAX_INTERLEAVE) + return ERR_PTR(-EINVAL); + + cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL); + if (!cxld) + return ERR_PTR(-ENOMEM); + + rc = ida_alloc(&port->decoder_ida, GFP_KERNEL); + if (rc < 0) + goto err; + + /* need parent to stick around to release the id */ + get_device(&port->dev); + cxld->id = rc; + + cxld->nr_targets = nr_targets; + seqlock_init(&cxld->target_lock); + dev = &cxld->dev; + device_initialize(dev); + device_set_pm_not_required(dev); + dev->parent = &port->dev; + dev->bus = &cxl_bus_type; + if (is_cxl_root(port)) + cxld->dev.type = &cxl_decoder_root_type; + else if (is_cxl_endpoint(port)) + cxld->dev.type = &cxl_decoder_endpoint_type; + else + cxld->dev.type = &cxl_decoder_switch_type; + + /* Pre initialize an "empty" decoder */ + cxld->interleave_ways = 1; + cxld->interleave_granularity = PAGE_SIZE; + cxld->target_type = CXL_DECODER_EXPANDER; + cxld->platform_res = (struct resource)DEFINE_RES_MEM(0, 0); + + return cxld; +err: + kfree(cxld); + return ERR_PTR(rc); +} + +/** + * cxl_root_decoder_alloc - Allocate a root level decoder + * @port: owning CXL root of this decoder + * @nr_targets: static number of downstream targets + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). A + * 'CXL root' decoder is one that decodes from a top-level / static platform + * firmware description of CXL resources into a CXL standard decode + * topology. + */ +struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + if (!is_cxl_root(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, nr_targets); +} +EXPORT_SYMBOL_NS_GPL(cxl_root_decoder_alloc, CXL); + +/** + * cxl_switch_decoder_alloc - Allocate a switch level decoder + * @port: owning CXL switch port of this decoder + * @nr_targets: max number of dynamically addressable downstream targets + * + * Return: A new cxl decoder to be registered by cxl_decoder_add(). A + * 'switch' decoder is any decoder that can be enumerated by PCIe + * topology and the HDM Decoder Capability. This includes the decoders + * that sit between Switch Upstream Ports / Switch Downstream Ports and + * Host Bridges / Root Ports. + */ +struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets) +{ + if (is_cxl_root(port) || is_cxl_endpoint(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, nr_targets); +} +EXPORT_SYMBOL_NS_GPL(cxl_switch_decoder_alloc, CXL); + +/** + * cxl_endpoint_decoder_alloc - Allocate an endpoint decoder + * @port: owning port of this decoder + * + * Return: A new cxl decoder to be registered by cxl_decoder_add() + */ +struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port) +{ + if (!is_cxl_endpoint(port)) + return ERR_PTR(-EINVAL); + + return cxl_decoder_alloc(port, 0); +} +EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_alloc, CXL); + +/** + * cxl_decoder_add_locked - Add a decoder with targets + * @cxld: The cxl decoder allocated by cxl_decoder_alloc() + * @target_map: A list of downstream ports that this decoder can direct memory + * traffic to. These numbers should correspond with the port number + * in the PCIe Link Capabilities structure. + * + * Certain types of decoders may not have any targets. The main example of this + * is an endpoint device. A more awkward example is a hostbridge whose root + * ports get hot added (technically possible, though unlikely). + * + * This is the locked variant of cxl_decoder_add(). + * + * Context: Process context. Expects the device lock of the port that owns the + * @cxld to be held. + * + * Return: Negative error code if the decoder wasn't properly configured; else + * returns 0. + */ +int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map) +{ + struct cxl_port *port; + struct device *dev; + int rc; + + if (WARN_ON_ONCE(!cxld)) + return -EINVAL; + + if (WARN_ON_ONCE(IS_ERR(cxld))) + return PTR_ERR(cxld); + + if (cxld->interleave_ways < 1) + return -EINVAL; + + dev = &cxld->dev; + + port = to_cxl_port(cxld->dev.parent); + if (!is_endpoint_decoder(dev)) { + rc = decoder_populate_targets(cxld, port, target_map); + if (rc && (cxld->flags & CXL_DECODER_F_ENABLE)) { + dev_err(&port->dev, + "Failed to populate active decoder targets\n"); + return rc; + } + } + + rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id); + if (rc) + return rc; + + /* + * Platform decoder resources should show up with a reasonable name. All + * other resources are just sub ranges within the main decoder resource. + */ + if (is_root_decoder(dev)) + cxld->platform_res.name = dev_name(dev); + + return device_add(dev); +} +EXPORT_SYMBOL_NS_GPL(cxl_decoder_add_locked, CXL); + +/** + * cxl_decoder_add - Add a decoder with targets + * @cxld: The cxl decoder allocated by cxl_decoder_alloc() + * @target_map: A list of downstream ports that this decoder can direct memory + * traffic to. These numbers should correspond with the port number + * in the PCIe Link Capabilities structure. + * + * This is the unlocked variant of cxl_decoder_add_locked(). + * See cxl_decoder_add_locked(). + * + * Context: Process context. Takes and releases the device lock of the port that + * owns the @cxld. + */ +int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map) +{ + struct cxl_port *port; + int rc; + + if (WARN_ON_ONCE(!cxld)) + return -EINVAL; + + if (WARN_ON_ONCE(IS_ERR(cxld))) + return PTR_ERR(cxld); + + port = to_cxl_port(cxld->dev.parent); + + cxl_device_lock(&port->dev); + rc = cxl_decoder_add_locked(cxld, target_map); + cxl_device_unlock(&port->dev); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(cxl_decoder_add, CXL); + +static void cxld_unregister(void *dev) +{ + device_unregister(dev); +} + +int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld) +{ + return devm_add_action_or_reset(host, cxld_unregister, &cxld->dev); +} +EXPORT_SYMBOL_NS_GPL(cxl_decoder_autoremove, CXL); + +/** + * __cxl_driver_register - register a driver for the cxl bus + * @cxl_drv: cxl driver structure to attach + * @owner: owning module/driver + * @modname: KBUILD_MODNAME for parent driver + */ +int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, + const char *modname) +{ + if (!cxl_drv->probe) { + pr_debug("%s ->probe() must be specified\n", modname); + return -EINVAL; + } + + if (!cxl_drv->name) { + pr_debug("%s ->name must be specified\n", modname); + return -EINVAL; + } + + if (!cxl_drv->id) { + pr_debug("%s ->id must be specified\n", modname); + return -EINVAL; + } + + cxl_drv->drv.bus = &cxl_bus_type; + cxl_drv->drv.owner = owner; + cxl_drv->drv.mod_name = modname; + cxl_drv->drv.name = cxl_drv->name; + + return driver_register(&cxl_drv->drv); +} +EXPORT_SYMBOL_NS_GPL(__cxl_driver_register, CXL); + +void cxl_driver_unregister(struct cxl_driver *cxl_drv) +{ + driver_unregister(&cxl_drv->drv); +} +EXPORT_SYMBOL_NS_GPL(cxl_driver_unregister, CXL); + +static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env) +{ + return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT, + cxl_device_id(dev)); +} + +static int cxl_bus_match(struct device *dev, struct device_driver *drv) +{ + return cxl_device_id(dev) == to_cxl_drv(drv)->id; +} + +static int cxl_bus_probe(struct device *dev) +{ + int rc; + + /* + * Take the CXL nested lock since the driver core only holds + * @dev->mutex and not @dev->lockdep_mutex. + */ + cxl_nested_lock(dev); + rc = to_cxl_drv(dev->driver)->probe(dev); + cxl_nested_unlock(dev); + + dev_dbg(dev, "probe: %d\n", rc); + return rc; +} + +static void cxl_bus_remove(struct device *dev) +{ + struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver); + + cxl_nested_lock(dev); + if (cxl_drv->remove) + cxl_drv->remove(dev); + cxl_nested_unlock(dev); +} + +static struct workqueue_struct *cxl_bus_wq; + +int cxl_bus_rescan(void) +{ + return bus_rescan_devices(&cxl_bus_type); +} +EXPORT_SYMBOL_NS_GPL(cxl_bus_rescan, CXL); + +bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) +{ + return queue_work(cxl_bus_wq, &cxlmd->detach_work); +} +EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); + +/* for user tooling to ensure port disable work has completed */ +static ssize_t flush_store(struct bus_type *bus, const char *buf, size_t count) +{ + if (sysfs_streq(buf, "1")) { + flush_workqueue(cxl_bus_wq); + return count; + } + + return -EINVAL; +} + +static BUS_ATTR_WO(flush); + +static struct attribute *cxl_bus_attributes[] = { + &bus_attr_flush.attr, + NULL, +}; + +static struct attribute_group cxl_bus_attribute_group = { + .attrs = cxl_bus_attributes, +}; + +static const struct attribute_group *cxl_bus_attribute_groups[] = { + &cxl_bus_attribute_group, + NULL, +}; + +struct bus_type cxl_bus_type = { + .name = "cxl", + .uevent = cxl_bus_uevent, + .match = cxl_bus_match, + .probe = cxl_bus_probe, + .remove = cxl_bus_remove, + .bus_groups = cxl_bus_attribute_groups, +}; +EXPORT_SYMBOL_NS_GPL(cxl_bus_type, CXL); + +static __init int cxl_core_init(void) +{ + int rc; + + cxl_mbox_init(); + + rc = cxl_memdev_init(); + if (rc) + return rc; + + cxl_bus_wq = alloc_ordered_workqueue("cxl_port", 0); + if (!cxl_bus_wq) { + rc = -ENOMEM; + goto err_wq; + } + + rc = bus_register(&cxl_bus_type); + if (rc) + goto err_bus; + + return 0; + +err_bus: + destroy_workqueue(cxl_bus_wq); +err_wq: + cxl_memdev_exit(); + cxl_mbox_exit(); + return rc; +} + +static void cxl_core_exit(void) +{ + bus_unregister(&cxl_bus_type); + destroy_workqueue(cxl_bus_wq); + cxl_memdev_exit(); + cxl_mbox_exit(); +} + +module_init(cxl_core_init); +module_exit(cxl_core_exit); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c index e37e23bf4355..39a129c57d40 100644 --- a/drivers/cxl/core/regs.c +++ b/drivers/cxl/core/regs.c @@ -5,6 +5,7 @@ #include #include #include +#include /** * DOC: cxl registers @@ -35,7 +36,7 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, struct cxl_component_reg_map *map) { int cap, cap_count; - u64 cap_array; + u32 cap_array; *map = (struct cxl_component_reg_map) { 0 }; @@ -45,11 +46,11 @@ void cxl_probe_component_regs(struct device *dev, void __iomem *base, */ base += CXL_CM_OFFSET; - cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET); + cap_array = readl(base + CXL_CM_CAP_HDR_OFFSET); if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) { dev_err(dev, - "Couldn't locate the CXL.cache and CXL.mem capability array header./n"); + "Couldn't locate the CXL.cache and CXL.mem capability array header.\n"); return; } @@ -158,9 +159,8 @@ void cxl_probe_device_regs(struct device *dev, void __iomem *base, } EXPORT_SYMBOL_NS_GPL(cxl_probe_device_regs, CXL); -static void __iomem *devm_cxl_iomap_block(struct device *dev, - resource_size_t addr, - resource_size_t length) +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length) { void __iomem *ret_val; struct resource *res; @@ -247,3 +247,58 @@ int cxl_map_device_regs(struct pci_dev *pdev, return 0; } EXPORT_SYMBOL_NS_GPL(cxl_map_device_regs, CXL); + +static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, + struct cxl_register_map *map) +{ + map->block_offset = ((u64)reg_hi << 32) | + (reg_lo & CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK); + map->barno = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BIR_MASK, reg_lo); + map->reg_type = FIELD_GET(CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK, reg_lo); +} + +/** + * cxl_find_regblock() - Locate register blocks by type + * @pdev: The CXL PCI device to enumerate. + * @type: Register Block Indicator id + * @map: Enumeration output, clobbered on error + * + * Return: 0 if register block enumerated, negative error code otherwise + * + * A CXL DVSEC may point to one or more register blocks, search for them + * by @type. + */ +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) +{ + u32 regloc_size, regblocks; + int regloc, i; + + map->block_offset = U64_MAX; + regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, + CXL_DVSEC_REG_LOCATOR); + if (!regloc) + return -ENXIO; + + pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); + regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + + regloc += CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET; + regblocks = (regloc_size - CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET) / 8; + + for (i = 0; i < regblocks; i++, regloc += 8) { + u32 reg_lo, reg_hi; + + pci_read_config_dword(pdev, regloc, ®_lo); + pci_read_config_dword(pdev, regloc + 4, ®_hi); + + cxl_decode_regblock(reg_lo, reg_hi, map); + + if (map->reg_type == type) + return 0; + } + + map->block_offset = U64_MAX; + return -ENODEV; +} +EXPORT_SYMBOL_NS_GPL(cxl_find_regblock, CXL); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index a5a0be3f088b..990b6670222e 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -17,6 +17,9 @@ * (port-driver, region-driver, nvdimm object-drivers... etc). */ +/* CXL 2.0 8.2.4 CXL Component Register Layout and Definition */ +#define CXL_COMPONENT_REG_BLOCK_SIZE SZ_64K + /* CXL 2.0 8.2.5 CXL.cache and CXL.mem Registers*/ #define CXL_CM_OFFSET 0x1000 #define CXL_CM_CAP_HDR_OFFSET 0x0 @@ -36,11 +39,23 @@ #define CXL_HDM_DECODER_CAP_OFFSET 0x0 #define CXL_HDM_DECODER_COUNT_MASK GENMASK(3, 0) #define CXL_HDM_DECODER_TARGET_COUNT_MASK GENMASK(7, 4) -#define CXL_HDM_DECODER0_BASE_LOW_OFFSET 0x10 -#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET 0x14 -#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET 0x18 -#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET 0x1c -#define CXL_HDM_DECODER0_CTRL_OFFSET 0x20 +#define CXL_HDM_DECODER_INTERLEAVE_11_8 BIT(8) +#define CXL_HDM_DECODER_INTERLEAVE_14_12 BIT(9) +#define CXL_HDM_DECODER_CTRL_OFFSET 0x4 +#define CXL_HDM_DECODER_ENABLE BIT(1) +#define CXL_HDM_DECODER0_BASE_LOW_OFFSET(i) (0x20 * (i) + 0x10) +#define CXL_HDM_DECODER0_BASE_HIGH_OFFSET(i) (0x20 * (i) + 0x14) +#define CXL_HDM_DECODER0_SIZE_LOW_OFFSET(i) (0x20 * (i) + 0x18) +#define CXL_HDM_DECODER0_SIZE_HIGH_OFFSET(i) (0x20 * (i) + 0x1c) +#define CXL_HDM_DECODER0_CTRL_OFFSET(i) (0x20 * (i) + 0x20) +#define CXL_HDM_DECODER0_CTRL_IG_MASK GENMASK(3, 0) +#define CXL_HDM_DECODER0_CTRL_IW_MASK GENMASK(7, 4) +#define CXL_HDM_DECODER0_CTRL_LOCK BIT(8) +#define CXL_HDM_DECODER0_CTRL_COMMIT BIT(9) +#define CXL_HDM_DECODER0_CTRL_COMMITTED BIT(10) +#define CXL_HDM_DECODER0_CTRL_TYPE BIT(12) +#define CXL_HDM_DECODER0_TL_LOW(i) (0x20 * (i) + 0x24) +#define CXL_HDM_DECODER0_TL_HIGH(i) (0x20 * (i) + 0x28) static inline int cxl_hdm_decoder_count(u32 cap_hdr) { @@ -145,6 +160,12 @@ int cxl_map_device_regs(struct pci_dev *pdev, struct cxl_device_regs *regs, struct cxl_register_map *map); +enum cxl_regloc_type; +int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map); +void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, + resource_size_t length); + #define CXL_RESOURCE_NONE ((resource_size_t) -1) #define CXL_TARGET_STRLEN 20 @@ -158,7 +179,8 @@ int cxl_map_device_regs(struct pci_dev *pdev, #define CXL_DECODER_F_TYPE2 BIT(2) #define CXL_DECODER_F_TYPE3 BIT(3) #define CXL_DECODER_F_LOCK BIT(4) -#define CXL_DECODER_F_MASK GENMASK(4, 0) +#define CXL_DECODER_F_ENABLE BIT(5) +#define CXL_DECODER_F_MASK GENMASK(5, 0) enum cxl_decoder_type { CXL_DECODER_ACCELERATOR = 2, @@ -175,22 +197,28 @@ enum cxl_decoder_type { * struct cxl_decoder - CXL address range decode configuration * @dev: this decoder's device * @id: kernel device name id - * @range: address range considered by this decoder + * @platform_res: address space resources considered by root decoder + * @decoder_range: address space resources considered by midlevel decoder * @interleave_ways: number of cxl_dports in this decode * @interleave_granularity: data stride per dport * @target_type: accelerator vs expander (type2 vs type3) selector * @flags: memory type capabilities and locking + * @target_lock: coordinate coherent reads of the target list * @nr_targets: number of elements in @target * @target: active ordered target list in current decoder configuration */ struct cxl_decoder { struct device dev; int id; - struct range range; + union { + struct resource platform_res; + struct range decoder_range; + }; int interleave_ways; int interleave_granularity; enum cxl_decoder_type target_type; unsigned long flags; + seqlock_t target_lock; int nr_targets; struct cxl_dport *target[]; }; @@ -226,14 +254,6 @@ struct cxl_nvdimm { struct nvdimm *nvdimm; }; -struct cxl_walk_context { - struct device *dev; - struct pci_bus *root; - struct cxl_port *port; - int error; - int count; -}; - /** * struct cxl_port - logical collection of upstream port devices and * downstream port devices to construct a CXL memory @@ -242,16 +262,22 @@ struct cxl_walk_context { * @uport: PCI or platform device implementing the upstream port capability * @id: id for port device-name * @dports: cxl_dport instances referenced by decoders + * @endpoints: cxl_ep instances, endpoints that are a descendant of this port * @decoder_ida: allocator for decoder ids * @component_reg_phys: component register capability base address (optional) + * @dead: last ep has been removed, force port re-creation + * @depth: How deep this port is relative to the root. depth 0 is the root. */ struct cxl_port { struct device dev; struct device *uport; int id; struct list_head dports; + struct list_head endpoints; struct ida decoder_ida; resource_size_t component_reg_phys; + bool dead; + unsigned int depth; }; /** @@ -270,19 +296,65 @@ struct cxl_dport { struct list_head list; }; +/** + * struct cxl_ep - track an endpoint's interest in a port + * @ep: device that hosts a generic CXL endpoint (expander or accelerator) + * @list: node on port->endpoints list + */ +struct cxl_ep { + struct device *ep; + struct list_head list; +}; + +/* + * The platform firmware device hosting the root is also the top of the + * CXL port topology. All other CXL ports have another CXL port as their + * parent and their ->uport / host device is out-of-line of the port + * ancestry. + */ +static inline bool is_cxl_root(struct cxl_port *port) +{ + return port->uport == port->dev.parent; +} + +bool is_cxl_port(struct device *dev); struct cxl_port *to_cxl_port(struct device *dev); +struct pci_bus; +int devm_cxl_register_pci_bus(struct device *host, struct device *uport, + struct pci_bus *bus); +struct pci_bus *cxl_port_to_pci_bus(struct cxl_port *port); struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport, resource_size_t component_reg_phys, struct cxl_port *parent_port); +struct cxl_port *find_cxl_root(struct device *dev); +int devm_cxl_enumerate_ports(struct cxl_memdev *cxlmd); +int cxl_bus_rescan(void); +struct cxl_port *cxl_mem_find_port(struct cxl_memdev *cxlmd); +bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd); -int cxl_add_dport(struct cxl_port *port, struct device *dport, int port_id, - resource_size_t component_reg_phys); +struct cxl_dport *devm_cxl_add_dport(struct cxl_port *port, + struct device *dport, int port_id, + resource_size_t component_reg_phys); +struct cxl_dport *cxl_find_dport_by_dev(struct cxl_port *port, + const struct device *dev); struct cxl_decoder *to_cxl_decoder(struct device *dev); bool is_root_decoder(struct device *dev); -struct cxl_decoder *cxl_decoder_alloc(struct cxl_port *port, int nr_targets); +bool is_cxl_decoder(struct device *dev); +struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets); +struct cxl_decoder *cxl_switch_decoder_alloc(struct cxl_port *port, + unsigned int nr_targets); int cxl_decoder_add(struct cxl_decoder *cxld, int *target_map); +struct cxl_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port); +int cxl_decoder_add_locked(struct cxl_decoder *cxld, int *target_map); int cxl_decoder_autoremove(struct device *host, struct cxl_decoder *cxld); +int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint); + +struct cxl_hdm; +struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port); +int devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm); +int devm_cxl_add_passthrough_decoder(struct cxl_port *port); extern struct bus_type cxl_bus_type; @@ -304,8 +376,14 @@ int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner, #define cxl_driver_register(x) __cxl_driver_register(x, THIS_MODULE, KBUILD_MODNAME) void cxl_driver_unregister(struct cxl_driver *cxl_drv); +#define module_cxl_driver(__cxl_driver) \ + module_driver(__cxl_driver, cxl_driver_register, cxl_driver_unregister) + #define CXL_DEVICE_NVDIMM_BRIDGE 1 #define CXL_DEVICE_NVDIMM 2 +#define CXL_DEVICE_PORT 3 +#define CXL_DEVICE_ROOT 4 +#define CXL_DEVICE_MEMORY_EXPANDER 5 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" @@ -326,4 +404,83 @@ struct cxl_nvdimm_bridge *cxl_find_nvdimm_bridge(struct cxl_nvdimm *cxl_nvd); #ifndef __mock #define __mock static #endif + +#ifdef CONFIG_PROVE_CXL_LOCKING +enum cxl_lock_class { + CXL_ANON_LOCK, + CXL_NVDIMM_LOCK, + CXL_NVDIMM_BRIDGE_LOCK, + CXL_PORT_LOCK, + /* + * Be careful to add new lock classes here, CXL_PORT_LOCK is + * extended by the port depth, so a maximum CXL port topology + * depth would need to be defined first. + */ +}; + +static inline void cxl_nested_lock(struct device *dev) +{ + if (is_cxl_port(dev)) { + struct cxl_port *port = to_cxl_port(dev); + + mutex_lock_nested(&dev->lockdep_mutex, + CXL_PORT_LOCK + port->depth); + } else if (is_cxl_decoder(dev)) { + struct cxl_port *port = to_cxl_port(dev->parent); + + /* + * A decoder is the immediate child of a port, so set + * its lock class equal to other child device siblings. + */ + mutex_lock_nested(&dev->lockdep_mutex, + CXL_PORT_LOCK + port->depth + 1); + } else if (is_cxl_nvdimm_bridge(dev)) + mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_BRIDGE_LOCK); + else if (is_cxl_nvdimm(dev)) + mutex_lock_nested(&dev->lockdep_mutex, CXL_NVDIMM_LOCK); + else + mutex_lock_nested(&dev->lockdep_mutex, CXL_ANON_LOCK); +} + +static inline void cxl_nested_unlock(struct device *dev) +{ + mutex_unlock(&dev->lockdep_mutex); +} + +static inline void cxl_device_lock(struct device *dev) +{ + /* + * For double lock errors the lockup will happen before lockdep + * warns at cxl_nested_lock(), so assert explicitly. + */ + lockdep_assert_not_held(&dev->lockdep_mutex); + + device_lock(dev); + cxl_nested_lock(dev); +} + +static inline void cxl_device_unlock(struct device *dev) +{ + cxl_nested_unlock(dev); + device_unlock(dev); +} +#else +static inline void cxl_nested_lock(struct device *dev) +{ +} + +static inline void cxl_nested_unlock(struct device *dev) +{ +} + +static inline void cxl_device_lock(struct device *dev) +{ + device_lock(dev); +} + +static inline void cxl_device_unlock(struct device *dev) +{ + device_unlock(dev); +} +#endif #endif /* __CXL_H__ */ diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 8d96d009ad90..5d33ce24fe09 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -34,12 +34,14 @@ * @dev: driver core device object * @cdev: char dev core object for ioctl operations * @cxlds: The device state backing this device + * @detach_work: active memdev lost a port in its ancestry * @id: id number of this memdev instance. */ struct cxl_memdev { struct device dev; struct cdev cdev; struct cxl_dev_state *cxlds; + struct work_struct detach_work; int id; }; @@ -48,6 +50,12 @@ static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) return container_of(dev, struct cxl_memdev, dev); } +bool is_cxl_memdev(struct device *dev); +static inline bool is_cxl_endpoint(struct cxl_port *port) +{ + return is_cxl_memdev(port->uport); +} + struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds); /** @@ -89,6 +97,18 @@ struct cxl_mbox_cmd { */ #define CXL_CAPACITY_MULTIPLIER SZ_256M +/** + * struct cxl_endpoint_dvsec_info - Cached DVSEC info + * @mem_enabled: cached value of mem_enabled in the DVSEC, PCIE_DEVICE + * @ranges: Number of active HDM ranges this device uses. + * @dvsec_range: cached attributes of the ranges in the DVSEC, PCIE_DEVICE + */ +struct cxl_endpoint_dvsec_info { + bool mem_enabled; + int ranges; + struct range dvsec_range[2]; +}; + /** * struct cxl_dev_state - The driver device state * @@ -98,6 +118,7 @@ struct cxl_mbox_cmd { * * @dev: The device associated with this CXL state * @regs: Parsed register blocks + * @cxl_dvsec: Offset to the PCIe device DVSEC * @payload_size: Size of space for payload * (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register) * @lsa_size: Size of Label Storage Area @@ -116,7 +137,11 @@ struct cxl_mbox_cmd { * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset + * @component_reg_phys: register base of component registers + * @info: Cached DVSEC information about the device. + * @serial: PCIe Device Serial Number * @mbox_send: @dev specific transport for transmitting mailbox commands + * @wait_media_ready: @dev specific method to await media ready * * See section 8.2.9.5.2 Capacity Configuration and Label Storage for * details on capacity parameters. @@ -125,6 +150,7 @@ struct cxl_dev_state { struct device *dev; struct cxl_regs regs; + int cxl_dvsec; size_t payload_size; size_t lsa_size; @@ -145,7 +171,12 @@ struct cxl_dev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; + resource_size_t component_reg_phys; + struct cxl_endpoint_dvsec_info info; + u64 serial; + int (*mbox_send)(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd); + int (*wait_media_ready)(struct cxl_dev_state *cxlds); }; enum cxl_opcode { @@ -264,4 +295,12 @@ int cxl_mem_create_range_info(struct cxl_dev_state *cxlds); struct cxl_dev_state *cxl_dev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds); + +struct cxl_hdm { + struct cxl_component_regs regs; + unsigned int decoder_count; + unsigned int target_count; + unsigned int interleave_mask; + struct cxl_port *port; +}; #endif /* __CXL_MEM_H__ */ diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h new file mode 100644 index 000000000000..329e7ea3f36a --- /dev/null +++ b/drivers/cxl/cxlpci.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ +#ifndef __CXL_PCI_H__ +#define __CXL_PCI_H__ +#include +#include "cxl.h" + +#define CXL_MEMORY_PROGIF 0x10 + +/* + * See section 8.1 Configuration Space Registers in the CXL 2.0 + * Specification. Names are taken straight from the specification with "CXL" and + * "DVSEC" redundancies removed. When obvious, abbreviations may be used. + */ +#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) +#define PCI_DVSEC_VENDOR_ID_CXL 0x1E98 + +/* CXL 2.0 8.1.3: PCIe DVSEC for CXL Device */ +#define CXL_DVSEC_PCIE_DEVICE 0 +#define CXL_DVSEC_CAP_OFFSET 0xA +#define CXL_DVSEC_MEM_CAPABLE BIT(2) +#define CXL_DVSEC_HDM_COUNT_MASK GENMASK(5, 4) +#define CXL_DVSEC_CTRL_OFFSET 0xC +#define CXL_DVSEC_MEM_ENABLE BIT(2) +#define CXL_DVSEC_RANGE_SIZE_HIGH(i) (0x18 + (i * 0x10)) +#define CXL_DVSEC_RANGE_SIZE_LOW(i) (0x1C + (i * 0x10)) +#define CXL_DVSEC_MEM_INFO_VALID BIT(0) +#define CXL_DVSEC_MEM_ACTIVE BIT(1) +#define CXL_DVSEC_MEM_SIZE_LOW_MASK GENMASK(31, 28) +#define CXL_DVSEC_RANGE_BASE_HIGH(i) (0x20 + (i * 0x10)) +#define CXL_DVSEC_RANGE_BASE_LOW(i) (0x24 + (i * 0x10)) +#define CXL_DVSEC_MEM_BASE_LOW_MASK GENMASK(31, 28) + +/* CXL 2.0 8.1.4: Non-CXL Function Map DVSEC */ +#define CXL_DVSEC_FUNCTION_MAP 2 + +/* CXL 2.0 8.1.5: CXL 2.0 Extensions DVSEC for Ports */ +#define CXL_DVSEC_PORT_EXTENSIONS 3 + +/* CXL 2.0 8.1.6: GPF DVSEC for CXL Port */ +#define CXL_DVSEC_PORT_GPF 4 + +/* CXL 2.0 8.1.7: GPF DVSEC for CXL Device */ +#define CXL_DVSEC_DEVICE_GPF 5 + +/* CXL 2.0 8.1.8: PCIe DVSEC for Flex Bus Port */ +#define CXL_DVSEC_PCIE_FLEXBUS_PORT 7 + +/* CXL 2.0 8.1.9: Register Locator DVSEC */ +#define CXL_DVSEC_REG_LOCATOR 8 +#define CXL_DVSEC_REG_LOCATOR_BLOCK1_OFFSET 0xC +#define CXL_DVSEC_REG_LOCATOR_BIR_MASK GENMASK(2, 0) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_ID_MASK GENMASK(15, 8) +#define CXL_DVSEC_REG_LOCATOR_BLOCK_OFF_LOW_MASK GENMASK(31, 16) + +/* Register Block Identifier (RBI) */ +enum cxl_regloc_type { + CXL_REGLOC_RBI_EMPTY = 0, + CXL_REGLOC_RBI_COMPONENT, + CXL_REGLOC_RBI_VIRT, + CXL_REGLOC_RBI_MEMDEV, + CXL_REGLOC_RBI_TYPES +}; + +static inline resource_size_t cxl_regmap_to_base(struct pci_dev *pdev, + struct cxl_register_map *map) +{ + if (map->block_offset == U64_MAX) + return CXL_RESOURCE_NONE; + + return pci_resource_start(pdev, map->barno) + map->block_offset; +} + +int devm_cxl_port_enumerate_dports(struct cxl_port *port); +#endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c new file mode 100644 index 000000000000..49a4b1c47299 --- /dev/null +++ b/drivers/cxl/mem.c @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "cxlpci.h" + +/** + * DOC: cxl mem + * + * CXL memory endpoint devices and switches are CXL capable devices that are + * participating in CXL.mem protocol. Their functionality builds on top of the + * CXL.io protocol that allows enumerating and configuring components via + * standard PCI mechanisms. + * + * The cxl_mem driver owns kicking off the enumeration of this CXL.mem + * capability. With the detection of a CXL capable endpoint, the driver will + * walk up to find the platform specific port it is connected to, and determine + * if there are intervening switches in the path. If there are switches, a + * secondary action is to enumerate those (implemented in cxl_core). Finally the + * cxl_mem driver adds the device it is bound to as a CXL endpoint-port for use + * in higher level operations. + */ + +static int wait_for_media(struct cxl_memdev *cxlmd) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + int rc; + + if (!info->mem_enabled) + return -EBUSY; + + rc = cxlds->wait_media_ready(cxlds); + if (rc) + return rc; + + /* + * We know the device is active, and enabled, if any ranges are non-zero + * we'll need to check later before adding the port since that owns the + * HDM decoder registers. + */ + return 0; +} + +static int create_endpoint(struct cxl_memdev *cxlmd, + struct cxl_port *parent_port) +{ + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_port *endpoint; + + endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev, + cxlds->component_reg_phys, parent_port); + if (IS_ERR(endpoint)) + return PTR_ERR(endpoint); + + dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev)); + + if (!endpoint->dev.driver) { + dev_err(&cxlmd->dev, "%s failed probe\n", + dev_name(&endpoint->dev)); + return -ENXIO; + } + + return cxl_endpoint_autoremove(cxlmd, endpoint); +} + +/** + * cxl_dvsec_decode_init() - Setup HDM decoding for the endpoint + * @cxlds: Device state + * + * Additionally, enables global HDM decoding. Warning: don't call this outside + * of probe. Once probe is complete, the port driver owns all access to the HDM + * decoder registers. + * + * Returns: false if DVSEC Ranges are being used instead of HDM + * decoders, or if it can not be determined if DVSEC Ranges are in use. + * Otherwise, returns true. + */ +__mock bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + struct cxl_register_map map; + struct cxl_component_reg_map *cmap = &map.component_map; + bool global_enable, do_hdm_init = false; + void __iomem *crb; + u32 global_ctrl; + + /* map hdm decoder */ + crb = ioremap(cxlds->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE); + if (!crb) { + dev_dbg(cxlds->dev, "Failed to map component registers\n"); + return false; + } + + cxl_probe_component_regs(cxlds->dev, crb, cmap); + if (!cmap->hdm_decoder.valid) { + dev_dbg(cxlds->dev, "Invalid HDM decoder registers\n"); + goto out; + } + + global_ctrl = readl(crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + global_enable = global_ctrl & CXL_HDM_DECODER_ENABLE; + if (!global_enable && info->ranges) { + dev_dbg(cxlds->dev, + "DVSEC ranges already programmed and HDM decoders not enabled.\n"); + goto out; + } + + do_hdm_init = true; + + /* + * Permanently (for this boot at least) opt the device into HDM + * operation. Individual HDM decoders still need to be enabled after + * this point. + */ + if (!global_enable) { + dev_dbg(cxlds->dev, "Enabling HDM decode\n"); + writel(global_ctrl | CXL_HDM_DECODER_ENABLE, + crb + cmap->hdm_decoder.offset + + CXL_HDM_DECODER_CTRL_OFFSET); + } + +out: + iounmap(crb); + return do_hdm_init; +} + +static int cxl_mem_probe(struct device *dev) +{ + struct cxl_memdev *cxlmd = to_cxl_memdev(dev); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_port *parent_port; + int rc; + + /* + * Someone is trying to reattach this device after it lost its port + * connection (an endpoint port previously registered by this memdev was + * disabled). This racy check is ok because if the port is still gone, + * no harm done, and if the port hierarchy comes back it will re-trigger + * this probe. Port rescan and memdev detach work share the same + * single-threaded workqueue. + */ + if (work_pending(&cxlmd->detach_work)) + return -EBUSY; + + rc = wait_for_media(cxlmd); + if (rc) { + dev_err(dev, "Media not active (%d)\n", rc); + return rc; + } + + /* + * If DVSEC ranges are being used instead of HDM decoder registers there + * is no use in trying to manage those. + */ + if (!cxl_dvsec_decode_init(cxlds)) { + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + int i; + + /* */ + for (i = 0; i < 2; i++) { + u64 base, size; + + /* + * Give a nice warning to the user that BIOS has really + * botched things for them if it didn't place DVSEC + * ranges in the memory map. + */ + base = info->dvsec_range[i].start; + size = range_len(&info->dvsec_range[i]); + if (size && !region_intersects(base, size, + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE)) { + dev_err(dev, + "DVSEC range %#llx-%#llx must be reserved by BIOS, but isn't\n", + base, base + size - 1); + } + } + dev_err(dev, + "Active DVSEC range registers in use. Will not bind.\n"); + return -EBUSY; + } + + rc = devm_cxl_enumerate_ports(cxlmd); + if (rc) + return rc; + + parent_port = cxl_mem_find_port(cxlmd); + if (!parent_port) { + dev_err(dev, "CXL port topology not found\n"); + return -ENXIO; + } + + cxl_device_lock(&parent_port->dev); + if (!parent_port->dev.driver) { + dev_err(dev, "CXL port topology %s not enabled\n", + dev_name(&parent_port->dev)); + rc = -ENXIO; + goto out; + } + + rc = create_endpoint(cxlmd, parent_port); +out: + cxl_device_unlock(&parent_port->dev); + put_device(&parent_port->dev); + return rc; +} + +static struct cxl_driver cxl_mem_driver = { + .name = "cxl_mem", + .probe = cxl_mem_probe, + .id = CXL_DEVICE_MEMORY_EXPANDER, +}; + +module_cxl_driver(cxl_mem_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); +MODULE_ALIAS_CXL(CXL_DEVICE_MEMORY_EXPANDER); +/* + * create_endpoint() wants to validate port driver attach immediately after + * endpoint registration. + */ +MODULE_SOFTDEP("pre: cxl_port"); diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8dc91fd3396a..8a7267d116b7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1,14 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ #include +#include #include +#include #include #include #include #include #include #include "cxlmem.h" -#include "pci.h" +#include "cxlpci.h" #include "cxl.h" /** @@ -35,6 +37,20 @@ /* CXL 2.0 - 8.2.8.4 */ #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) +/* + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to + * dictate how long to wait for the mailbox to become ready. The new + * field allows the device to tell software the amount of time to wait + * before mailbox ready. This field per the spec theoretically allows + * for up to 255 seconds. 255 seconds is unreasonably long, its longer + * than the maximum SATA port link recovery wait. Default to 60 seconds + * until someone builds a CXL device that needs more time in practice. + */ +static unsigned short mbox_ready_timeout = 60; +module_param(mbox_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(mbox_ready_timeout, + "seconds to wait for mailbox ready / memory active status"); + static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { const unsigned long start = jiffies; @@ -57,14 +73,16 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) return 0; } -static void cxl_pci_mbox_timeout(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *mbox_cmd) -{ - struct device *dev = cxlds->dev; +#define cxl_err(dev, status, msg) \ + dev_err_ratelimited(dev, msg ", device state %s%s\n", \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") - dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n", - mbox_cmd->opcode, mbox_cmd->size_in); -} +#define cxl_cmd_err(dev, cmd, status, msg) \ + dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ + (cmd)->opcode, \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") /** * __cxl_pci_mbox_send_cmd() - Execute a mailbox command @@ -118,7 +136,11 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #1 */ if (cxl_doorbell_busy(cxlds)) { - dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n"); + u64 md_status = + readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, + "mailbox queue busy"); return -EBUSY; } @@ -144,7 +166,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #5 */ rc = cxl_pci_mbox_wait_for_doorbell(cxlds); if (rc == -ETIMEDOUT) { - cxl_pci_mbox_timeout(cxlds, mbox_cmd); + u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); return rc; } @@ -182,98 +206,13 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, return 0; } -/** - * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox. - * @cxlds: The device state to gain access to. - * - * Context: Any context. Takes the mbox_mutex. - * Return: 0 if exclusive access was acquired. - */ -static int cxl_pci_mbox_get(struct cxl_dev_state *cxlds) -{ - struct device *dev = cxlds->dev; - u64 md_status; - int rc; - - mutex_lock_io(&cxlds->mbox_mutex); - - /* - * XXX: There is some amount of ambiguity in the 2.0 version of the spec - * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the - * bit is to allow firmware running on the device to notify the driver - * that it's ready to receive commands. It is unclear if the bit needs - * to be read for each transaction mailbox, ie. the firmware can switch - * it on and off as needed. Second, there is no defined timeout for - * mailbox ready, like there is for the doorbell interface. - * - * Assumptions: - * 1. The firmware might toggle the Mailbox Interface Ready bit, check - * it for every command. - * - * 2. If the doorbell is clear, the firmware should have first set the - * Mailbox Interface Ready bit. Therefore, waiting for the doorbell - * to be ready is sufficient. - */ - rc = cxl_pci_mbox_wait_for_doorbell(cxlds); - if (rc) { - dev_warn(dev, "Mailbox interface not ready\n"); - goto out; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) { - dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n"); - rc = -EBUSY; - goto out; - } - - /* - * Hardware shouldn't allow a ready status but also have failure bits - * set. Spit out an error, this should be a bug report - */ - rc = -EFAULT; - if (md_status & CXLMDEV_DEV_FATAL) { - dev_err(dev, "mbox: reported ready, but fatal\n"); - goto out; - } - if (md_status & CXLMDEV_FW_HALT) { - dev_err(dev, "mbox: reported ready, but halted\n"); - goto out; - } - if (CXLMDEV_RESET_NEEDED(md_status)) { - dev_err(dev, "mbox: reported ready, but reset needed\n"); - goto out; - } - - /* with lock held */ - return 0; - -out: - mutex_unlock(&cxlds->mbox_mutex); - return rc; -} - -/** - * cxl_pci_mbox_put() - Release exclusive access to the mailbox. - * @cxlds: The device state to communicate with. - * - * Context: Any context. Expects mbox_mutex to be held. - */ -static void cxl_pci_mbox_put(struct cxl_dev_state *cxlds) -{ - mutex_unlock(&cxlds->mbox_mutex); -} - static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { int rc; - rc = cxl_pci_mbox_get(cxlds); - if (rc) - return rc; - + mutex_lock_io(&cxlds->mbox_mutex); rc = __cxl_pci_mbox_send_cmd(cxlds, cmd); - cxl_pci_mbox_put(cxlds); + mutex_unlock(&cxlds->mbox_mutex); return rc; } @@ -281,6 +220,34 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) { const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); + unsigned long timeout; + u64 md_status; + + timeout = jiffies + mbox_ready_timeout * HZ; + do { + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (md_status & CXLMDEV_MBOX_IF_READY) + break; + if (msleep_interruptible(100)) + break; + } while (!time_after(jiffies, timeout)); + + if (!(md_status & CXLMDEV_MBOX_IF_READY)) { + cxl_err(cxlds->dev, md_status, + "timeout awaiting mailbox ready"); + return -ETIMEDOUT; + } + + /* + * A command may be in flight from a previous driver instance, + * think kexec, do one doorbell wait so that + * __cxl_pci_mbox_send_cmd() can assume that it is the only + * source for future doorbell busy events. + */ + if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { + cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle"); + return -ETIMEDOUT; + } cxlds->mbox_send = cxl_pci_mbox_send; cxlds->payload_size = @@ -400,58 +367,6 @@ static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *ma return 0; } -static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, - struct cxl_register_map *map) -{ - map->block_offset = - ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); - map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); - map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); -} - -/** - * cxl_find_regblock() - Locate register blocks by type - * @pdev: The CXL PCI device to enumerate. - * @type: Register Block Indicator id - * @map: Enumeration output, clobbered on error - * - * Return: 0 if register block enumerated, negative error code otherwise - * - * A CXL DVSEC may point to one or more register blocks, search for them - * by @type. - */ -static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) -{ - u32 regloc_size, regblocks; - int regloc, i; - - regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, - PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID); - if (!regloc) - return -ENXIO; - - pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); - - regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; - regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; - - for (i = 0; i < regblocks; i++, regloc += 8) { - u32 reg_lo, reg_hi; - - pci_read_config_dword(pdev, regloc, ®_lo); - pci_read_config_dword(pdev, regloc + 4, ®_hi); - - cxl_decode_regblock(reg_lo, reg_hi, map); - - if (map->reg_type == type) - return 0; - } - - return -ENODEV; -} - static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, struct cxl_register_map *map) { @@ -471,6 +386,165 @@ static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, return rc; } +static int wait_for_valid(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; + + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + return -ETIMEDOUT; +} + +/* + * Wait up to @mbox_ready_timeout for the device to report memory + * active. + */ +static int wait_for_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; + + for (i = mbox_ready_timeout; i; i--) { + u32 temp; + int rc; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; + + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } + + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + mbox_ready_timeout); + return -ETIMEDOUT; + } + + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; +} + +static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + int hdm_count, rc, i; + u16 cap, ctrl; + + if (!d) + return -ENXIO; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); + if (rc) + return rc; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); + if (rc) + return rc; + + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) + return -ENXIO; + + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; + + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info->dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + info->ranges++; + } + + return 0; +} + static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct cxl_register_map map; @@ -493,6 +567,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->serial = pci_get_dsn(pdev); + cxlds->cxl_dvsec = pci_find_dvsec_capability( + pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + if (!cxlds->cxl_dvsec) + dev_warn(&pdev->dev, + "Device DVSEC not present, skip CXL.mem init\n"); + + cxlds->wait_media_ready = wait_for_media_ready; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; @@ -501,6 +584,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + /* + * If the component registers can't be found, the cxl_pci driver may + * still be useful for management functions so don't return an error. + */ + cxlds->component_reg_phys = CXL_RESOURCE_NONE; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + dev_warn(&pdev->dev, "No component registers (%d)\n", rc); + + cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); + rc = cxl_pci_setup_mailbox(cxlds); if (rc) return rc; @@ -517,6 +611,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_dvsec_ranges(cxlds); + if (rc) + dev_warn(&pdev->dev, + "Failed to get DVSEC range information (%d)\n", rc); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h deleted file mode 100644 index 7d3e4bf06b45..000000000000 --- a/drivers/cxl/pci.h +++ /dev/null @@ -1,34 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* Copyright(c) 2020 Intel Corporation. All rights reserved. */ -#ifndef __CXL_PCI_H__ -#define __CXL_PCI_H__ - -#define CXL_MEMORY_PROGIF 0x10 - -/* - * See section 8.1 Configuration Space Registers in the CXL 2.0 - * Specification - */ -#define PCI_DVSEC_HEADER1_LENGTH_MASK GENMASK(31, 20) -#define PCI_DVSEC_VENDOR_ID_CXL 0x1E98 -#define PCI_DVSEC_ID_CXL 0x0 - -#define PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID 0x8 -#define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET 0xC - -/* BAR Indicator Register (BIR) */ -#define CXL_REGLOC_BIR_MASK GENMASK(2, 0) - -/* Register Block Identifier (RBI) */ -enum cxl_regloc_type { - CXL_REGLOC_RBI_EMPTY = 0, - CXL_REGLOC_RBI_COMPONENT, - CXL_REGLOC_RBI_VIRT, - CXL_REGLOC_RBI_MEMDEV, - CXL_REGLOC_RBI_TYPES -}; - -#define CXL_REGLOC_RBI_MASK GENMASK(15, 8) -#define CXL_REGLOC_ADDR_MASK GENMASK(31, 16) - -#endif /* __CXL_PCI_H__ */ diff --git a/drivers/cxl/pmem.c b/drivers/cxl/pmem.c index b65a272a2d6d..15ad666ab03e 100644 --- a/drivers/cxl/pmem.c +++ b/drivers/cxl/pmem.c @@ -43,7 +43,7 @@ static int cxl_nvdimm_probe(struct device *dev) if (!cxl_nvb) return -ENXIO; - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); if (!cxl_nvb->nvdimm_bus) { rc = -ENXIO; goto out; @@ -68,7 +68,7 @@ static int cxl_nvdimm_probe(struct device *dev) dev_set_drvdata(dev, nvdimm); rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); out: - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); put_device(&cxl_nvb->dev); return rc; @@ -233,7 +233,7 @@ static void cxl_nvb_update_state(struct work_struct *work) struct nvdimm_bus *victim_bus = NULL; bool release = false, rescan = false; - device_lock(&cxl_nvb->dev); + cxl_device_lock(&cxl_nvb->dev); switch (cxl_nvb->state) { case CXL_NVB_ONLINE: if (!online_nvdimm_bus(cxl_nvb)) { @@ -251,7 +251,7 @@ static void cxl_nvb_update_state(struct work_struct *work) default: break; } - device_unlock(&cxl_nvb->dev); + cxl_device_unlock(&cxl_nvb->dev); if (release) device_release_driver(&cxl_nvb->dev); @@ -327,9 +327,9 @@ static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) return 0; cxl_nvb = to_cxl_nvdimm_bridge(dev); - device_lock(dev); + cxl_device_lock(dev); cxl_nvb->state = CXL_NVB_NEW; - device_unlock(dev); + cxl_device_unlock(dev); return 0; } diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c new file mode 100644 index 000000000000..d420da5fc39c --- /dev/null +++ b/drivers/cxl/port.c @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ +#include +#include +#include + +#include "cxlmem.h" +#include "cxlpci.h" + +/** + * DOC: cxl port + * + * The port driver enumerates dport via PCI and scans for HDM + * (Host-managed-Device-Memory) decoder resources via the + * @component_reg_phys value passed in by the agent that registered the + * port. All descendant ports of a CXL root port (described by platform + * firmware) are managed in this drivers context. Each driver instance + * is responsible for tearing down the driver context of immediate + * descendant ports. The locking for this is validated by + * CONFIG_PROVE_CXL_LOCKING. + * + * The primary service this driver provides is presenting APIs to other + * drivers to utilize the decoders, and indicating to userspace (via bind + * status) the connectivity of the CXL.mem protocol throughout the + * PCIe topology. + */ + +static void schedule_detach(void *cxlmd) +{ + schedule_cxl_memdev_detach(cxlmd); +} + +static int cxl_port_probe(struct device *dev) +{ + struct cxl_port *port = to_cxl_port(dev); + struct cxl_hdm *cxlhdm; + int rc; + + if (is_cxl_endpoint(port)) { + struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport); + + get_device(&cxlmd->dev); + rc = devm_add_action_or_reset(dev, schedule_detach, cxlmd); + if (rc) + return rc; + } else { + rc = devm_cxl_port_enumerate_dports(port); + if (rc < 0) + return rc; + if (rc == 1) + return devm_cxl_add_passthrough_decoder(port); + } + + cxlhdm = devm_cxl_setup_hdm(port); + if (IS_ERR(cxlhdm)) + return PTR_ERR(cxlhdm); + + rc = devm_cxl_enumerate_decoders(cxlhdm); + if (rc) { + dev_err(dev, "Couldn't enumerate decoders (%d)\n", rc); + return rc; + } + + return 0; +} + +static struct cxl_driver cxl_port_driver = { + .name = "cxl_port", + .probe = cxl_port_probe, + .id = CXL_DEVICE_PORT, +}; + +module_cxl_driver(cxl_port_driver); +MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(CXL); +MODULE_ALIAS_CXL(CXL_DEVICE_PORT); diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index a11850dd475d..2650a852eeaf 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -185,7 +185,7 @@ static inline void devm_nsio_disable(struct device *dev, } #endif -#ifdef CONFIG_PROVE_LOCKING +#ifdef CONFIG_PROVE_NVDIMM_LOCKING extern struct class *nd_class; enum { diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cfe3b092c31d..075cd25363ac 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1544,6 +1544,29 @@ config CSD_LOCK_WAIT_DEBUG include the IPI handler function currently executing (if any) and relevant stack traces. +choice + prompt "Lock debugging: prove subsystem device_lock() correctness" + depends on PROVE_LOCKING + help + For subsystems that have instrumented their usage of the device_lock() + with nested annotations, enable lock dependency checking. The locking + hierarchy 'subclass' identifiers are not compatible across + sub-systems, so only one can be enabled at a time. + +config PROVE_NVDIMM_LOCKING + bool "NVDIMM" + depends on LIBNVDIMM + help + Enable lockdep to validate nd_device_lock() usage. + +config PROVE_CXL_LOCKING + bool "CXL" + depends on CXL_BUS + help + Enable lockdep to validate cxl_device_lock() usage. + +endchoice + endmenu # lock debugging config TRACE_IRQFLAGS diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 1acdf2fc31c5..82e49ab0937d 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -3,8 +3,11 @@ ldflags-y += --wrap=acpi_table_parse_cedt ldflags-y += --wrap=is_acpi_device_node ldflags-y += --wrap=acpi_evaluate_integer ldflags-y += --wrap=acpi_pci_find_root -ldflags-y += --wrap=pci_walk_bus ldflags-y += --wrap=nvdimm_bus_register +ldflags-y += --wrap=devm_cxl_port_enumerate_dports +ldflags-y += --wrap=devm_cxl_setup_hdm +ldflags-y += --wrap=devm_cxl_add_passthrough_decoder +ldflags-y += --wrap=devm_cxl_enumerate_decoders DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl @@ -23,15 +26,26 @@ obj-m += cxl_pmem.o cxl_pmem-y := $(CXL_SRC)/pmem.o cxl_pmem-y += config_check.o +obj-m += cxl_port.o + +cxl_port-y := $(CXL_SRC)/port.o +cxl_port-y += config_check.o + +obj-m += cxl_mem.o + +cxl_mem-y := $(CXL_SRC)/mem.o +cxl_mem-y += mock_mem.o +cxl_mem-y += config_check.o + obj-m += cxl_core.o -cxl_core-y := $(CXL_CORE_SRC)/bus.o +cxl_core-y := $(CXL_CORE_SRC)/port.o cxl_core-y += $(CXL_CORE_SRC)/pmem.o cxl_core-y += $(CXL_CORE_SRC)/regs.o cxl_core-y += $(CXL_CORE_SRC)/memdev.o cxl_core-y += $(CXL_CORE_SRC)/mbox.o +cxl_core-y += $(CXL_CORE_SRC)/pci.o +cxl_core-y += $(CXL_CORE_SRC)/hdm.o cxl_core-y += config_check.o -cxl_core-y += mock_pmem.o - obj-m += test/ diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c index 4c8a493ace56..55813de26d46 100644 --- a/tools/testing/cxl/mock_acpi.c +++ b/tools/testing/cxl/mock_acpi.c @@ -4,7 +4,6 @@ #include #include #include -#include #include #include "test/mock.h" @@ -34,76 +33,3 @@ struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) put_cxl_mock_ops(index); return found; } - -static int match_add_root_port(struct pci_dev *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct pci_bus *root_bus = ctx->root; - struct cxl_port *port = ctx->port; - int type = pci_pcie_type(pdev); - struct device *dev = ctx->dev; - u32 lnkcap, port_num; - int rc; - - if (pdev->bus != root_bus) - return 0; - if (!pci_is_pcie(pdev)) - return 0; - if (type != PCI_EXP_TYPE_ROOT_PORT) - return 0; - if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, - &lnkcap) != PCIBIOS_SUCCESSFUL) - return 0; - - /* TODO walk DVSEC to find component register base */ - port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); - rc = cxl_add_dport(port, &pdev->dev, port_num, CXL_RESOURCE_NONE); - if (rc) { - dev_err(dev, "failed to add dport: %s (%d)\n", - dev_name(&pdev->dev), rc); - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", port_num, dev_name(&pdev->dev)); - - return 0; -} - -static int mock_add_root_port(struct platform_device *pdev, void *data) -{ - struct cxl_walk_context *ctx = data; - struct cxl_port *port = ctx->port; - struct device *dev = ctx->dev; - int rc; - - rc = cxl_add_dport(port, &pdev->dev, pdev->id, CXL_RESOURCE_NONE); - if (rc) { - dev_err(dev, "failed to add dport: %s (%d)\n", - dev_name(&pdev->dev), rc); - ctx->error = rc; - return rc; - } - ctx->count++; - - dev_dbg(dev, "add dport%d: %s\n", pdev->id, dev_name(&pdev->dev)); - - return 0; -} - -int match_add_root_ports(struct pci_dev *dev, void *data) -{ - int index, rc; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - struct platform_device *pdev = (struct platform_device *) dev; - - if (ops && ops->is_mock_port(pdev)) - rc = mock_add_root_port(pdev, data); - else - rc = match_add_root_port(dev, data); - - put_cxl_mock_ops(index); - - return rc; -} diff --git a/tools/testing/cxl/mock_mem.c b/tools/testing/cxl/mock_mem.c new file mode 100644 index 000000000000..d1dec5845139 --- /dev/null +++ b/tools/testing/cxl/mock_mem.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */ + +#include + +struct cxl_dev_state; +bool cxl_dvsec_decode_init(struct cxl_dev_state *cxlds) +{ + return true; +} diff --git a/tools/testing/cxl/mock_pmem.c b/tools/testing/cxl/mock_pmem.c deleted file mode 100644 index f7315e6f52c0..000000000000 --- a/tools/testing/cxl/mock_pmem.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Copyright(c) 2021 Intel Corporation. All rights reserved. */ -#include -#include "test/mock.h" -#include - -int match_nvdimm_bridge(struct device *dev, const void *data) -{ - int index, rc = 0; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - const struct cxl_nvdimm *cxl_nvd = data; - - if (ops) { - if (dev->type == &cxl_nvdimm_bridge_type && - (ops->is_mock_dev(dev->parent->parent) == - ops->is_mock_dev(cxl_nvd->dev.parent->parent))) - rc = 1; - } else - rc = dev->type == &cxl_nvdimm_bridge_type; - - put_cxl_mock_ops(index); - - return rc; -} diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 736d99006fb7..431f2bddf6c8 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -8,16 +8,25 @@ #include #include #include +#include #include "mock.h" -#define NR_CXL_HOST_BRIDGES 4 +#define NR_CXL_HOST_BRIDGES 2 #define NR_CXL_ROOT_PORTS 2 +#define NR_CXL_SWITCH_PORTS 2 +#define NR_CXL_PORT_DECODERS 2 static struct platform_device *cxl_acpi; static struct platform_device *cxl_host_bridge[NR_CXL_HOST_BRIDGES]; static struct platform_device *cxl_root_port[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; -struct platform_device *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; +static struct platform_device + *cxl_switch_uport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS]; +static struct platform_device + *cxl_switch_dport[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * + NR_CXL_SWITCH_PORTS]; +struct platform_device + *cxl_mem[NR_CXL_HOST_BRIDGES * NR_CXL_ROOT_PORTS * NR_CXL_SWITCH_PORTS]; static struct acpi_device acpi0017_mock; static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { @@ -27,12 +36,6 @@ static struct acpi_device host_bridge[NR_CXL_HOST_BRIDGES] = { [1] = { .handle = &host_bridge[1], }, - [2] = { - .handle = &host_bridge[2], - }, - [3] = { - .handle = &host_bridge[3], - }, }; static bool is_mock_dev(struct device *dev) @@ -70,7 +73,7 @@ static struct { } cfmws0; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[2]; } cfmws1; struct { struct acpi_cedt_cfmws cfmws; @@ -78,7 +81,7 @@ static struct { } cfmws2; struct { struct acpi_cedt_cfmws cfmws; - u32 target[4]; + u32 target[2]; } cfmws3; } __packed mock_cedt = { .cedt = { @@ -104,22 +107,6 @@ static struct { .uid = 1, .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, }, - .chbs[2] = { - .header = { - .type = ACPI_CEDT_TYPE_CHBS, - .length = sizeof(mock_cedt.chbs[0]), - }, - .uid = 2, - .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, - }, - .chbs[3] = { - .header = { - .type = ACPI_CEDT_TYPE_CHBS, - .length = sizeof(mock_cedt.chbs[0]), - }, - .uid = 3, - .cxl_version = ACPI_CEDT_CHBS_VERSION_CXL20, - }, .cfmws0 = { .cfmws = { .header = { @@ -141,14 +128,14 @@ static struct { .type = ACPI_CEDT_TYPE_CFMWS, .length = sizeof(mock_cedt.cfmws1), }, - .interleave_ways = 2, + .interleave_ways = 1, .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_VOLATILE, .qtg_id = 1, - .window_size = SZ_256M * 4, + .window_size = SZ_256M * 2, }, - .target = { 0, 1, 2, 3 }, + .target = { 0, 1, }, }, .cfmws2 = { .cfmws = { @@ -171,14 +158,14 @@ static struct { .type = ACPI_CEDT_TYPE_CFMWS, .length = sizeof(mock_cedt.cfmws3), }, - .interleave_ways = 2, + .interleave_ways = 1, .granularity = 4, .restrictions = ACPI_CEDT_CFMWS_RESTRICT_TYPE3 | ACPI_CEDT_CFMWS_RESTRICT_PMEM, .qtg_id = 3, - .window_size = SZ_256M * 4, + .window_size = SZ_256M * 2, }, - .target = { 0, 1, 2, 3 }, + .target = { 0, 1, }, }, }; @@ -317,6 +304,30 @@ static bool is_mock_bridge(struct device *dev) for (i = 0; i < ARRAY_SIZE(cxl_host_bridge); i++) if (dev == &cxl_host_bridge[i]->dev) return true; + return false; +} + +static bool is_mock_port(struct device *dev) +{ + int i; + + if (is_mock_bridge(dev)) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) + if (dev == &cxl_root_port[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) + if (dev == &cxl_switch_uport[i]->dev) + return true; + + for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) + if (dev == &cxl_switch_dport[i]->dev) + return true; + + if (is_cxl_memdev(dev)) + return is_mock_dev(dev->parent); return false; } @@ -358,34 +369,8 @@ static struct acpi_pci_root mock_pci_root[NR_CXL_HOST_BRIDGES] = { [1] = { .bus = &mock_pci_bus[1], }, - [2] = { - .bus = &mock_pci_bus[2], - }, - [3] = { - .bus = &mock_pci_bus[3], - }, }; -static struct platform_device *mock_cxl_root_port(struct pci_bus *bus, int index) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(mock_pci_bus); i++) - if (bus == &mock_pci_bus[i]) - return cxl_root_port[index + i * NR_CXL_ROOT_PORTS]; - return NULL; -} - -static bool is_mock_port(struct platform_device *pdev) -{ - int i; - - for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) - if (pdev == cxl_root_port[i]) - return true; - return false; -} - static bool is_mock_bus(struct pci_bus *bus) { int i; @@ -405,16 +390,166 @@ static struct acpi_pci_root *mock_acpi_pci_find_root(acpi_handle handle) return &mock_pci_root[host_bridge_index(adev)]; } +static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port) +{ + struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL); + + if (!cxlhdm) + return ERR_PTR(-ENOMEM); + + cxlhdm->port = port; + return cxlhdm; +} + +static int mock_cxl_add_passthrough_decoder(struct cxl_port *port) +{ + dev_err(&port->dev, "unexpected passthrough decoder for cxl_test\n"); + return -EOPNOTSUPP; +} + + +struct target_map_ctx { + int *target_map; + int index; + int target_count; +}; + +static int map_targets(struct device *dev, void *data) +{ + struct platform_device *pdev = to_platform_device(dev); + struct target_map_ctx *ctx = data; + + ctx->target_map[ctx->index++] = pdev->id; + + if (ctx->index > ctx->target_count) { + dev_WARN_ONCE(dev, 1, "too many targets found?\n"); + return -ENXIO; + } + + return 0; +} + +static int mock_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) +{ + struct cxl_port *port = cxlhdm->port; + struct cxl_port *parent_port = to_cxl_port(port->dev.parent); + int target_count, i; + + if (is_cxl_endpoint(port)) + target_count = 0; + else if (is_cxl_root(parent_port)) + target_count = NR_CXL_ROOT_PORTS; + else + target_count = NR_CXL_SWITCH_PORTS; + + for (i = 0; i < NR_CXL_PORT_DECODERS; i++) { + int target_map[CXL_DECODER_MAX_INTERLEAVE] = { 0 }; + struct target_map_ctx ctx = { + .target_map = target_map, + .target_count = target_count, + }; + struct cxl_decoder *cxld; + int rc; + + if (target_count) + cxld = cxl_switch_decoder_alloc(port, target_count); + else + cxld = cxl_endpoint_decoder_alloc(port); + if (IS_ERR(cxld)) { + dev_warn(&port->dev, + "Failed to allocate the decoder\n"); + return PTR_ERR(cxld); + } + + cxld->decoder_range = (struct range) { + .start = 0, + .end = -1, + }; + + cxld->flags = CXL_DECODER_F_ENABLE; + cxld->interleave_ways = min_not_zero(target_count, 1); + cxld->interleave_granularity = SZ_4K; + cxld->target_type = CXL_DECODER_EXPANDER; + + if (target_count) { + rc = device_for_each_child(port->uport, &ctx, + map_targets); + if (rc) { + put_device(&cxld->dev); + return rc; + } + } + + rc = cxl_decoder_add_locked(cxld, target_map); + if (rc) { + put_device(&cxld->dev); + dev_err(&port->dev, "Failed to add decoder\n"); + return rc; + } + + rc = cxl_decoder_autoremove(&port->dev, cxld); + if (rc) + return rc; + dev_dbg(&cxld->dev, "Added to port %s\n", dev_name(&port->dev)); + } + + return 0; +} + +static int mock_cxl_port_enumerate_dports(struct cxl_port *port) +{ + struct device *dev = &port->dev; + struct platform_device **array; + int i, array_size; + + if (port->depth == 1) { + array_size = ARRAY_SIZE(cxl_root_port); + array = cxl_root_port; + } else if (port->depth == 2) { + array_size = ARRAY_SIZE(cxl_switch_dport); + array = cxl_switch_dport; + } else { + dev_WARN_ONCE(&port->dev, 1, "unexpected depth %d\n", + port->depth); + return -ENXIO; + } + + for (i = 0; i < array_size; i++) { + struct platform_device *pdev = array[i]; + struct cxl_dport *dport; + + if (pdev->dev.parent != port->uport) + continue; + + dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id, + CXL_RESOURCE_NONE); + + if (IS_ERR(dport)) { + dev_err(dev, "failed to add dport: %s (%ld)\n", + dev_name(&pdev->dev), PTR_ERR(dport)); + return PTR_ERR(dport); + } + + dev_dbg(dev, "add dport%d: %s\n", pdev->id, + dev_name(&pdev->dev)); + } + + return 0; +} + static struct cxl_mock_ops cxl_mock_ops = { .is_mock_adev = is_mock_adev, .is_mock_bridge = is_mock_bridge, .is_mock_bus = is_mock_bus, .is_mock_port = is_mock_port, .is_mock_dev = is_mock_dev, - .mock_port = mock_cxl_root_port, .acpi_table_parse_cedt = mock_acpi_table_parse_cedt, .acpi_evaluate_integer = mock_acpi_evaluate_integer, .acpi_pci_find_root = mock_acpi_pci_find_root, + .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports, + .devm_cxl_setup_hdm = mock_cxl_setup_hdm, + .devm_cxl_add_passthrough_decoder = mock_cxl_add_passthrough_decoder, + .devm_cxl_enumerate_decoders = mock_cxl_enumerate_decoders, .list = LIST_HEAD_INIT(cxl_mock_ops.list), }; @@ -506,12 +641,17 @@ static __init int cxl_test_init(void) platform_device_put(pdev); goto err_bridge; } + cxl_host_bridge[i] = pdev; + rc = sysfs_create_link(&pdev->dev.kobj, &pdev->dev.kobj, + "physical_node"); + if (rc) + goto err_bridge; } for (i = 0; i < ARRAY_SIZE(cxl_root_port); i++) { struct platform_device *bridge = - cxl_host_bridge[i / NR_CXL_ROOT_PORTS]; + cxl_host_bridge[i % ARRAY_SIZE(cxl_host_bridge)]; struct platform_device *pdev; pdev = platform_device_alloc("cxl_root_port", i); @@ -527,15 +667,52 @@ static __init int cxl_test_init(void) cxl_root_port[i] = pdev; } - BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_root_port)); + BUILD_BUG_ON(ARRAY_SIZE(cxl_switch_uport) != ARRAY_SIZE(cxl_root_port)); + for (i = 0; i < ARRAY_SIZE(cxl_switch_uport); i++) { + struct platform_device *root_port = cxl_root_port[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_uport", i); + if (!pdev) + goto err_port; + pdev->dev.parent = &root_port->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_uport; + } + cxl_switch_uport[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_switch_dport); i++) { + struct platform_device *uport = + cxl_switch_uport[i % ARRAY_SIZE(cxl_switch_uport)]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_switch_dport", i); + if (!pdev) + goto err_port; + pdev->dev.parent = &uport->dev; + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_dport; + } + cxl_switch_dport[i] = pdev; + } + + BUILD_BUG_ON(ARRAY_SIZE(cxl_mem) != ARRAY_SIZE(cxl_switch_dport)); for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { - struct platform_device *port = cxl_root_port[i]; + struct platform_device *dport = cxl_switch_dport[i]; struct platform_device *pdev; pdev = alloc_memdev(i); if (!pdev) goto err_mem; - pdev->dev.parent = &port->dev; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); rc = platform_device_add(pdev); if (rc) { @@ -563,12 +740,24 @@ static __init int cxl_test_init(void) err_mem: for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); +err_dport: + for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_dport[i]); +err_uport: + for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_uport[i]); err_port: for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); err_bridge: - for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_host_bridge[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); platform_device_unregister(cxl_host_bridge[i]); + } err_populate: depopulate_all_mock_resources(); err_gen_pool_add: @@ -585,10 +774,20 @@ static __exit void cxl_test_exit(void) platform_device_unregister(cxl_acpi); for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) platform_device_unregister(cxl_mem[i]); + for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_dport[i]); + for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) + platform_device_unregister(cxl_switch_uport[i]); for (i = ARRAY_SIZE(cxl_root_port) - 1; i >= 0; i--) platform_device_unregister(cxl_root_port[i]); - for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) + for (i = ARRAY_SIZE(cxl_host_bridge) - 1; i >= 0; i--) { + struct platform_device *pdev = cxl_host_bridge[i]; + + if (!pdev) + continue; + sysfs_remove_link(&pdev->dev.kobj, "physical_node"); platform_device_unregister(cxl_host_bridge[i]); + } depopulate_all_mock_resources(); gen_pool_destroy(cxl_mock_pool); unregister_cxl_mock_ops(&cxl_mock_ops); @@ -598,3 +797,4 @@ module_init(cxl_test_init); module_exit(cxl_test_exit); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); +MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8c2086c4caef..b6b726eff3e2 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -236,11 +237,25 @@ static int cxl_mock_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd * return rc; } +static int cxl_mock_wait_media_ready(struct cxl_dev_state *cxlds) +{ + msleep(100); + return 0; +} + static void label_area_release(void *lsa) { vfree(lsa); } +static void mock_validate_dvsec_ranges(struct cxl_dev_state *cxlds) +{ + struct cxl_endpoint_dvsec_info *info; + + info = &cxlds->info; + info->mem_enabled = true; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -261,7 +276,9 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->serial = pdev->id; cxlds->mbox_send = cxl_mock_mbox_send; + cxlds->wait_media_ready = cxl_mock_wait_media_ready; cxlds->payload_size = SZ_4K; rc = cxl_enumerate_cmds(cxlds); @@ -276,6 +293,8 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; + mock_validate_dvsec_ranges(cxlds); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 17408f892df4..6e8c9d63c92d 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include "mock.h" static LIST_HEAD(mock); @@ -114,32 +116,6 @@ struct acpi_pci_root *__wrap_acpi_pci_find_root(acpi_handle handle) } EXPORT_SYMBOL_GPL(__wrap_acpi_pci_find_root); -void __wrap_pci_walk_bus(struct pci_bus *bus, - int (*cb)(struct pci_dev *, void *), void *userdata) -{ - int index; - struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); - - if (ops && ops->is_mock_bus(bus)) { - int rc, i; - - /* - * Simulate 2 root ports per host-bridge and no - * depth recursion. - */ - for (i = 0; i < 2; i++) { - rc = cb((struct pci_dev *) ops->mock_port(bus, i), - userdata); - if (rc) - break; - } - } else - pci_walk_bus(bus, cb, userdata); - - put_cxl_mock_ops(index); -} -EXPORT_SYMBOL_GPL(__wrap_pci_walk_bus); - struct nvdimm_bus * __wrap_nvdimm_bus_register(struct device *dev, struct nvdimm_bus_descriptor *nd_desc) @@ -155,5 +131,68 @@ __wrap_nvdimm_bus_register(struct device *dev, } EXPORT_SYMBOL_GPL(__wrap_nvdimm_bus_register); +struct cxl_hdm *__wrap_devm_cxl_setup_hdm(struct cxl_port *port) +{ + int index; + struct cxl_hdm *cxlhdm; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + cxlhdm = ops->devm_cxl_setup_hdm(port); + else + cxlhdm = devm_cxl_setup_hdm(port); + put_cxl_mock_ops(index); + + return cxlhdm; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_setup_hdm, CXL); + +int __wrap_devm_cxl_add_passthrough_decoder(struct cxl_port *port) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_add_passthrough_decoder(port); + else + rc = devm_cxl_add_passthrough_decoder(port); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_add_passthrough_decoder, CXL); + +int __wrap_devm_cxl_enumerate_decoders(struct cxl_hdm *cxlhdm) +{ + int rc, index; + struct cxl_port *port = cxlhdm->port; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_enumerate_decoders(cxlhdm); + else + rc = devm_cxl_enumerate_decoders(cxlhdm); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_enumerate_decoders, CXL); + +int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port) +{ + int rc, index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (ops && ops->is_mock_port(port->uport)) + rc = ops->devm_cxl_port_enumerate_dports(port); + else + rc = devm_cxl_port_enumerate_dports(port); + put_cxl_mock_ops(index); + + return rc; +} +EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); +MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h index 15ed0fd877e4..738f24e3988a 100644 --- a/tools/testing/cxl/test/mock.h +++ b/tools/testing/cxl/test/mock.h @@ -2,6 +2,7 @@ #include #include +#include struct cxl_mock_ops { struct list_head list; @@ -15,10 +16,13 @@ struct cxl_mock_ops { struct acpi_object_list *arguments, unsigned long long *data); struct acpi_pci_root *(*acpi_pci_find_root)(acpi_handle handle); - struct platform_device *(*mock_port)(struct pci_bus *bus, int index); bool (*is_mock_bus)(struct pci_bus *bus); - bool (*is_mock_port)(struct platform_device *pdev); + bool (*is_mock_port)(struct device *dev); bool (*is_mock_dev)(struct device *dev); + int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port); + struct cxl_hdm *(*devm_cxl_setup_hdm)(struct cxl_port *port); + int (*devm_cxl_add_passthrough_decoder)(struct cxl_port *port); + int (*devm_cxl_enumerate_decoders)(struct cxl_hdm *hdm); }; void register_cxl_mock_ops(struct cxl_mock_ops *ops);