From 57e29f4c8919af7276c35c2da0ae5efb4c36c33e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 15 Jul 2024 10:33:06 +0200 Subject: [PATCH 01/39] s390: Add runtime constant support Implement the runtime constant infrastructure for s390, allowing the dcache d_hash() function to be generated using as a constant for hash table address followed by shift by a constant of the hash index. This is the s390 variant of commit 94a2bc0f611c ("arm64: add 'runtime constant' support") and commit e3c92e81711d ("runtime constants: add x86 architecture support"). Signed-off-by: Heiko Carstens Acked-by: Vasily Gorbik Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/runtime-const.h | 77 +++++++++++++++++++++++++++ arch/s390/kernel/vmlinux.lds.S | 3 ++ 2 files changed, 80 insertions(+) create mode 100644 arch/s390/include/asm/runtime-const.h diff --git a/arch/s390/include/asm/runtime-const.h b/arch/s390/include/asm/runtime-const.h new file mode 100644 index 000000000000..17878b1d048c --- /dev/null +++ b/arch/s390/include/asm/runtime-const.h @@ -0,0 +1,77 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_RUNTIME_CONST_H +#define _ASM_S390_RUNTIME_CONST_H + +#include + +#define runtime_const_ptr(sym) \ +({ \ + typeof(sym) __ret; \ + \ + asm_inline( \ + "0: iihf %[__ret],%[c1]\n" \ + " iilf %[__ret],%[c2]\n" \ + ".pushsection runtime_ptr_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "=d" (__ret) \ + : [c1] "i" (0x01234567UL), \ + [c2] "i" (0x89abcdefUL)); \ + __ret; \ +}) + +#define runtime_const_shift_right_32(val, sym) \ +({ \ + unsigned int __ret = (val); \ + \ + asm_inline( \ + "0: srl %[__ret],12\n" \ + ".pushsection runtime_shift_" #sym ",\"a\"\n" \ + ".long 0b - .\n" \ + ".popsection" \ + : [__ret] "+d" (__ret)); \ + __ret; \ +}) + +#define runtime_const_init(type, sym) do { \ + extern s32 __start_runtime_##type##_##sym[]; \ + extern s32 __stop_runtime_##type##_##sym[]; \ + \ + runtime_const_fixup(__runtime_fixup_##type, \ + (unsigned long)(sym), \ + __start_runtime_##type##_##sym, \ + __stop_runtime_##type##_##sym); \ +} while (0) + +/* 32-bit immediate for iihf and iilf in bits in I2 field */ +static inline void __runtime_fixup_32(u32 *p, unsigned int val) +{ + s390_kernel_write(p, &val, sizeof(val)); +} + +static inline void __runtime_fixup_ptr(void *where, unsigned long val) +{ + __runtime_fixup_32(where + 2, val >> 32); + __runtime_fixup_32(where + 8, val); +} + +/* Immediate value is lower 12 bits of D2 field of srl */ +static inline void __runtime_fixup_shift(void *where, unsigned long val) +{ + u32 insn = *(u32 *)where; + + insn &= 0xfffff000; + insn |= (val & 63); + s390_kernel_write(where, &insn, sizeof(insn)); +} + +static inline void runtime_const_fixup(void (*fn)(void *, unsigned long), + unsigned long val, s32 *start, s32 *end) +{ + while (start < end) { + fn(*start + (void *)start, val); + start++; + } +} + +#endif /* _ASM_S390_RUNTIME_CONST_H */ diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index a1ce3925ec71..5128ccee9c67 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -190,6 +190,9 @@ SECTIONS . = ALIGN(PAGE_SIZE); INIT_DATA_SECTION(0x100) + RUNTIME_CONST(shift, d_hash_shift) + RUNTIME_CONST(ptr, dentry_hashtable) + PERCPU_SECTION(0x100) . = ALIGN(PAGE_SIZE); From 5fd11b96b43708f2f6e3964412c301c1bd20ec0f Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Thu, 11 Jul 2024 15:45:26 +0200 Subject: [PATCH 02/39] s390/pci: Refactor arch_setup_msi_irqs() Factor out adapter interrupt allocation from arch_setup_msi_irqs() in preparation for enabling registration of multiple MSIs. Code movement only, no change of functionality intended. Signed-off-by: Gerd Bayer Reviewed-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_irq.c | 54 ++++++++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 0ef83b6ac0db..979f776b09b8 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -268,33 +268,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq, } } -int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, + unsigned long *bit) { - struct zpci_dev *zdev = to_zpci(pdev); - unsigned int hwirq, msi_vecs, cpu; - unsigned long bit; - struct msi_desc *msi; - struct msi_msg msg; - int cpu_addr; - int rc, irq; - - zdev->aisb = -1UL; - zdev->msi_first_bit = -1U; - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; - msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); - if (irq_delivery == DIRECTED) { /* Allocate cpu vector bits */ - bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); - if (bit == -1UL) + *bit = airq_iv_alloc(zpci_ibv[0], msi_vecs); + if (*bit == -1UL) return -EIO; } else { /* Allocate adapter summary indicator bit */ - bit = airq_iv_alloc_bit(zpci_sbv); - if (bit == -1UL) + *bit = airq_iv_alloc_bit(zpci_sbv); + if (*bit == -1UL) return -EIO; - zdev->aisb = bit; + zdev->aisb = *bit; /* Create adapter interrupt vector */ zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL); @@ -302,10 +289,33 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) return -ENOMEM; /* Wire up shortcut pointer */ - zpci_ibv[bit] = zdev->aibv; + zpci_ibv[*bit] = zdev->aibv; /* Each function has its own interrupt vector */ - bit = 0; + *bit = 0; } + return 0; +} + +int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) +{ + struct zpci_dev *zdev = to_zpci(pdev); + unsigned int hwirq, msi_vecs, cpu; + struct msi_desc *msi; + struct msi_msg msg; + unsigned long bit; + int cpu_addr; + int rc, irq; + + zdev->aisb = -1UL; + zdev->msi_first_bit = -1U; + + if (type == PCI_CAP_ID_MSI && nvec > 1) + return 1; + msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); + + rc = __alloc_airq(zdev, msi_vecs, &bit); + if (rc < 0) + return rc; /* Request MSI interrupts */ hwirq = bit; From ab42fcb511fd9d241bbab7cc3ca04e34e9fc0666 Mon Sep 17 00:00:00 2001 From: Gerd Bayer Date: Thu, 11 Jul 2024 15:45:27 +0200 Subject: [PATCH 03/39] s390/pci: Allow allocation of more than 1 MSI interrupt On a PCI adapter that provides up to 8 MSI interrupt sources the s390 implementation of PCI interrupts rejected to accommodate them, although the underlying hardware is able to support that. For MSI-X it is sufficient to allocate a single irq_desc per msi_desc, but for MSI multiple irq descriptors are attached to and controlled by a single msi descriptor. Add the appropriate loops to maintain multiple irq descriptors and tie/untie them to/from the appropriate AIBV bit, if a device driver allocates more than 1 MSI interrupt. Common PCI code passes on requests to allocate a number of interrupt vectors based on the device drivers' demand and the PCI functions' capabilities. However, the root-complex of s390 systems support just a limited number of interrupt vectors per PCI function. Produce a kernel log message to inform about any architecture-specific capping that might be done. With this change, we had a PCI adapter successfully raising interrupts to its device driver via all 8 sources. Fixes: a384c8924a8b ("s390/PCI: Fix single MSI only check") Signed-off-by: Gerd Bayer Reviewed-by: Niklas Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/pci/pci_irq.c | 62 ++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/arch/s390/pci/pci_irq.c b/arch/s390/pci/pci_irq.c index 979f776b09b8..84482a921332 100644 --- a/arch/s390/pci/pci_irq.c +++ b/arch/s390/pci/pci_irq.c @@ -298,8 +298,8 @@ static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs, int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) { + unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu; struct zpci_dev *zdev = to_zpci(pdev); - unsigned int hwirq, msi_vecs, cpu; struct msi_desc *msi; struct msi_msg msg; unsigned long bit; @@ -309,30 +309,46 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) zdev->aisb = -1UL; zdev->msi_first_bit = -1U; - if (type == PCI_CAP_ID_MSI && nvec > 1) - return 1; msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); + if (msi_vecs < nvec) { + pr_info("%s requested %d irqs, allocate system limit of %d", + pci_name(pdev), nvec, zdev->max_msi); + } rc = __alloc_airq(zdev, msi_vecs, &bit); if (rc < 0) return rc; - /* Request MSI interrupts */ + /* + * Request MSI interrupts: + * When using MSI, nvec_used interrupt sources and their irq + * descriptors are controlled through one msi descriptor. + * Thus the outer loop over msi descriptors shall run only once, + * while two inner loops iterate over the interrupt vectors. + * When using MSI-X, each interrupt vector/irq descriptor + * is bound to exactly one msi descriptor (nvec_used is one). + * So the inner loops are executed once, while the outer iterates + * over the MSI-X descriptors. + */ hwirq = bit; msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) { - rc = -EIO; if (hwirq - bit >= msi_vecs) break; - irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE, - (irq_delivery == DIRECTED) ? - msi->affinity : NULL); + irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used); + irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE, + (irq_delivery == DIRECTED) ? + msi->affinity : NULL); if (irq < 0) return -ENOMEM; - rc = irq_set_msi_desc(irq, msi); - if (rc) - return rc; - irq_set_chip_and_handler(irq, &zpci_irq_chip, - handle_percpu_irq); + + for (i = 0; i < irqs_per_msi; i++) { + rc = irq_set_msi_desc_off(irq, i, msi); + if (rc) + return rc; + irq_set_chip_and_handler(irq + i, &zpci_irq_chip, + handle_percpu_irq); + } + msg.data = hwirq - bit; if (irq_delivery == DIRECTED) { if (msi->affinity) @@ -345,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) msg.address_lo |= (cpu_addr << 8); for_each_possible_cpu(cpu) { - airq_iv_set_data(zpci_ibv[cpu], hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zpci_ibv[cpu], + hwirq + i, irq + i); } } else { msg.address_lo = zdev->msi_addr & 0xffffffff; - airq_iv_set_data(zdev->aibv, hwirq, irq); + for (i = 0; i < irqs_per_msi; i++) + airq_iv_set_data(zdev->aibv, hwirq + i, irq + i); } msg.address_hi = zdev->msi_addr >> 32; pci_write_msi_msg(irq, &msg); - hwirq++; + hwirq += irqs_per_msi; } zdev->msi_first_bit = bit; - zdev->msi_nr_irqs = msi_vecs; + zdev->msi_nr_irqs = hwirq - bit; rc = zpci_set_irq(zdev); if (rc) return rc; - return (msi_vecs == nvec) ? 0 : msi_vecs; + return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs; } void arch_teardown_msi_irqs(struct pci_dev *pdev) { struct zpci_dev *zdev = to_zpci(pdev); struct msi_desc *msi; + unsigned int i; int rc; /* Disable interrupts */ @@ -379,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) /* Release MSI interrupts */ msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) { - irq_set_msi_desc(msi->irq, NULL); - irq_free_desc(msi->irq); + for (i = 0; i < msi->nvec_used; i++) { + irq_set_msi_desc(msi->irq + i, NULL); + irq_free_desc(msi->irq + i); + } msi->msg.address_lo = 0; msi->msg.address_hi = 0; msi->msg.data = 0; From e188e5d5ffd01d484b5255b88739fcf67b300223 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Thu, 11 Jul 2024 15:50:26 +0200 Subject: [PATCH 04/39] s390/setup: Fix __pa/__va for modules under non-GPL licenses The struct vm_layout contains fields used in __pa/__va calculations. Such fundamental things have to be exported with EXPORT_SYMBOL to avoid breakages of out-of-tree modules under non-GPL licenses. Fixes: 7de0446f0b26 ("s390/boot: Make identity mapping base address explicit") Acked-by: Heiko Carstens Acked-by: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 3993f4caf224..1faba11d5f0b 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -149,7 +149,7 @@ unsigned long __bootdata_preserved(max_mappable); struct physmem_info __bootdata(physmem_info); struct vm_layout __bootdata_preserved(vm_layout); -EXPORT_SYMBOL_GPL(vm_layout); +EXPORT_SYMBOL(vm_layout); int __bootdata_preserved(__kaslr_enabled); unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); From ec25f99cc834644e6577fa11582f7691589ed8cc Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 23 Jul 2024 14:44:11 +0200 Subject: [PATCH 05/39] s390/kmsan: Fix merge conflict with get_lowcore() introduction Resolve the conflict between commit 2a48c8c9cf87 ("s390/kmsan: implement the architecture-specific functions") and commit 39976f1278a9 ("s390: Remove S390_lowcore"). Fixes: 2a48c8c9cf87 ("s390/kmsan: implement the architecture-specific functions") Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Link: https://lore.kernel.org/r/20240723124441.120044-2-iii@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/kmsan.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/kmsan.h b/arch/s390/include/asm/kmsan.h index 27db65fbf3f6..f73e181d09ae 100644 --- a/arch/s390/include/asm/kmsan.h +++ b/arch/s390/include/asm/kmsan.h @@ -12,8 +12,8 @@ static inline bool is_lowcore_addr(void *addr) { - return addr >= (void *)&S390_lowcore && - addr < (void *)(&S390_lowcore + 1); + return addr >= (void *)get_lowcore() && + addr < (void *)(get_lowcore() + 1); } static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) @@ -25,7 +25,7 @@ static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin) * order to get a distinct struct page. */ addr += (void *)lowcore_ptr[raw_smp_processor_id()] - - (void *)&S390_lowcore; + (void *)get_lowcore(); if (KMSAN_WARN_ON(is_lowcore_addr(addr))) return NULL; return kmsan_get_metadata(addr, is_origin); From 19af288706b25f2213e85b8b2df140c04fd7c63d Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 23 Jul 2024 14:44:12 +0200 Subject: [PATCH 06/39] s390/ptdump: Add KMSAN page markers Add KMSAN vmalloc metadata areas to /sys/kernel/debug/kernel_page_tables. Example output: 0x000003a95fff9000-0x000003a960000000 28K PTE I ---[ vmalloc Area End ]--- ---[ Kmsan vmalloc Shadow Start ]--- 0x000003a960000000-0x000003a960010000 64K PTE RW NX [...] 0x000003d3dfff9000-0x000003d3e0000000 28K PTE I ---[ Kmsan vmalloc Shadow End ]--- ---[ Kmsan vmalloc Origins Start ]--- 0x000003d3e0000000-0x000003d3e0010000 64K PTE RW NX [...] 0x000003fe5fff9000-0x000003fe60000000 28K PTE I ---[ Kmsan vmalloc Origins End ]--- ---[ Kmsan Modules Shadow Start ]--- 0x000003fe60000000-0x000003fe60001000 4K PTE RW NX [...] 0x000003fe60100000-0x000003fee0000000 2047M PMD I ---[ Kmsan Modules Shadow End ]--- ---[ Kmsan Modules Origins Start ]--- 0x000003fee0000000-0x000003fee0001000 4K PTE RW NX [...] 0x000003fee0100000-0x000003ff60000000 2047M PMD I ---[ Kmsan Modules Origins End ]--- ---[ Modules Area Start ]--- 0x000003ff60000000-0x000003ff60001000 4K PTE RO X Signed-off-by: Ilya Leoshkevich Reviewed-by: Heiko Carstens Link: https://lore.kernel.org/r/20240723124441.120044-3-iii@linux.ibm.com Signed-off-by: Vasily Gorbik --- arch/s390/mm/dump_pagetables.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 45db5f47b22d..98dab3e049de 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -36,6 +36,16 @@ enum address_markers_idx { VMEMMAP_END_NR, VMALLOC_NR, VMALLOC_END_NR, +#ifdef CONFIG_KMSAN + KMSAN_VMALLOC_SHADOW_START_NR, + KMSAN_VMALLOC_SHADOW_END_NR, + KMSAN_VMALLOC_ORIGIN_START_NR, + KMSAN_VMALLOC_ORIGIN_END_NR, + KMSAN_MODULES_SHADOW_START_NR, + KMSAN_MODULES_SHADOW_END_NR, + KMSAN_MODULES_ORIGIN_START_NR, + KMSAN_MODULES_ORIGIN_END_NR, +#endif MODULES_NR, MODULES_END_NR, ABS_LOWCORE_NR, @@ -65,6 +75,16 @@ static struct addr_marker address_markers[] = { [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, [VMALLOC_NR] = {0, "vmalloc Area Start"}, [VMALLOC_END_NR] = {0, "vmalloc Area End"}, +#ifdef CONFIG_KMSAN + [KMSAN_VMALLOC_SHADOW_START_NR] = {0, "Kmsan vmalloc Shadow Start"}, + [KMSAN_VMALLOC_SHADOW_END_NR] = {0, "Kmsan vmalloc Shadow End"}, + [KMSAN_VMALLOC_ORIGIN_START_NR] = {0, "Kmsan vmalloc Origins Start"}, + [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, "Kmsan vmalloc Origins End"}, + [KMSAN_MODULES_SHADOW_START_NR] = {0, "Kmsan Modules Shadow Start"}, + [KMSAN_MODULES_SHADOW_END_NR] = {0, "Kmsan Modules Shadow End"}, + [KMSAN_MODULES_ORIGIN_START_NR] = {0, "Kmsan Modules Origins Start"}, + [KMSAN_MODULES_ORIGIN_END_NR] = {0, "Kmsan Modules Origins End"}, +#endif [MODULES_NR] = {0, "Modules Area Start"}, [MODULES_END_NR] = {0, "Modules Area End"}, [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, @@ -306,6 +326,16 @@ static int pt_dump_init(void) #ifdef CONFIG_KFENCE address_markers[KFENCE_START_NR].start_address = kfence_start; address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE; +#endif +#ifdef CONFIG_KMSAN + address_markers[KMSAN_VMALLOC_SHADOW_START_NR].start_address = KMSAN_VMALLOC_SHADOW_START; + address_markers[KMSAN_VMALLOC_SHADOW_END_NR].start_address = KMSAN_VMALLOC_SHADOW_END; + address_markers[KMSAN_VMALLOC_ORIGIN_START_NR].start_address = KMSAN_VMALLOC_ORIGIN_START; + address_markers[KMSAN_VMALLOC_ORIGIN_END_NR].start_address = KMSAN_VMALLOC_ORIGIN_END; + address_markers[KMSAN_MODULES_SHADOW_START_NR].start_address = KMSAN_MODULES_SHADOW_START; + address_markers[KMSAN_MODULES_SHADOW_END_NR].start_address = KMSAN_MODULES_SHADOW_END; + address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START; + address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END; #endif sort_address_markers(); #ifdef CONFIG_PTDUMP_DEBUGFS From e6ce1f12d777f6ee22b20e10ae6a771e7e6f44f5 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 15 Jul 2024 12:07:29 +0200 Subject: [PATCH 07/39] s390/cpum_cf: Fix endless loop in CF_DIAG event stop Event CF_DIAG reads out complete counter sets using stcctm instruction. This is done at event start time when the process starts execution and at event stop time when the process is removed from the CPU. During removal the difference of each counter in the counter sets is calculated and saved as raw data in the ring buffer. This works fine unless the number of counters in a counter set is zero. This may happen for the extended counter set. This set is machine specific and the size of the counter set can be zero even when extended counter set is authorized for read access. This case is not handled. cfdiag_diffctr() checks authorization of the extended counter set. If true the functions assumes the extended counter set has been saved in a data buffer. However this is not the case, cfdiag_getctrset() does not save a counter set with counter set size of zero. This mismatch causes an endless loop in the counter set readout during event stop handling. The calculation of the difference of the counters in each counter now verifies the size of the counter set is non-zero. A counter set with size zero is skipped. Fixes: a029a4eab39e ("s390/cpumf: Allow concurrent access for CPU Measurement Counter Facility") Signed-off-by: Thomas Richter Acked-by: Sumanth Korikkar Acked-by: Heiko Carstens Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Alexander Gordeev Signed-off-by: Vasily Gorbik --- arch/s390/kernel/perf_cpum_cf.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 1434642e9cba..6968be98af11 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -556,25 +556,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth) struct cf_trailer_entry *trailer_start, *trailer_stop; struct cf_ctrset_entry *ctrstart, *ctrstop; size_t offset = 0; + int i; - auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1; - do { + for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) { ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset); ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset); + /* Counter set not authorized */ + if (!(auth & cpumf_ctr_ctl[i])) + continue; + /* Counter set size zero was not saved */ + if (!cpum_cf_read_setsize(i)) + continue; + if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) { pr_err_once("cpum_cf_diag counter set compare error " "in set %i\n", ctrstart->set); return 0; } - auth &= ~cpumf_ctr_ctl[ctrstart->set]; if (ctrstart->def == CF_DIAG_CTRSET_DEF) { cfdiag_diffctrset((u64 *)(ctrstart + 1), (u64 *)(ctrstop + 1), ctrstart->ctr); offset += ctrstart->ctr * sizeof(u64) + sizeof(*ctrstart); } - } while (ctrstart->def && auth); + } /* Save time_stamp from start of event in stop's trailer */ trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset); From b798b685b42c9dbe508e59a74250d97c41bec35e Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Wed, 17 Jul 2024 21:43:22 +0200 Subject: [PATCH 08/39] s390/boot: Do not assume the decompressor range is reserved When allocating a random memory range for .amode31 sections the minimal randomization address is 0. That does not lead to a possible overlap with the decompressor image (which also starts from 0) since by that time the image range is already reserved. Do not assume the decompressor range is reserved and always provide the minimal randomization address for .amode31 sections beyond the decompressor. That is a prerequisite for moving the lowcore memory address from NULL elsewhere. Signed-off-by: Alexander Gordeev Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/startup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index c59014945af0..cc8753c0c121 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -478,8 +478,12 @@ void startup_kernel(void) * before the kernel started. Therefore, in case the two sections * overlap there is no risk of corrupting any data. */ - if (kaslr_enabled()) - amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G); + if (kaslr_enabled()) { + unsigned long amode31_min; + + amode31_min = (unsigned long)_decompressor_end; + amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); + } if (!amode31_lma) amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); From a795eeaf851b91c79fc357a245ca72fd1e7df906 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 16 Jul 2024 09:26:13 +0200 Subject: [PATCH 09/39] s390/smp: Handle restart interrupt on ipl cpu The current smp code allows to trigger a restart interrupt on CPUs offline in linux. To allow using the percpu infrastructure instead of the pcpu_devices array, switch to the ipl cpu which is always online before calling do_restart(). Reviewed-by: Heiko Carstens Reviewed-by: Alexander Gordeev Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/smp.h | 1 - arch/s390/kernel/ipl.c | 2 +- arch/s390/kernel/smp.c | 15 --------------- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/arch/s390/include/asm/smp.h b/arch/s390/include/asm/smp.h index c13c79025348..cd835f4fb11a 100644 --- a/arch/s390/include/asm/smp.h +++ b/arch/s390/include/asm/smp.h @@ -24,7 +24,6 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle); extern void arch_send_call_function_single_ipi(int cpu); extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); -extern void smp_call_online_cpu(void (*func)(void *), void *); extern void smp_call_ipl_cpu(void (*func)(void *), void *); extern void smp_emergency_stop(void); diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 3a7d6e172211..f17bb7bf9392 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -2112,7 +2112,7 @@ void do_restart(void *arg) tracing_off(); debug_locks_off(); lgr_info_log(); - smp_call_online_cpu(__do_restart, arg); + smp_call_ipl_cpu(__do_restart, arg); } /* on halt */ diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index c3c54adf67bc..1e1290525423 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -382,21 +382,6 @@ static int pcpu_set_smt(unsigned int mtid) return cc; } -/* - * Call function on an online CPU. - */ -void smp_call_online_cpu(void (*func)(void *), void *data) -{ - struct pcpu *pcpu; - - /* Use the current cpu if it is online. */ - pcpu = pcpu_find_address(cpu_online_mask, stap()); - if (!pcpu) - /* Use the first online cpu. */ - pcpu = pcpu_devices + cpumask_first(cpu_online_mask); - pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack); -} - /* * Call function on the ipl CPU. */ From 90fc5ac28235843b1d5070c5dac9b01e7d39b24c Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 16 Jul 2024 09:26:14 +0200 Subject: [PATCH 10/39] s390/smp: Switch pcpu_devices to percpu In preparation of moving the CIF flags from lowcore to pcpu_devices, convert the pcpu_devices array to use the percpu infrastructure. This is required because using the pcpu_devices array as it is would introduce a performance penalty due to the fact that CPU flags for multiple CPUs would end up in the same cacheline. Note that a pointer to the pcpu struct of the IPL CPU is still required. This is because a restart interrupt can be triggered on an offline CPU. s390 stores the percpu offset in lowcore, but offline CPUs have no lowcore area allocated. So percpu data cannot be used from an offline CPU and we need to get the pcpu pointer for the IPL cpu from somewhere else. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/smp.c | 113 +++++++++++++++++++++-------------------- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 1e1290525423..b36b089b9a26 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -83,7 +83,14 @@ struct pcpu { }; static u8 boot_core_type; -static struct pcpu pcpu_devices[NR_CPUS]; +static DEFINE_PER_CPU(struct pcpu, pcpu_devices); +/* + * Pointer to the pcpu area of the boot CPU. This is required when a restart + * interrupt is triggered on an offline CPU. For that case accessing percpu + * data with the common primitives does not work, since the percpu offset is + * stored in a non existent lowcore. + */ +static struct pcpu *ipl_pcpu; unsigned int smp_cpu_mt_shift; EXPORT_SYMBOL(smp_cpu_mt_shift); @@ -174,8 +181,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address) int cpu; for_each_cpu(cpu, mask) - if (pcpu_devices[cpu].address == address) - return pcpu_devices + cpu; + if (per_cpu(pcpu_devices, cpu).address == address) + return &per_cpu(pcpu_devices, cpu); return NULL; } @@ -230,13 +237,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu) return -ENOMEM; } -static void pcpu_free_lowcore(struct pcpu *pcpu) +static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu) { unsigned long async_stack, nodat_stack, mcck_stack; struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET; async_stack = lc->async_stack - STACK_INIT_OFFSET; @@ -277,12 +282,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) arch_spin_lock_setup(cpu); } -static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) +static void pcpu_attach_task(int cpu, struct task_struct *tsk) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET; lc->current_task = (unsigned long)tsk; @@ -296,18 +299,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk) lc->steal_timer = 0; } -static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data) +static void pcpu_start_fn(int cpu, void (*func)(void *), void *data) { struct lowcore *lc; - int cpu; - cpu = pcpu - pcpu_devices; lc = lowcore_ptr[cpu]; lc->restart_stack = lc->kernel_stack; lc->restart_fn = (unsigned long) func; lc->restart_data = (unsigned long) data; lc->restart_source = -1U; - pcpu_sigp_retry(pcpu, SIGP_RESTART, 0); + pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0); } typedef void (pcpu_delegate_fn)(void *); @@ -320,14 +321,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data) func(data); /* should not return */ } -static void pcpu_delegate(struct pcpu *pcpu, +static void pcpu_delegate(struct pcpu *pcpu, int cpu, pcpu_delegate_fn *func, void *data, unsigned long stack) { struct lowcore *lc, *abs_lc; unsigned int source_cpu; - lc = lowcore_ptr[pcpu - pcpu_devices]; + lc = lowcore_ptr[cpu]; source_cpu = stap(); if (pcpu->address == source_cpu) { @@ -377,7 +378,7 @@ static int pcpu_set_smt(unsigned int mtid) smp_cpu_mt_shift = 0; while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift)) smp_cpu_mt_shift++; - pcpu_devices[0].address = stap(); + per_cpu(pcpu_devices, 0).address = stap(); } return cc; } @@ -389,11 +390,10 @@ void smp_call_ipl_cpu(void (*func)(void *), void *data) { struct lowcore *lc = lowcore_ptr[0]; - if (pcpu_devices[0].address == stap()) + if (ipl_pcpu->address == stap()) lc = get_lowcore(); - pcpu_delegate(&pcpu_devices[0], func, data, - lc->nodat_stack); + pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack); } int smp_find_processor_id(u16 address) @@ -401,21 +401,21 @@ int smp_find_processor_id(u16 address) int cpu; for_each_present_cpu(cpu) - if (pcpu_devices[cpu].address == address) + if (per_cpu(pcpu_devices, cpu).address == address) return cpu; return -1; } void schedule_mcck_handler(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending); } bool notrace arch_vcpu_is_preempted(int cpu) { if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu)) return false; - if (pcpu_running(pcpu_devices + cpu)) + if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu))) return false; return true; } @@ -427,7 +427,7 @@ void notrace smp_yield_cpu(int cpu) return; diag_stat_inc_norecursion(DIAG_STAT_X09C); asm volatile("diag %0,0,0x9c" - : : "d" (pcpu_devices[cpu].address)); + : : "d" (per_cpu(pcpu_devices, cpu).address)); } EXPORT_SYMBOL_GPL(smp_yield_cpu); @@ -448,7 +448,7 @@ void notrace smp_emergency_stop(void) end = get_tod_clock() + (1000000UL << 12); for_each_cpu(cpu, &cpumask) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); set_bit(ec_stop_cpu, &pcpu->ec_mask); while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL, 0, NULL) == SIGP_CC_BUSY && @@ -457,7 +457,7 @@ void notrace smp_emergency_stop(void) } while (get_tod_clock() < end) { for_each_cpu(cpu, &cpumask) - if (pcpu_stopped(pcpu_devices + cpu)) + if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu))) cpumask_clear_cpu(cpu, &cpumask); if (cpumask_empty(&cpumask)) break; @@ -472,6 +472,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop); */ void smp_send_stop(void) { + struct pcpu *pcpu; int cpu; /* Disable all interrupts/machine checks */ @@ -487,8 +488,9 @@ void smp_send_stop(void) for_each_online_cpu(cpu) { if (cpu == smp_processor_id()) continue; - pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0); - while (!pcpu_stopped(pcpu_devices + cpu)) + pcpu = per_cpu_ptr(&pcpu_devices, cpu); + pcpu_sigp_retry(pcpu, SIGP_STOP, 0); + while (!pcpu_stopped(pcpu)) cpu_relax(); } } @@ -502,7 +504,7 @@ static void smp_handle_ext_call(void) unsigned long bits; /* handle bit signal external calls */ - bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0); + bits = this_cpu_xchg(pcpu_devices.ec_mask, 0); if (test_bit(ec_stop_cpu, &bits)) smp_stop_cpu(); if (test_bit(ec_schedule, &bits)) @@ -527,12 +529,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) int cpu; for_each_cpu(cpu, mask) - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } void arch_send_call_function_single_ipi(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single); } /* @@ -542,13 +544,13 @@ void arch_send_call_function_single_ipi(int cpu) */ void arch_smp_send_reschedule(int cpu) { - pcpu_ec_call(pcpu_devices + cpu, ec_schedule); + pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule); } #ifdef CONFIG_IRQ_WORK void arch_irq_work_raise(void) { - pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work); + pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work); } #endif @@ -560,7 +562,7 @@ int smp_store_status(int cpu) struct pcpu *pcpu; unsigned long pa; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); lc = lowcore_ptr[cpu]; pa = __pa(&lc->floating_pt_save_area); if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS, @@ -668,17 +670,17 @@ void __init smp_save_dump_secondary_cpus(void) void smp_cpu_set_polarization(int cpu, int val) { - pcpu_devices[cpu].polarization = val; + per_cpu(pcpu_devices, cpu).polarization = val; } int smp_cpu_get_polarization(int cpu) { - return pcpu_devices[cpu].polarization; + return per_cpu(pcpu_devices, cpu).polarization; } int smp_cpu_get_cpu_address(int cpu) { - return pcpu_devices[cpu].address; + return per_cpu(pcpu_devices, cpu).address; } static void __ref smp_get_core_info(struct sclp_core_info *info, int early) @@ -717,7 +719,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail, for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) { if (pcpu_find_address(cpu_present_mask, address + i)) continue; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); pcpu->address = address + i; if (configured) pcpu->state = CPU_STATE_CONFIGURED; @@ -752,7 +754,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early) * that all SMT threads get subsequent logical CPU numbers. */ if (early) { - core_id = pcpu_devices[0].address >> smp_cpu_mt_shift; + core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift; for (i = 0; i < info->configured; i++) { core = &info->core[i]; if (core->core_id == core_id) { @@ -852,7 +854,7 @@ static void smp_start_secondary(void *cpuvoid) /* Upping and downing of CPUs */ int __cpu_up(unsigned int cpu, struct task_struct *tidle) { - struct pcpu *pcpu = pcpu_devices + cpu; + struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu); int rc; if (pcpu->state != CPU_STATE_CONFIGURED) @@ -870,8 +872,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) */ system_ctlreg_lock(); pcpu_prepare_secondary(pcpu, cpu); - pcpu_attach_task(pcpu, tidle); - pcpu_start_fn(pcpu, smp_start_secondary, NULL); + pcpu_attach_task(cpu, tidle); + pcpu_start_fn(cpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); @@ -916,10 +918,10 @@ void __cpu_die(unsigned int cpu) struct pcpu *pcpu; /* Wait until target cpu is down */ - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); while (!pcpu_stopped(pcpu)) cpu_relax(); - pcpu_free_lowcore(pcpu); + pcpu_free_lowcore(pcpu, cpu); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); } @@ -927,7 +929,7 @@ void __cpu_die(unsigned int cpu) void __noreturn cpu_die(void) { idle_task_exit(); - pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0); + pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0); for (;;) ; } @@ -957,10 +959,9 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { - struct pcpu *pcpu = pcpu_devices; - WARN_ON(!cpu_present(0) || !cpu_online(0)); - pcpu->state = CPU_STATE_CONFIGURED; + ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0); + ipl_pcpu->state = CPU_STATE_CONFIGURED; get_lowcore()->percpu_offset = __per_cpu_offset[0]; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } @@ -969,8 +970,8 @@ void __init smp_setup_processor_id(void) { struct lowcore *lc = get_lowcore(); - pcpu_devices[0].address = stap(); lc->cpu_nr = 0; + per_cpu(pcpu_devices, 0).address = stap(); lc->spinlock_lockval = arch_spin_lockval(0); lc->spinlock_index = 0; } @@ -992,7 +993,7 @@ static ssize_t cpu_configure_show(struct device *dev, ssize_t count; mutex_lock(&smp_cpu_state_mutex); - count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state); + count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state); mutex_unlock(&smp_cpu_state_mutex); return count; } @@ -1018,7 +1019,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) if (cpu_online(cpu + i)) goto out; - pcpu = pcpu_devices + cpu; + pcpu = per_cpu_ptr(&pcpu_devices, cpu); rc = 0; switch (val) { case 0: @@ -1030,7 +1031,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_STANDBY; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1045,7 +1046,7 @@ static ssize_t cpu_configure_store(struct device *dev, for (i = 0; i <= smp_cpu_mtid; i++) { if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i)) continue; - pcpu[i].state = CPU_STATE_CONFIGURED; + per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED; smp_cpu_set_polarization(cpu + i, POLARIZATION_UNKNOWN); } @@ -1064,7 +1065,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store); static ssize_t show_cpu_address(struct device *dev, struct device_attribute *attr, char *buf) { - return sprintf(buf, "%d\n", pcpu_devices[dev->id].address); + return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address); } static DEVICE_ATTR(address, 0444, show_cpu_address, NULL); @@ -1090,14 +1091,14 @@ static struct attribute_group cpu_online_attr_group = { static int smp_cpu_online(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group); } static int smp_cpu_pre_down(unsigned int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group); return 0; @@ -1110,7 +1111,7 @@ bool arch_cpu_is_hotpluggable(int cpu) int arch_register_cpu(int cpu) { - struct cpu *c = &per_cpu(cpu_devices, cpu); + struct cpu *c = per_cpu_ptr(&cpu_devices, cpu); int rc; c->hotpluggable = arch_cpu_is_hotpluggable(cpu); From d3604ffba1521f59f312be3f19999084dddef446 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 16 Jul 2024 09:26:15 +0200 Subject: [PATCH 11/39] s390: Move CIF flags to struct pcpu To allow testing flags for offline CPUs, move the CIF flags to struct pcpu. To avoid having to calculate the array index for each access, add a pointer to the pcpu member for the current cpu to lowcore. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 3 +-- arch/s390/include/asm/processor.h | 26 ++++++++++++++++++++------ arch/s390/kernel/asm-offsets.c | 4 +++- arch/s390/kernel/entry.S | 3 ++- arch/s390/kernel/setup.c | 1 + arch/s390/kernel/smp.c | 17 +++++++---------- 6 files changed, 34 insertions(+), 20 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index c724e71e1785..bce3a69ab2a3 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -97,8 +97,7 @@ struct lowcore { __u64 save_area_async[8]; /* 0x0240 */ __u64 save_area_restart[1]; /* 0x0280 */ - /* CPU flags. */ - __u64 cpu_flags; /* 0x0288 */ + __u64 pcpu; /* 0x0288 */ /* Return psws. */ psw_t return_psw; /* 0x0290 */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index c87cf2b8e81a..5debb12614ad 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -42,21 +42,37 @@ #include #include +struct pcpu { + unsigned long ec_mask; /* bit mask for ec_xxx functions */ + unsigned long ec_clk; /* sigp timestamp for ec_xxx */ + unsigned long flags; /* per CPU flags */ + signed char state; /* physical cpu state */ + signed char polarization; /* physical polarization */ + u16 address; /* physical cpu address */ +}; + +DECLARE_PER_CPU(struct pcpu, pcpu_devices); + typedef long (*sys_call_ptr_t)(struct pt_regs *regs); +static __always_inline struct pcpu *this_pcpu(void) +{ + return (struct pcpu *)(get_lowcore()->pcpu); +} + static __always_inline void set_cpu_flag(int flag) { - get_lowcore()->cpu_flags |= (1UL << flag); + this_pcpu()->flags |= (1UL << flag); } static __always_inline void clear_cpu_flag(int flag) { - get_lowcore()->cpu_flags &= ~(1UL << flag); + this_pcpu()->flags &= ~(1UL << flag); } static __always_inline bool test_cpu_flag(int flag) { - return get_lowcore()->cpu_flags & (1UL << flag); + return this_pcpu()->flags & (1UL << flag); } static __always_inline bool test_and_set_cpu_flag(int flag) @@ -81,9 +97,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag) */ static __always_inline bool test_cpu_flag_of(int flag, int cpu) { - struct lowcore *lc = lowcore_ptr[cpu]; - - return lc->cpu_flags & (1UL << flag); + return per_cpu(pcpu_devices, cpu).flags & (1UL << flag); } #define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY) diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 26bb45d0e6f1..58fc6b93b475 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -114,7 +114,7 @@ int main(void) OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync); OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async); OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart); - OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags); + OFFSET(__LC_PCPU, lowcore, pcpu); OFFSET(__LC_RETURN_PSW, lowcore, return_psw); OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw); OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer); @@ -186,5 +186,7 @@ int main(void) #endif OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); + + OFFSET(__PCPU_FLAGS, pcpu, flags); return 0; } diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 454b6b92c7f8..fa58bd2c48c9 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -480,7 +480,8 @@ SYM_CODE_START(mcck_int_handler) clgrjl %r9,%r14, 4f larl %r14,.Lsie_leave clgrjhe %r9,%r14, 4f - oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST + lg %r10,__LC_PCPU + oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15) #endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 1faba11d5f0b..178daf4e3563 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -406,6 +406,7 @@ static void __init setup_lowcore(void) panic("%s: Failed to allocate %zu bytes align=%zx\n", __func__, sizeof(*lc), sizeof(*lc)); + lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0); lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT; lc->restart_psw.addr = __pa(restart_int_handler); lc->external_new_psw.mask = PSW_KERNEL_BITS; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b36b089b9a26..fbba37ec53cf 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -74,16 +74,8 @@ enum { CPU_STATE_CONFIGURED, }; -struct pcpu { - unsigned long ec_mask; /* bit mask for ec_xxx functions */ - unsigned long ec_clk; /* sigp timestamp for ec_xxx */ - signed char state; /* physical cpu state */ - signed char polarization; /* physical polarization */ - u16 address; /* physical cpu address */ -}; - static u8 boot_core_type; -static DEFINE_PER_CPU(struct pcpu, pcpu_devices); +DEFINE_PER_CPU(struct pcpu, pcpu_devices); /* * Pointer to the pcpu area of the boot CPU. This is required when a restart * interrupt is triggered on an offline CPU. For that case accessing percpu @@ -264,6 +256,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; + lc->pcpu = (unsigned long)pcpu; lc->restart_flags = RESTART_FLAG_CTLREGS; lc->spinlock_lockval = arch_spin_lockval(cpu); lc->spinlock_index = 0; @@ -924,6 +917,7 @@ void __cpu_die(unsigned int cpu) pcpu_free_lowcore(pcpu, cpu); cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask); + pcpu->flags = 0; } void __noreturn cpu_die(void) @@ -959,10 +953,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus) void __init smp_prepare_boot_cpu(void) { + struct lowcore *lc = get_lowcore(); + WARN_ON(!cpu_present(0) || !cpu_online(0)); + lc->percpu_offset = __per_cpu_offset[0]; ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0); ipl_pcpu->state = CPU_STATE_CONFIGURED; - get_lowcore()->percpu_offset = __per_cpu_offset[0]; + lc->pcpu = (unsigned long)ipl_pcpu; smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN); } From 035248a7843242d51f249444fbad7340b7336f68 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 16 Jul 2024 13:50:48 +0200 Subject: [PATCH 12/39] s390/alternatives: Remove noaltinstr option The current Kernel doesn't boot without alternative patching on z16 machines. To avoid such bugs in the future, remove the option disable alternative patching. Signed-off-by: Sven Schnelle Reviewed-by: Alexander Gordeev Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- .../admin-guide/kernel-parameters.txt | 3 --- arch/s390/kernel/alternative.c | 21 ++----------------- 2 files changed, 2 insertions(+), 22 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index c1134ad5f06d..f1384c7b59c9 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3830,9 +3830,6 @@ noalign [KNL,ARM] - noaltinstr [S390,EARLY] Disables alternative instructions - patching (CPU alternatives feature). - noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any IOAPICs that may be present in the system. diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 1ac5f707dd70..7971bc1bf496 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -7,18 +7,8 @@ #include #include -static int __initdata_or_module alt_instr_disabled; - -static int __init disable_alternative_instructions(char *str) -{ - alt_instr_disabled = 1; - return 0; -} - -early_param("noaltinstr", disable_alternative_instructions); - -static void __init_or_module __apply_alternatives(struct alt_instr *start, - struct alt_instr *end) +void __init_or_module apply_alternatives(struct alt_instr *start, + struct alt_instr *end) { struct alt_instr *a; u8 *instr, *replacement; @@ -37,13 +27,6 @@ static void __init_or_module __apply_alternatives(struct alt_instr *start, } } -void __init_or_module apply_alternatives(struct alt_instr *start, - struct alt_instr *end) -{ - if (!alt_instr_disabled) - __apply_alternatives(start, end); -} - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; void __init apply_alternative_instructions(void) { From 9be999a61232fd6748ebe8654c71bcde1a0fbed3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Jul 2024 13:50:49 +0200 Subject: [PATCH 13/39] s390/alternatives: Use consistent naming The alternative code is using the words facility and feature for the same. Rename facility to more generic feature everywhere to have consistent naming. Reviewed-by: Alexander Gordeev Tested-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/alternative.h | 30 ++++++++++++++--------------- arch/s390/kernel/alternative.c | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index dd93b92c3ab6..07459553d64f 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -11,7 +11,7 @@ struct alt_instr { s32 instr_offset; /* original instruction */ s32 repl_offset; /* offset to replacement instruction */ - u16 facility; /* facility bit set for replacement */ + u16 feature; /* feature required for replacement */ u8 instrlen; /* length of original instruction */ } __packed; @@ -48,10 +48,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define OLDINSTR(oldinstr) \ "661:\n\t" oldinstr "\n662:\n" -#define ALTINSTR_ENTRY(facility, num) \ +#define ALTINSTR_ENTRY(feature, num) \ "\t.long 661b - .\n" /* old instruction */ \ "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \ - "\t.word " __stringify(facility) "\n" /* facility bit */ \ + "\t.word " __stringify(feature) "\n" /* feature */ \ "\t.byte " oldinstr_len "\n" /* instruction len */ \ "\t.org . - (" oldinstr_len ") & 1\n" \ "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \ @@ -61,24 +61,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n" /* alternative assembly primitive: */ -#define ALTERNATIVE(oldinstr, altinstr, facility) \ +#define ALTERNATIVE(oldinstr, altinstr, feature) \ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr, 1) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility, 1) \ + ALTINSTR_ENTRY(feature, 1) \ ".popsection\n" -#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\ +#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\ ".pushsection .altinstr_replacement, \"ax\"\n" \ ALTINSTR_REPLACEMENT(altinstr1, 1) \ ALTINSTR_REPLACEMENT(altinstr2, 2) \ ".popsection\n" \ OLDINSTR(oldinstr) \ ".pushsection .altinstructions,\"a\"\n" \ - ALTINSTR_ENTRY(facility1, 1) \ - ALTINSTR_ENTRY(facility2, 2) \ + ALTINSTR_ENTRY(feature1, 1) \ + ALTINSTR_ENTRY(feature2, 2) \ ".popsection\n" /* @@ -93,12 +93,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); * For non barrier like inlines please define new variants * without volatile and memory clobber. */ -#define alternative(oldinstr, altinstr, facility) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory") +#define alternative(oldinstr, altinstr, feature) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory") -#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \ - asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \ - altinstr2, facility2) ::: "memory") +#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \ + asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \ + altinstr2, feature2) ::: "memory") /* Alternative inline assembly with input. */ #define alternative_input(oldinstr, newinstr, feature, input...) \ @@ -106,8 +106,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); : : input) /* Like alternative_input, but with a single output argument */ -#define alternative_io(oldinstr, altinstr, facility, output, input...) \ - asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \ +#define alternative_io(oldinstr, altinstr, feature, output, input...) \ + asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \ : output : input) /* Use this macro if more than one output parameter is needed. */ diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 7971bc1bf496..e2b6549f8eaf 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -21,7 +21,7 @@ void __init_or_module apply_alternatives(struct alt_instr *start, instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - if (!__test_facility(a->facility, alt_stfle_fac_list)) + if (!__test_facility(a->feature, alt_stfle_fac_list)) continue; s390_kernel_write(instr, replacement, a->instrlen); } From c77f7354c4478bf4560b546913e097b3d4ab50c1 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Jul 2024 13:50:50 +0200 Subject: [PATCH 14/39] s390/alternatives: Merge both alternative header files The two alternative header files must stay in sync. This is easier to achieve within one header file. Therefore merge both of them and have only one file, like most other architectures. Reviewed-by: Alexander Gordeev Tested-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/alternative-asm.h | 57 ------------------------- arch/s390/include/asm/alternative.h | 50 ++++++++++++++++++++++ arch/s390/kernel/entry.S | 2 +- 3 files changed, 51 insertions(+), 58 deletions(-) delete mode 100644 arch/s390/include/asm/alternative-asm.h diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h deleted file mode 100644 index 608f6287ca9c..000000000000 --- a/arch/s390/include/asm/alternative-asm.h +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_S390_ALTERNATIVE_ASM_H -#define _ASM_S390_ALTERNATIVE_ASM_H - -#ifdef __ASSEMBLY__ - -/* - * Issue one struct alt_instr descriptor entry (need to put it into - * the section .altinstructions, see below). This entry contains - * enough information for the alternatives patching code to patch an - * instruction. See apply_alternatives(). - */ -.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature - .long \orig_start - . - .long \alt_start - . - .word \feature - .byte \orig_end - \orig_start - .org . - ( \orig_end - \orig_start ) & 1 - .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) - .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE oldinstr, newinstr, feature - .pushsection .altinstr_replacement,"ax" -770: \newinstr -771: .popsection -772: \oldinstr -773: .pushsection .altinstructions,"a" - alt_entry 772b, 773b, 770b, 771b, \feature - .popsection -.endm - -/* - * Define an alternative between two instructions. If @feature is - * present, early code in apply_alternatives() replaces @oldinstr with - * @newinstr. - */ -.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 - .pushsection .altinstr_replacement,"ax" -770: \newinstr1 -771: \newinstr2 -772: .popsection -773: \oldinstr -774: .pushsection .altinstructions,"a" - alt_entry 773b, 774b, 770b, 771b,\feature1 - alt_entry 773b, 774b, 771b, 772b,\feature2 - .popsection -.endm - -#endif /* __ASSEMBLY__ */ - -#endif /* _ASM_S390_ALTERNATIVE_ASM_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 07459553d64f..16de33750d6c 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -116,6 +116,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); /* Use this macro if clobbers are needed without inputs. */ #define ASM_NO_INPUT_CLOBBER(clobber...) : clobber +#else /* __ASSEMBLY__ */ + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature + .long \orig_start - . + .long \alt_start - . + .word \feature + .byte \orig_end - \orig_start + .org . - ( \orig_end - \orig_start ) & 1 + .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) + .org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start ) +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature + .pushsection .altinstr_replacement,"ax" +770: \newinstr +771: .popsection +772: \oldinstr +773: .pushsection .altinstructions,"a" + alt_entry 772b, 773b, 770b, 771b, \feature + .popsection +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 + .pushsection .altinstr_replacement,"ax" +770: \newinstr1 +771: \newinstr2 +772: .popsection +773: \oldinstr +774: .pushsection .altinstructions,"a" + alt_entry 773b, 774b, 770b, 771b,\feature1 + alt_entry 773b, 774b, 771b, 772b,\feature2 + .popsection +.endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_ALTERNATIVE_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index fa58bd2c48c9..866917ff013f 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include From ace76fac944b06e46427e0406f315609f278ef91 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Jul 2024 13:50:51 +0200 Subject: [PATCH 15/39] s390/alternatives: Move text sync functions Move all text sync functions from alternative.c to processor.c. This way there is only minimal code left in alternative.c left, which is a prerequisite to use the C file within boot code as well. Reviewed-by: Alexander Gordeev Tested-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/kernel/alternative.c | 20 -------------------- arch/s390/kernel/processor.c | 20 +++++++++++++++++++- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index e2b6549f8eaf..33debc2a26c9 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,8 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include -#include -#include #include #include #include @@ -32,20 +29,3 @@ void __init apply_alternative_instructions(void) { apply_alternatives(__alt_instructions, __alt_instructions_end); } - -static void do_sync_core(void *info) -{ - sync_core(); -} - -void text_poke_sync(void) -{ - on_each_cpu(do_sync_core, NULL, 1); -} - -void text_poke_sync_lock(void) -{ - cpus_read_lock(); - text_poke_sync(); - cpus_read_unlock(); -} diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 65c1464eea4f..5ce9a795a0fe 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -17,7 +17,8 @@ #include #include #include - +#include +#include #include #include #include @@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask) } } +static void do_sync_core(void *info) +{ + sync_core(); +} + +void text_poke_sync(void) +{ + on_each_cpu(do_sync_core, NULL, 1); +} + +void text_poke_sync_lock(void) +{ + cpus_read_lock(); + text_poke_sync(); + cpus_read_unlock(); +} + /* * cpu_init - initializes state that is per-CPU. */ From 030f7951c5b293739301fb616add0f1d3fb46073 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Jul 2024 13:50:52 +0200 Subject: [PATCH 16/39] s390/uaccess: Make s390_kernel_write() usable for decompressor To avoid lots of ifdefs in C code make s390_kernel_write() usable for the decompressor: simply use memcpy() for this case since there is no write protection enabled that early. Reviewed-by: Alexander Gordeev Tested-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/uaccess.h | 9 ++++++++- arch/s390/mm/maccess.c | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 9213be0529ee..a81f897a81ce 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -332,7 +332,14 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo return __clear_user(to, n); } -void *s390_kernel_write(void *dst, const void *src, size_t size); +void *__s390_kernel_write(void *dst, const void *src, size_t size); + +static inline void *s390_kernel_write(void *dst, const void *src, size_t size) +{ + if (__is_defined(__DECOMPRESSOR)) + return memcpy(dst, src, size); + return __s390_kernel_write(dst, src, size); +} int __noreturn __put_kernel_bad(void); diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index 632c3a55feed..28a18c42ba99 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -48,7 +48,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz } /* - * s390_kernel_write - write to kernel memory bypassing DAT + * __s390_kernel_write - write to kernel memory bypassing DAT * @dst: destination address * @src: source address * @size: number of bytes to copy @@ -61,7 +61,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz */ static DEFINE_SPINLOCK(s390_kernel_write_lock); -notrace void *s390_kernel_write(void *dst, const void *src, size_t size) +notrace void *__s390_kernel_write(void *dst, const void *src, size_t size) { void *tmp = dst; unsigned long flags; From b3e0c5f734f934dab1cfdef669e3baa165a0cbfe Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Jul 2024 13:50:53 +0200 Subject: [PATCH 17/39] s390/alternatives: Rework to allow for callbacks Rework alternatives to allow for callbacks. With this every alternative entry has additional data encoded: - When (aka context) an alternative is supposed to be applied - The type of an alternative, which allows for type specific handling and callbacks - Extra type specific payload (patch information), which can be passed to callbacks in order to decide if an alternative should be applied or not With this only the "late" context is implemented, which means there is no change to the previous behaviour. All code is just converted to the more generic new infrastructure. Reviewed-by: Alexander Gordeev Tested-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/alternative.h | 66 ++++++++++++++++++++++++++--- arch/s390/include/asm/processor.h | 2 +- arch/s390/include/asm/spinlock.h | 2 +- arch/s390/kernel/alternative.c | 30 +++++++------ arch/s390/kernel/entry.S | 22 +++++----- arch/s390/lib/spinlock.c | 4 +- 6 files changed, 92 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 16de33750d6c..5b931070be16 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -2,6 +2,44 @@ #ifndef _ASM_S390_ALTERNATIVE_H #define _ASM_S390_ALTERNATIVE_H +/* + * Each alternative comes with a 32 bit feature field: + * union { + * u32 feature; + * struct { + * u32 ctx : 4; + * u32 type : 8; + * u32 data : 20; + * }; + * } + * + * @ctx is a bitfield, where only one bit must be set. Each bit defines + * in which context an alternative is supposed to be applied to the + * kernel image: + * + * - from the decompressor before the kernel itself is executed + * - from early kernel code from within the kernel + * + * @type is a number which defines the type and with that the type + * specific alternative patching. + * + * @data is additional type specific information which defines if an + * alternative should be applied. + */ + +#define ALT_CTX_LATE 1 +#define ALT_CTX_ALL ALT_CTX_LATE + +#define ALT_TYPE_FACILITY 0 + +#define ALT_DATA_SHIFT 0 +#define ALT_TYPE_SHIFT 20 +#define ALT_CTX_SHIFT 28 + +#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + #ifndef __ASSEMBLY__ #include @@ -11,12 +49,30 @@ struct alt_instr { s32 instr_offset; /* original instruction */ s32 repl_offset; /* offset to replacement instruction */ - u16 feature; /* feature required for replacement */ + union { + u32 feature; /* feature required for replacement */ + struct { + u32 ctx : 4; /* context */ + u32 type : 8; /* type of alternative */ + u32 data : 20; /* patching information */ + }; + }; u8 instrlen; /* length of original instruction */ } __packed; -void apply_alternative_instructions(void); -void apply_alternatives(struct alt_instr *start, struct alt_instr *end); +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; + +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx); + +static inline void apply_alternative_instructions(void) +{ + __apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE); +} + +static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + __apply_alternatives(start, end, ALT_CTX_ALL); +} /* * +---------------------------------+ @@ -51,7 +107,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); #define ALTINSTR_ENTRY(feature, num) \ "\t.long 661b - .\n" /* old instruction */ \ "\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \ - "\t.word " __stringify(feature) "\n" /* feature */ \ + "\t.long " __stringify(feature) "\n" /* feature */ \ "\t.byte " oldinstr_len "\n" /* instruction len */ \ "\t.org . - (" oldinstr_len ") & 1\n" \ "\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \ @@ -127,7 +183,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end); .macro alt_entry orig_start, orig_end, alt_start, alt_end, feature .long \orig_start - . .long \alt_start - . - .word \feature + .long \feature .byte \orig_end - \orig_start .org . - ( \orig_end - \orig_start ) & 1 .org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start ) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 5debb12614ad..8a52554f49f0 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -419,7 +419,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82)); + asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_FACILITY(82))); } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index 3e43c90ff135..77d5e804af93 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -79,7 +79,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp) typecheck(int, lp->lock); kcsan_release(); asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */ " sth %1,%0\n" : "=R" (((unsigned short *) &lp->lock)[1]) : "d" (0) : "cc", "memory"); diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 33debc2a26c9..ecabdff89bce 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,31 +1,33 @@ // SPDX-License-Identifier: GPL-2.0 -#include + +#include #include #include -#include -void __init_or_module apply_alternatives(struct alt_instr *start, - struct alt_instr *end) +void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx) { - struct alt_instr *a; u8 *instr, *replacement; + struct alt_instr *a; + bool replace; /* * The scan order should be from start to end. A later scanned * alternative code can overwrite previously scanned alternative code. */ for (a = start; a < end; a++) { + if (!(a->ctx & ctx)) + continue; + switch (a->type) { + case ALT_TYPE_FACILITY: + replace = __test_facility(a->data, alt_stfle_fac_list); + break; + default: + replace = false; + } + if (!replace) + continue; instr = (u8 *)&a->instr_offset + a->instr_offset; replacement = (u8 *)&a->repl_offset + a->repl_offset; - - if (!__test_facility(a->feature, alt_stfle_fac_list)) - continue; s390_kernel_write(instr, replacement, a->instrlen); } } - -extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; -void __init apply_alternative_instructions(void) -{ - apply_alternatives(__alt_instructions, __alt_instructions_end); -} diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 866917ff013f..90027a57a524 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -32,19 +32,19 @@ _LPP_OFFSET = __LC_LPP .macro STBEAR address - ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193) .endm .macro LBEAR address - ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193 + ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193) .endm .macro LPSWEY address,lpswe - ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193 + ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193) .endm .macro MBEAR reg - ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193 + ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), ALT_FACILITY(193) .endm .macro CHECK_STACK savearea @@ -100,22 +100,22 @@ _LPP_OFFSET = __LC_LPP .endm .macro BPOFF - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_FACILITY(82) .endm .macro BPON - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82 + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_FACILITY(82) .endm .macro BPENTER tif_ptr,tif_mask ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \ - "j .+12; nop; nop", 82 + "j .+12; nop; nop", ALT_FACILITY(82) .endm .macro BPEXIT tif_ptr,tif_mask TSTMSK \tif_ptr,\tif_mask ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \ - "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82 + "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_FACILITY(82) .endm #if IS_ENABLED(CONFIG_KVM) @@ -169,7 +169,7 @@ SYM_FUNC_START(__switch_to_asm) aghi %r3,__TASK_pid mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 + ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40) BR_EX %r14 SYM_FUNC_END(__switch_to_asm) @@ -515,7 +515,7 @@ SYM_CODE_START(mcck_int_handler) jno 0f BPON stpt __LC_EXIT_TIMER -0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193 +0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA), ALT_FACILITY(193) LBEAR 0(%r12) lmg %r11,%r15,__PT_R11(%r11) LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE @@ -551,7 +551,7 @@ SYM_CODE_START(mcck_int_handler) SYM_CODE_END(mcck_int_handler) SYM_CODE_START(restart_int_handler) - ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40 + ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40) stg %r15,__LC_SAVE_AREA_RESTART TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4 jz 0f diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c index 0c9a73a18826..9f86ad8fa8b4 100644 --- a/arch/s390/lib/spinlock.c +++ b/arch/s390/lib/spinlock.c @@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock) int owner; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */ " l %0,%1\n" : "=d" (owner) : "Q" (*lock) : "memory"); return owner; @@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new) int expected = old; asm_inline volatile( - ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */ + ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */ " cs %0,%3,%1\n" : "=d" (old), "=Q" (*lock) : "0" (old), "d" (new), "Q" (*lock) From 7f9d85998f6c5b989796470fd1ac066232c60723 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Tue, 16 Jul 2024 13:50:54 +0200 Subject: [PATCH 18/39] s390/alternatives: Allow early alternative patching in decompressor Add the required code to patch alternatives early in the decompressor. This is required for the upcoming lowcore relocation changes, where alternatives for facility 193 need to get patched before lowcore alternatives. Reviewed-by: Alexander Gordeev Co-developed-by: Heiko Carstens Signed-off-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/Makefile | 2 +- arch/s390/boot/alternative.c | 3 +++ arch/s390/boot/boot.h | 2 ++ arch/s390/boot/startup.c | 5 +++++ arch/s390/include/asm/alternative.h | 12 +++++++++--- arch/s390/kernel/alternative.c | 5 +++++ arch/s390/kernel/vmlinux.lds.S | 2 ++ 7 files changed, 27 insertions(+), 4 deletions(-) create mode 100644 arch/s390/boot/alternative.c diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index e7658997452b..5d8cb7e3b096 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -39,7 +39,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o diff --git a/arch/s390/boot/alternative.c b/arch/s390/boot/alternative.c new file mode 100644 index 000000000000..abc08d2c873d --- /dev/null +++ b/arch/s390/boot/alternative.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "../kernel/alternative.c" diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index 18027fdc92b0..ed2f0ec24f0d 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -30,6 +30,8 @@ struct vmlinux_info { unsigned long init_mm_off; unsigned long swapper_pg_dir_off; unsigned long invalid_pg_dir_off; + unsigned long alt_instructions; + unsigned long alt_instructions_end; #ifdef CONFIG_KASAN unsigned long kasan_early_shadow_page_off; unsigned long kasan_early_shadow_pte_off; diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cc8753c0c121..cca2f1bad33c 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -376,6 +376,8 @@ static void kaslr_adjust_vmlinux_info(long offset) vmlinux.init_mm_off += offset; vmlinux.swapper_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset; + vmlinux.alt_instructions += offset; + vmlinux.alt_instructions_end += offset; #ifdef CONFIG_KASAN vmlinux.kasan_early_shadow_page_off += offset; vmlinux.kasan_early_shadow_pte_off += offset; @@ -507,6 +509,9 @@ void startup_kernel(void) kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); copy_bootdata(); + __apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions, + (struct alt_instr *)_vmlinux_info.alt_instructions_end, + ALT_CTX_EARLY); /* * Save KASLR offset for early dumps, before vmcore_info is set. diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 5b931070be16..32c208332e57 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -27,15 +27,21 @@ * alternative should be applied. */ -#define ALT_CTX_LATE 1 -#define ALT_CTX_ALL ALT_CTX_LATE +#define ALT_CTX_EARLY 1 +#define ALT_CTX_LATE 2 +#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) -#define ALT_TYPE_FACILITY 0 +#define ALT_TYPE_FACILITY_EARLY 0 +#define ALT_TYPE_FACILITY 1 #define ALT_DATA_SHIFT 0 #define ALT_TYPE_SHIFT 20 #define ALT_CTX_SHIFT 28 +#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY_EARLY << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + #define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index ecabdff89bce..de89c9e8b1a3 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -18,9 +18,14 @@ void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsign if (!(a->ctx & ctx)) continue; switch (a->type) { + case ALT_TYPE_FACILITY_EARLY: + replace = test_facility(a->data); + break; +#ifndef __DECOMPRESSOR case ALT_TYPE_FACILITY: replace = __test_facility(a->data, alt_stfle_fac_list); break; +#endif default: replace = false; } diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 5128ccee9c67..975c654cf5a5 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -222,6 +222,8 @@ SECTIONS QUAD(init_mm) QUAD(swapper_pg_dir) QUAD(invalid_pg_dir) + QUAD(__alt_instructions) + QUAD(__alt_instructions_end) #ifdef CONFIG_KASAN QUAD(kasan_early_shadow_page) QUAD(kasan_early_shadow_pte) From 47837a5c74f432ad992239cfa5966543f466d4df Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Jul 2024 13:50:55 +0200 Subject: [PATCH 19/39] s390/nospec: Push down alternative handling The nospec implementation is deeply integrated into the alternatives code: only for nospec an alternative facility list is implemented and used by the alternative code, while it is modified by nospec specific needs. Push down the nospec alternative handling into the nospec by introducing a new alternative type and a specific nospec callback to decide if alternatives should be applied. Also introduce a new global nobp variable which together with facility 82 can be used to decide if nobp is enabled or not. Acked-by: Alexander Gordeev Tested-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/alternative.h | 5 +++++ arch/s390/include/asm/nospec-branch.h | 9 +++++++++ arch/s390/include/asm/processor.h | 2 +- arch/s390/kernel/alternative.c | 4 ++++ arch/s390/kernel/early.c | 2 -- arch/s390/kernel/entry.S | 8 ++++---- arch/s390/kernel/nospec-branch.c | 16 +++++++++------- arch/s390/kernel/nospec-sysfs.c | 2 +- 8 files changed, 33 insertions(+), 15 deletions(-) diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 32c208332e57..5f56a2f3aba6 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -33,6 +33,7 @@ #define ALT_TYPE_FACILITY_EARLY 0 #define ALT_TYPE_FACILITY 1 +#define ALT_TYPE_SPEC 2 #define ALT_DATA_SHIFT 0 #define ALT_TYPE_SHIFT 20 @@ -46,6 +47,10 @@ ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) +#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ + ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ + (facility) << ALT_DATA_SHIFT) + #ifndef __ASSEMBLY__ #include diff --git a/arch/s390/include/asm/nospec-branch.h b/arch/s390/include/asm/nospec-branch.h index b9c1f3cae842..192835a3e24d 100644 --- a/arch/s390/include/asm/nospec-branch.h +++ b/arch/s390/include/asm/nospec-branch.h @@ -5,8 +5,17 @@ #ifndef __ASSEMBLY__ #include +#include extern int nospec_disable; +extern int nobp; + +static inline bool nobp_enabled(void) +{ + if (__is_defined(__DECOMPRESSOR)) + return false; + return nobp && test_facility(82); +} void nospec_init_branches(void); void nospec_auto_detect(void); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 8a52554f49f0..3063488014eb 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -419,7 +419,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs) static __always_inline void bpon(void) { - asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_FACILITY(82))); + asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82))); } #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index de89c9e8b1a3..05545669552f 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include @@ -26,6 +27,9 @@ void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsign replace = __test_facility(a->data, alt_stfle_fac_list); break; #endif + case ALT_TYPE_SPEC: + replace = nobp_enabled(); + break; default: replace = false; } diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 467ed4dba817..d142598e0532 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -193,8 +193,6 @@ static noinline __init void setup_lowcore_early(void) static noinline __init void setup_facility_list(void) { memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list)); - if (!IS_ENABLED(CONFIG_KERNEL_NOBP)) - __clear_facility(82, alt_stfle_fac_list); } static __init void detect_diag9c(void) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 90027a57a524..8caf893d1b59 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -100,22 +100,22 @@ _LPP_OFFSET = __LC_LPP .endm .macro BPOFF - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_FACILITY(82) + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82) .endm .macro BPON - ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_FACILITY(82) + ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm .macro BPENTER tif_ptr,tif_mask ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \ - "j .+12; nop; nop", ALT_FACILITY(82) + "j .+12; nop; nop", ALT_SPEC(82) .endm .macro BPEXIT tif_ptr,tif_mask TSTMSK \tif_ptr,\tif_mask ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \ - "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_FACILITY(82) + "jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82) .endm #if IS_ENABLED(CONFIG_KVM) diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 9b8c24ebb008..e11ec15960a1 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -4,6 +4,8 @@ #include #include +int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP); + static int __init nobp_setup_early(char *str) { bool enabled; @@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str) * The user explicitly requested nobp=1, enable it and * disable the expoline support. */ - __set_facility(82, alt_stfle_fac_list); + nobp = 1; if (IS_ENABLED(CONFIG_EXPOLINE)) nospec_disable = 1; } else { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } return 0; } @@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early); static int __init nospec_setup_early(char *str) { - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; return 0; } early_param("nospec", nospec_setup_early); @@ -40,7 +42,7 @@ static int __init nospec_report(void) pr_info("Spectre V2 mitigation: etokens\n"); if (nospec_uses_trampoline()) pr_info("Spectre V2 mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) pr_info("Spectre V2 mitigation: limited branch prediction\n"); return 0; } @@ -66,14 +68,14 @@ void __init nospec_auto_detect(void) */ if (__is_defined(CC_USING_EXPOLINE)) nospec_disable = 1; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } else if (__is_defined(CC_USING_EXPOLINE)) { /* * The kernel has been compiled with expolines. * Keep expolines enabled and disable nobp. */ nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } /* * If the kernel has not been compiled with expolines the @@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str) { if (str && !strncmp(str, "on", 2)) { nospec_disable = 0; - __clear_facility(82, alt_stfle_fac_list); + nobp = 0; } if (str && !strncmp(str, "off", 3)) nospec_disable = 1; diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c index 52d4353188ad..a95188818637 100644 --- a/arch/s390/kernel/nospec-sysfs.c +++ b/arch/s390/kernel/nospec-sysfs.c @@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev, return sprintf(buf, "Mitigation: etokens\n"); if (nospec_uses_trampoline()) return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, alt_stfle_fac_list)) + if (nobp_enabled()) return sprintf(buf, "Mitigation: limited branch prediction\n"); return sprintf(buf, "Vulnerable\n"); } From beb8cee06f9b8726616ba87783116cb8fb889c7a Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 16 Jul 2024 13:50:56 +0200 Subject: [PATCH 20/39] s390/alternatives: Remove alternative facility list The alternative and the normal facility list are always identical. Remove the alternative facility list, which allows to simplify the alternatives code. Acked-by: Alexander Gordeev Tested-by: Sven Schnelle Signed-off-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/alternative.h | 9 ++++----- arch/s390/include/asm/facility.h | 1 - arch/s390/kernel/alternative.c | 7 +------ arch/s390/kernel/early.c | 6 ------ arch/s390/kernel/setup.c | 1 - 5 files changed, 5 insertions(+), 19 deletions(-) diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 5f56a2f3aba6..3ddd6dbe5635 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -31,16 +31,15 @@ #define ALT_CTX_LATE 2 #define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE) -#define ALT_TYPE_FACILITY_EARLY 0 -#define ALT_TYPE_FACILITY 1 -#define ALT_TYPE_SPEC 2 +#define ALT_TYPE_FACILITY 0 +#define ALT_TYPE_SPEC 1 #define ALT_DATA_SHIFT 0 #define ALT_TYPE_SHIFT 20 #define ALT_CTX_SHIFT 28 -#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ - ALT_TYPE_FACILITY_EARLY << ALT_TYPE_SHIFT | \ +#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) #define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \ diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index d46cc725f024..b7d234838a36 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -20,7 +20,6 @@ #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) extern u64 stfle_fac_list[16]; -extern u64 alt_stfle_fac_list[16]; static inline void __set_facility(unsigned long nr, void *facilities) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index 05545669552f..eae254466192 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -19,14 +19,9 @@ void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsign if (!(a->ctx & ctx)) continue; switch (a->type) { - case ALT_TYPE_FACILITY_EARLY: + case ALT_TYPE_FACILITY: replace = test_facility(a->data); break; -#ifndef __DECOMPRESSOR - case ALT_TYPE_FACILITY: - replace = __test_facility(a->data, alt_stfle_fac_list); - break; -#endif case ALT_TYPE_SPEC: replace = nobp_enabled(); break; diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index d142598e0532..3ce77cee272d 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -190,11 +190,6 @@ static noinline __init void setup_lowcore_early(void) get_lowcore()->preempt_count = INIT_PREEMPT_COUNT; } -static noinline __init void setup_facility_list(void) -{ - memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list)); -} - static __init void detect_diag9c(void) { unsigned int cpu_address; @@ -289,7 +284,6 @@ void __init startup_init(void) lockdep_off(); sort_amode31_extable(); setup_lowcore_early(); - setup_facility_list(); detect_machine_type(); setup_arch_string(); setup_boot_command_line(); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 178daf4e3563..700003e1bc76 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -155,7 +155,6 @@ unsigned int __bootdata_preserved(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support); u64 __bootdata_preserved(stfle_fac_list[16]); EXPORT_SYMBOL(stfle_fac_list); -u64 alt_stfle_fac_list[16]; struct oldmem_data __bootdata_preserved(oldmem_data); unsigned long VMALLOC_START; From 213400c4afd5c89fd8bd17d06addf145f6c8f0d5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Jul 2024 15:41:11 +0200 Subject: [PATCH 21/39] s390/nmi: Simplify ptregs setup The low level machine check handler code fills the ptregs structure partially with the register contents present at machine check handler entry and partially with contents from the machine check save area. In case of a machine check the contents of all general purpose registers are saved by the CPU to the machine check save area. Therefore simplify the code and fill the ptregs structure by only using the machine check save area as source. Signed-off-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 8caf893d1b59..a72d6494701d 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -491,8 +491,8 @@ SYM_CODE_START(mcck_int_handler) stctg %c1,%c1,__PT_CR1(%r11) lctlg %c1,%c1,__LC_KERNEL_ASCE xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r14,__LC_GPREGS_SAVE_AREA+64 - stmg %r0,%r7,__PT_R0(%r11) + lghi %r14,__LC_GPREGS_SAVE_AREA + mvc __PT_R0(128,%r11),0(%r14) # clear user controlled registers to prevent speculative use xgr %r0,%r0 xgr %r1,%r1 @@ -502,7 +502,6 @@ SYM_CODE_START(mcck_int_handler) xgr %r6,%r6 xgr %r7,%r7 xgr %r10,%r10 - mvc __PT_R8(64,%r11),0(%r14) stmg %r8,%r9,__PT_PSW(%r11) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) From fc8eac33ad93420b4e51cdd811e12d3fc9b531a5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Jul 2024 15:41:12 +0200 Subject: [PATCH 22/39] s390/entry: Move SIE indicator flag to thread info CIF_SIE indicates if a thread is running in SIE context. This is the state of a thread and not the CPU. Therefore move this indicator to thread info. Signed-off-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/processor.h | 2 -- arch/s390/include/asm/thread_info.h | 1 + arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kernel/entry.S | 20 ++++++++++++-------- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 3063488014eb..5ecd442535b9 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -14,13 +14,11 @@ #include -#define CIF_SIE 0 /* CPU needs SIE exit cleanup */ #define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */ #define CIF_ENABLED_WAIT 5 /* in enabled wait state */ #define CIF_MCCK_GUEST 6 /* machine check happening in guest */ #define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */ -#define _CIF_SIE BIT(CIF_SIE) #define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY) #define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT) #define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST) diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h index d02a709717b8..00ac01874a12 100644 --- a/arch/s390/include/asm/thread_info.h +++ b/arch/s390/include/asm/thread_info.h @@ -40,6 +40,7 @@ struct thread_info { unsigned long flags; /* low level flags */ unsigned long syscall_work; /* SYSCALL_WORK_ flags */ unsigned int cpu; /* current CPU */ + unsigned char sie; /* running in SIE context */ }; /* diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 58fc6b93b475..ffa0dd2dbaac 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -28,6 +28,7 @@ int main(void) BLANK(); /* thread info offsets */ OFFSET(__TI_flags, task_struct, thread_info.flags); + OFFSET(__TI_sie, task_struct, thread_info.sie); BLANK(); /* pt_regs offsets */ OFFSET(__PT_PSW, pt_regs, psw); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a72d6494701d..df351622c94c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -123,7 +123,8 @@ _LPP_OFFSET = __LC_LPP lg %r9,\sie_control # get control block pointer ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + lg %r9,__LC_CURRENT + mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit .endm #endif @@ -183,15 +184,15 @@ SYM_FUNC_END(__switch_to_asm) */ SYM_FUNC_START(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers - lg %r12,__LC_CURRENT + lg %r14,__LC_CURRENT stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 - mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags + mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags lmg %r0,%r13,0(%r4) # load guest gprs 0-13 - oi __LC_CPU_FLAGS+7,_CIF_SIE + mvi __TI_sie(%r14),1 lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now @@ -211,7 +212,8 @@ SYM_FUNC_START(__sie64a) lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - ni __LC_CPU_FLAGS+7,255-_CIF_SIE + lg %r14,__LC_CURRENT + mvi __TI_sie(%r14),0 # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. @@ -394,7 +396,8 @@ SYM_CODE_START(\name) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT + tm __TI_sie(%r10),0xff jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15) @@ -469,9 +472,10 @@ SYM_CODE_START(mcck_int_handler) TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - TSTMSK __LC_CPU_FLAGS,_CIF_SIE + lg %r10,__LC_CURRENT + tm __TI_sie(%r10),0xff jz .Lmcck_user - # Need to compare the address instead of a CIF_SIE* flag. + # Need to compare the address instead of __TI_SIE flag. # Otherwise there would be a race between setting the flag # and entering SIE (or leaving and clearing the flag). This # would cause machine checks targeted at the guest to be From 13be21f39ab58184fb91844d2242e33805dda40e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 22 Jul 2024 15:41:13 +0200 Subject: [PATCH 23/39] s390/atomic_ops: Disable flag outputs constraint for GCC versions below 14.2.0 GCC may die with an ICE if the flag outputs constraint is used in combination with other inline assemblies. This will be fixed with GCC 14.2.0. Therefore disable the use of the constraint for now. Link: https://gcc.gnu.org/git?p=gcc.git;a=commit;h=cd11413ff7c4353a3e336db415304f788d23a393 Signed-off-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/atomic_ops.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 2b379d1d9046..742c7919cbcd 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -188,7 +188,8 @@ static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -#ifdef __GCC_ASM_FLAG_OUTPUTS__ +/* GCC versions before 14.2.0 may die with an ICE in some configurations. */ +#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_IS_GCC) && (GCC_VERSION < 140200)) static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { From 5ade5be4edf855245955108860d2016af3065a37 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:14 +0200 Subject: [PATCH 24/39] s390: Add infrastructure to patch lowcore accesses The s390 architecture defines two special per-CPU data pages called the "prefix area". In s390-linux terminology this is usually called "lowcore". This memory area contains system configuration data like old/new PSW's for system call/interrupt/machine check handlers and lots of other data. It is normally mapped to logical address 0. This area can only be accessed when in supervisor mode. This means that kernel code can dereference NULL pointers, because accesses to address 0 are allowed. Parts of lowcore can be write protected, but read accesses and write accesses outside of the write protected areas are not caught. To remove this limitation for debugging and testing, remap lowcore to another address and define a function get_lowcore() which simply returns the address where lowcore is mapped at. This would normally introduce a pointer dereference (=memory read). As lowcore is used for several very often used variables, add code to patch this function during runtime, so we avoid the memory reads. For C code get_lowcore() has to be used, for assembly code it is the GET_LC macro. When using this macro/function a reference is added to alternative patching. All these locations will be patched to the actual lowcore location when the kernel is booted or a module is loaded. To make debugging/bisecting problems easier, this patch adds all the infrastructure but the lowcore address is still hardwired to 0. This way the code can be converted on a per function basis, and the functionality is enabled in a patch after all the functions have been converted. Note that this requires at least z16 because the old lpsw instruction only allowed a 12 bit displacement. z16 introduced lpswey which allows 20 bits (signed), so the lowcore can effectively be mapped from address 0 - 0x7e000. To use 0x7e000 as address, a 6 byte lgfi instruction would have to be used in the alternative. To save two bytes, llilh can be used, but this only allows to set bits 16-31 of the address. In order to use the llilh instruction, use 0x70000 as alternative lowcore address. This is still large enough to catch NULL pointer dereferences into large arrays. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/boot.h | 2 ++ arch/s390/boot/ipl_parm.c | 1 + arch/s390/boot/startup.c | 1 + arch/s390/boot/vmem.c | 11 ++++++++++- arch/s390/include/asm/abs_lowcore.h | 8 ++++++++ arch/s390/include/asm/alternative.h | 4 ++++ arch/s390/include/asm/lowcore.h | 23 ++++++++++++++++++++++- arch/s390/kernel/abs_lowcore.c | 1 + arch/s390/kernel/alternative.c | 4 ++++ arch/s390/kernel/alternative.h | 0 arch/s390/kernel/early.c | 1 + arch/s390/kernel/setup.c | 3 +++ 12 files changed, 57 insertions(+), 2 deletions(-) create mode 100644 arch/s390/kernel/alternative.h diff --git a/arch/s390/boot/boot.h b/arch/s390/boot/boot.h index ed2f0ec24f0d..83e2ce050b6c 100644 --- a/arch/s390/boot/boot.h +++ b/arch/s390/boot/boot.h @@ -91,8 +91,10 @@ extern char _end[], _decompressor_end[]; extern unsigned char _compressed_start[]; extern unsigned char _compressed_end[]; extern struct vmlinux_info _vmlinux_info; + #define vmlinux _vmlinux_info +#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore)) #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) #define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset)) #define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index a21f301acd29..337c14931ccb 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index cca2f1bad33c..ce232552bc1c 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -30,6 +30,7 @@ unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(max_mappable); +int __bootdata_preserved(relocate_lowcore); u64 __bootdata_preserved(stfle_fac_list[16]); struct oldmem_data __bootdata_preserved(oldmem_data); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index a255ca189aaa..2847cc059ab7 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -26,6 +26,7 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]); enum populate_mode { POPULATE_NONE, POPULATE_DIRECT, + POPULATE_LOWCORE, POPULATE_ABS_LOWCORE, POPULATE_IDENTITY, POPULATE_KERNEL, @@ -242,6 +243,8 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m return -1; case POPULATE_DIRECT: return addr; + case POPULATE_LOWCORE: + return __lowcore_pa(addr); case POPULATE_ABS_LOWCORE: return __abs_lowcore_pa(addr); case POPULATE_KERNEL: @@ -418,6 +421,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit) { + unsigned long lowcore_address = 0; unsigned long start, end; unsigned long asce_type; unsigned long asce_bits; @@ -455,12 +459,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l __arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER); __arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER); + if (relocate_lowcore) + lowcore_address = LOWCORE_ALT_ADDRESS; + /* * To allow prefixing the lowcore must be mapped with 4KB pages. * To prevent creation of a large page at address 0 first map * the lowcore and create the identity mapping only afterwards. */ - pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT); + pgtable_populate(lowcore_address, + lowcore_address + sizeof(struct lowcore), + POPULATE_LOWCORE); for_each_physmem_usable_range(i, &start, &end) { pgtable_populate((unsigned long)__identity_va(start), (unsigned long)__identity_va(end), diff --git a/arch/s390/include/asm/abs_lowcore.h b/arch/s390/include/asm/abs_lowcore.h index 6f264b79e377..d20df8c923fc 100644 --- a/arch/s390/include/asm/abs_lowcore.h +++ b/arch/s390/include/asm/abs_lowcore.h @@ -2,6 +2,7 @@ #ifndef _ASM_S390_ABS_LOWCORE_H #define _ASM_S390_ABS_LOWCORE_H +#include #include #define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore)) @@ -24,4 +25,11 @@ static inline void put_abs_lowcore(struct lowcore *lc) put_cpu(); } +extern int __bootdata_preserved(relocate_lowcore); + +static inline int have_relocated_lowcore(void) +{ + return relocate_lowcore; +} + #endif /* _ASM_S390_ABS_LOWCORE_H */ diff --git a/arch/s390/include/asm/alternative.h b/arch/s390/include/asm/alternative.h index 3ddd6dbe5635..de980c938a3e 100644 --- a/arch/s390/include/asm/alternative.h +++ b/arch/s390/include/asm/alternative.h @@ -33,6 +33,7 @@ #define ALT_TYPE_FACILITY 0 #define ALT_TYPE_SPEC 1 +#define ALT_TYPE_LOWCORE 2 #define ALT_DATA_SHIFT 0 #define ALT_TYPE_SHIFT 20 @@ -50,6 +51,9 @@ ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \ (facility) << ALT_DATA_SHIFT) +#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \ + ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT) + #ifndef __ASSEMBLY__ #include diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index bce3a69ab2a3..52c90b65a2b8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -14,10 +14,15 @@ #include #include #include +#include #define LC_ORDER 1 #define LC_PAGES 2 +#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL) + +#ifndef __ASSEMBLY__ + struct pgm_tdb { u64 data[32]; }; @@ -214,7 +219,14 @@ struct lowcore { static __always_inline struct lowcore *get_lowcore(void) { - return NULL; + struct lowcore *lc; + + if (__is_defined(__DECOMPRESSOR)) + return NULL; + asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE) + : [lc] "=d" (lc) + : [alt] "i" (LOWCORE_ALT_ADDRESS >> 16)); + return lc; } extern struct lowcore *lowcore_ptr[]; @@ -224,4 +236,13 @@ static inline void set_prefix(__u32 address) asm volatile("spx %0" : : "Q" (address) : "memory"); } +#else /* __ASSEMBLY__ */ + +.macro GET_LC reg + ALTERNATIVE "llilh \reg,0", \ + __stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \ + ALT_LOWCORE +.endm + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/kernel/abs_lowcore.c b/arch/s390/kernel/abs_lowcore.c index f9efc54ec4b7..09cd24cbe74e 100644 --- a/arch/s390/kernel/abs_lowcore.c +++ b/arch/s390/kernel/abs_lowcore.c @@ -4,6 +4,7 @@ #include unsigned long __bootdata_preserved(__abs_lowcore); +int __bootdata_preserved(relocate_lowcore); int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc) { diff --git a/arch/s390/kernel/alternative.c b/arch/s390/kernel/alternative.c index eae254466192..8d5d0de35de0 100644 --- a/arch/s390/kernel/alternative.c +++ b/arch/s390/kernel/alternative.c @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -25,6 +26,9 @@ void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsign case ALT_TYPE_SPEC: replace = nobp_enabled(); break; + case ALT_TYPE_LOWCORE: + replace = have_relocated_lowcore(); + break; default: replace = false; } diff --git a/arch/s390/kernel/alternative.h b/arch/s390/kernel/alternative.h new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 3ce77cee272d..14d324865e33 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -48,6 +48,7 @@ decompressor_handled_param(dfltcc); decompressor_handled_param(facilities); decompressor_handled_param(nokaslr); decompressor_handled_param(cmma); +decompressor_handled_param(relocate_lowcore); #if IS_ENABLED(CONFIG_KVM) decompressor_handled_param(prot_virt); #endif diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 700003e1bc76..4ec99f73fa27 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -889,6 +889,9 @@ void __init setup_arch(char **cmdline_p) else pr_info("Linux is running as a guest in 64-bit mode\n"); + if (have_relocated_lowcore()) + pr_info("Lowcore relocated to 0x%px\n", get_lowcore()); + log_component_list(); /* Have one command line that is parsed and saved in /proc/cmdline */ From 39e8c5d6a4ce6512af5178f70c0c5d735141fc10 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:15 +0200 Subject: [PATCH 25/39] s390/head64: Make startup code ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in startup_continue(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/head64.S | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 45413b04efc5..396034b2fe67 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -10,6 +10,7 @@ #include #include +#include #include #include #include @@ -18,14 +19,15 @@ __HEAD SYM_CODE_START(startup_continue) larl %r1,tod_clock_base - mvc 0(16,%r1),__LC_BOOT_CLOCK + GET_LC %r2 + mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2) # # Setup stack # larl %r14,init_task - stg %r14,__LC_CURRENT + stg %r14,__LC_CURRENT(%r2) larl %r15,init_thread_union+STACK_INIT_OFFSET - stg %r15,__LC_KERNEL_STACK + stg %r15,__LC_KERNEL_STACK(%r2) brasl %r14,sclp_early_adjust_va # allow sclp_early_printk brasl %r14,startup_init # s390 specific early init brasl %r14,start_kernel # common init code From 12184a46767b40c1c9b022cd96a9b4019ebd368f Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:16 +0200 Subject: [PATCH 26/39] s390/entry: Make __sie64a() ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in __sie64a(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index df351622c94c..618b8b774932 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -28,6 +28,7 @@ #include #include #include +#include _LPP_OFFSET = __LC_LPP @@ -184,7 +185,8 @@ SYM_FUNC_END(__switch_to_asm) */ SYM_FUNC_START(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers - lg %r14,__LC_CURRENT + GET_LC %r13 + lg %r14,__LC_CURRENT(%r13) stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area @@ -211,8 +213,9 @@ SYM_FUNC_START(__sie64a) .Lsie_skip: lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - lg %r14,__LC_CURRENT + GET_LC %r14 + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce + lg %r14,__LC_CURRENT(%r14) mvi __TI_sie(%r14),0 # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There From ca2f0a26c498c42fcccdf09527e8755481801eea Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:17 +0200 Subject: [PATCH 27/39] s390/entry: Add base register to MBEAR macro In preparation of having lowcore at different address than zero, add the base register to MBEAR. No functional change, because %r0 is passed to the macro. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 618b8b774932..0d624045f2a6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -44,8 +44,9 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193) .endm - .macro MBEAR reg - ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), ALT_FACILITY(193) + .macro MBEAR reg, lowcore + ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\ + ALT_FACILITY(193) .endm .macro CHECK_STACK savearea @@ -282,7 +283,7 @@ SYM_CODE_START(system_call) xgr %r11,%r11 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC - MBEAR %r2 + MBEAR %r2,%r0 lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE @@ -424,7 +425,7 @@ SYM_CODE_START(\name) xgr %r10,%r10 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - MBEAR %r11 + MBEAR %r11,%r0 stmg %r8,%r9,__PT_PSW(%r11) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,\handler From 6908f8f916f24636621a5b4c300bdf9a0155f07e Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:18 +0200 Subject: [PATCH 28/39] s390/entry: Add base register to SIEEXIT macro In preparation of having lowcore at different address than zero, add the base register to SIEEXIT. No functional change, because %r0 is passed to the macro. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 0d624045f2a6..7a5e11d9db8b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -121,11 +121,11 @@ _LPP_OFFSET = __LC_LPP .endm #if IS_ENABLED(CONFIG_KVM) - .macro SIEEXIT sie_control - lg %r9,\sie_control # get control block pointer - ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE - lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce - lg %r9,__LC_CURRENT + .macro SIEEXIT sie_control,lowcore + lg %r9,\sie_control # get control block pointer + ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE + lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce + lg %r9,__LC_CURRENT(\lowcore) mvi __TI_sie(%r9),0 larl %r9,sie_exit # skip forward to sie_exit .endm @@ -349,7 +349,7 @@ SYM_CODE_START(pgm_check_handler) clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r10) + SIEEXIT __SF_SIE_CONTROL(%r10),%r0 #endif 5: stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use @@ -404,7 +404,7 @@ SYM_CODE_START(\name) tm __TI_sie(%r10),0xff jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r0 #endif 0: CHECK_STACK __LC_SAVE_AREA_ASYNC aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) @@ -491,7 +491,7 @@ SYM_CODE_START(mcck_int_handler) lg %r10,__LC_PCPU oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15) + SIEEXIT __SF_SIE_CONTROL(%r15),%r0 #endif .Lmcck_user: lg %r15,__LC_MCCK_STACK From 86e08d64eec35cbe6a85798add4bfc1218ca9513 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:19 +0200 Subject: [PATCH 29/39] s390/entry: Add base register to CHECK_VMAP_STACK/CHECK_STACK macro In preparation of having lowcore at different address than zero, add the base register to CHECK_VMAP_STACK and CHECK_STACK. No functional change, because %r0 is passed to the macro. Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 7a5e11d9db8b..1d12f3c29a43 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -49,30 +49,30 @@ _LPP_OFFSET = __LC_LPP ALT_FACILITY(193) .endm - .macro CHECK_STACK savearea + .macro CHECK_STACK savearea, lowcore #ifdef CONFIG_CHECK_STACK tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD - lghi %r14,\savearea + la %r14,\savearea(\lowcore) jz stack_overflow #endif .endm - .macro CHECK_VMAP_STACK savearea,oklabel + .macro CHECK_VMAP_STACK savearea, lowcore, oklabel #ifdef CONFIG_VMAP_STACK lgr %r14,%r15 nill %r14,0x10000 - THREAD_SIZE oill %r14,STACK_INIT_OFFSET - clg %r14,__LC_KERNEL_STACK + clg %r14,__LC_KERNEL_STACK(\lowcore) je \oklabel - clg %r14,__LC_ASYNC_STACK + clg %r14,__LC_ASYNC_STACK(\lowcore) je \oklabel - clg %r14,__LC_MCCK_STACK + clg %r14,__LC_MCCK_STACK(\lowcore) je \oklabel - clg %r14,__LC_NODAT_STACK + clg %r14,__LC_NODAT_STACK(\lowcore) je \oklabel - clg %r14,__LC_RESTART_STACK + clg %r14,__LC_RESTART_STACK(\lowcore) je \oklabel - lghi %r14,\savearea + la %r14,\savearea(\lowcore) j stack_overflow #else j \oklabel @@ -331,10 +331,10 @@ SYM_CODE_START(pgm_check_handler) jnz 2f # -> enabled, can't be a double fault tm __LC_PGM_ILC+3,0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA_SYNC +2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r0 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r0,4f 3: lg %r15,__LC_KERNEL_STACK 4: la %r11,STACK_FRAME_OVERHEAD(%r15) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) @@ -406,7 +406,7 @@ SYM_CODE_START(\name) BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST SIEEXIT __SF_SIE_CONTROL(%r15),%r0 #endif -0: CHECK_STACK __LC_SAVE_AREA_ASYNC +0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r0 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f 1: lctlg %c1,%c1,__LC_KERNEL_ASCE From 9e1e275fa28d5896ca7cdf8afa5eb58c0117a303 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:20 +0200 Subject: [PATCH 30/39] s390/entry: Make pgm_check_handler() ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in pgm_check_handler(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/include/asm/lowcore.h | 6 +++++ arch/s390/kernel/entry.S | 47 ++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 21 deletions(-) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 52c90b65a2b8..183ac29afaf8 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -244,5 +244,11 @@ static inline void set_prefix(__u32 address) ALT_LOWCORE .endm +.macro STMG_LC start, end, savearea + ALTERNATIVE "stmg \start, \end, \savearea", \ + __stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \ + ALT_LOWCORE +.endm + #endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_LOWCORE_H */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 1d12f3c29a43..5f63f3fbb34c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -40,8 +40,11 @@ _LPP_OFFSET = __LC_LPP ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193) .endm - .macro LPSWEY address,lpswe - ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY(193) + .macro LPSWEY address, lpswe + ALTERNATIVE_2 "b \lpswe;nopr", \ + ".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \ + __stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \ + ALT_LOWCORE .endm .macro MBEAR reg, lowcore @@ -317,39 +320,40 @@ SYM_CODE_END(ret_from_fork) */ SYM_CODE_START(pgm_check_handler) - stpt __LC_SYS_ENTER_TIMER + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_SYNC lgr %r10,%r15 - lmg %r8,%r9,__LC_PGM_OLD_PSW + lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13) tmhh %r8,0x0001 # coming from user space? jno .Lpgm_skip_asce - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) j 3f # -> fault in user space .Lpgm_skip_asce: 1: tmhh %r8,0x4000 # PER bit set in old PSW ? jnz 2f # -> enabled, can't be a double fault - tm __LC_PGM_ILC+3,0x80 # check for per exception + tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception jnz .Lpgm_svcper # -> single stepped svc -2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r0 +2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) # CHECK_VMAP_STACK branches to stack_overflow or 4f - CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r0,4f -3: lg %r15,__LC_KERNEL_STACK + CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f +3: lg %r15,__LC_KERNEL_STACK(%r13) 4: la %r11,STACK_FRAME_OVERHEAD(%r15) xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,__PT_R0(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC - mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13) + mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13) stctg %c1,%c1,__PT_CR1(%r11) #if IS_ENABLED(CONFIG_KVM) - ltg %r12,__LC_GMAP + ltg %r12,__LC_GMAP(%r13) jz 5f clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r10),%r0 + SIEEXIT __SF_SIE_CONTROL(%r10),%r13 #endif 5: stmg %r8,%r9,__PT_PSW(%r11) # clear user controlled registers to prevent speculative use @@ -365,11 +369,11 @@ SYM_CODE_START(pgm_check_handler) tmhh %r8,0x0001 # returning to user space? jno .Lpgm_exit_kernel STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) .Lpgm_exit_kernel: - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE @@ -378,11 +382,11 @@ SYM_CODE_START(pgm_check_handler) # single stepped system call # .Lpgm_svcper: - mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW + mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13) larl %r14,.Lsysc_per - stg %r14,__LC_RETURN_PSW+8 + stg %r14,__LC_RETURN_PSW+8(%r13) lghi %r14,1 - LBEAR __LC_PGM_LAST_BREAK + LBEAR __LC_PGM_LAST_BREAK(%r13) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per SYM_CODE_END(pgm_check_handler) @@ -596,7 +600,8 @@ SYM_CODE_END(restart_int_handler) * Setup a pt_regs so that show_trace can provide a good call trace. */ SYM_CODE_START(stack_overflow) - lg %r15,__LC_NODAT_STACK # change to panic stack + GET_LC %r15 + lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) stmg %r8,%r9,__PT_PSW(%r11) From bd2c55b307f77fbf19d76250672266ff06f4a324 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:21 +0200 Subject: [PATCH 31/39] s390/entry: Make int handlers ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in the ext/io interrupt handlers. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5f63f3fbb34c..5c303eff08e0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -395,26 +395,27 @@ SYM_CODE_END(pgm_check_handler) */ .macro INT_HANDLER name,lc_old_psw,handler SYM_CODE_START(\name) - stckf __LC_INT_CLOCK - stpt __LC_SYS_ENTER_TIMER - STBEAR __LC_LAST_BREAK + STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC + GET_LC %r13 + stckf __LC_INT_CLOCK(%r13) + stpt __LC_SYS_ENTER_TIMER(%r13) + STBEAR __LC_LAST_BREAK(%r13) BPOFF - stmg %r8,%r15,__LC_SAVE_AREA_ASYNC - lmg %r8,%r9,\lc_old_psw + lmg %r8,%r9,\lc_old_psw(%r13) tmhh %r8,0x0001 # interrupting from user ? jnz 1f #if IS_ENABLED(CONFIG_KVM) - lg %r10,__LC_CURRENT + lg %r10,__LC_CURRENT(%r13) tm __TI_sie(%r10),0xff jz 0f BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15),%r0 + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif -0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r0 +0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13 aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE) j 2f -1: lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK +1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) 2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) la %r11,STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r7,__PT_R0(%r11) @@ -428,18 +429,18 @@ SYM_CODE_START(\name) xgr %r7,%r7 xgr %r10,%r10 xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11) - mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC - MBEAR %r11,%r0 + mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13) + MBEAR %r11,%r13 stmg %r8,%r9,__PT_PSW(%r11) lgr %r2,%r11 # pass pointer to pt_regs brasl %r14,\handler - mvc __LC_RETURN_PSW(16),__PT_PSW(%r11) + mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11) tmhh %r8,0x0001 # returning to user ? jno 2f STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE + lctlg %c1,%c1,__LC_USER_ASCE(%r13) BPON - stpt __LC_EXIT_TIMER + stpt __LC_EXIT_TIMER(%r13) 2: LBEAR __PT_LAST_BREAK(%r11) lmg %r0,%r15,__PT_R0(%r11) LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE From 0001b7bbc53aeb8d31f650701d2a55e498634a2d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:22 +0200 Subject: [PATCH 32/39] s390/entry: Make mchk_int_handler() ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in mcck_int_handler(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 48 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 5c303eff08e0..a855f901f6e6 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -455,33 +455,34 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq */ SYM_CODE_START(mcck_int_handler) BPOFF - lmg %r8,%r9,__LC_MCK_OLD_PSW - TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE + GET_LC %r13 + lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE jo .Lmcck_panic # yes -> rest of mcck code invalid - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID jno .Lmcck_panic # control registers invalid -> panic ptlb - lghi %r14,__LC_CPU_TIMER_SAVE_AREA - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) - TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID + lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13) + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID jo 3f - la %r14,__LC_SYS_ENTER_TIMER - clc 0(8,%r14),__LC_EXIT_TIMER + la %r14,__LC_SYS_ENTER_TIMER(%r13) + clc 0(8,%r14),__LC_EXIT_TIMER(%r13) jl 1f - la %r14,__LC_EXIT_TIMER -1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER + la %r14,__LC_EXIT_TIMER(%r13) +1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13) jl 2f - la %r14,__LC_LAST_UPDATE_TIMER + la %r14,__LC_LAST_UPDATE_TIMER(%r13) 2: spt 0(%r14) - mvc __LC_MCCK_ENTER_TIMER(8),0(%r14) -3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID + mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14) +3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID jno .Lmcck_panic tmhh %r8,0x0001 # interrupting from user ? jnz .Lmcck_user - TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID + TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID jno .Lmcck_panic #if IS_ENABLED(CONFIG_KVM) - lg %r10,__LC_CURRENT + lg %r10,__LC_CURRENT(%r13) tm __TI_sie(%r10),0xff jz .Lmcck_user # Need to compare the address instead of __TI_SIE flag. @@ -496,15 +497,15 @@ SYM_CODE_START(mcck_int_handler) lg %r10,__LC_PCPU oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST 4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST - SIEEXIT __SF_SIE_CONTROL(%r15),%r0 + SIEEXIT __SF_SIE_CONTROL(%r15),%r13 #endif .Lmcck_user: - lg %r15,__LC_MCCK_STACK + lg %r15,__LC_MCCK_STACK(%r13) la %r11,STACK_FRAME_OVERHEAD(%r15) stctg %c1,%c1,__PT_CR1(%r11) - lctlg %c1,%c1,__LC_KERNEL_ASCE + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) - lghi %r14,__LC_GPREGS_SAVE_AREA + lay %r14,__LC_GPREGS_SAVE_AREA(%r13) mvc __PT_R0(128,%r11),0(%r14) # clear user controlled registers to prevent speculative use xgr %r0,%r0 @@ -522,12 +523,13 @@ SYM_CODE_START(mcck_int_handler) brasl %r14,s390_do_machine_check lctlg %c1,%c1,__PT_CR1(%r11) lmg %r0,%r10,__PT_R0(%r11) - mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW - tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ? + mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW + tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ? jno 0f BPON - stpt __LC_EXIT_TIMER -0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA), ALT_FACILITY(193) + stpt __LC_EXIT_TIMER(%r13) +0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\ + ALT_FACILITY(193) LBEAR 0(%r12) lmg %r11,%r15,__PT_R11(%r11) LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE From 4064b711127e1eb5b5fd42d539fd45e3e33c9b6f Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:23 +0200 Subject: [PATCH 33/39] s390/entry: Make restart_int_handler() ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in restart_int_handler(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a855f901f6e6..ca58b3da3916 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -573,15 +573,17 @@ SYM_CODE_START(restart_int_handler) 0: larl %r15,daton_psw lpswe 0(%r15) # turn dat on, keep irqs off .Ldaton: - lg %r15,__LC_RESTART_STACK + GET_LC %r15 + lg %r15,__LC_RESTART_STACK(%r15) xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15) stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART - mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW + GET_LC %r13 + mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13) + mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13) xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15) - lg %r1,__LC_RESTART_FN # load fn, parm & source cpu - lg %r2,__LC_RESTART_DATA - lgf %r3,__LC_RESTART_SOURCE + lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu + lg %r2,__LC_RESTART_DATA(%r13) + lgf %r3,__LC_RESTART_SOURCE(%r13) ltgr %r3,%r3 # test source cpu address jm 1f # negative -> skip source stop 0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu From 7cc86dee44a47d961fd6195fd91f75ce176b992d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:24 +0200 Subject: [PATCH 34/39] s390/entry: Make __switch_to() ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in __switch_to(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index ca58b3da3916..bbdbe3c3a770 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -169,13 +169,14 @@ SYM_FUNC_START(__switch_to_asm) stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev lg %r15,0(%r4,%r3) # start of kernel stack of next agr %r15,%r5 # end of kernel stack of next - stg %r3,__LC_CURRENT # store task struct of next - stg %r15,__LC_KERNEL_STACK # store end of kernel stack + GET_LC %r13 + stg %r3,__LC_CURRENT(%r13) # store task struct of next + stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next aghi %r3,__TASK_pid - mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next + mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next + ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40) lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task - ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40) BR_EX %r14 SYM_FUNC_END(__switch_to_asm) From 9b3dcae128f8803950d646329f2301cae3fe8f4d Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:25 +0200 Subject: [PATCH 35/39] s390/entry: Make ret_from_fork() ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in ret_from_fork(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index bbdbe3c3a770..2bd9ef24ace3 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -307,12 +307,13 @@ SYM_CODE_START(ret_from_fork) lgr %r3,%r11 brasl %r14,__ret_from_fork STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + GET_LC %r13 + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(ret_from_fork) From 361f6ec2fe203760353c708480099e0325295b21 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:26 +0200 Subject: [PATCH 36/39] s390/entry: Make system_call() ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in system_call(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/entry.S | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 2bd9ef24ace3..749410cfdbc0 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -264,14 +264,15 @@ EXPORT_SYMBOL(sie_exit) */ SYM_CODE_START(system_call) - stpt __LC_SYS_ENTER_TIMER - stmg %r8,%r15,__LC_SAVE_AREA_SYNC + STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC + GET_LC %r13 + stpt __LC_SYS_ENTER_TIMER(%r13) BPOFF lghi %r14,0 .Lsysc_per: - STBEAR __LC_LAST_BREAK - lctlg %c1,%c1,__LC_KERNEL_ASCE - lg %r15,__LC_KERNEL_STACK + STBEAR __LC_LAST_BREAK(%r13) + lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13) + lg %r15,__LC_KERNEL_STACK(%r13) xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15) # clear user controlled register to prevent speculative use @@ -286,17 +287,17 @@ SYM_CODE_START(system_call) xgr %r10,%r10 xgr %r11,%r11 la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs - mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC - MBEAR %r2,%r0 + mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13) + MBEAR %r2,%r13 lgr %r3,%r14 brasl %r14,__do_syscall STACKLEAK_ERASE - lctlg %c1,%c1,__LC_USER_ASCE - mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) + lctlg %c1,%c1,__LC_USER_ASCE(%r13) + mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15) BPON LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15) + stpt __LC_EXIT_TIMER(%r13) lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15) - stpt __LC_EXIT_TIMER LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE SYM_CODE_END(system_call) From 97cee3dd4a07413a4175e247f550a4931d39cee1 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:27 +0200 Subject: [PATCH 37/39] s390/kdump: Make kdump ready for lowcore relocation In preparation of having lowcore at different address than zero, add the base register to all lowcore accesses in store_status() and __do_machine_kdump(). Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/kernel/machine_kexec.c | 2 +- arch/s390/kernel/reipl.S | 26 +++++++++++--------------- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index f4cf65da6d49..8f681ccfb83a 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data) * This need to be done *after* s390_reset_system set the * prefix register of this CPU to zero */ - memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA), + memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area), phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512); call_nodat(1, int, purgatory, int, 1); diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 88087a32ebc6..69fcaf54d5ca 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -9,6 +9,7 @@ #include #include #include +#include GEN_BR_THUNK %r9 @@ -20,20 +21,15 @@ # r3 = Parameter for function # SYM_CODE_START(store_status) - /* Save register one and load save area base */ - stg %r1,__LC_SAVE_AREA_RESTART + STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA /* General purpose registers */ - lghi %r1,__LC_GPREGS_SAVE_AREA - stmg %r0,%r15,0(%r1) - mvc 8(8,%r1),__LC_SAVE_AREA_RESTART + GET_LC %r13 /* Control registers */ - lghi %r1,__LC_CREGS_SAVE_AREA - stctg %c0,%c15,0(%r1) + stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13) /* Access registers */ - lghi %r1,__LC_AREGS_SAVE_AREA - stam %a0,%a15,0(%r1) + stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13) /* Floating point registers */ - lghi %r1,__LC_FPREGS_SAVE_AREA + lay %r1,__LC_FPREGS_SAVE_AREA(%r13) std %f0, 0x00(%r1) std %f1, 0x08(%r1) std %f2, 0x10(%r1) @@ -51,21 +47,21 @@ SYM_CODE_START(store_status) std %f14,0x70(%r1) std %f15,0x78(%r1) /* Floating point control register */ - lghi %r1,__LC_FP_CREG_SAVE_AREA + lay %r1,__LC_FP_CREG_SAVE_AREA(%r13) stfpc 0(%r1) /* CPU timer */ - lghi %r1,__LC_CPU_TIMER_SAVE_AREA + lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13) stpt 0(%r1) /* Store prefix register */ - lghi %r1,__LC_PREFIX_SAVE_AREA + lay %r1,__LC_PREFIX_SAVE_AREA(%r13) stpx 0(%r1) /* Clock comparator - seven bytes */ - lghi %r1,__LC_CLOCK_COMP_SAVE_AREA larl %r4,clkcmp stckc 0(%r4) + lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13) mvc 1(7,%r1),1(%r4) /* Program status word */ - lghi %r1,__LC_PSW_SAVE_AREA + lay %r1,__LC_PSW_SAVE_AREA(%r13) epsw %r4,%r5 st %r4,0(%r1) st %r5,4(%r1) From 8f1e70adb1a3ecb982bb6c475209b080bf985074 Mon Sep 17 00:00:00 2001 From: Sven Schnelle Date: Mon, 22 Jul 2024 15:41:28 +0200 Subject: [PATCH 38/39] s390/boot: Add cmdline option to relocate lowcore Now that everything has been converted, add the option 'relocate_lowcore' to enable relocating the lowcore. Reviewed-by: Heiko Carstens Signed-off-by: Sven Schnelle Signed-off-by: Vasily Gorbik --- arch/s390/boot/ipl_parm.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/s390/boot/ipl_parm.c b/arch/s390/boot/ipl_parm.c index 337c14931ccb..1773b72a6a7b 100644 --- a/arch/s390/boot/ipl_parm.c +++ b/arch/s390/boot/ipl_parm.c @@ -311,5 +311,7 @@ void parse_boot_command_line(void) prot_virt_host = 1; } #endif + if (!strcmp(param, "relocate_lowcore") && test_facility(193)) + relocate_lowcore = 1; } } From 6dc2e98d5f1de162d1777aee97e59d75d70d07c5 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 4 Jul 2024 11:02:46 +0000 Subject: [PATCH 39/39] s390: Remove protvirt and kvm config guards for uv code Removing the CONFIG_PROTECTED_VIRTUALIZATION_GUEST ifdefs and config option as well as CONFIG_KVM ifdefs in uv files. Having this configurable has been more of a pain than a help. It's time to remove the ifdefs and the config option. Signed-off-by: Janosch Frank Acked-by: Christian Borntraeger Acked-by: Heiko Carstens Signed-off-by: Vasily Gorbik --- arch/s390/Kconfig | 11 ---------- arch/s390/boot/Makefile | 3 +-- arch/s390/boot/uv.c | 8 ------- arch/s390/boot/uv.h | 13 ------------ arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/s390/include/asm/page.h | 2 -- arch/s390/include/asm/uv.h | 32 ---------------------------- arch/s390/kernel/Makefile | 3 +-- arch/s390/kernel/uv.c | 35 +++++++++++++------------------ drivers/s390/char/Kconfig | 2 +- 11 files changed, 18 insertions(+), 93 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 59e0d861e26f..a822f952f64a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -799,17 +799,6 @@ config HAVE_PNETID menu "Virtualization" -config PROTECTED_VIRTUALIZATION_GUEST - def_bool n - prompt "Protected virtualization guest support" - help - Select this option, if you want to be able to run this - kernel as a protected virtualization KVM guest. - Protected virtualization capable machines have a mini hypervisor - located at machine level (an ultravisor). With help of the - Ultravisor, KVM will be able to run "protected" VMs, special - VMs whose memory and management data are unavailable to KVM. - config PFAULT def_bool y prompt "Pseudo page fault support" diff --git a/arch/s390/boot/Makefile b/arch/s390/boot/Makefile index 5d8cb7e3b096..4f476884d340 100644 --- a/arch/s390/boot/Makefile +++ b/arch/s390/boot/Makefile @@ -39,8 +39,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o -obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o +obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o uv.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o diff --git a/arch/s390/boot/uv.c b/arch/s390/boot/uv.c index 1e66d2cbb096..318e6ba95bfd 100644 --- a/arch/s390/boot/uv.c +++ b/arch/s390/boot/uv.c @@ -8,12 +8,8 @@ #include "uv.h" /* will be used in arch/s390/kernel/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); -#endif -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); -#endif struct uv_info __bootdata_preserved(uv_info); void uv_query_info(void) @@ -53,14 +49,11 @@ void uv_query_info(void) uv_info.max_secrets = uvcb.max_secrets; } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) && test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list)) prot_virt_guest = 1; -#endif } -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit) { if (is_prot_virt_host() && uv_info.max_sec_stor_addr) @@ -92,4 +85,3 @@ void sanitize_prot_virt_host(void) { prot_virt_host = is_prot_virt_host_capable(); } -#endif diff --git a/arch/s390/boot/uv.h b/arch/s390/boot/uv.h index 0f3070856f8d..da4a4a8d48e0 100644 --- a/arch/s390/boot/uv.h +++ b/arch/s390/boot/uv.h @@ -2,21 +2,8 @@ #ifndef BOOT_UV_H #define BOOT_UV_H -#if IS_ENABLED(CONFIG_KVM) unsigned long adjust_to_uv_max(unsigned long limit); void sanitize_prot_virt_host(void); -#else -static inline unsigned long adjust_to_uv_max(unsigned long limit) -{ - return limit; -} -static inline void sanitize_prot_virt_host(void) {} -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) void uv_query_info(void); -#else -static inline void uv_query_info(void) {} -#endif #endif /* BOOT_UV_H */ diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index f3602414a961..ea63a7342f5f 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -55,7 +55,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index d0d8925fdf09..d8b28ff8ff45 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -53,7 +53,6 @@ CONFIG_EXPOLINE_AUTO=y CONFIG_CHSC_SCH=y CONFIG_VFIO_CCW=m CONFIG_VFIO_AP=m -CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y CONFIG_CMM=m CONFIG_APPLDATA_BASE=y CONFIG_S390_HYPFS_FS=y diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 5ec41ec3d761..06416b3f94f5 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -174,12 +174,10 @@ static inline int devmem_is_allowed(unsigned long pfn) #define HAVE_ARCH_FREE_PAGE #define HAVE_ARCH_ALLOC_PAGE -#if IS_ENABLED(CONFIG_PGSTE) int arch_make_folio_accessible(struct folio *folio); #define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE int arch_make_page_accessible(struct page *page); #define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE -#endif struct vm_layout { unsigned long kaslr_offset; diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0679445cac0b..0b5f8f3e84f1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -414,7 +414,6 @@ static inline bool uv_has_feature(u8 feature_bit) return test_bit_inv(feature_bit, &uv_info.uv_feature_indications); } -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST extern int prot_virt_guest; static inline int is_prot_virt_guest(void) @@ -466,13 +465,6 @@ static inline int uv_remove_shared(unsigned long addr) return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS); } -#else -#define is_prot_virt_guest() 0 -static inline int uv_set_shared(unsigned long addr) { return 0; } -static inline int uv_remove_shared(unsigned long addr) { return 0; } -#endif - -#if IS_ENABLED(CONFIG_KVM) extern int prot_virt_host; static inline int is_prot_virt_host(void) @@ -489,29 +481,5 @@ int uv_convert_from_secure_pte(pte_t pte); int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr); void setup_uv(void); -#else -#define is_prot_virt_host() 0 -static inline void setup_uv(void) {} - -static inline int uv_pin_shared(unsigned long paddr) -{ - return 0; -} - -static inline int uv_destroy_folio(struct folio *folio) -{ - return 0; -} - -static inline int uv_destroy_pte(pte_t pte) -{ - return 0; -} - -static inline int uv_convert_from_secure_pte(pte_t pte) -{ - return 0; -} -#endif #endif /* _ASM_S390_UV_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 7241fa194709..e47a4be54ff8 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -43,7 +43,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o -obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o +obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o extra-y += vmlinux.lds @@ -80,7 +80,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o obj-$(CONFIG_TRACEPOINTS) += trace.o -obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o # vdso obj-y += vdso64/ diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index fa62fa0e369f..36db065c7cf7 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -18,11 +18,22 @@ #include #include +#if !IS_ENABLED(CONFIG_KVM) +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + return 0; +} + +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + return 0; +} +#endif + /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); -#endif /* * uv_info contains both host and guest information but it's currently only @@ -35,7 +46,6 @@ EXPORT_SYMBOL(prot_virt_guest); struct uv_info __bootdata_preserved(uv_info); EXPORT_SYMBOL(uv_info); -#if IS_ENABLED(CONFIG_KVM) int __bootdata_preserved(prot_virt_host); EXPORT_SYMBOL(prot_virt_host); @@ -543,9 +553,6 @@ int arch_make_page_accessible(struct page *page) return arch_make_folio_accessible(page_folio(page)); } EXPORT_SYMBOL_GPL(arch_make_page_accessible); -#endif - -#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM) static ssize_t uv_query_facilities(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { @@ -721,24 +728,13 @@ static struct attribute_group uv_query_attr_group = { static ssize_t uv_is_prot_virt_guest(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST - val = prot_virt_guest; -#endif - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_guest); } static ssize_t uv_is_prot_virt_host(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - int val = 0; - -#if IS_ENABLED(CONFIG_KVM) - val = prot_virt_host; -#endif - - return sysfs_emit(buf, "%d\n", val); + return sysfs_emit(buf, "%d\n", prot_virt_host); } static struct kobj_attribute uv_prot_virt_guest = @@ -790,4 +786,3 @@ static int __init uv_info_init(void) return rc; } device_initcall(uv_info_init); -#endif diff --git a/drivers/s390/char/Kconfig b/drivers/s390/char/Kconfig index 8a03af5ee5b3..80c4e5101c97 100644 --- a/drivers/s390/char/Kconfig +++ b/drivers/s390/char/Kconfig @@ -96,7 +96,7 @@ config SCLP_OFB config S390_UV_UAPI def_tristate m prompt "Ultravisor userspace API" - depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST) + depends on S390 help Selecting exposes parts of the UV interface to userspace by providing a misc character device at /dev/uv.