more s390 updates for 6.11 merge window

- Fix KMSAN build breakage caused by the conflict between s390 and
   mm-stable trees
 
 - Add KMSAN page markers for ptdump
 
 - Add runtime constant support
 
 - Fix __pa/__va for modules under non-GPL licenses by exporting necessary
   vm_layout struct with EXPORT_SYMBOL to prevent linkage problems
 
 - Fix an endless loop in the CF_DIAG event stop in the CPU Measurement
   Counter Facility code when the counter set size is zero
 
 - Remove the PROTECTED_VIRTUALIZATION_GUEST config option and enable
   its functionality by default
 
 - Support allocation of multiple MSI interrupts per device and improve
   logging of architecture-specific limitations
 
 - Add support for lowcore relocation as a debugging feature to catch
   all null ptr dereferences in the kernel address space, improving
   detection beyond the current implementation's limited write access
   protection
 
 - Clean up and rework CPU alternatives to allow for callbacks and early
   patching for the lowcore relocation
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEE3QHqV+H2a8xAv27vjYWKoQLXFBgFAmajrGUACgkQjYWKoQLX
 FBgZjwf/X8suh1Gm2qO47hdGURusOKmEa6GjYaihKzOi2I5yAWVXGsAYX7QtXI4X
 fxbKyuGJIvq7LOrIojN1JOCPGkDRztgMddqDJI7WljuRiw6dcd4L5tbaPgCKEv3Q
 AQcoq+Aeg1L5xnuNFPdQXl6+Fy2lTFqJCkUl+uW05pGAn2R212dYG3HB41TpwOtJ
 Sv2R5+yD9TQCKnHyuCQqaGf7d6SQTcVeBj8zrqVmcyduNK+BYYMOwlJ/UTRzeZEX
 3DmQg/TdAkxXf0jZ+vrNILEfHlIvwDAhFjdoAXXL0TX4lx2cHLx9AiqNxYhUprsG
 0gutc/nLq2FxhqofoJ0z9TCdb1Ef7w==
 =skva
 -----END PGP SIGNATURE-----

Merge tag 's390-6.11-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull more s390 updates from Vasily Gorbik:

 - Fix KMSAN build breakage caused by the conflict between s390 and
   mm-stable trees

 - Add KMSAN page markers for ptdump

 - Add runtime constant support

 - Fix __pa/__va for modules under non-GPL licenses by exporting
   necessary vm_layout struct with EXPORT_SYMBOL to prevent linkage
   problems

 - Fix an endless loop in the CF_DIAG event stop in the CPU Measurement
   Counter Facility code when the counter set size is zero

 - Remove the PROTECTED_VIRTUALIZATION_GUEST config option and enable
   its functionality by default

 - Support allocation of multiple MSI interrupts per device and improve
   logging of architecture-specific limitations

 - Add support for lowcore relocation as a debugging feature to catch
   all null ptr dereferences in the kernel address space, improving
   detection beyond the current implementation's limited write access
   protection

 - Clean up and rework CPU alternatives to allow for callbacks and early
   patching for the lowcore relocation

* tag 's390-6.11-2' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (39 commits)
  s390: Remove protvirt and kvm config guards for uv code
  s390/boot: Add cmdline option to relocate lowcore
  s390/kdump: Make kdump ready for lowcore relocation
  s390/entry: Make system_call() ready for lowcore relocation
  s390/entry: Make ret_from_fork() ready for lowcore relocation
  s390/entry: Make __switch_to() ready for lowcore relocation
  s390/entry: Make restart_int_handler() ready for lowcore relocation
  s390/entry: Make mchk_int_handler() ready for lowcore relocation
  s390/entry: Make int handlers ready for lowcore relocation
  s390/entry: Make pgm_check_handler() ready for lowcore relocation
  s390/entry: Add base register to CHECK_VMAP_STACK/CHECK_STACK macro
  s390/entry: Add base register to SIEEXIT macro
  s390/entry: Add base register to MBEAR macro
  s390/entry: Make __sie64a() ready for lowcore relocation
  s390/head64: Make startup code ready for lowcore relocation
  s390: Add infrastructure to patch lowcore accesses
  s390/atomic_ops: Disable flag outputs constraint for GCC versions below 14.2.0
  s390/entry: Move SIE indicator flag to thread info
  s390/nmi: Simplify ptregs setup
  s390/alternatives: Remove alternative facility list
  ...
This commit is contained in:
Linus Torvalds 2024-07-26 10:47:53 -07:00
commit 65ad409e63
52 changed files with 746 additions and 525 deletions

View file

@ -3830,9 +3830,6 @@
noalign [KNL,ARM]
noaltinstr [S390,EARLY] Disables alternative instructions
patching (CPU alternatives feature).
noapic [SMP,APIC,EARLY] Tells the kernel to not make use of any
IOAPICs that may be present in the system.

View file

@ -799,17 +799,6 @@ config HAVE_PNETID
menu "Virtualization"
config PROTECTED_VIRTUALIZATION_GUEST
def_bool n
prompt "Protected virtualization guest support"
help
Select this option, if you want to be able to run this
kernel as a protected virtualization KVM guest.
Protected virtualization capable machines have a mini hypervisor
located at machine level (an ultravisor). With help of the
Ultravisor, KVM will be able to run "protected" VMs, special
VMs whose memory and management data are unavailable to KVM.
config PFAULT
def_bool y
prompt "Pseudo page fault support"

View file

@ -39,8 +39,7 @@ CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
obj-y := head.o als.o startup.o physmem_info.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o relocs.o alternative.o uv.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
obj-y += $(if $(CONFIG_KERNEL_UNCOMPRESSED),,decompressor.o) info.o
obj-$(CONFIG_KERNEL_ZSTD) += clz_ctz.o

View file

@ -0,0 +1,3 @@
// SPDX-License-Identifier: GPL-2.0
#include "../kernel/alternative.c"

View file

@ -30,6 +30,8 @@ struct vmlinux_info {
unsigned long init_mm_off;
unsigned long swapper_pg_dir_off;
unsigned long invalid_pg_dir_off;
unsigned long alt_instructions;
unsigned long alt_instructions_end;
#ifdef CONFIG_KASAN
unsigned long kasan_early_shadow_page_off;
unsigned long kasan_early_shadow_pte_off;
@ -89,8 +91,10 @@ extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info;
#define vmlinux _vmlinux_info
#define __lowcore_pa(x) ((unsigned long)(x) % sizeof(struct lowcore))
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
#define __kernel_va(x) ((void *)((unsigned long)(x) - __kaslr_offset_phys + __kaslr_offset))
#define __kernel_pa(x) ((unsigned long)(x) - __kaslr_offset + __kaslr_offset_phys)

View file

@ -3,6 +3,7 @@
#include <linux/init.h>
#include <linux/ctype.h>
#include <linux/pgtable.h>
#include <asm/abs_lowcore.h>
#include <asm/page-states.h>
#include <asm/ebcdic.h>
#include <asm/sclp.h>
@ -310,5 +311,7 @@ void parse_boot_command_line(void)
prot_virt_host = 1;
}
#endif
if (!strcmp(param, "relocate_lowcore") && test_facility(193))
relocate_lowcore = 1;
}
}

View file

@ -30,6 +30,7 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata_preserved(max_mappable);
int __bootdata_preserved(relocate_lowcore);
u64 __bootdata_preserved(stfle_fac_list[16]);
struct oldmem_data __bootdata_preserved(oldmem_data);
@ -376,6 +377,8 @@ static void kaslr_adjust_vmlinux_info(long offset)
vmlinux.init_mm_off += offset;
vmlinux.swapper_pg_dir_off += offset;
vmlinux.invalid_pg_dir_off += offset;
vmlinux.alt_instructions += offset;
vmlinux.alt_instructions_end += offset;
#ifdef CONFIG_KASAN
vmlinux.kasan_early_shadow_page_off += offset;
vmlinux.kasan_early_shadow_pte_off += offset;
@ -478,8 +481,12 @@ void startup_kernel(void)
* before the kernel started. Therefore, in case the two sections
* overlap there is no risk of corrupting any data.
*/
if (kaslr_enabled())
amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
if (kaslr_enabled()) {
unsigned long amode31_min;
amode31_min = (unsigned long)_decompressor_end;
amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G);
}
if (!amode31_lma)
amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
@ -503,6 +510,9 @@ void startup_kernel(void)
kaslr_adjust_got(__kaslr_offset);
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
copy_bootdata();
__apply_alternatives((struct alt_instr *)_vmlinux_info.alt_instructions,
(struct alt_instr *)_vmlinux_info.alt_instructions_end,
ALT_CTX_EARLY);
/*
* Save KASLR offset for early dumps, before vmcore_info is set.

View file

@ -8,12 +8,8 @@
#include "uv.h"
/* will be used in arch/s390/kernel/uv.c */
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
#endif
#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
#endif
struct uv_info __bootdata_preserved(uv_info);
void uv_query_info(void)
@ -53,14 +49,11 @@ void uv_query_info(void)
uv_info.max_secrets = uvcb.max_secrets;
}
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
if (test_bit_inv(BIT_UVC_CMD_SET_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list) &&
test_bit_inv(BIT_UVC_CMD_REMOVE_SHARED_ACCESS, (unsigned long *)uvcb.inst_calls_list))
prot_virt_guest = 1;
#endif
}
#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit)
{
if (is_prot_virt_host() && uv_info.max_sec_stor_addr)
@ -92,4 +85,3 @@ void sanitize_prot_virt_host(void)
{
prot_virt_host = is_prot_virt_host_capable();
}
#endif

View file

@ -2,21 +2,8 @@
#ifndef BOOT_UV_H
#define BOOT_UV_H
#if IS_ENABLED(CONFIG_KVM)
unsigned long adjust_to_uv_max(unsigned long limit);
void sanitize_prot_virt_host(void);
#else
static inline unsigned long adjust_to_uv_max(unsigned long limit)
{
return limit;
}
static inline void sanitize_prot_virt_host(void) {}
#endif
#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
void uv_query_info(void);
#else
static inline void uv_query_info(void) {}
#endif
#endif /* BOOT_UV_H */

View file

@ -26,6 +26,7 @@ atomic_long_t __bootdata_preserved(direct_pages_count[PG_DIRECT_MAP_MAX]);
enum populate_mode {
POPULATE_NONE,
POPULATE_DIRECT,
POPULATE_LOWCORE,
POPULATE_ABS_LOWCORE,
POPULATE_IDENTITY,
POPULATE_KERNEL,
@ -242,6 +243,8 @@ static unsigned long _pa(unsigned long addr, unsigned long size, enum populate_m
return -1;
case POPULATE_DIRECT:
return addr;
case POPULATE_LOWCORE:
return __lowcore_pa(addr);
case POPULATE_ABS_LOWCORE:
return __abs_lowcore_pa(addr);
case POPULATE_KERNEL:
@ -418,6 +421,7 @@ static void pgtable_populate(unsigned long addr, unsigned long end, enum populat
void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned long asce_limit)
{
unsigned long lowcore_address = 0;
unsigned long start, end;
unsigned long asce_type;
unsigned long asce_bits;
@ -455,12 +459,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l
__arch_set_page_dat((void *)swapper_pg_dir, 1UL << CRST_ALLOC_ORDER);
__arch_set_page_dat((void *)invalid_pg_dir, 1UL << CRST_ALLOC_ORDER);
if (relocate_lowcore)
lowcore_address = LOWCORE_ALT_ADDRESS;
/*
* To allow prefixing the lowcore must be mapped with 4KB pages.
* To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards.
*/
pgtable_populate(0, sizeof(struct lowcore), POPULATE_DIRECT);
pgtable_populate(lowcore_address,
lowcore_address + sizeof(struct lowcore),
POPULATE_LOWCORE);
for_each_physmem_usable_range(i, &start, &end) {
pgtable_populate((unsigned long)__identity_va(start),
(unsigned long)__identity_va(end),

View file

@ -55,7 +55,6 @@ CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y

View file

@ -53,7 +53,6 @@ CONFIG_EXPOLINE_AUTO=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_CCW=m
CONFIG_VFIO_AP=m
CONFIG_PROTECTED_VIRTUALIZATION_GUEST=y
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_S390_HYPFS_FS=y

View file

@ -2,6 +2,7 @@
#ifndef _ASM_S390_ABS_LOWCORE_H
#define _ASM_S390_ABS_LOWCORE_H
#include <asm/sections.h>
#include <asm/lowcore.h>
#define ABS_LOWCORE_MAP_SIZE (NR_CPUS * sizeof(struct lowcore))
@ -24,4 +25,11 @@ static inline void put_abs_lowcore(struct lowcore *lc)
put_cpu();
}
extern int __bootdata_preserved(relocate_lowcore);
static inline int have_relocated_lowcore(void)
{
return relocate_lowcore;
}
#endif /* _ASM_S390_ABS_LOWCORE_H */

View file

@ -1,57 +0,0 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_ALTERNATIVE_ASM_H
#define _ASM_S390_ALTERNATIVE_ASM_H
#ifdef __ASSEMBLY__
/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
* instruction. See apply_alternatives().
*/
.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
.long \orig_start - .
.long \alt_start - .
.word \feature
.byte \orig_end - \orig_start
.org . - ( \orig_end - \orig_start ) & 1
.org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
.org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
* @newinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, feature
.pushsection .altinstr_replacement,"ax"
770: \newinstr
771: .popsection
772: \oldinstr
773: .pushsection .altinstructions,"a"
alt_entry 772b, 773b, 770b, 771b, \feature
.popsection
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
* @newinstr.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
.pushsection .altinstr_replacement,"ax"
770: \newinstr1
771: \newinstr2
772: .popsection
773: \oldinstr
774: .pushsection .altinstructions,"a"
alt_entry 773b, 774b, 770b, 771b,\feature1
alt_entry 773b, 774b, 771b, 772b,\feature2
.popsection
.endm
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_ALTERNATIVE_ASM_H */

View file

@ -2,6 +2,58 @@
#ifndef _ASM_S390_ALTERNATIVE_H
#define _ASM_S390_ALTERNATIVE_H
/*
* Each alternative comes with a 32 bit feature field:
* union {
* u32 feature;
* struct {
* u32 ctx : 4;
* u32 type : 8;
* u32 data : 20;
* };
* }
*
* @ctx is a bitfield, where only one bit must be set. Each bit defines
* in which context an alternative is supposed to be applied to the
* kernel image:
*
* - from the decompressor before the kernel itself is executed
* - from early kernel code from within the kernel
*
* @type is a number which defines the type and with that the type
* specific alternative patching.
*
* @data is additional type specific information which defines if an
* alternative should be applied.
*/
#define ALT_CTX_EARLY 1
#define ALT_CTX_LATE 2
#define ALT_CTX_ALL (ALT_CTX_EARLY | ALT_CTX_LATE)
#define ALT_TYPE_FACILITY 0
#define ALT_TYPE_SPEC 1
#define ALT_TYPE_LOWCORE 2
#define ALT_DATA_SHIFT 0
#define ALT_TYPE_SHIFT 20
#define ALT_CTX_SHIFT 28
#define ALT_FACILITY_EARLY(facility) (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
(facility) << ALT_DATA_SHIFT)
#define ALT_FACILITY(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
ALT_TYPE_FACILITY << ALT_TYPE_SHIFT | \
(facility) << ALT_DATA_SHIFT)
#define ALT_SPEC(facility) (ALT_CTX_LATE << ALT_CTX_SHIFT | \
ALT_TYPE_SPEC << ALT_TYPE_SHIFT | \
(facility) << ALT_DATA_SHIFT)
#define ALT_LOWCORE (ALT_CTX_EARLY << ALT_CTX_SHIFT | \
ALT_TYPE_LOWCORE << ALT_TYPE_SHIFT)
#ifndef __ASSEMBLY__
#include <linux/types.h>
@ -11,12 +63,30 @@
struct alt_instr {
s32 instr_offset; /* original instruction */
s32 repl_offset; /* offset to replacement instruction */
u16 facility; /* facility bit set for replacement */
union {
u32 feature; /* feature required for replacement */
struct {
u32 ctx : 4; /* context */
u32 type : 8; /* type of alternative */
u32 data : 20; /* patching information */
};
};
u8 instrlen; /* length of original instruction */
} __packed;
void apply_alternative_instructions(void);
void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx);
static inline void apply_alternative_instructions(void)
{
__apply_alternatives(__alt_instructions, __alt_instructions_end, ALT_CTX_LATE);
}
static inline void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
__apply_alternatives(start, end, ALT_CTX_ALL);
}
/*
* +---------------------------------+
@ -48,10 +118,10 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
#define OLDINSTR(oldinstr) \
"661:\n\t" oldinstr "\n662:\n"
#define ALTINSTR_ENTRY(facility, num) \
#define ALTINSTR_ENTRY(feature, num) \
"\t.long 661b - .\n" /* old instruction */ \
"\t.long " b_altinstr(num)"b - .\n" /* alt instruction */ \
"\t.word " __stringify(facility) "\n" /* facility bit */ \
"\t.long " __stringify(feature) "\n" /* feature */ \
"\t.byte " oldinstr_len "\n" /* instruction len */ \
"\t.org . - (" oldinstr_len ") & 1\n" \
"\t.org . - (" oldinstr_len ") + (" altinstr_len(num) ")\n" \
@ -61,24 +131,24 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
b_altinstr(num)":\n\t" altinstr "\n" e_altinstr(num) ":\n"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, altinstr, facility) \
#define ALTERNATIVE(oldinstr, altinstr, feature) \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr, 1) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(facility, 1) \
ALTINSTR_ENTRY(feature, 1) \
".popsection\n"
#define ALTERNATIVE_2(oldinstr, altinstr1, facility1, altinstr2, facility2)\
#define ALTERNATIVE_2(oldinstr, altinstr1, feature1, altinstr2, feature2)\
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(altinstr1, 1) \
ALTINSTR_REPLACEMENT(altinstr2, 2) \
".popsection\n" \
OLDINSTR(oldinstr) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(facility1, 1) \
ALTINSTR_ENTRY(facility2, 2) \
ALTINSTR_ENTRY(feature1, 1) \
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n"
/*
@ -93,12 +163,12 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
* For non barrier like inlines please define new variants
* without volatile and memory clobber.
*/
#define alternative(oldinstr, altinstr, facility) \
asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) : : : "memory")
#define alternative(oldinstr, altinstr, feature) \
asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) : : : "memory")
#define alternative_2(oldinstr, altinstr1, facility1, altinstr2, facility2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, facility1, \
altinstr2, facility2) ::: "memory")
#define alternative_2(oldinstr, altinstr1, feature1, altinstr2, feature2) \
asm_inline volatile(ALTERNATIVE_2(oldinstr, altinstr1, feature1, \
altinstr2, feature2) ::: "memory")
/* Alternative inline assembly with input. */
#define alternative_input(oldinstr, newinstr, feature, input...) \
@ -106,8 +176,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
: : input)
/* Like alternative_input, but with a single output argument */
#define alternative_io(oldinstr, altinstr, facility, output, input...) \
asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, facility) \
#define alternative_io(oldinstr, altinstr, feature, output, input...) \
asm_inline volatile(ALTERNATIVE(oldinstr, altinstr, feature) \
: output : input)
/* Use this macro if more than one output parameter is needed. */
@ -116,6 +186,56 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
/* Use this macro if clobbers are needed without inputs. */
#define ASM_NO_INPUT_CLOBBER(clobber...) : clobber
#else /* __ASSEMBLY__ */
/*
* Issue one struct alt_instr descriptor entry (need to put it into
* the section .altinstructions, see below). This entry contains
* enough information for the alternatives patching code to patch an
* instruction. See apply_alternatives().
*/
.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
.long \orig_start - .
.long \alt_start - .
.long \feature
.byte \orig_end - \orig_start
.org . - ( \orig_end - \orig_start ) & 1
.org . - ( \orig_end - \orig_start ) + ( \alt_end - \alt_start )
.org . - ( \alt_end - \alt_start ) + ( \orig_end - \orig_start )
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
* @newinstr.
*/
.macro ALTERNATIVE oldinstr, newinstr, feature
.pushsection .altinstr_replacement,"ax"
770: \newinstr
771: .popsection
772: \oldinstr
773: .pushsection .altinstructions,"a"
alt_entry 772b, 773b, 770b, 771b, \feature
.popsection
.endm
/*
* Define an alternative between two instructions. If @feature is
* present, early code in apply_alternatives() replaces @oldinstr with
* @newinstr.
*/
.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
.pushsection .altinstr_replacement,"ax"
770: \newinstr1
771: \newinstr2
772: .popsection
773: \oldinstr
774: .pushsection .altinstructions,"a"
alt_entry 773b, 774b, 770b, 771b,\feature1
alt_entry 773b, 774b, 771b, 772b,\feature2
.popsection
.endm
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_ALTERNATIVE_H */

View file

@ -188,7 +188,8 @@ static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new)
return old;
}
#ifdef __GCC_ASM_FLAG_OUTPUTS__
/* GCC versions before 14.2.0 may die with an ICE in some configurations. */
#if defined(__GCC_ASM_FLAG_OUTPUTS__) && !(IS_ENABLED(CONFIG_CC_IS_GCC) && (GCC_VERSION < 140200))
static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new)
{

View file

@ -20,7 +20,6 @@
#define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8)
extern u64 stfle_fac_list[16];
extern u64 alt_stfle_fac_list[16];
static inline void __set_facility(unsigned long nr, void *facilities)
{

View file

@ -12,8 +12,8 @@
static inline bool is_lowcore_addr(void *addr)
{
return addr >= (void *)&S390_lowcore &&
addr < (void *)(&S390_lowcore + 1);
return addr >= (void *)get_lowcore() &&
addr < (void *)(get_lowcore() + 1);
}
static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
@ -25,7 +25,7 @@ static inline void *arch_kmsan_get_meta_or_null(void *addr, bool is_origin)
* order to get a distinct struct page.
*/
addr += (void *)lowcore_ptr[raw_smp_processor_id()] -
(void *)&S390_lowcore;
(void *)get_lowcore();
if (KMSAN_WARN_ON(is_lowcore_addr(addr)))
return NULL;
return kmsan_get_metadata(addr, is_origin);

View file

@ -14,10 +14,15 @@
#include <asm/ctlreg.h>
#include <asm/cpu.h>
#include <asm/types.h>
#include <asm/alternative.h>
#define LC_ORDER 1
#define LC_PAGES 2
#define LOWCORE_ALT_ADDRESS _AC(0x70000, UL)
#ifndef __ASSEMBLY__
struct pgm_tdb {
u64 data[32];
};
@ -97,8 +102,7 @@ struct lowcore {
__u64 save_area_async[8]; /* 0x0240 */
__u64 save_area_restart[1]; /* 0x0280 */
/* CPU flags. */
__u64 cpu_flags; /* 0x0288 */
__u64 pcpu; /* 0x0288 */
/* Return psws. */
psw_t return_psw; /* 0x0290 */
@ -215,7 +219,14 @@ struct lowcore {
static __always_inline struct lowcore *get_lowcore(void)
{
return NULL;
struct lowcore *lc;
if (__is_defined(__DECOMPRESSOR))
return NULL;
asm(ALTERNATIVE("llilh %[lc],0", "llilh %[lc],%[alt]", ALT_LOWCORE)
: [lc] "=d" (lc)
: [alt] "i" (LOWCORE_ALT_ADDRESS >> 16));
return lc;
}
extern struct lowcore *lowcore_ptr[];
@ -225,4 +236,19 @@ static inline void set_prefix(__u32 address)
asm volatile("spx %0" : : "Q" (address) : "memory");
}
#else /* __ASSEMBLY__ */
.macro GET_LC reg
ALTERNATIVE "llilh \reg,0", \
__stringify(llilh \reg, LOWCORE_ALT_ADDRESS >> 16), \
ALT_LOWCORE
.endm
.macro STMG_LC start, end, savearea
ALTERNATIVE "stmg \start, \end, \savearea", \
__stringify(stmg \start, \end, LOWCORE_ALT_ADDRESS + \savearea), \
ALT_LOWCORE
.endm
#endif /* __ASSEMBLY__ */
#endif /* _ASM_S390_LOWCORE_H */

View file

@ -5,8 +5,17 @@
#ifndef __ASSEMBLY__
#include <linux/types.h>
#include <asm/facility.h>
extern int nospec_disable;
extern int nobp;
static inline bool nobp_enabled(void)
{
if (__is_defined(__DECOMPRESSOR))
return false;
return nobp && test_facility(82);
}
void nospec_init_branches(void);
void nospec_auto_detect(void);

View file

@ -174,12 +174,10 @@ static inline int devmem_is_allowed(unsigned long pfn)
#define HAVE_ARCH_FREE_PAGE
#define HAVE_ARCH_ALLOC_PAGE
#if IS_ENABLED(CONFIG_PGSTE)
int arch_make_folio_accessible(struct folio *folio);
#define HAVE_ARCH_MAKE_FOLIO_ACCESSIBLE
int arch_make_page_accessible(struct page *page);
#define HAVE_ARCH_MAKE_PAGE_ACCESSIBLE
#endif
struct vm_layout {
unsigned long kaslr_offset;

View file

@ -14,13 +14,11 @@
#include <linux/bits.h>
#define CIF_SIE 0 /* CPU needs SIE exit cleanup */
#define CIF_NOHZ_DELAY 2 /* delay HZ disable for a tick */
#define CIF_ENABLED_WAIT 5 /* in enabled wait state */
#define CIF_MCCK_GUEST 6 /* machine check happening in guest */
#define CIF_DEDICATED_CPU 7 /* this CPU is dedicated */
#define _CIF_SIE BIT(CIF_SIE)
#define _CIF_NOHZ_DELAY BIT(CIF_NOHZ_DELAY)
#define _CIF_ENABLED_WAIT BIT(CIF_ENABLED_WAIT)
#define _CIF_MCCK_GUEST BIT(CIF_MCCK_GUEST)
@ -42,21 +40,37 @@
#include <asm/irqflags.h>
#include <asm/alternative.h>
struct pcpu {
unsigned long ec_mask; /* bit mask for ec_xxx functions */
unsigned long ec_clk; /* sigp timestamp for ec_xxx */
unsigned long flags; /* per CPU flags */
signed char state; /* physical cpu state */
signed char polarization; /* physical polarization */
u16 address; /* physical cpu address */
};
DECLARE_PER_CPU(struct pcpu, pcpu_devices);
typedef long (*sys_call_ptr_t)(struct pt_regs *regs);
static __always_inline struct pcpu *this_pcpu(void)
{
return (struct pcpu *)(get_lowcore()->pcpu);
}
static __always_inline void set_cpu_flag(int flag)
{
get_lowcore()->cpu_flags |= (1UL << flag);
this_pcpu()->flags |= (1UL << flag);
}
static __always_inline void clear_cpu_flag(int flag)
{
get_lowcore()->cpu_flags &= ~(1UL << flag);
this_pcpu()->flags &= ~(1UL << flag);
}
static __always_inline bool test_cpu_flag(int flag)
{
return get_lowcore()->cpu_flags & (1UL << flag);
return this_pcpu()->flags & (1UL << flag);
}
static __always_inline bool test_and_set_cpu_flag(int flag)
@ -81,9 +95,7 @@ static __always_inline bool test_and_clear_cpu_flag(int flag)
*/
static __always_inline bool test_cpu_flag_of(int flag, int cpu)
{
struct lowcore *lc = lowcore_ptr[cpu];
return lc->cpu_flags & (1UL << flag);
return per_cpu(pcpu_devices, cpu).flags & (1UL << flag);
}
#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
@ -405,7 +417,7 @@ static __always_inline bool regs_irqs_disabled(struct pt_regs *regs)
static __always_inline void bpon(void)
{
asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", 82));
asm volatile(ALTERNATIVE("nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)));
}
#endif /* __ASSEMBLY__ */

View file

@ -0,0 +1,77 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_S390_RUNTIME_CONST_H
#define _ASM_S390_RUNTIME_CONST_H
#include <linux/uaccess.h>
#define runtime_const_ptr(sym) \
({ \
typeof(sym) __ret; \
\
asm_inline( \
"0: iihf %[__ret],%[c1]\n" \
" iilf %[__ret],%[c2]\n" \
".pushsection runtime_ptr_" #sym ",\"a\"\n" \
".long 0b - .\n" \
".popsection" \
: [__ret] "=d" (__ret) \
: [c1] "i" (0x01234567UL), \
[c2] "i" (0x89abcdefUL)); \
__ret; \
})
#define runtime_const_shift_right_32(val, sym) \
({ \
unsigned int __ret = (val); \
\
asm_inline( \
"0: srl %[__ret],12\n" \
".pushsection runtime_shift_" #sym ",\"a\"\n" \
".long 0b - .\n" \
".popsection" \
: [__ret] "+d" (__ret)); \
__ret; \
})
#define runtime_const_init(type, sym) do { \
extern s32 __start_runtime_##type##_##sym[]; \
extern s32 __stop_runtime_##type##_##sym[]; \
\
runtime_const_fixup(__runtime_fixup_##type, \
(unsigned long)(sym), \
__start_runtime_##type##_##sym, \
__stop_runtime_##type##_##sym); \
} while (0)
/* 32-bit immediate for iihf and iilf in bits in I2 field */
static inline void __runtime_fixup_32(u32 *p, unsigned int val)
{
s390_kernel_write(p, &val, sizeof(val));
}
static inline void __runtime_fixup_ptr(void *where, unsigned long val)
{
__runtime_fixup_32(where + 2, val >> 32);
__runtime_fixup_32(where + 8, val);
}
/* Immediate value is lower 12 bits of D2 field of srl */
static inline void __runtime_fixup_shift(void *where, unsigned long val)
{
u32 insn = *(u32 *)where;
insn &= 0xfffff000;
insn |= (val & 63);
s390_kernel_write(where, &insn, sizeof(insn));
}
static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
unsigned long val, s32 *start, s32 *end)
{
while (start < end) {
fn(*start + (void *)start, val);
start++;
}
}
#endif /* _ASM_S390_RUNTIME_CONST_H */

View file

@ -24,7 +24,6 @@ extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
extern void arch_send_call_function_single_ipi(int cpu);
extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
extern void smp_call_online_cpu(void (*func)(void *), void *);
extern void smp_call_ipl_cpu(void (*func)(void *), void *);
extern void smp_emergency_stop(void);

View file

@ -79,7 +79,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
typecheck(int, lp->lock);
kcsan_release();
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", 49) /* NIAI 7 */
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,7,0", ALT_FACILITY(49)) /* NIAI 7 */
" sth %1,%0\n"
: "=R" (((unsigned short *) &lp->lock)[1])
: "d" (0) : "cc", "memory");

View file

@ -40,6 +40,7 @@ struct thread_info {
unsigned long flags; /* low level flags */
unsigned long syscall_work; /* SYSCALL_WORK_ flags */
unsigned int cpu; /* current CPU */
unsigned char sie; /* running in SIE context */
};
/*

View file

@ -332,7 +332,14 @@ static inline unsigned long __must_check clear_user(void __user *to, unsigned lo
return __clear_user(to, n);
}
void *s390_kernel_write(void *dst, const void *src, size_t size);
void *__s390_kernel_write(void *dst, const void *src, size_t size);
static inline void *s390_kernel_write(void *dst, const void *src, size_t size)
{
if (__is_defined(__DECOMPRESSOR))
return memcpy(dst, src, size);
return __s390_kernel_write(dst, src, size);
}
int __noreturn __put_kernel_bad(void);

View file

@ -414,7 +414,6 @@ static inline bool uv_has_feature(u8 feature_bit)
return test_bit_inv(feature_bit, &uv_info.uv_feature_indications);
}
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
extern int prot_virt_guest;
static inline int is_prot_virt_guest(void)
@ -466,13 +465,6 @@ static inline int uv_remove_shared(unsigned long addr)
return share(addr, UVC_CMD_REMOVE_SHARED_ACCESS);
}
#else
#define is_prot_virt_guest() 0
static inline int uv_set_shared(unsigned long addr) { return 0; }
static inline int uv_remove_shared(unsigned long addr) { return 0; }
#endif
#if IS_ENABLED(CONFIG_KVM)
extern int prot_virt_host;
static inline int is_prot_virt_host(void)
@ -489,29 +481,5 @@ int uv_convert_from_secure_pte(pte_t pte);
int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
void setup_uv(void);
#else
#define is_prot_virt_host() 0
static inline void setup_uv(void) {}
static inline int uv_pin_shared(unsigned long paddr)
{
return 0;
}
static inline int uv_destroy_folio(struct folio *folio)
{
return 0;
}
static inline int uv_destroy_pte(pte_t pte)
{
return 0;
}
static inline int uv_convert_from_secure_pte(pte_t pte)
{
return 0;
}
#endif
#endif /* _ASM_S390_UV_H */

View file

@ -43,7 +43,7 @@ obj-y += sysinfo.o lgr.o os_info.o ctlreg.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o
obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o facility.o uv.o
extra-y += vmlinux.lds
@ -80,7 +80,6 @@ obj-$(CONFIG_PERF_EVENTS) += perf_cpum_cf_events.o perf_regs.o
obj-$(CONFIG_PERF_EVENTS) += perf_pai_crypto.o perf_pai_ext.o
obj-$(CONFIG_TRACEPOINTS) += trace.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
# vdso
obj-y += vdso64/

View file

@ -4,6 +4,7 @@
#include <asm/abs_lowcore.h>
unsigned long __bootdata_preserved(__abs_lowcore);
int __bootdata_preserved(relocate_lowcore);
int abs_lowcore_map(int cpu, struct lowcore *lc, bool alloc)
{

View file

@ -1,68 +1,41 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/module.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <asm/text-patching.h>
#include <linux/uaccess.h>
#include <asm/nospec-branch.h>
#include <asm/abs_lowcore.h>
#include <asm/alternative.h>
#include <asm/facility.h>
#include <asm/nospec-branch.h>
static int __initdata_or_module alt_instr_disabled;
static int __init disable_alternative_instructions(char *str)
void __apply_alternatives(struct alt_instr *start, struct alt_instr *end, unsigned int ctx)
{
alt_instr_disabled = 1;
return 0;
}
early_param("noaltinstr", disable_alternative_instructions);
static void __init_or_module __apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
struct alt_instr *a;
u8 *instr, *replacement;
struct alt_instr *a;
bool replace;
/*
* The scan order should be from start to end. A later scanned
* alternative code can overwrite previously scanned alternative code.
*/
for (a = start; a < end; a++) {
if (!(a->ctx & ctx))
continue;
switch (a->type) {
case ALT_TYPE_FACILITY:
replace = test_facility(a->data);
break;
case ALT_TYPE_SPEC:
replace = nobp_enabled();
break;
case ALT_TYPE_LOWCORE:
replace = have_relocated_lowcore();
break;
default:
replace = false;
}
if (!replace)
continue;
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
if (!__test_facility(a->facility, alt_stfle_fac_list))
continue;
s390_kernel_write(instr, replacement, a->instrlen);
}
}
void __init_or_module apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
if (!alt_instr_disabled)
__apply_alternatives(start, end);
}
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
void __init apply_alternative_instructions(void)
{
apply_alternatives(__alt_instructions, __alt_instructions_end);
}
static void do_sync_core(void *info)
{
sync_core();
}
void text_poke_sync(void)
{
on_each_cpu(do_sync_core, NULL, 1);
}
void text_poke_sync_lock(void)
{
cpus_read_lock();
text_poke_sync();
cpus_read_unlock();
}

View file

View file

@ -28,6 +28,7 @@ int main(void)
BLANK();
/* thread info offsets */
OFFSET(__TI_flags, task_struct, thread_info.flags);
OFFSET(__TI_sie, task_struct, thread_info.sie);
BLANK();
/* pt_regs offsets */
OFFSET(__PT_PSW, pt_regs, psw);
@ -114,7 +115,7 @@ int main(void)
OFFSET(__LC_SAVE_AREA_SYNC, lowcore, save_area_sync);
OFFSET(__LC_SAVE_AREA_ASYNC, lowcore, save_area_async);
OFFSET(__LC_SAVE_AREA_RESTART, lowcore, save_area_restart);
OFFSET(__LC_CPU_FLAGS, lowcore, cpu_flags);
OFFSET(__LC_PCPU, lowcore, pcpu);
OFFSET(__LC_RETURN_PSW, lowcore, return_psw);
OFFSET(__LC_RETURN_MCCK_PSW, lowcore, return_mcck_psw);
OFFSET(__LC_SYS_ENTER_TIMER, lowcore, sys_enter_timer);
@ -186,5 +187,7 @@ int main(void)
#endif
OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs);
DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs));
OFFSET(__PCPU_FLAGS, pcpu, flags);
return 0;
}

View file

@ -48,6 +48,7 @@ decompressor_handled_param(dfltcc);
decompressor_handled_param(facilities);
decompressor_handled_param(nokaslr);
decompressor_handled_param(cmma);
decompressor_handled_param(relocate_lowcore);
#if IS_ENABLED(CONFIG_KVM)
decompressor_handled_param(prot_virt);
#endif
@ -190,13 +191,6 @@ static noinline __init void setup_lowcore_early(void)
get_lowcore()->preempt_count = INIT_PREEMPT_COUNT;
}
static noinline __init void setup_facility_list(void)
{
memcpy(alt_stfle_fac_list, stfle_fac_list, sizeof(alt_stfle_fac_list));
if (!IS_ENABLED(CONFIG_KERNEL_NOBP))
__clear_facility(82, alt_stfle_fac_list);
}
static __init void detect_diag9c(void)
{
unsigned int cpu_address;
@ -291,7 +285,6 @@ void __init startup_init(void)
lockdep_off();
sort_amode31_extable();
setup_lowcore_early();
setup_facility_list();
detect_machine_type();
setup_arch_string();
setup_boot_command_line();

View file

@ -12,7 +12,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/asm-extable.h>
#include <asm/alternative-asm.h>
#include <asm/alternative.h>
#include <asm/processor.h>
#include <asm/cache.h>
#include <asm/dwarf.h>
@ -28,49 +28,54 @@
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/nospec-insn.h>
#include <asm/lowcore.h>
_LPP_OFFSET = __LC_LPP
.macro STBEAR address
ALTERNATIVE "nop", ".insn s,0xb2010000,\address", 193
ALTERNATIVE "nop", ".insn s,0xb2010000,\address", ALT_FACILITY(193)
.endm
.macro LBEAR address
ALTERNATIVE "nop", ".insn s,0xb2000000,\address", 193
ALTERNATIVE "nop", ".insn s,0xb2000000,\address", ALT_FACILITY(193)
.endm
.macro LPSWEY address,lpswe
ALTERNATIVE "b \lpswe; nopr", ".insn siy,0xeb0000000071,\address,0", 193
.macro LPSWEY address, lpswe
ALTERNATIVE_2 "b \lpswe;nopr", \
".insn siy,0xeb0000000071,\address,0", ALT_FACILITY_EARLY(193), \
__stringify(.insn siy,0xeb0000000071,LOWCORE_ALT_ADDRESS+\address,0), \
ALT_LOWCORE
.endm
.macro MBEAR reg
ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
.macro MBEAR reg, lowcore
ALTERNATIVE "brcl 0,0", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK(\lowcore)),\
ALT_FACILITY(193)
.endm
.macro CHECK_STACK savearea
.macro CHECK_STACK savearea, lowcore
#ifdef CONFIG_CHECK_STACK
tml %r15,THREAD_SIZE - CONFIG_STACK_GUARD
lghi %r14,\savearea
la %r14,\savearea(\lowcore)
jz stack_overflow
#endif
.endm
.macro CHECK_VMAP_STACK savearea,oklabel
.macro CHECK_VMAP_STACK savearea, lowcore, oklabel
#ifdef CONFIG_VMAP_STACK
lgr %r14,%r15
nill %r14,0x10000 - THREAD_SIZE
oill %r14,STACK_INIT_OFFSET
clg %r14,__LC_KERNEL_STACK
clg %r14,__LC_KERNEL_STACK(\lowcore)
je \oklabel
clg %r14,__LC_ASYNC_STACK
clg %r14,__LC_ASYNC_STACK(\lowcore)
je \oklabel
clg %r14,__LC_MCCK_STACK
clg %r14,__LC_MCCK_STACK(\lowcore)
je \oklabel
clg %r14,__LC_NODAT_STACK
clg %r14,__LC_NODAT_STACK(\lowcore)
je \oklabel
clg %r14,__LC_RESTART_STACK
clg %r14,__LC_RESTART_STACK(\lowcore)
je \oklabel
lghi %r14,\savearea
la %r14,\savearea(\lowcore)
j stack_overflow
#else
j \oklabel
@ -100,30 +105,31 @@ _LPP_OFFSET = __LC_LPP
.endm
.macro BPOFF
ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", 82
ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,12,0", ALT_SPEC(82)
.endm
.macro BPON
ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", 82
ALTERNATIVE "nop", ".insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
.macro BPENTER tif_ptr,tif_mask
ALTERNATIVE "TSTMSK \tif_ptr,\tif_mask; jz .+8; .insn rrf,0xb2e80000,0,0,13,0", \
"j .+12; nop; nop", 82
"j .+12; nop; nop", ALT_SPEC(82)
.endm
.macro BPEXIT tif_ptr,tif_mask
TSTMSK \tif_ptr,\tif_mask
ALTERNATIVE "jz .+8; .insn rrf,0xb2e80000,0,0,12,0", \
"jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", 82
"jnz .+8; .insn rrf,0xb2e80000,0,0,13,0", ALT_SPEC(82)
.endm
#if IS_ENABLED(CONFIG_KVM)
.macro SIEEXIT sie_control
lg %r9,\sie_control # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
ni __LC_CPU_FLAGS+7,255-_CIF_SIE
.macro SIEEXIT sie_control,lowcore
lg %r9,\sie_control # get control block pointer
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_KERNEL_ASCE(\lowcore) # load primary asce
lg %r9,__LC_CURRENT(\lowcore)
mvi __TI_sie(%r9),0
larl %r9,sie_exit # skip forward to sie_exit
.endm
#endif
@ -163,13 +169,14 @@ SYM_FUNC_START(__switch_to_asm)
stg %r15,__THREAD_ksp(%r1,%r2) # store kernel stack of prev
lg %r15,0(%r4,%r3) # start of kernel stack of next
agr %r15,%r5 # end of kernel stack of next
stg %r3,__LC_CURRENT # store task struct of next
stg %r15,__LC_KERNEL_STACK # store end of kernel stack
GET_LC %r13
stg %r3,__LC_CURRENT(%r13) # store task struct of next
stg %r15,__LC_KERNEL_STACK(%r13) # store end of kernel stack
lg %r15,__THREAD_ksp(%r1,%r3) # load kernel stack of next
aghi %r3,__TASK_pid
mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next
mvc __LC_CURRENT_PID(4,%r13),0(%r3) # store pid of next
ALTERNATIVE "nop", "lpp _LPP_OFFSET(%r13)", ALT_FACILITY(40)
lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task
ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
BR_EX %r14
SYM_FUNC_END(__switch_to_asm)
@ -183,15 +190,16 @@ SYM_FUNC_END(__switch_to_asm)
*/
SYM_FUNC_START(__sie64a)
stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers
lg %r12,__LC_CURRENT
GET_LC %r13
lg %r14,__LC_CURRENT(%r13)
stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical..
stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses
stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area
stg %r5,__SF_SIE_GUEST_ASCE(%r15) # save guest asce
xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r14) # copy thread flags
lmg %r0,%r13,0(%r4) # load guest gprs 0-13
oi __LC_CPU_FLAGS+7,_CIF_SIE
mvi __TI_sie(%r14),1
lctlg %c1,%c1,__SF_SIE_GUEST_ASCE(%r15) # load primary asce
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
oi __SIE_PROG0C+3(%r14),1 # we are going into SIE now
@ -210,8 +218,10 @@ SYM_FUNC_START(__sie64a)
.Lsie_skip:
lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer
ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce
ni __LC_CPU_FLAGS+7,255-_CIF_SIE
GET_LC %r14
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r14) # load primary asce
lg %r14,__LC_CURRENT(%r14)
mvi __TI_sie(%r14),0
# some program checks are suppressing. C code (e.g. do_protection_exception)
# will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
# are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
@ -254,14 +264,15 @@ EXPORT_SYMBOL(sie_exit)
*/
SYM_CODE_START(system_call)
stpt __LC_SYS_ENTER_TIMER
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
GET_LC %r13
stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
lghi %r14,0
.Lsysc_per:
STBEAR __LC_LAST_BREAK
lctlg %c1,%c1,__LC_KERNEL_ASCE
lg %r15,__LC_KERNEL_STACK
STBEAR __LC_LAST_BREAK(%r13)
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
# clear user controlled register to prevent speculative use
@ -276,17 +287,17 @@ SYM_CODE_START(system_call)
xgr %r10,%r10
xgr %r11,%r11
la %r2,STACK_FRAME_OVERHEAD(%r15) # pointer to pt_regs
mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
MBEAR %r2
mvc __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC(%r13)
MBEAR %r2,%r13
lgr %r3,%r14
brasl %r14,__do_syscall
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE
mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(system_call)
@ -297,12 +308,13 @@ SYM_CODE_START(ret_from_fork)
lgr %r3,%r11
brasl %r14,__ret_from_fork
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE
mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
GET_LC %r13
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
BPON
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
stpt __LC_EXIT_TIMER(%r13)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
stpt __LC_EXIT_TIMER
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
SYM_CODE_END(ret_from_fork)
@ -311,39 +323,40 @@ SYM_CODE_END(ret_from_fork)
*/
SYM_CODE_START(pgm_check_handler)
stpt __LC_SYS_ENTER_TIMER
STMG_LC %r8,%r15,__LC_SAVE_AREA_SYNC
GET_LC %r13
stpt __LC_SYS_ENTER_TIMER(%r13)
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_SYNC
lgr %r10,%r15
lmg %r8,%r9,__LC_PGM_OLD_PSW
lmg %r8,%r9,__LC_PGM_OLD_PSW(%r13)
tmhh %r8,0x0001 # coming from user space?
jno .Lpgm_skip_asce
lctlg %c1,%c1,__LC_KERNEL_ASCE
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
j 3f # -> fault in user space
.Lpgm_skip_asce:
1: tmhh %r8,0x4000 # PER bit set in old PSW ?
jnz 2f # -> enabled, can't be a double fault
tm __LC_PGM_ILC+3,0x80 # check for per exception
tm __LC_PGM_ILC+3(%r13),0x80 # check for per exception
jnz .Lpgm_svcper # -> single stepped svc
2: CHECK_STACK __LC_SAVE_AREA_SYNC
2: CHECK_STACK __LC_SAVE_AREA_SYNC,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
# CHECK_VMAP_STACK branches to stack_overflow or 4f
CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,4f
3: lg %r15,__LC_KERNEL_STACK
CHECK_VMAP_STACK __LC_SAVE_AREA_SYNC,%r13,4f
3: lg %r15,__LC_KERNEL_STACK(%r13)
4: la %r11,STACK_FRAME_OVERHEAD(%r15)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
stmg %r0,%r7,__PT_R0(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC(%r13)
mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK(%r13)
stctg %c1,%c1,__PT_CR1(%r11)
#if IS_ENABLED(CONFIG_KVM)
ltg %r12,__LC_GMAP
ltg %r12,__LC_GMAP(%r13)
jz 5f
clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11)
jne 5f
BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST
SIEEXIT __SF_SIE_CONTROL(%r10)
SIEEXIT __SF_SIE_CONTROL(%r10),%r13
#endif
5: stmg %r8,%r9,__PT_PSW(%r11)
# clear user controlled registers to prevent speculative use
@ -359,11 +372,11 @@ SYM_CODE_START(pgm_check_handler)
tmhh %r8,0x0001 # returning to user space?
jno .Lpgm_exit_kernel
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER
stpt __LC_EXIT_TIMER(%r13)
.Lpgm_exit_kernel:
mvc __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
mvc __LC_RETURN_PSW(16,%r13),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
LBEAR STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
lmg %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@ -372,11 +385,11 @@ SYM_CODE_START(pgm_check_handler)
# single stepped system call
#
.Lpgm_svcper:
mvc __LC_RETURN_PSW(8),__LC_SVC_NEW_PSW
mvc __LC_RETURN_PSW(8,%r13),__LC_SVC_NEW_PSW(%r13)
larl %r14,.Lsysc_per
stg %r14,__LC_RETURN_PSW+8
stg %r14,__LC_RETURN_PSW+8(%r13)
lghi %r14,1
LBEAR __LC_PGM_LAST_BREAK
LBEAR __LC_PGM_LAST_BREAK(%r13)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
SYM_CODE_END(pgm_check_handler)
@ -385,25 +398,27 @@ SYM_CODE_END(pgm_check_handler)
*/
.macro INT_HANDLER name,lc_old_psw,handler
SYM_CODE_START(\name)
stckf __LC_INT_CLOCK
stpt __LC_SYS_ENTER_TIMER
STBEAR __LC_LAST_BREAK
STMG_LC %r8,%r15,__LC_SAVE_AREA_ASYNC
GET_LC %r13
stckf __LC_INT_CLOCK(%r13)
stpt __LC_SYS_ENTER_TIMER(%r13)
STBEAR __LC_LAST_BREAK(%r13)
BPOFF
stmg %r8,%r15,__LC_SAVE_AREA_ASYNC
lmg %r8,%r9,\lc_old_psw
lmg %r8,%r9,\lc_old_psw(%r13)
tmhh %r8,0x0001 # interrupting from user ?
jnz 1f
#if IS_ENABLED(CONFIG_KVM)
TSTMSK __LC_CPU_FLAGS,_CIF_SIE
lg %r10,__LC_CURRENT(%r13)
tm __TI_sie(%r10),0xff
jz 0f
BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
SIEEXIT __SF_SIE_CONTROL(%r15)
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
0: CHECK_STACK __LC_SAVE_AREA_ASYNC
0: CHECK_STACK __LC_SAVE_AREA_ASYNC,%r13
aghi %r15,-(STACK_FRAME_OVERHEAD + __PT_SIZE)
j 2f
1: lctlg %c1,%c1,__LC_KERNEL_ASCE
lg %r15,__LC_KERNEL_STACK
1: lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
lg %r15,__LC_KERNEL_STACK(%r13)
2: xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
@ -417,18 +432,18 @@ SYM_CODE_START(\name)
xgr %r7,%r7
xgr %r10,%r10
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
MBEAR %r11
mvc __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC(%r13)
MBEAR %r11,%r13
stmg %r8,%r9,__PT_PSW(%r11)
lgr %r2,%r11 # pass pointer to pt_regs
brasl %r14,\handler
mvc __LC_RETURN_PSW(16),__PT_PSW(%r11)
mvc __LC_RETURN_PSW(16,%r13),__PT_PSW(%r11)
tmhh %r8,0x0001 # returning to user ?
jno 2f
STACKLEAK_ERASE
lctlg %c1,%c1,__LC_USER_ASCE
lctlg %c1,%c1,__LC_USER_ASCE(%r13)
BPON
stpt __LC_EXIT_TIMER
stpt __LC_EXIT_TIMER(%r13)
2: LBEAR __PT_LAST_BREAK(%r11)
lmg %r0,%r15,__PT_R0(%r11)
LPSWEY __LC_RETURN_PSW,__LC_RETURN_LPSWE
@ -443,35 +458,37 @@ INT_HANDLER io_int_handler,__LC_IO_OLD_PSW,do_io_irq
*/
SYM_CODE_START(mcck_int_handler)
BPOFF
lmg %r8,%r9,__LC_MCK_OLD_PSW
TSTMSK __LC_MCCK_CODE,MCCK_CODE_SYSTEM_DAMAGE
GET_LC %r13
lmg %r8,%r9,__LC_MCK_OLD_PSW(%r13)
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_SYSTEM_DAMAGE
jo .Lmcck_panic # yes -> rest of mcck code invalid
TSTMSK __LC_MCCK_CODE,MCCK_CODE_CR_VALID
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CR_VALID
jno .Lmcck_panic # control registers invalid -> panic
ptlb
lghi %r14,__LC_CPU_TIMER_SAVE_AREA
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
TSTMSK __LC_MCCK_CODE,MCCK_CODE_CPU_TIMER_VALID
lay %r14,__LC_CPU_TIMER_SAVE_AREA(%r13)
mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_CPU_TIMER_VALID
jo 3f
la %r14,__LC_SYS_ENTER_TIMER
clc 0(8,%r14),__LC_EXIT_TIMER
la %r14,__LC_SYS_ENTER_TIMER(%r13)
clc 0(8,%r14),__LC_EXIT_TIMER(%r13)
jl 1f
la %r14,__LC_EXIT_TIMER
1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER
la %r14,__LC_EXIT_TIMER(%r13)
1: clc 0(8,%r14),__LC_LAST_UPDATE_TIMER(%r13)
jl 2f
la %r14,__LC_LAST_UPDATE_TIMER
la %r14,__LC_LAST_UPDATE_TIMER(%r13)
2: spt 0(%r14)
mvc __LC_MCCK_ENTER_TIMER(8),0(%r14)
3: TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_MWP_VALID
mvc __LC_MCCK_ENTER_TIMER(8,%r13),0(%r14)
3: TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_MWP_VALID
jno .Lmcck_panic
tmhh %r8,0x0001 # interrupting from user ?
jnz .Lmcck_user
TSTMSK __LC_MCCK_CODE,MCCK_CODE_PSW_IA_VALID
TSTMSK __LC_MCCK_CODE(%r13),MCCK_CODE_PSW_IA_VALID
jno .Lmcck_panic
#if IS_ENABLED(CONFIG_KVM)
TSTMSK __LC_CPU_FLAGS,_CIF_SIE
lg %r10,__LC_CURRENT(%r13)
tm __TI_sie(%r10),0xff
jz .Lmcck_user
# Need to compare the address instead of a CIF_SIE* flag.
# Need to compare the address instead of __TI_SIE flag.
# Otherwise there would be a race between setting the flag
# and entering SIE (or leaving and clearing the flag). This
# would cause machine checks targeted at the guest to be
@ -480,18 +497,19 @@ SYM_CODE_START(mcck_int_handler)
clgrjl %r9,%r14, 4f
larl %r14,.Lsie_leave
clgrjhe %r9,%r14, 4f
oi __LC_CPU_FLAGS+7, _CIF_MCCK_GUEST
lg %r10,__LC_PCPU
oi __PCPU_FLAGS+7(%r10), _CIF_MCCK_GUEST
4: BPENTER __SF_SIE_FLAGS(%r15),_TIF_ISOLATE_BP_GUEST
SIEEXIT __SF_SIE_CONTROL(%r15)
SIEEXIT __SF_SIE_CONTROL(%r15),%r13
#endif
.Lmcck_user:
lg %r15,__LC_MCCK_STACK
lg %r15,__LC_MCCK_STACK(%r13)
la %r11,STACK_FRAME_OVERHEAD(%r15)
stctg %c1,%c1,__PT_CR1(%r11)
lctlg %c1,%c1,__LC_KERNEL_ASCE
lctlg %c1,%c1,__LC_KERNEL_ASCE(%r13)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
lghi %r14,__LC_GPREGS_SAVE_AREA+64
stmg %r0,%r7,__PT_R0(%r11)
lay %r14,__LC_GPREGS_SAVE_AREA(%r13)
mvc __PT_R0(128,%r11),0(%r14)
# clear user controlled registers to prevent speculative use
xgr %r0,%r0
xgr %r1,%r1
@ -501,7 +519,6 @@ SYM_CODE_START(mcck_int_handler)
xgr %r6,%r6
xgr %r7,%r7
xgr %r10,%r10
mvc __PT_R8(64,%r11),0(%r14)
stmg %r8,%r9,__PT_PSW(%r11)
xc __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
xc __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
@ -509,12 +526,13 @@ SYM_CODE_START(mcck_int_handler)
brasl %r14,s390_do_machine_check
lctlg %c1,%c1,__PT_CR1(%r11)
lmg %r0,%r10,__PT_R0(%r11)
mvc __LC_RETURN_MCCK_PSW(16),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1,0x01 # returning to user ?
mvc __LC_RETURN_MCCK_PSW(16,%r13),__PT_PSW(%r11) # move return PSW
tm __LC_RETURN_MCCK_PSW+1(%r13),0x01 # returning to user ?
jno 0f
BPON
stpt __LC_EXIT_TIMER
0: ALTERNATIVE "nop", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
stpt __LC_EXIT_TIMER(%r13)
0: ALTERNATIVE "brcl 0,0", __stringify(lay %r12,__LC_LAST_BREAK_SAVE_AREA(%r13)),\
ALT_FACILITY(193)
LBEAR 0(%r12)
lmg %r11,%r15,__PT_R11(%r11)
LPSWEY __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
@ -550,7 +568,7 @@ SYM_CODE_START(mcck_int_handler)
SYM_CODE_END(mcck_int_handler)
SYM_CODE_START(restart_int_handler)
ALTERNATIVE "nop", "lpp _LPP_OFFSET", 40
ALTERNATIVE "nop", "lpp _LPP_OFFSET", ALT_FACILITY(40)
stg %r15,__LC_SAVE_AREA_RESTART
TSTMSK __LC_RESTART_FLAGS,RESTART_FLAG_CTLREGS,4
jz 0f
@ -558,15 +576,17 @@ SYM_CODE_START(restart_int_handler)
0: larl %r15,daton_psw
lpswe 0(%r15) # turn dat on, keep irqs off
.Ldaton:
lg %r15,__LC_RESTART_STACK
GET_LC %r15
lg %r15,__LC_RESTART_STACK(%r15)
xc STACK_FRAME_OVERHEAD(__PT_SIZE,%r15),STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r14,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART
mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW
GET_LC %r13
mvc STACK_FRAME_OVERHEAD+__PT_R15(8,%r15),__LC_SAVE_AREA_RESTART(%r13)
mvc STACK_FRAME_OVERHEAD+__PT_PSW(16,%r15),__LC_RST_OLD_PSW(%r13)
xc 0(STACK_FRAME_OVERHEAD,%r15),0(%r15)
lg %r1,__LC_RESTART_FN # load fn, parm & source cpu
lg %r2,__LC_RESTART_DATA
lgf %r3,__LC_RESTART_SOURCE
lg %r1,__LC_RESTART_FN(%r13) # load fn, parm & source cpu
lg %r2,__LC_RESTART_DATA(%r13)
lgf %r3,__LC_RESTART_SOURCE(%r13)
ltgr %r3,%r3 # test source cpu address
jm 1f # negative -> skip source stop
0: sigp %r4,%r3,SIGP_SENSE # sigp sense to source cpu
@ -588,7 +608,8 @@ SYM_CODE_END(restart_int_handler)
* Setup a pt_regs so that show_trace can provide a good call trace.
*/
SYM_CODE_START(stack_overflow)
lg %r15,__LC_NODAT_STACK # change to panic stack
GET_LC %r15
lg %r15,__LC_NODAT_STACK(%r15) # change to panic stack
la %r11,STACK_FRAME_OVERHEAD(%r15)
stmg %r0,%r7,__PT_R0(%r11)
stmg %r8,%r9,__PT_PSW(%r11)

View file

@ -10,6 +10,7 @@
#include <linux/init.h>
#include <linux/linkage.h>
#include <asm/lowcore.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
#include <asm/page.h>
@ -18,14 +19,15 @@
__HEAD
SYM_CODE_START(startup_continue)
larl %r1,tod_clock_base
mvc 0(16,%r1),__LC_BOOT_CLOCK
GET_LC %r2
mvc 0(16,%r1),__LC_BOOT_CLOCK(%r2)
#
# Setup stack
#
larl %r14,init_task
stg %r14,__LC_CURRENT
stg %r14,__LC_CURRENT(%r2)
larl %r15,init_thread_union+STACK_INIT_OFFSET
stg %r15,__LC_KERNEL_STACK
stg %r15,__LC_KERNEL_STACK(%r2)
brasl %r14,sclp_early_adjust_va # allow sclp_early_printk
brasl %r14,startup_init # s390 specific early init
brasl %r14,start_kernel # common init code

View file

@ -2112,7 +2112,7 @@ void do_restart(void *arg)
tracing_off();
debug_locks_off();
lgr_info_log();
smp_call_online_cpu(__do_restart, arg);
smp_call_ipl_cpu(__do_restart, arg);
}
/* on halt */

View file

@ -62,7 +62,7 @@ static void __do_machine_kdump(void *data)
* This need to be done *after* s390_reset_system set the
* prefix register of this CPU to zero
*/
memcpy(absolute_pointer(__LC_FPREGS_SAVE_AREA),
memcpy(absolute_pointer(get_lowcore()->floating_pt_save_area),
phys_to_virt(prefix + __LC_FPREGS_SAVE_AREA), 512);
call_nodat(1, int, purgatory, int, 1);

View file

@ -4,6 +4,8 @@
#include <linux/cpu.h>
#include <asm/nospec-branch.h>
int nobp = IS_ENABLED(CONFIG_KERNEL_NOBP);
static int __init nobp_setup_early(char *str)
{
bool enabled;
@ -17,11 +19,11 @@ static int __init nobp_setup_early(char *str)
* The user explicitly requested nobp=1, enable it and
* disable the expoline support.
*/
__set_facility(82, alt_stfle_fac_list);
nobp = 1;
if (IS_ENABLED(CONFIG_EXPOLINE))
nospec_disable = 1;
} else {
__clear_facility(82, alt_stfle_fac_list);
nobp = 0;
}
return 0;
}
@ -29,7 +31,7 @@ early_param("nobp", nobp_setup_early);
static int __init nospec_setup_early(char *str)
{
__clear_facility(82, alt_stfle_fac_list);
nobp = 0;
return 0;
}
early_param("nospec", nospec_setup_early);
@ -40,7 +42,7 @@ static int __init nospec_report(void)
pr_info("Spectre V2 mitigation: etokens\n");
if (nospec_uses_trampoline())
pr_info("Spectre V2 mitigation: execute trampolines\n");
if (__test_facility(82, alt_stfle_fac_list))
if (nobp_enabled())
pr_info("Spectre V2 mitigation: limited branch prediction\n");
return 0;
}
@ -66,14 +68,14 @@ void __init nospec_auto_detect(void)
*/
if (__is_defined(CC_USING_EXPOLINE))
nospec_disable = 1;
__clear_facility(82, alt_stfle_fac_list);
nobp = 0;
} else if (__is_defined(CC_USING_EXPOLINE)) {
/*
* The kernel has been compiled with expolines.
* Keep expolines enabled and disable nobp.
*/
nospec_disable = 0;
__clear_facility(82, alt_stfle_fac_list);
nobp = 0;
}
/*
* If the kernel has not been compiled with expolines the
@ -86,7 +88,7 @@ static int __init spectre_v2_setup_early(char *str)
{
if (str && !strncmp(str, "on", 2)) {
nospec_disable = 0;
__clear_facility(82, alt_stfle_fac_list);
nobp = 0;
}
if (str && !strncmp(str, "off", 3))
nospec_disable = 1;

View file

@ -17,7 +17,7 @@ ssize_t cpu_show_spectre_v2(struct device *dev,
return sprintf(buf, "Mitigation: etokens\n");
if (nospec_uses_trampoline())
return sprintf(buf, "Mitigation: execute trampolines\n");
if (__test_facility(82, alt_stfle_fac_list))
if (nobp_enabled())
return sprintf(buf, "Mitigation: limited branch prediction\n");
return sprintf(buf, "Vulnerable\n");
}

View file

@ -556,25 +556,31 @@ static int cfdiag_diffctr(struct cpu_cf_events *cpuhw, unsigned long auth)
struct cf_trailer_entry *trailer_start, *trailer_stop;
struct cf_ctrset_entry *ctrstart, *ctrstop;
size_t offset = 0;
int i;
auth &= (1 << CPUMF_LCCTL_ENABLE_SHIFT) - 1;
do {
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
ctrstart = (struct cf_ctrset_entry *)(cpuhw->start + offset);
ctrstop = (struct cf_ctrset_entry *)(cpuhw->stop + offset);
/* Counter set not authorized */
if (!(auth & cpumf_ctr_ctl[i]))
continue;
/* Counter set size zero was not saved */
if (!cpum_cf_read_setsize(i))
continue;
if (memcmp(ctrstop, ctrstart, sizeof(*ctrstop))) {
pr_err_once("cpum_cf_diag counter set compare error "
"in set %i\n", ctrstart->set);
return 0;
}
auth &= ~cpumf_ctr_ctl[ctrstart->set];
if (ctrstart->def == CF_DIAG_CTRSET_DEF) {
cfdiag_diffctrset((u64 *)(ctrstart + 1),
(u64 *)(ctrstop + 1), ctrstart->ctr);
offset += ctrstart->ctr * sizeof(u64) +
sizeof(*ctrstart);
}
} while (ctrstart->def && auth);
}
/* Save time_stamp from start of event in stop's trailer */
trailer_start = (struct cf_trailer_entry *)(cpuhw->start + offset);

View file

@ -17,7 +17,8 @@
#include <linux/mm_types.h>
#include <linux/delay.h>
#include <linux/cpu.h>
#include <linux/smp.h>
#include <asm/text-patching.h>
#include <asm/diag.h>
#include <asm/facility.h>
#include <asm/elf.h>
@ -79,6 +80,23 @@ void notrace stop_machine_yield(const struct cpumask *cpumask)
}
}
static void do_sync_core(void *info)
{
sync_core();
}
void text_poke_sync(void)
{
on_each_cpu(do_sync_core, NULL, 1);
}
void text_poke_sync_lock(void)
{
cpus_read_lock();
text_poke_sync();
cpus_read_unlock();
}
/*
* cpu_init - initializes state that is per-CPU.
*/

View file

@ -9,6 +9,7 @@
#include <asm/asm-offsets.h>
#include <asm/nospec-insn.h>
#include <asm/sigp.h>
#include <asm/lowcore.h>
GEN_BR_THUNK %r9
@ -20,20 +21,15 @@
# r3 = Parameter for function
#
SYM_CODE_START(store_status)
/* Save register one and load save area base */
stg %r1,__LC_SAVE_AREA_RESTART
STMG_LC %r0,%r15,__LC_GPREGS_SAVE_AREA
/* General purpose registers */
lghi %r1,__LC_GPREGS_SAVE_AREA
stmg %r0,%r15,0(%r1)
mvc 8(8,%r1),__LC_SAVE_AREA_RESTART
GET_LC %r13
/* Control registers */
lghi %r1,__LC_CREGS_SAVE_AREA
stctg %c0,%c15,0(%r1)
stctg %c0,%c15,__LC_CREGS_SAVE_AREA(%r13)
/* Access registers */
lghi %r1,__LC_AREGS_SAVE_AREA
stam %a0,%a15,0(%r1)
stamy %a0,%a15,__LC_AREGS_SAVE_AREA(%r13)
/* Floating point registers */
lghi %r1,__LC_FPREGS_SAVE_AREA
lay %r1,__LC_FPREGS_SAVE_AREA(%r13)
std %f0, 0x00(%r1)
std %f1, 0x08(%r1)
std %f2, 0x10(%r1)
@ -51,21 +47,21 @@ SYM_CODE_START(store_status)
std %f14,0x70(%r1)
std %f15,0x78(%r1)
/* Floating point control register */
lghi %r1,__LC_FP_CREG_SAVE_AREA
lay %r1,__LC_FP_CREG_SAVE_AREA(%r13)
stfpc 0(%r1)
/* CPU timer */
lghi %r1,__LC_CPU_TIMER_SAVE_AREA
lay %r1,__LC_CPU_TIMER_SAVE_AREA(%r13)
stpt 0(%r1)
/* Store prefix register */
lghi %r1,__LC_PREFIX_SAVE_AREA
lay %r1,__LC_PREFIX_SAVE_AREA(%r13)
stpx 0(%r1)
/* Clock comparator - seven bytes */
lghi %r1,__LC_CLOCK_COMP_SAVE_AREA
larl %r4,clkcmp
stckc 0(%r4)
lay %r1,__LC_CLOCK_COMP_SAVE_AREA(%r13)
mvc 1(7,%r1),1(%r4)
/* Program status word */
lghi %r1,__LC_PSW_SAVE_AREA
lay %r1,__LC_PSW_SAVE_AREA(%r13)
epsw %r4,%r5
st %r4,0(%r1)
st %r5,4(%r1)

View file

@ -149,13 +149,12 @@ unsigned long __bootdata_preserved(max_mappable);
struct physmem_info __bootdata(physmem_info);
struct vm_layout __bootdata_preserved(vm_layout);
EXPORT_SYMBOL_GPL(vm_layout);
EXPORT_SYMBOL(vm_layout);
int __bootdata_preserved(__kaslr_enabled);
unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]);
EXPORT_SYMBOL(stfle_fac_list);
u64 alt_stfle_fac_list[16];
struct oldmem_data __bootdata_preserved(oldmem_data);
unsigned long VMALLOC_START;
@ -406,6 +405,7 @@ static void __init setup_lowcore(void)
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
lc->pcpu = (unsigned long)per_cpu_ptr(&pcpu_devices, 0);
lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
lc->restart_psw.addr = __pa(restart_int_handler);
lc->external_new_psw.mask = PSW_KERNEL_BITS;
@ -889,6 +889,9 @@ void __init setup_arch(char **cmdline_p)
else
pr_info("Linux is running as a guest in 64-bit mode\n");
if (have_relocated_lowcore())
pr_info("Lowcore relocated to 0x%px\n", get_lowcore());
log_component_list();
/* Have one command line that is parsed and saved in /proc/cmdline */

View file

@ -74,16 +74,15 @@ enum {
CPU_STATE_CONFIGURED,
};
struct pcpu {
unsigned long ec_mask; /* bit mask for ec_xxx functions */
unsigned long ec_clk; /* sigp timestamp for ec_xxx */
signed char state; /* physical cpu state */
signed char polarization; /* physical polarization */
u16 address; /* physical cpu address */
};
static u8 boot_core_type;
static struct pcpu pcpu_devices[NR_CPUS];
DEFINE_PER_CPU(struct pcpu, pcpu_devices);
/*
* Pointer to the pcpu area of the boot CPU. This is required when a restart
* interrupt is triggered on an offline CPU. For that case accessing percpu
* data with the common primitives does not work, since the percpu offset is
* stored in a non existent lowcore.
*/
static struct pcpu *ipl_pcpu;
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@ -174,8 +173,8 @@ static struct pcpu *pcpu_find_address(const struct cpumask *mask, u16 address)
int cpu;
for_each_cpu(cpu, mask)
if (pcpu_devices[cpu].address == address)
return pcpu_devices + cpu;
if (per_cpu(pcpu_devices, cpu).address == address)
return &per_cpu(pcpu_devices, cpu);
return NULL;
}
@ -230,13 +229,11 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
return -ENOMEM;
}
static void pcpu_free_lowcore(struct pcpu *pcpu)
static void pcpu_free_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, nodat_stack, mcck_stack;
struct lowcore *lc;
int cpu;
cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
nodat_stack = lc->nodat_stack - STACK_INIT_OFFSET;
async_stack = lc->async_stack - STACK_INIT_OFFSET;
@ -259,6 +256,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask);
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
lc->pcpu = (unsigned long)pcpu;
lc->restart_flags = RESTART_FLAG_CTLREGS;
lc->spinlock_lockval = arch_spin_lockval(cpu);
lc->spinlock_index = 0;
@ -277,12 +275,10 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
arch_spin_lock_setup(cpu);
}
static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
static void pcpu_attach_task(int cpu, struct task_struct *tsk)
{
struct lowcore *lc;
int cpu;
cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->kernel_stack = (unsigned long)task_stack_page(tsk) + STACK_INIT_OFFSET;
lc->current_task = (unsigned long)tsk;
@ -296,18 +292,16 @@ static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
lc->steal_timer = 0;
}
static void pcpu_start_fn(struct pcpu *pcpu, void (*func)(void *), void *data)
static void pcpu_start_fn(int cpu, void (*func)(void *), void *data)
{
struct lowcore *lc;
int cpu;
cpu = pcpu - pcpu_devices;
lc = lowcore_ptr[cpu];
lc->restart_stack = lc->kernel_stack;
lc->restart_fn = (unsigned long) func;
lc->restart_data = (unsigned long) data;
lc->restart_source = -1U;
pcpu_sigp_retry(pcpu, SIGP_RESTART, 0);
pcpu_sigp_retry(per_cpu_ptr(&pcpu_devices, cpu), SIGP_RESTART, 0);
}
typedef void (pcpu_delegate_fn)(void *);
@ -320,14 +314,14 @@ static void __pcpu_delegate(pcpu_delegate_fn *func, void *data)
func(data); /* should not return */
}
static void pcpu_delegate(struct pcpu *pcpu,
static void pcpu_delegate(struct pcpu *pcpu, int cpu,
pcpu_delegate_fn *func,
void *data, unsigned long stack)
{
struct lowcore *lc, *abs_lc;
unsigned int source_cpu;
lc = lowcore_ptr[pcpu - pcpu_devices];
lc = lowcore_ptr[cpu];
source_cpu = stap();
if (pcpu->address == source_cpu) {
@ -377,26 +371,11 @@ static int pcpu_set_smt(unsigned int mtid)
smp_cpu_mt_shift = 0;
while (smp_cpu_mtid >= (1U << smp_cpu_mt_shift))
smp_cpu_mt_shift++;
pcpu_devices[0].address = stap();
per_cpu(pcpu_devices, 0).address = stap();
}
return cc;
}
/*
* Call function on an online CPU.
*/
void smp_call_online_cpu(void (*func)(void *), void *data)
{
struct pcpu *pcpu;
/* Use the current cpu if it is online. */
pcpu = pcpu_find_address(cpu_online_mask, stap());
if (!pcpu)
/* Use the first online cpu. */
pcpu = pcpu_devices + cpumask_first(cpu_online_mask);
pcpu_delegate(pcpu, func, data, (unsigned long) restart_stack);
}
/*
* Call function on the ipl CPU.
*/
@ -404,11 +383,10 @@ void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
struct lowcore *lc = lowcore_ptr[0];
if (pcpu_devices[0].address == stap())
if (ipl_pcpu->address == stap())
lc = get_lowcore();
pcpu_delegate(&pcpu_devices[0], func, data,
lc->nodat_stack);
pcpu_delegate(ipl_pcpu, 0, func, data, lc->nodat_stack);
}
int smp_find_processor_id(u16 address)
@ -416,21 +394,21 @@ int smp_find_processor_id(u16 address)
int cpu;
for_each_present_cpu(cpu)
if (pcpu_devices[cpu].address == address)
if (per_cpu(pcpu_devices, cpu).address == address)
return cpu;
return -1;
}
void schedule_mcck_handler(void)
{
pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_mcck_pending);
pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_mcck_pending);
}
bool notrace arch_vcpu_is_preempted(int cpu)
{
if (test_cpu_flag_of(CIF_ENABLED_WAIT, cpu))
return false;
if (pcpu_running(pcpu_devices + cpu))
if (pcpu_running(per_cpu_ptr(&pcpu_devices, cpu)))
return false;
return true;
}
@ -442,7 +420,7 @@ void notrace smp_yield_cpu(int cpu)
return;
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
: : "d" (pcpu_devices[cpu].address));
: : "d" (per_cpu(pcpu_devices, cpu).address));
}
EXPORT_SYMBOL_GPL(smp_yield_cpu);
@ -463,7 +441,7 @@ void notrace smp_emergency_stop(void)
end = get_tod_clock() + (1000000UL << 12);
for_each_cpu(cpu, &cpumask) {
struct pcpu *pcpu = pcpu_devices + cpu;
struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
set_bit(ec_stop_cpu, &pcpu->ec_mask);
while (__pcpu_sigp(pcpu->address, SIGP_EMERGENCY_SIGNAL,
0, NULL) == SIGP_CC_BUSY &&
@ -472,7 +450,7 @@ void notrace smp_emergency_stop(void)
}
while (get_tod_clock() < end) {
for_each_cpu(cpu, &cpumask)
if (pcpu_stopped(pcpu_devices + cpu))
if (pcpu_stopped(per_cpu_ptr(&pcpu_devices, cpu)))
cpumask_clear_cpu(cpu, &cpumask);
if (cpumask_empty(&cpumask))
break;
@ -487,6 +465,7 @@ NOKPROBE_SYMBOL(smp_emergency_stop);
*/
void smp_send_stop(void)
{
struct pcpu *pcpu;
int cpu;
/* Disable all interrupts/machine checks */
@ -502,8 +481,9 @@ void smp_send_stop(void)
for_each_online_cpu(cpu) {
if (cpu == smp_processor_id())
continue;
pcpu_sigp_retry(pcpu_devices + cpu, SIGP_STOP, 0);
while (!pcpu_stopped(pcpu_devices + cpu))
pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu_sigp_retry(pcpu, SIGP_STOP, 0);
while (!pcpu_stopped(pcpu))
cpu_relax();
}
}
@ -517,7 +497,7 @@ static void smp_handle_ext_call(void)
unsigned long bits;
/* handle bit signal external calls */
bits = xchg(&pcpu_devices[smp_processor_id()].ec_mask, 0);
bits = this_cpu_xchg(pcpu_devices.ec_mask, 0);
if (test_bit(ec_stop_cpu, &bits))
smp_stop_cpu();
if (test_bit(ec_schedule, &bits))
@ -542,12 +522,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask)
int cpu;
for_each_cpu(cpu, mask)
pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
void arch_send_call_function_single_ipi(int cpu)
{
pcpu_ec_call(pcpu_devices + cpu, ec_call_function_single);
pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_call_function_single);
}
/*
@ -557,13 +537,13 @@ void arch_send_call_function_single_ipi(int cpu)
*/
void arch_smp_send_reschedule(int cpu)
{
pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
pcpu_ec_call(per_cpu_ptr(&pcpu_devices, cpu), ec_schedule);
}
#ifdef CONFIG_IRQ_WORK
void arch_irq_work_raise(void)
{
pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
pcpu_ec_call(this_cpu_ptr(&pcpu_devices), ec_irq_work);
}
#endif
@ -575,7 +555,7 @@ int smp_store_status(int cpu)
struct pcpu *pcpu;
unsigned long pa;
pcpu = pcpu_devices + cpu;
pcpu = per_cpu_ptr(&pcpu_devices, cpu);
lc = lowcore_ptr[cpu];
pa = __pa(&lc->floating_pt_save_area);
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
@ -683,17 +663,17 @@ void __init smp_save_dump_secondary_cpus(void)
void smp_cpu_set_polarization(int cpu, int val)
{
pcpu_devices[cpu].polarization = val;
per_cpu(pcpu_devices, cpu).polarization = val;
}
int smp_cpu_get_polarization(int cpu)
{
return pcpu_devices[cpu].polarization;
return per_cpu(pcpu_devices, cpu).polarization;
}
int smp_cpu_get_cpu_address(int cpu)
{
return pcpu_devices[cpu].address;
return per_cpu(pcpu_devices, cpu).address;
}
static void __ref smp_get_core_info(struct sclp_core_info *info, int early)
@ -732,7 +712,7 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
for (i = 0; (i <= smp_cpu_mtid) && (cpu < nr_cpu_ids); i++) {
if (pcpu_find_address(cpu_present_mask, address + i))
continue;
pcpu = pcpu_devices + cpu;
pcpu = per_cpu_ptr(&pcpu_devices, cpu);
pcpu->address = address + i;
if (configured)
pcpu->state = CPU_STATE_CONFIGURED;
@ -767,7 +747,7 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
* that all SMT threads get subsequent logical CPU numbers.
*/
if (early) {
core_id = pcpu_devices[0].address >> smp_cpu_mt_shift;
core_id = per_cpu(pcpu_devices, 0).address >> smp_cpu_mt_shift;
for (i = 0; i < info->configured; i++) {
core = &info->core[i];
if (core->core_id == core_id) {
@ -867,7 +847,7 @@ static void smp_start_secondary(void *cpuvoid)
/* Upping and downing of CPUs */
int __cpu_up(unsigned int cpu, struct task_struct *tidle)
{
struct pcpu *pcpu = pcpu_devices + cpu;
struct pcpu *pcpu = per_cpu_ptr(&pcpu_devices, cpu);
int rc;
if (pcpu->state != CPU_STATE_CONFIGURED)
@ -885,8 +865,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
*/
system_ctlreg_lock();
pcpu_prepare_secondary(pcpu, cpu);
pcpu_attach_task(pcpu, tidle);
pcpu_start_fn(pcpu, smp_start_secondary, NULL);
pcpu_attach_task(cpu, tidle);
pcpu_start_fn(cpu, smp_start_secondary, NULL);
/* Wait until cpu puts itself in the online & active maps */
while (!cpu_online(cpu))
cpu_relax();
@ -931,18 +911,19 @@ void __cpu_die(unsigned int cpu)
struct pcpu *pcpu;
/* Wait until target cpu is down */
pcpu = pcpu_devices + cpu;
pcpu = per_cpu_ptr(&pcpu_devices, cpu);
while (!pcpu_stopped(pcpu))
cpu_relax();
pcpu_free_lowcore(pcpu);
pcpu_free_lowcore(pcpu, cpu);
cpumask_clear_cpu(cpu, mm_cpumask(&init_mm));
cpumask_clear_cpu(cpu, &init_mm.context.cpu_attach_mask);
pcpu->flags = 0;
}
void __noreturn cpu_die(void)
{
idle_task_exit();
pcpu_sigp_retry(pcpu_devices + smp_processor_id(), SIGP_STOP, 0);
pcpu_sigp_retry(this_cpu_ptr(&pcpu_devices), SIGP_STOP, 0);
for (;;) ;
}
@ -972,11 +953,13 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
void __init smp_prepare_boot_cpu(void)
{
struct pcpu *pcpu = pcpu_devices;
struct lowcore *lc = get_lowcore();
WARN_ON(!cpu_present(0) || !cpu_online(0));
pcpu->state = CPU_STATE_CONFIGURED;
get_lowcore()->percpu_offset = __per_cpu_offset[0];
lc->percpu_offset = __per_cpu_offset[0];
ipl_pcpu = per_cpu_ptr(&pcpu_devices, 0);
ipl_pcpu->state = CPU_STATE_CONFIGURED;
lc->pcpu = (unsigned long)ipl_pcpu;
smp_cpu_set_polarization(0, POLARIZATION_UNKNOWN);
}
@ -984,8 +967,8 @@ void __init smp_setup_processor_id(void)
{
struct lowcore *lc = get_lowcore();
pcpu_devices[0].address = stap();
lc->cpu_nr = 0;
per_cpu(pcpu_devices, 0).address = stap();
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
}
@ -1007,7 +990,7 @@ static ssize_t cpu_configure_show(struct device *dev,
ssize_t count;
mutex_lock(&smp_cpu_state_mutex);
count = sprintf(buf, "%d\n", pcpu_devices[dev->id].state);
count = sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).state);
mutex_unlock(&smp_cpu_state_mutex);
return count;
}
@ -1033,7 +1016,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++)
if (cpu_online(cpu + i))
goto out;
pcpu = pcpu_devices + cpu;
pcpu = per_cpu_ptr(&pcpu_devices, cpu);
rc = 0;
switch (val) {
case 0:
@ -1045,7 +1028,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
pcpu[i].state = CPU_STATE_STANDBY;
per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_STANDBY;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@ -1060,7 +1043,7 @@ static ssize_t cpu_configure_store(struct device *dev,
for (i = 0; i <= smp_cpu_mtid; i++) {
if (cpu + i >= nr_cpu_ids || !cpu_present(cpu + i))
continue;
pcpu[i].state = CPU_STATE_CONFIGURED;
per_cpu(pcpu_devices, cpu + i).state = CPU_STATE_CONFIGURED;
smp_cpu_set_polarization(cpu + i,
POLARIZATION_UNKNOWN);
}
@ -1079,7 +1062,7 @@ static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
{
return sprintf(buf, "%d\n", pcpu_devices[dev->id].address);
return sprintf(buf, "%d\n", per_cpu(pcpu_devices, dev->id).address);
}
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
@ -1105,14 +1088,14 @@ static struct attribute_group cpu_online_attr_group = {
static int smp_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
return sysfs_create_group(&c->dev.kobj, &cpu_online_attr_group);
}
static int smp_cpu_pre_down(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
sysfs_remove_group(&c->dev.kobj, &cpu_online_attr_group);
return 0;
@ -1125,7 +1108,7 @@ bool arch_cpu_is_hotpluggable(int cpu)
int arch_register_cpu(int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct cpu *c = per_cpu_ptr(&cpu_devices, cpu);
int rc;
c->hotpluggable = arch_cpu_is_hotpluggable(cpu);

View file

@ -18,11 +18,22 @@
#include <asm/sections.h>
#include <asm/uv.h>
#if !IS_ENABLED(CONFIG_KVM)
unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
{
return 0;
}
int gmap_fault(struct gmap *gmap, unsigned long gaddr,
unsigned int fault_flags)
{
return 0;
}
#endif
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
#endif
/*
* uv_info contains both host and guest information but it's currently only
@ -35,7 +46,6 @@ EXPORT_SYMBOL(prot_virt_guest);
struct uv_info __bootdata_preserved(uv_info);
EXPORT_SYMBOL(uv_info);
#if IS_ENABLED(CONFIG_KVM)
int __bootdata_preserved(prot_virt_host);
EXPORT_SYMBOL(prot_virt_host);
@ -543,9 +553,6 @@ int arch_make_page_accessible(struct page *page)
return arch_make_folio_accessible(page_folio(page));
}
EXPORT_SYMBOL_GPL(arch_make_page_accessible);
#endif
#if defined(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) || IS_ENABLED(CONFIG_KVM)
static ssize_t uv_query_facilities(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@ -721,24 +728,13 @@ static struct attribute_group uv_query_attr_group = {
static ssize_t uv_is_prot_virt_guest(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
int val = 0;
#ifdef CONFIG_PROTECTED_VIRTUALIZATION_GUEST
val = prot_virt_guest;
#endif
return sysfs_emit(buf, "%d\n", val);
return sysfs_emit(buf, "%d\n", prot_virt_guest);
}
static ssize_t uv_is_prot_virt_host(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
int val = 0;
#if IS_ENABLED(CONFIG_KVM)
val = prot_virt_host;
#endif
return sysfs_emit(buf, "%d\n", val);
return sysfs_emit(buf, "%d\n", prot_virt_host);
}
static struct kobj_attribute uv_prot_virt_guest =
@ -790,4 +786,3 @@ static int __init uv_info_init(void)
return rc;
}
device_initcall(uv_info_init);
#endif

View file

@ -190,6 +190,9 @@ SECTIONS
. = ALIGN(PAGE_SIZE);
INIT_DATA_SECTION(0x100)
RUNTIME_CONST(shift, d_hash_shift)
RUNTIME_CONST(ptr, dentry_hashtable)
PERCPU_SECTION(0x100)
. = ALIGN(PAGE_SIZE);
@ -219,6 +222,8 @@ SECTIONS
QUAD(init_mm)
QUAD(swapper_pg_dir)
QUAD(invalid_pg_dir)
QUAD(__alt_instructions)
QUAD(__alt_instructions_end)
#ifdef CONFIG_KASAN
QUAD(kasan_early_shadow_page)
QUAD(kasan_early_shadow_pte)

View file

@ -75,7 +75,7 @@ static inline int arch_load_niai4(int *lock)
int owner;
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", 49) /* NIAI 4 */
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,4,0", ALT_FACILITY(49)) /* NIAI 4 */
" l %0,%1\n"
: "=d" (owner) : "Q" (*lock) : "memory");
return owner;
@ -86,7 +86,7 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
int expected = old;
asm_inline volatile(
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", 49) /* NIAI 8 */
ALTERNATIVE("nop", ".insn rre,0xb2fa0000,8,0", ALT_FACILITY(49)) /* NIAI 8 */
" cs %0,%3,%1\n"
: "=d" (old), "=Q" (*lock)
: "0" (old), "d" (new), "Q" (*lock)

View file

@ -36,6 +36,16 @@ enum address_markers_idx {
VMEMMAP_END_NR,
VMALLOC_NR,
VMALLOC_END_NR,
#ifdef CONFIG_KMSAN
KMSAN_VMALLOC_SHADOW_START_NR,
KMSAN_VMALLOC_SHADOW_END_NR,
KMSAN_VMALLOC_ORIGIN_START_NR,
KMSAN_VMALLOC_ORIGIN_END_NR,
KMSAN_MODULES_SHADOW_START_NR,
KMSAN_MODULES_SHADOW_END_NR,
KMSAN_MODULES_ORIGIN_START_NR,
KMSAN_MODULES_ORIGIN_END_NR,
#endif
MODULES_NR,
MODULES_END_NR,
ABS_LOWCORE_NR,
@ -65,6 +75,16 @@ static struct addr_marker address_markers[] = {
[VMEMMAP_END_NR] = {0, "vmemmap Area End"},
[VMALLOC_NR] = {0, "vmalloc Area Start"},
[VMALLOC_END_NR] = {0, "vmalloc Area End"},
#ifdef CONFIG_KMSAN
[KMSAN_VMALLOC_SHADOW_START_NR] = {0, "Kmsan vmalloc Shadow Start"},
[KMSAN_VMALLOC_SHADOW_END_NR] = {0, "Kmsan vmalloc Shadow End"},
[KMSAN_VMALLOC_ORIGIN_START_NR] = {0, "Kmsan vmalloc Origins Start"},
[KMSAN_VMALLOC_ORIGIN_END_NR] = {0, "Kmsan vmalloc Origins End"},
[KMSAN_MODULES_SHADOW_START_NR] = {0, "Kmsan Modules Shadow Start"},
[KMSAN_MODULES_SHADOW_END_NR] = {0, "Kmsan Modules Shadow End"},
[KMSAN_MODULES_ORIGIN_START_NR] = {0, "Kmsan Modules Origins Start"},
[KMSAN_MODULES_ORIGIN_END_NR] = {0, "Kmsan Modules Origins End"},
#endif
[MODULES_NR] = {0, "Modules Area Start"},
[MODULES_END_NR] = {0, "Modules Area End"},
[ABS_LOWCORE_NR] = {0, "Lowcore Area Start"},
@ -306,6 +326,16 @@ static int pt_dump_init(void)
#ifdef CONFIG_KFENCE
address_markers[KFENCE_START_NR].start_address = kfence_start;
address_markers[KFENCE_END_NR].start_address = kfence_start + KFENCE_POOL_SIZE;
#endif
#ifdef CONFIG_KMSAN
address_markers[KMSAN_VMALLOC_SHADOW_START_NR].start_address = KMSAN_VMALLOC_SHADOW_START;
address_markers[KMSAN_VMALLOC_SHADOW_END_NR].start_address = KMSAN_VMALLOC_SHADOW_END;
address_markers[KMSAN_VMALLOC_ORIGIN_START_NR].start_address = KMSAN_VMALLOC_ORIGIN_START;
address_markers[KMSAN_VMALLOC_ORIGIN_END_NR].start_address = KMSAN_VMALLOC_ORIGIN_END;
address_markers[KMSAN_MODULES_SHADOW_START_NR].start_address = KMSAN_MODULES_SHADOW_START;
address_markers[KMSAN_MODULES_SHADOW_END_NR].start_address = KMSAN_MODULES_SHADOW_END;
address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START;
address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END;
#endif
sort_address_markers();
#ifdef CONFIG_PTDUMP_DEBUGFS

View file

@ -48,7 +48,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
}
/*
* s390_kernel_write - write to kernel memory bypassing DAT
* __s390_kernel_write - write to kernel memory bypassing DAT
* @dst: destination address
* @src: source address
* @size: number of bytes to copy
@ -61,7 +61,7 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
*/
static DEFINE_SPINLOCK(s390_kernel_write_lock);
notrace void *s390_kernel_write(void *dst, const void *src, size_t size)
notrace void *__s390_kernel_write(void *dst, const void *src, size_t size)
{
void *tmp = dst;
unsigned long flags;

View file

@ -268,33 +268,20 @@ static void zpci_floating_irq_handler(struct airq_struct *airq,
}
}
int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
static int __alloc_airq(struct zpci_dev *zdev, int msi_vecs,
unsigned long *bit)
{
struct zpci_dev *zdev = to_zpci(pdev);
unsigned int hwirq, msi_vecs, cpu;
unsigned long bit;
struct msi_desc *msi;
struct msi_msg msg;
int cpu_addr;
int rc, irq;
zdev->aisb = -1UL;
zdev->msi_first_bit = -1U;
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
if (irq_delivery == DIRECTED) {
/* Allocate cpu vector bits */
bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
if (bit == -1UL)
*bit = airq_iv_alloc(zpci_ibv[0], msi_vecs);
if (*bit == -1UL)
return -EIO;
} else {
/* Allocate adapter summary indicator bit */
bit = airq_iv_alloc_bit(zpci_sbv);
if (bit == -1UL)
*bit = airq_iv_alloc_bit(zpci_sbv);
if (*bit == -1UL)
return -EIO;
zdev->aisb = bit;
zdev->aisb = *bit;
/* Create adapter interrupt vector */
zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | AIRQ_IV_BITLOCK, NULL);
@ -302,27 +289,66 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
return -ENOMEM;
/* Wire up shortcut pointer */
zpci_ibv[bit] = zdev->aibv;
zpci_ibv[*bit] = zdev->aibv;
/* Each function has its own interrupt vector */
bit = 0;
*bit = 0;
}
return 0;
}
int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
{
unsigned int hwirq, msi_vecs, irqs_per_msi, i, cpu;
struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
struct msi_msg msg;
unsigned long bit;
int cpu_addr;
int rc, irq;
zdev->aisb = -1UL;
zdev->msi_first_bit = -1U;
msi_vecs = min_t(unsigned int, nvec, zdev->max_msi);
if (msi_vecs < nvec) {
pr_info("%s requested %d irqs, allocate system limit of %d",
pci_name(pdev), nvec, zdev->max_msi);
}
/* Request MSI interrupts */
rc = __alloc_airq(zdev, msi_vecs, &bit);
if (rc < 0)
return rc;
/*
* Request MSI interrupts:
* When using MSI, nvec_used interrupt sources and their irq
* descriptors are controlled through one msi descriptor.
* Thus the outer loop over msi descriptors shall run only once,
* while two inner loops iterate over the interrupt vectors.
* When using MSI-X, each interrupt vector/irq descriptor
* is bound to exactly one msi descriptor (nvec_used is one).
* So the inner loops are executed once, while the outer iterates
* over the MSI-X descriptors.
*/
hwirq = bit;
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_NOTASSOCIATED) {
rc = -EIO;
if (hwirq - bit >= msi_vecs)
break;
irq = __irq_alloc_descs(-1, 0, 1, 0, THIS_MODULE,
(irq_delivery == DIRECTED) ?
msi->affinity : NULL);
irqs_per_msi = min_t(unsigned int, msi_vecs, msi->nvec_used);
irq = __irq_alloc_descs(-1, 0, irqs_per_msi, 0, THIS_MODULE,
(irq_delivery == DIRECTED) ?
msi->affinity : NULL);
if (irq < 0)
return -ENOMEM;
rc = irq_set_msi_desc(irq, msi);
if (rc)
return rc;
irq_set_chip_and_handler(irq, &zpci_irq_chip,
handle_percpu_irq);
for (i = 0; i < irqs_per_msi; i++) {
rc = irq_set_msi_desc_off(irq, i, msi);
if (rc)
return rc;
irq_set_chip_and_handler(irq + i, &zpci_irq_chip,
handle_percpu_irq);
}
msg.data = hwirq - bit;
if (irq_delivery == DIRECTED) {
if (msi->affinity)
@ -335,31 +361,35 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
msg.address_lo |= (cpu_addr << 8);
for_each_possible_cpu(cpu) {
airq_iv_set_data(zpci_ibv[cpu], hwirq, irq);
for (i = 0; i < irqs_per_msi; i++)
airq_iv_set_data(zpci_ibv[cpu],
hwirq + i, irq + i);
}
} else {
msg.address_lo = zdev->msi_addr & 0xffffffff;
airq_iv_set_data(zdev->aibv, hwirq, irq);
for (i = 0; i < irqs_per_msi; i++)
airq_iv_set_data(zdev->aibv, hwirq + i, irq + i);
}
msg.address_hi = zdev->msi_addr >> 32;
pci_write_msi_msg(irq, &msg);
hwirq++;
hwirq += irqs_per_msi;
}
zdev->msi_first_bit = bit;
zdev->msi_nr_irqs = msi_vecs;
zdev->msi_nr_irqs = hwirq - bit;
rc = zpci_set_irq(zdev);
if (rc)
return rc;
return (msi_vecs == nvec) ? 0 : msi_vecs;
return (zdev->msi_nr_irqs == nvec) ? 0 : zdev->msi_nr_irqs;
}
void arch_teardown_msi_irqs(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
struct msi_desc *msi;
unsigned int i;
int rc;
/* Disable interrupts */
@ -369,8 +399,10 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
/* Release MSI interrupts */
msi_for_each_desc(msi, &pdev->dev, MSI_DESC_ASSOCIATED) {
irq_set_msi_desc(msi->irq, NULL);
irq_free_desc(msi->irq);
for (i = 0; i < msi->nvec_used; i++) {
irq_set_msi_desc(msi->irq + i, NULL);
irq_free_desc(msi->irq + i);
}
msi->msg.address_lo = 0;
msi->msg.address_hi = 0;
msi->msg.data = 0;

View file

@ -96,7 +96,7 @@ config SCLP_OFB
config S390_UV_UAPI
def_tristate m
prompt "Ultravisor userspace API"
depends on S390 && (KVM || PROTECTED_VIRTUALIZATION_GUEST)
depends on S390
help
Selecting exposes parts of the UV interface to userspace
by providing a misc character device at /dev/uv.