powerpc fixes for 5.5 #4

Two weeks worth of accumulated fixes.
 
 A fix for a performance regression seen on PowerVM LPARs using dedicated CPUs,
 caused by our vcpu_is_preempted() returning true even for idle CPUs.
 
 One of the ultravisor support patches broke KVM on big endian hosts in v5.4.
 
 Our KUAP (Kernel User Access Prevention) code missed allowing access in
 __clear_user(), which could lead to an oops or erroneous SEGV when triggered via
 PTRACE_GETREGSET.
 
 Two fixes for the ocxl driver, an open/remove race, and a memory leak in an
 error path.
 
 A handful of other small fixes.
 
 Thanks to:
   Andrew Donnellan, Christian Zigotzky, Christophe Leroy, Christoph Hellwig,
   Daniel Axtens, David Hildenbrand, Frederic Barrat, Gautham R. Shenoy, Greg
   Kurz, Ihor Pasichnyk, Juri Lelli, Marcus Comstedt, Mike Rapoport, Parth Shah,
   Srikar Dronamraju, Vaidyanathan Srinivasan.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCAAxFiEEJFGtCPCthwEv2Y/bUevqPMjhpYAFAl39/EETHG1wZUBlbGxl
 cm1hbi5pZC5hdQAKCRBR6+o8yOGlgNkxD/9o9t+e3IvmZSFNrN0krQCiQCQntNp9
 vYZSCq734ZKu1pIQxTOpOEAn11DhOcZZNULg3EC3WVFuTxz8i84yh2kd+rx94XO8
 diRwXIdowR3KkqP9spq3dlPZOJtSZxgj56uCrtdt3diTQNT3xEwT2h9Y2wQWKiTF
 GrRoDmgxlByVcOPinUobgFmho5DXWp7XLcJoV9skB/pHEPwo9a5DgN60jGDtlnkn
 Vpb2zQZDK2p6OhcZGdVbfrIcphK/uAR2a6uWWVdsRbreSj1UV03IlvipSthNPiQi
 Dv+B+gtQZJ/w1LvddE9R2ySfTeaeARd7XjwfZf/nn+oTEi+XHsRFmP+x73KDJllx
 GXGMckiNq21eii2O4xXvs/NiXJmFfbpg3tkkAJxrL45Ikv2/eWn3ugs07HHbS5uG
 NiM8b8o2PgR4XSCULc0LgJflVSqCEaujm4CukZ7Jbn4QMHe1edQFqu2W8cmY0Sxy
 C1h0DQWJXbAozGpF0+3mB8DNl6ru0CibIaXgWVSsF7nAc3w343o3HhtK5sX/UqGa
 fFPIRXuAr0fdkeb5Kv5Q5uzos3bdC5vJ77aQ+QvkyStg1fD+uf1ChUg2Uc+XFGBC
 oxN9hg8BA7HEoxzaUC5nzEOw1Abl1CssIB2vTUAkiOps1ypMk4HOR796MIQ+3EDO
 VnEfIK73yBmoLA==
 =d/F6
 -----END PGP SIGNATURE-----

Merge tag 'powerpc-5.5-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc fixes from Michael Ellerman:
 "Two weeks worth of accumulated fixes:

   - A fix for a performance regression seen on PowerVM LPARs using
     dedicated CPUs, caused by our vcpu_is_preempted() returning true
     even for idle CPUs.

   - One of the ultravisor support patches broke KVM on big endian hosts
     in v5.4.

   - Our KUAP (Kernel User Access Prevention) code missed allowing
     access in __clear_user(), which could lead to an oops or erroneous
     SEGV when triggered via PTRACE_GETREGSET.

   - Two fixes for the ocxl driver, an open/remove race, and a memory
     leak in an error path.

   - A handful of other small fixes.

  Thanks to: Andrew Donnellan, Christian Zigotzky, Christophe Leroy,
  Christoph Hellwig, Daniel Axtens, David Hildenbrand, Frederic Barrat,
  Gautham R. Shenoy, Greg Kurz, Ihor Pasichnyk, Juri Lelli, Marcus
  Comstedt, Mike Rapoport, Parth Shah, Srikar Dronamraju, Vaidyanathan
  Srinivasan"

* tag 'powerpc-5.5-4' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux:
  KVM: PPC: Book3S HV: Fix regression on big endian hosts
  powerpc: Fix __clear_user() with KUAP enabled
  powerpc/pseries/cmm: fix managed page counts when migrating pages between zones
  powerpc/8xx: fix bogus __init on mmu_mapin_ram_chunk()
  ocxl: Fix potential memory leak on context creation
  powerpc/irq: fix stack overflow verification
  powerpc: Ensure that swiotlb buffer is allocated from low memory
  powerpc/shared: Use static key to detect shared processor
  powerpc/vcpu: Assume dedicated processors as non-preempt
  ocxl: Fix concurrent AFU open and device removal
This commit is contained in:
Linus Torvalds 2019-12-21 06:17:05 -08:00
commit 6d04182dd3
12 changed files with 62 additions and 36 deletions

View file

@ -36,10 +36,12 @@
#endif
#ifdef CONFIG_PPC_PSERIES
DECLARE_STATIC_KEY_FALSE(shared_processor);
#define vcpu_is_preempted vcpu_is_preempted
static inline bool vcpu_is_preempted(int cpu)
{
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
if (!static_branch_unlikely(&shared_processor))
return false;
return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
}
@ -110,13 +112,8 @@ static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
static inline bool is_shared_processor(void)
{
/*
* LPPACA is only available on Pseries so guard anything LPPACA related to
* allow other platforms (which include this common header) to compile.
*/
#ifdef CONFIG_PPC_PSERIES
return (IS_ENABLED(CONFIG_PPC_SPLPAR) &&
lppaca_shared_proc(local_paca->lppaca_ptr));
#ifdef CONFIG_PPC_SPLPAR
return static_branch_unlikely(&shared_processor);
#else
return false;
#endif

View file

@ -401,7 +401,7 @@ copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
return n;
}
extern unsigned long __clear_user(void __user *addr, unsigned long size);
unsigned long __arch_clear_user(void __user *addr, unsigned long size);
static inline unsigned long clear_user(void __user *addr, unsigned long size)
{
@ -409,12 +409,17 @@ static inline unsigned long clear_user(void __user *addr, unsigned long size)
might_fault();
if (likely(access_ok(addr, size))) {
allow_write_to_user(addr, size);
ret = __clear_user(addr, size);
ret = __arch_clear_user(addr, size);
prevent_write_to_user(addr, size);
}
return ret;
}
static inline unsigned long __clear_user(void __user *addr, unsigned long size)
{
return clear_user(addr, size);
}
extern long strncpy_from_user(char *dst, const char __user *src, long count);
extern __must_check long strnlen_user(const char __user *str, long n);

View file

@ -619,8 +619,6 @@ void __do_irq(struct pt_regs *regs)
trace_irq_entry(regs);
check_stack_overflow();
/*
* Query the platform PIC for the interrupt & ack it.
*
@ -652,6 +650,8 @@ void do_IRQ(struct pt_regs *regs)
irqsp = hardirq_ctx[raw_smp_processor_id()];
sirqsp = softirq_ctx[raw_smp_processor_id()];
check_stack_overflow();
/* Already there ? */
if (unlikely(cursp == irqsp || cursp == sirqsp)) {
__do_irq(regs);

View file

@ -1117,7 +1117,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
ld r7, VCPU_GPR(R7)(r4)
bne ret_to_ultra
lwz r0, VCPU_CR(r4)
ld r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R0)(r4)
@ -1137,7 +1137,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
* R3 = UV_RETURN
*/
ret_to_ultra:
lwz r0, VCPU_CR(r4)
ld r0, VCPU_CR(r4)
mtcr r0
ld r0, VCPU_GPR(R3)(r4)

View file

@ -17,7 +17,7 @@ CACHELINE_BYTES = L1_CACHE_BYTES
LG_CACHELINE_BYTES = L1_CACHE_SHIFT
CACHELINE_MASK = (L1_CACHE_BYTES-1)
_GLOBAL(__clear_user)
_GLOBAL(__arch_clear_user)
/*
* Use dcbz on the complete cache lines in the destination
* to set them to zero. This requires that the destination
@ -87,4 +87,4 @@ _GLOBAL(__clear_user)
EX_TABLE(8b, 91b)
EX_TABLE(9b, 91b)
EXPORT_SYMBOL(__clear_user)
EXPORT_SYMBOL(__arch_clear_user)

View file

@ -17,7 +17,7 @@ PPC64_CACHES:
.section ".text"
/**
* __clear_user: - Zero a block of memory in user space, with less checking.
* __arch_clear_user: - Zero a block of memory in user space, with less checking.
* @to: Destination address, in user space.
* @n: Number of bytes to zero.
*
@ -58,7 +58,7 @@ err3; stb r0,0(r3)
mr r3,r4
blr
_GLOBAL_TOC(__clear_user)
_GLOBAL_TOC(__arch_clear_user)
cmpdi r4,32
neg r6,r3
li r0,0
@ -181,4 +181,4 @@ err1; dcbz 0,r3
cmpdi r4,32
blt .Lshort_clear
b .Lmedium_clear
EXPORT_SYMBOL(__clear_user)
EXPORT_SYMBOL(__arch_clear_user)

View file

@ -289,6 +289,14 @@ void __init mem_init(void)
BUILD_BUG_ON(MMU_PAGE_COUNT > 16);
#ifdef CONFIG_SWIOTLB
/*
* Some platforms (e.g. 85xx) limit DMA-able memory way below
* 4G. We force memblock to bottom-up mode to ensure that the
* memory allocated in swiotlb_init() is DMA-able.
* As it's the last memblock allocation, no need to reset it
* back to to-down.
*/
memblock_set_bottom_up(true);
swiotlb_init(0);
#endif

View file

@ -103,7 +103,7 @@ static void mmu_patch_addis(s32 *site, long simm)
patch_instruction_site(site, instr);
}
void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
{
unsigned long s = offset;
unsigned long v = PAGE_OFFSET + s;

View file

@ -539,6 +539,16 @@ static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
/* balloon page list reference */
get_page(newpage);
/*
* When we migrate a page to a different zone, we have to fixup the
* count of both involved zones as we adjusted the managed page count
* when inflating.
*/
if (page_zone(page) != page_zone(newpage)) {
adjust_managed_page_count(page, 1);
adjust_managed_page_count(newpage, -1);
}
spin_lock_irqsave(&b_dev_info->pages_lock, flags);
balloon_page_insert(b_dev_info, newpage);
balloon_page_delete(page);

View file

@ -74,6 +74,9 @@
#include "pseries.h"
#include "../../../../drivers/pci/pci.h"
DEFINE_STATIC_KEY_FALSE(shared_processor);
EXPORT_SYMBOL_GPL(shared_processor);
int CMO_PrPSP = -1;
int CMO_SecPSP = -1;
unsigned long CMO_PageSize = (ASM_CONST(1) << IOMMU_PAGE_SHIFT_4K);
@ -758,6 +761,10 @@ static void __init pSeries_setup_arch(void)
if (firmware_has_feature(FW_FEATURE_LPAR)) {
vpa_init(boot_cpuid);
if (lppaca_shared_proc(get_lppaca()))
static_branch_enable(&shared_processor);
ppc_md.power_save = pseries_lpar_idle;
ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
#ifdef CONFIG_PCI_IOV

View file

@ -10,18 +10,17 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
int pasid;
struct ocxl_context *ctx;
*context = kzalloc(sizeof(struct ocxl_context), GFP_KERNEL);
if (!*context)
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
if (!ctx)
return -ENOMEM;
ctx = *context;
ctx->afu = afu;
mutex_lock(&afu->contexts_lock);
pasid = idr_alloc(&afu->contexts_idr, ctx, afu->pasid_base,
afu->pasid_base + afu->pasid_max, GFP_KERNEL);
if (pasid < 0) {
mutex_unlock(&afu->contexts_lock);
kfree(ctx);
return pasid;
}
afu->pasid_count++;
@ -43,6 +42,7 @@ int ocxl_context_alloc(struct ocxl_context **context, struct ocxl_afu *afu,
* duration of the life of the context
*/
ocxl_afu_get(afu);
*context = ctx;
return 0;
}
EXPORT_SYMBOL_GPL(ocxl_context_alloc);

View file

@ -18,18 +18,15 @@ static struct class *ocxl_class;
static struct mutex minors_idr_lock;
static struct idr minors_idr;
static struct ocxl_file_info *find_file_info(dev_t devno)
static struct ocxl_file_info *find_and_get_file_info(dev_t devno)
{
struct ocxl_file_info *info;
/*
* We don't declare an RCU critical section here, as our AFU
* is protected by a reference counter on the device. By the time the
* info reference is removed from the idr, the ref count of
* the device is already at 0, so no user API will access that AFU and
* this function can't return it.
*/
mutex_lock(&minors_idr_lock);
info = idr_find(&minors_idr, MINOR(devno));
if (info)
get_device(&info->dev);
mutex_unlock(&minors_idr_lock);
return info;
}
@ -58,14 +55,16 @@ static int afu_open(struct inode *inode, struct file *file)
pr_debug("%s for device %x\n", __func__, inode->i_rdev);
info = find_file_info(inode->i_rdev);
info = find_and_get_file_info(inode->i_rdev);
if (!info)
return -ENODEV;
rc = ocxl_context_alloc(&ctx, info->afu, inode->i_mapping);
if (rc)
if (rc) {
put_device(&info->dev);
return rc;
}
put_device(&info->dev);
file->private_data = ctx;
return 0;
}
@ -487,7 +486,6 @@ static void info_release(struct device *dev)
{
struct ocxl_file_info *info = container_of(dev, struct ocxl_file_info, dev);
free_minor(info);
ocxl_afu_put(info->afu);
kfree(info);
}
@ -577,6 +575,7 @@ void ocxl_file_unregister_afu(struct ocxl_afu *afu)
ocxl_file_make_invisible(info);
ocxl_sysfs_unregister_afu(info);
free_minor(info);
device_unregister(&info->dev);
}