kernel-hacking-2024-linux-s.../arch/riscv/kernel/process.c

236 lines
6.2 KiB
C
Raw Normal View History

// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2009 Sunplus Core Technology Co., Ltd.
* Chen Liqin <liqin.chen@sunplusct.com>
* Lennox Wu <lennox.wu@sunplusct.com>
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
*/
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/tick.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
#include <asm/processor.h>
#include <asm/csr.h>
#include <asm/stacktrace.h>
#include <asm/string.h>
#include <asm/switch_to.h>
#include <asm/thread_info.h>
#include <asm/cpuidle.h>
#include <asm/vector.h>
#include <asm/cpufeature.h>
riscv: Enable per-task stack canaries This enables the use of per-task stack canary values if GCC has support for emitting the stack canary reference relative to the value of tp, which holds the task struct pointer in the riscv kernel. After compare arm64 and x86 implementations, seems arm64's is more flexible and readable. The key point is how gcc get the offset of stack_canary from gs/el0_sp. x86: Use a fix offset from gs, not flexible. struct fixed_percpu_data { /* * GCC hardcodes the stack canary as %gs:40. Since the * irq_stack is the object at %gs:0, we reserve the bottom * 48 bytes of the irq stack for the canary. */ char gs_base[40]; // :( unsigned long stack_canary; }; arm64: Use -mstack-protector-guard-offset & guard-reg gcc options: -mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=xxx riscv: Use -mstack-protector-guard-offset & guard-reg gcc options: -mstack-protector-guard=tls -mstack-protector-guard-reg=tp -mstack-protector-guard-offset=xxx GCC's implementation has been merged: commit c931e8d5a96463427040b0d11f9c4352ac22b2b0 Author: Cooper Qu <cooper.qu@linux.alibaba.com> Date: Mon Jul 13 16:15:08 2020 +0800 RISC-V: Add support for TLS stack protector canary access In the end, these codes are inserted by gcc before return: * 0xffffffe00020b396 <+120>: ld a5,1008(tp) # 0x3f0 * 0xffffffe00020b39a <+124>: xor a5,a5,a4 * 0xffffffe00020b39c <+126>: mv a0,s5 * 0xffffffe00020b39e <+128>: bnez a5,0xffffffe00020b61c <_do_fork+766> 0xffffffe00020b3a2 <+132>: ld ra,136(sp) 0xffffffe00020b3a4 <+134>: ld s0,128(sp) 0xffffffe00020b3a6 <+136>: ld s1,120(sp) 0xffffffe00020b3a8 <+138>: ld s2,112(sp) 0xffffffe00020b3aa <+140>: ld s3,104(sp) 0xffffffe00020b3ac <+142>: ld s4,96(sp) 0xffffffe00020b3ae <+144>: ld s5,88(sp) 0xffffffe00020b3b0 <+146>: ld s6,80(sp) 0xffffffe00020b3b2 <+148>: ld s7,72(sp) 0xffffffe00020b3b4 <+150>: addi sp,sp,144 0xffffffe00020b3b6 <+152>: ret ... * 0xffffffe00020b61c <+766>: auipc ra,0x7f8 * 0xffffffe00020b620 <+770>: jalr -1764(ra) # 0xffffffe000a02f38 <__stack_chk_fail> Signed-off-by: Guo Ren <guoren@linux.alibaba.com> Signed-off-by: Cooper Qu <cooper.qu@linux.alibaba.com> Reviewed-by: Kees Cook <keescook@chromium.org> Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
2020-12-17 16:29:18 +00:00
#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
#include <linux/stackprotector.h>
unsigned long __stack_chk_guard __read_mostly;
EXPORT_SYMBOL(__stack_chk_guard);
#endif
extern asmlinkage void ret_from_fork(void);
riscv: Fix warning by declaring arch_cpu_idle() as noinstr The following warning appears when using ftrace: [89855.443413] RCU not on for: arch_cpu_idle+0x0/0x1c [89855.445640] WARNING: CPU: 5 PID: 0 at include/linux/trace_recursion.h:162 arch_ftrace_ops_list_func+0x208/0x228 [89855.445824] Modules linked in: xt_conntrack(E) nft_chain_nat(E) xt_MASQUERADE(E) nf_conntrack_netlink(E) xt_addrtype(E) nft_compat(E) nf_tables(E) nfnetlink(E) br_netfilter(E) cfg80211(E) nls_iso8859_1(E) ofpart(E) redboot(E) cmdlinepart(E) cfi_cmdset_0001(E) virtio_net(E) cfi_probe(E) cfi_util(E) 9pnet_virtio(E) gen_probe(E) net_failover(E) virtio_rng(E) failover(E) 9pnet(E) physmap(E) map_funcs(E) chipreg(E) mtd(E) uio_pdrv_genirq(E) uio(E) dm_multipath(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) drm(E) efi_pstore(E) backlight(E) ip_tables(E) x_tables(E) raid10(E) raid456(E) async_raid6_recov(E) async_memcpy(E) async_pq(E) async_xor(E) xor(E) async_tx(E) raid6_pq(E) raid1(E) raid0(E) virtio_blk(E) [89855.451563] CPU: 5 PID: 0 Comm: swapper/5 Tainted: G E 6.8.0-rc6ubuntu-defconfig #2 [89855.451726] Hardware name: riscv-virtio,qemu (DT) [89855.451899] epc : arch_ftrace_ops_list_func+0x208/0x228 [89855.452016] ra : arch_ftrace_ops_list_func+0x208/0x228 [89855.452119] epc : ffffffff8016b216 ra : ffffffff8016b216 sp : ffffaf808090fdb0 [89855.452171] gp : ffffffff827c7680 tp : ffffaf808089ad40 t0 : ffffffff800c0dd8 [89855.452216] t1 : 0000000000000001 t2 : 0000000000000000 s0 : ffffaf808090fe30 [89855.452306] s1 : 0000000000000000 a0 : 0000000000000026 a1 : ffffffff82cd6ac8 [89855.452423] a2 : ffffffff800458c8 a3 : ffffaf80b1870640 a4 : 0000000000000000 [89855.452646] a5 : 0000000000000000 a6 : 00000000ffffffff a7 : ffffffffffffffff [89855.452698] s2 : ffffffff82766872 s3 : ffffffff80004caa s4 : ffffffff80ebea90 [89855.452743] s5 : ffffaf808089bd40 s6 : 8000000a00006e00 s7 : 0000000000000008 [89855.452787] s8 : 0000000000002000 s9 : 0000000080043700 s10: 0000000000000000 [89855.452831] s11: 0000000000000000 t3 : 0000000000100000 t4 : 0000000000000064 [89855.452874] t5 : 000000000000000c t6 : ffffaf80b182dbfc [89855.452929] status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003 [89855.453053] [<ffffffff8016b216>] arch_ftrace_ops_list_func+0x208/0x228 [89855.453191] [<ffffffff8000e082>] ftrace_call+0x8/0x22 [89855.453265] [<ffffffff800a149c>] do_idle+0x24c/0x2ca [89855.453357] [<ffffffff8000da54>] return_to_handler+0x0/0x26 [89855.453429] [<ffffffff8000b716>] smp_callin+0x92/0xb6 [89855.453785] ---[ end trace 0000000000000000 ]--- To fix this, mark arch_cpu_idle() as noinstr, like it is done in commit a9cbc1b471d2 ("s390/idle: mark arch_cpu_idle() noinstr"). Reported-by: Evgenii Shatokhin <e.shatokhin@yadro.com> Closes: https://lore.kernel.org/linux-riscv/51f21b87-ebed-4411-afbc-c00d3dea2bab@yadro.com/ Fixes: cfbc4f81c9d0 ("riscv: Select ARCH_WANTS_NO_INSTR") Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com> Reviewed-by: Andy Chiu <andy.chiu@sifive.com> Tested-by: Andy Chiu <andy.chiu@sifive.com> Acked-by: Puranjay Mohan <puranjay12@gmail.com> Link: https://lore.kernel.org/r/20240326203017.310422-2-alexghiti@rivosinc.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
2024-03-26 20:30:16 +00:00
void noinstr arch_cpu_idle(void)
{
cpu_do_idle();
}
int set_unalign_ctl(struct task_struct *tsk, unsigned int val)
{
if (!unaligned_ctl_available())
return -EINVAL;
tsk->thread.align_ctl = val;
return 0;
}
int get_unalign_ctl(struct task_struct *tsk, unsigned long adr)
{
if (!unaligned_ctl_available())
return -EINVAL;
return put_user(tsk->thread.align_ctl, (unsigned long __user *)adr);
}
void __show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);
if (!user_mode(regs)) {
pr_cont("epc : %pS\n", (void *)regs->epc);
pr_cont(" ra : %pS\n", (void *)regs->ra);
}
pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
regs->epc, regs->ra, regs->sp);
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
regs->gp, regs->tp, regs->t0);
pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n",
regs->t1, regs->t2, regs->s0);
pr_cont(" s1 : " REG_FMT " a0 : " REG_FMT " a1 : " REG_FMT "\n",
regs->s1, regs->a0, regs->a1);
pr_cont(" a2 : " REG_FMT " a3 : " REG_FMT " a4 : " REG_FMT "\n",
regs->a2, regs->a3, regs->a4);
pr_cont(" a5 : " REG_FMT " a6 : " REG_FMT " a7 : " REG_FMT "\n",
regs->a5, regs->a6, regs->a7);
pr_cont(" s2 : " REG_FMT " s3 : " REG_FMT " s4 : " REG_FMT "\n",
regs->s2, regs->s3, regs->s4);
pr_cont(" s5 : " REG_FMT " s6 : " REG_FMT " s7 : " REG_FMT "\n",
regs->s5, regs->s6, regs->s7);
pr_cont(" s8 : " REG_FMT " s9 : " REG_FMT " s10: " REG_FMT "\n",
regs->s8, regs->s9, regs->s10);
pr_cont(" s11: " REG_FMT " t3 : " REG_FMT " t4 : " REG_FMT "\n",
regs->s11, regs->t3, regs->t4);
pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n",
regs->t5, regs->t6);
pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
regs->status, regs->badaddr, regs->cause);
}
void show_regs(struct pt_regs *regs)
{
__show_regs(regs);
if (!user_mode(regs))
dump_backtrace(regs, NULL, KERN_DEFAULT);
}
#ifdef CONFIG_COMPAT
static bool compat_mode_supported __read_mostly;
bool compat_elf_check_arch(Elf32_Ehdr *hdr)
{
return compat_mode_supported &&
hdr->e_machine == EM_RISCV &&
hdr->e_ident[EI_CLASS] == ELFCLASS32;
}
static int __init compat_mode_detect(void)
{
unsigned long tmp = csr_read(CSR_STATUS);
csr_write(CSR_STATUS, (tmp & ~SR_UXL) | SR_UXL_32);
compat_mode_supported =
(csr_read(CSR_STATUS) & SR_UXL) == SR_UXL_32;
csr_write(CSR_STATUS, tmp);
pr_info("riscv: ELF compat mode %s",
compat_mode_supported ? "supported" : "unsupported");
return 0;
}
early_initcall(compat_mode_detect);
#endif
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
regs->status = SR_PIE;
if (has_fpu()) {
regs->status |= SR_FS_INITIAL;
riscv: Correct the initialized flow of FP register The following two reasons cause FP registers are sometimes not initialized before starting the user program. 1. Currently, the FP context is initialized in flush_thread() function and we expect these initial values to be restored to FP register when doing FP context switch. However, the FP context switch only occurs in switch_to function. Hence, if this process does not be scheduled out and scheduled in before entering the user space, the FP registers have no chance to initialize. 2. In flush_thread(), the state of reg->sstatus.FS inherits from the parent. Hence, the state of reg->sstatus.FS may be dirty. If this process is scheduled out during flush_thread() and initializing the FP register, the fstate_save() in switch_to will corrupt the FP context which has been initialized until flush_thread(). To solve the 1st case, the initialization of the FP register will be completed in start_thread(). It makes sure all FP registers are initialized before starting the user program. For the 2nd case, the state of reg->sstatus.FS in start_thread will be set to SR_FS_OFF to prevent this process from corrupting FP context in doing context save. The FP state is set to SR_FS_INITIAL in start_trhead(). Signed-off-by: Vincent Chen <vincent.chen@sifive.com> Reviewed-by: Anup Patel <anup@brainfault.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Fixes: 7db91e57a0acd ("RISC-V: Task implementation") Cc: stable@vger.kernel.org [paul.walmsley@sifive.com: fixed brace alignment issue reported by checkpatch] Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
2019-08-14 08:23:52 +00:00
/*
* Restore the initial value to the FP register
* before starting the user program.
*/
fstate_restore(current, regs);
}
regs->epc = pc;
regs->sp = sp;
#ifdef CONFIG_64BIT
regs->status &= ~SR_UXL;
if (is_compat_task())
regs->status |= SR_UXL_32;
else
regs->status |= SR_UXL_64;
#endif
}
void flush_thread(void)
{
#ifdef CONFIG_FPU
/*
riscv: Correct the initialized flow of FP register The following two reasons cause FP registers are sometimes not initialized before starting the user program. 1. Currently, the FP context is initialized in flush_thread() function and we expect these initial values to be restored to FP register when doing FP context switch. However, the FP context switch only occurs in switch_to function. Hence, if this process does not be scheduled out and scheduled in before entering the user space, the FP registers have no chance to initialize. 2. In flush_thread(), the state of reg->sstatus.FS inherits from the parent. Hence, the state of reg->sstatus.FS may be dirty. If this process is scheduled out during flush_thread() and initializing the FP register, the fstate_save() in switch_to will corrupt the FP context which has been initialized until flush_thread(). To solve the 1st case, the initialization of the FP register will be completed in start_thread(). It makes sure all FP registers are initialized before starting the user program. For the 2nd case, the state of reg->sstatus.FS in start_thread will be set to SR_FS_OFF to prevent this process from corrupting FP context in doing context save. The FP state is set to SR_FS_INITIAL in start_trhead(). Signed-off-by: Vincent Chen <vincent.chen@sifive.com> Reviewed-by: Anup Patel <anup@brainfault.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Fixes: 7db91e57a0acd ("RISC-V: Task implementation") Cc: stable@vger.kernel.org [paul.walmsley@sifive.com: fixed brace alignment issue reported by checkpatch] Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
2019-08-14 08:23:52 +00:00
* Reset FPU state and context
* frm: round to nearest, ties to even (IEEE default)
* fflags: accrued exceptions cleared
*/
riscv: Correct the initialized flow of FP register The following two reasons cause FP registers are sometimes not initialized before starting the user program. 1. Currently, the FP context is initialized in flush_thread() function and we expect these initial values to be restored to FP register when doing FP context switch. However, the FP context switch only occurs in switch_to function. Hence, if this process does not be scheduled out and scheduled in before entering the user space, the FP registers have no chance to initialize. 2. In flush_thread(), the state of reg->sstatus.FS inherits from the parent. Hence, the state of reg->sstatus.FS may be dirty. If this process is scheduled out during flush_thread() and initializing the FP register, the fstate_save() in switch_to will corrupt the FP context which has been initialized until flush_thread(). To solve the 1st case, the initialization of the FP register will be completed in start_thread(). It makes sure all FP registers are initialized before starting the user program. For the 2nd case, the state of reg->sstatus.FS in start_thread will be set to SR_FS_OFF to prevent this process from corrupting FP context in doing context save. The FP state is set to SR_FS_INITIAL in start_trhead(). Signed-off-by: Vincent Chen <vincent.chen@sifive.com> Reviewed-by: Anup Patel <anup@brainfault.org> Reviewed-by: Christoph Hellwig <hch@lst.de> Fixes: 7db91e57a0acd ("RISC-V: Task implementation") Cc: stable@vger.kernel.org [paul.walmsley@sifive.com: fixed brace alignment issue reported by checkpatch] Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
2019-08-14 08:23:52 +00:00
fstate_off(current, task_pt_regs(current));
memset(&current->thread.fstate, 0, sizeof(current->thread.fstate));
#endif
#ifdef CONFIG_RISCV_ISA_V
/* Reset vector state */
riscv_v_vstate_ctrl_init(current);
riscv_v_vstate_off(task_pt_regs(current));
kfree(current->thread.vstate.datap);
memset(&current->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(current, TIF_RISCV_V_DEFER_RESTORE);
#endif
}
void arch_release_task_struct(struct task_struct *tsk)
{
/* Free the vector context of datap. */
if (has_vector())
riscv_v_thread_free(tsk);
}
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
fstate_save(src, task_pt_regs(src));
*dst = *src;
/* clear entire V context, including datap for a new task */
memset(&dst->thread.vstate, 0, sizeof(struct __riscv_v_ext_state));
memset(&dst->thread.kernel_vstate, 0, sizeof(struct __riscv_v_ext_state));
clear_tsk_thread_flag(dst, TIF_RISCV_V_DEFER_RESTORE);
return 0;
}
int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long clone_flags = args->flags;
unsigned long usp = args->stack;
unsigned long tls = args->tls;
struct pt_regs *childregs = task_pt_regs(p);
memset(&p->thread.s, 0, sizeof(p->thread.s));
/* p->thread holds context to be restored by __switch_to() */
if (unlikely(args->fn)) {
/* Kernel thread */
memset(childregs, 0, sizeof(struct pt_regs));
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
p->thread.s[0] = (unsigned long)args->fn;
p->thread.s[1] = (unsigned long)args->fn_arg;
} else {
*childregs = *(current_pt_regs());
/* Turn off status.VS */
riscv_v_vstate_off(childregs);
if (usp) /* User fork */
childregs->sp = usp;
if (clone_flags & CLONE_SETTLS)
childregs->tp = tls;
childregs->a0 = 0; /* Return value of fork() */
p->thread.s[0] = 0;
}
p->thread.riscv_v_flags = 0;
if (has_vector())
riscv_v_thread_alloc(p);
p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
}
void __init arch_task_cache_init(void)
{
riscv_v_setup_ctx_cache();
}