xtensa: support coprocessors on SMP

Current coprocessor support on xtensa only works correctly on
uniprocessor configurations. Make it work on SMP too and keep it lazy.

Make coprocessor_owner array per-CPU and move it to struct exc_table for
easy access from the fast_coprocessor exception handler. Allow task to
have live coprocessors only on single CPU, record this CPU number in the
struct thread_info::cp_owner_cpu. Change struct thread_info::cpenable
meaning to be 'coprocessors live on cp_owner_cpu'.
Introduce C-level coprocessor exception handler that flushes and
releases live coprocessors of the task taking 'coprocessor disabled'
exception and call it from the fast_coprocessor handler when the task
has live coprocessors on other CPU.
Make coprocessor_flush_all and coprocessor_release_all work correctly
when called from any CPU by sending IPI to the cp_owner_cpu. Add
function coprocessor_flush_release_all to do flush followed by release
atomically. Add function local_coprocessors_flush_release_all to flush
and release all coprocessors on the local CPU and use it to flush
coprocessor contexts from the CPU that goes offline.

Signed-off-by: Max Filippov <jcmvbkbc@gmail.com>
This commit is contained in:
Max Filippov 2022-04-15 03:05:31 -07:00
parent f29cab2906
commit 11e969bc96
11 changed files with 238 additions and 75 deletions

View file

@ -142,10 +142,12 @@ typedef struct { XCHAL_CP6_SA_LIST(2) } xtregs_cp6_t
typedef struct { XCHAL_CP7_SA_LIST(2) } xtregs_cp7_t
__attribute__ ((aligned (XCHAL_CP7_SA_ALIGN)));
extern struct thread_info* coprocessor_owner[XCHAL_CP_MAX];
struct thread_info;
void coprocessor_flush(struct thread_info *ti, int cp_index);
void coprocessor_release_all(struct thread_info *ti);
void coprocessor_flush_all(struct thread_info *ti);
void coprocessor_flush_release_all(struct thread_info *ti);
void local_coprocessors_flush_release_all(void);
#endif /* XTENSA_HAVE_COPROCESSORS */

View file

@ -52,12 +52,17 @@ struct thread_info {
__u32 cpu; /* current CPU */
__s32 preempt_count; /* 0 => preemptable,< 0 => BUG*/
unsigned long cpenable;
#if XCHAL_HAVE_EXCLUSIVE
/* result of the most recent exclusive store */
unsigned long atomctl8;
#endif
/*
* If i-th bit is set then coprocessor state is loaded into the
* coprocessor i on CPU cp_owner_cpu.
*/
unsigned long cpenable;
u32 cp_owner_cpu;
/* Allocate storage for extra user states and coprocessor states. */
#if XTENSA_HAVE_COPROCESSORS
xtregs_coprocessor_t xtregs_cp;

View file

@ -27,6 +27,10 @@ struct exc_table {
void *fixup;
/* For passing a parameter to fixup */
void *fixup_param;
#if XTENSA_HAVE_COPROCESSORS
/* Pointers to owner struct thread_info */
struct thread_info *coprocessor_owner[XCHAL_CP_MAX];
#endif
/* Fast user exception handlers */
void *fast_user_handler[EXCCAUSE_N];
/* Fast kernel exception handlers */
@ -35,6 +39,8 @@ struct exc_table {
xtensa_exception_handler *default_handler[EXCCAUSE_N];
};
DECLARE_PER_CPU(struct exc_table, exc_table);
xtensa_exception_handler *
__init trap_set_handler(int cause, xtensa_exception_handler *handler);

View file

@ -91,10 +91,12 @@ int main(void)
/* struct thread_info (offset from start_struct) */
DEFINE(THREAD_RA, offsetof (struct task_struct, thread.ra));
DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp));
DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable));
#if XCHAL_HAVE_EXCLUSIVE
DEFINE(THREAD_ATOMCTL8, offsetof (struct thread_info, atomctl8));
#endif
DEFINE(THREAD_CPENABLE, offsetof(struct thread_info, cpenable));
DEFINE(THREAD_CPU, offsetof(struct thread_info, cpu));
DEFINE(THREAD_CP_OWNER_CPU, offsetof(struct thread_info, cp_owner_cpu));
#if XTENSA_HAVE_COPROCESSORS
DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0));
DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1));
@ -137,6 +139,10 @@ int main(void)
DEFINE(EXC_TABLE_DOUBLE_SAVE, offsetof(struct exc_table, double_save));
DEFINE(EXC_TABLE_FIXUP, offsetof(struct exc_table, fixup));
DEFINE(EXC_TABLE_PARAM, offsetof(struct exc_table, fixup_param));
#if XTENSA_HAVE_COPROCESSORS
DEFINE(EXC_TABLE_COPROCESSOR_OWNER,
offsetof(struct exc_table, coprocessor_owner));
#endif
DEFINE(EXC_TABLE_FAST_USER,
offsetof(struct exc_table, fast_user_handler));
DEFINE(EXC_TABLE_FAST_KERNEL,

View file

@ -19,6 +19,26 @@
#include <asm/current.h>
#include <asm/regs.h>
/*
* Rules for coprocessor state manipulation on SMP:
*
* - a task may have live coprocessors only on one CPU.
*
* - whether coprocessor context of task T is live on some CPU is
* denoted by T's thread_info->cpenable.
*
* - non-zero thread_info->cpenable means that thread_info->cp_owner_cpu
* is valid in the T's thread_info. Zero thread_info->cpenable means that
* coprocessor context is valid in the T's thread_info.
*
* - if a coprocessor context of task T is live on CPU X, only CPU X changes
* T's thread_info->cpenable, cp_owner_cpu and coprocessor save area.
* This is done by making sure that for the task T with live coprocessor
* on CPU X cpenable SR is 0 when T runs on any other CPU Y.
* When fast_coprocessor exception is taken on CPU Y it goes to the
* C-level do_coprocessor that uses IPI to make CPU X flush T's coprocessors.
*/
#if XTENSA_HAVE_COPROCESSORS
/*
@ -101,9 +121,37 @@
ENTRY(fast_coprocessor)
s32i a3, a2, PT_AREG3
#ifdef CONFIG_SMP
/*
* Check if any coprocessor context is live on another CPU
* and if so go through the C-level coprocessor exception handler
* to flush it to memory.
*/
GET_THREAD_INFO (a0, a2)
l32i a3, a0, THREAD_CPENABLE
beqz a3, .Lload_local
/*
* Pairs with smp_wmb in local_coprocessor_release_all
* and with both memws below.
*/
memw
l32i a3, a0, THREAD_CPU
l32i a0, a0, THREAD_CP_OWNER_CPU
beq a0, a3, .Lload_local
rsr a0, ps
l32i a3, a2, PT_AREG3
bbci.l a0, PS_UM_BIT, 1f
call0 user_exception
1: call0 kernel_exception
#endif
/* Save remaining registers a1-a3 and SAR */
s32i a3, a2, PT_AREG3
.Lload_local:
rsr a3, sar
s32i a1, a2, PT_AREG1
s32i a3, a2, PT_SAR
@ -117,6 +165,9 @@ ENTRY(fast_coprocessor)
s32i a5, a1, PT_AREG5
s32i a6, a1, PT_AREG6
s32i a7, a1, PT_AREG7
s32i a8, a1, PT_AREG8
s32i a9, a1, PT_AREG9
s32i a10, a1, PT_AREG10
/* Find coprocessor number. Subtract first CP EXCCAUSE from EXCCAUSE */
@ -139,51 +190,66 @@ ENTRY(fast_coprocessor)
addx8 a7, a3, a7
addx4 a7, a3, a7
/* Retrieve previous owner. (a3 still holds CP number) */
/* Retrieve previous owner (a8). */
movi a0, coprocessor_owner # list of owners
rsr a0, excsave1 # exc_table
addx4 a0, a3, a0 # entry for CP
l32i a4, a0, 0
l32i a8, a0, EXC_TABLE_COPROCESSOR_OWNER
beqz a4, 1f # skip 'save' if no previous owner
/* Set new owner (a9). */
GET_THREAD_INFO (a9, a1)
l32i a4, a9, THREAD_CPU
s32i a9, a0, EXC_TABLE_COPROCESSOR_OWNER
s32i a4, a9, THREAD_CP_OWNER_CPU
/*
* Enable coprocessor for the new owner. (a2 = 1 << CP number)
* This can be done before loading context into the coprocessor.
*/
l32i a4, a9, THREAD_CPENABLE
or a4, a4, a2
/*
* Make sure THREAD_CP_OWNER_CPU is in memory before updating
* THREAD_CPENABLE
*/
memw # (2)
s32i a4, a9, THREAD_CPENABLE
beqz a8, 1f # skip 'save' if no previous owner
/* Disable coprocessor for previous owner. (a2 = 1 << CP number) */
l32i a5, a4, THREAD_CPENABLE
xor a5, a5, a2 # (1 << cp-id) still in a2
s32i a5, a4, THREAD_CPENABLE
l32i a10, a8, THREAD_CPENABLE
xor a10, a10, a2
/*
* Get context save area and call save routine.
* (a4 still holds previous owner (thread_info), a3 CP number)
*/
/* Get context save area and call save routine. */
l32i a2, a7, CP_REGS_TAB_OFFSET
l32i a3, a7, CP_REGS_TAB_SAVE
add a2, a2, a4
add a2, a2, a8
callx0 a3
/* Note that only a0 and a1 were preserved. */
rsr a3, exccause
addi a3, a3, -EXCCAUSE_COPROCESSOR0_DISABLED
movi a0, coprocessor_owner
addx4 a0, a3, a0
/* Set new 'owner' (a0 points to the CP owner, a3 contains the CP nr) */
1: GET_THREAD_INFO (a4, a1)
s32i a4, a0, 0
/*
* Make sure coprocessor context and THREAD_CP_OWNER_CPU are in memory
* before updating THREAD_CPENABLE
*/
memw # (3)
s32i a10, a8, THREAD_CPENABLE
1:
/* Get context save area and call load routine. */
l32i a2, a7, CP_REGS_TAB_OFFSET
l32i a3, a7, CP_REGS_TAB_LOAD
add a2, a2, a4
add a2, a2, a9
callx0 a3
/* Restore all registers and return from exception handler. */
l32i a10, a1, PT_AREG10
l32i a9, a1, PT_AREG9
l32i a8, a1, PT_AREG8
l32i a7, a1, PT_AREG7
l32i a6, a1, PT_AREG6
l32i a5, a1, PT_AREG5
@ -233,12 +299,4 @@ ENTRY(coprocessor_flush)
ENDPROC(coprocessor_flush)
.data
ENTRY(coprocessor_owner)
.fill XCHAL_CP_MAX, 4, 0
END(coprocessor_owner)
#endif /* XTENSA_HAVE_COPROCESSORS */

View file

@ -2071,8 +2071,16 @@ ENTRY(_switch_to)
#if (XTENSA_HAVE_COPROCESSORS || XTENSA_HAVE_IO_PORTS)
l32i a3, a5, THREAD_CPENABLE
xsr a3, cpenable
s32i a3, a4, THREAD_CPENABLE
#ifdef CONFIG_SMP
beqz a3, 1f
memw # pairs with memw (2) in fast_coprocessor
l32i a6, a5, THREAD_CP_OWNER_CPU
l32i a7, a5, THREAD_CPU
beq a6, a7, 1f # load 0 into CPENABLE if current CPU is not the owner
movi a3, 0
1:
#endif
wsr a3, cpenable
#endif
#if XCHAL_HAVE_EXCLUSIVE

View file

@ -47,6 +47,7 @@
#include <asm/asm-offsets.h>
#include <asm/regs.h>
#include <asm/hw_breakpoint.h>
#include <asm/traps.h>
extern void ret_from_fork(void);
extern void ret_from_kernel_thread(void);
@ -63,52 +64,114 @@ EXPORT_SYMBOL(__stack_chk_guard);
#if XTENSA_HAVE_COPROCESSORS
void coprocessor_release_all(struct thread_info *ti)
void local_coprocessors_flush_release_all(void)
{
unsigned long cpenable;
struct thread_info **coprocessor_owner;
struct thread_info *unique_owner[XCHAL_CP_MAX];
int n = 0;
int i, j;
coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
xtensa_set_sr(XCHAL_CP_MASK, cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
struct thread_info *ti = coprocessor_owner[i];
if (ti) {
coprocessor_flush(ti, i);
for (j = 0; j < n; j++)
if (unique_owner[j] == ti)
break;
if (j == n)
unique_owner[n++] = ti;
coprocessor_owner[i] = NULL;
}
}
for (i = 0; i < n; i++) {
/* pairs with memw (1) in fast_coprocessor and memw in switch_to */
smp_wmb();
unique_owner[i]->cpenable = 0;
}
xtensa_set_sr(0, cpenable);
}
static void local_coprocessor_release_all(void *info)
{
struct thread_info *ti = info;
struct thread_info **coprocessor_owner;
int i;
/* Make sure we don't switch tasks during this operation. */
preempt_disable();
coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
/* Walk through all cp owners and release it for the requested one. */
cpenable = ti->cpenable;
for (i = 0; i < XCHAL_CP_MAX; i++) {
if (coprocessor_owner[i] == ti) {
coprocessor_owner[i] = 0;
cpenable &= ~(1 << i);
}
if (coprocessor_owner[i] == ti)
coprocessor_owner[i] = NULL;
}
ti->cpenable = cpenable;
/* pairs with memw (1) in fast_coprocessor and memw in switch_to */
smp_wmb();
ti->cpenable = 0;
if (ti == current_thread_info())
xtensa_set_sr(0, cpenable);
}
preempt_enable();
void coprocessor_release_all(struct thread_info *ti)
{
if (ti->cpenable) {
/* pairs with memw (2) in fast_coprocessor */
smp_rmb();
smp_call_function_single(ti->cp_owner_cpu,
local_coprocessor_release_all,
ti, true);
}
}
static void local_coprocessor_flush_all(void *info)
{
struct thread_info *ti = info;
struct thread_info **coprocessor_owner;
unsigned long old_cpenable;
int i;
coprocessor_owner = this_cpu_ptr(&exc_table)->coprocessor_owner;
old_cpenable = xtensa_xsr(ti->cpenable, cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
if (coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
}
xtensa_set_sr(old_cpenable, cpenable);
}
void coprocessor_flush_all(struct thread_info *ti)
{
unsigned long cpenable, old_cpenable;
int i;
preempt_disable();
old_cpenable = xtensa_get_sr(cpenable);
cpenable = ti->cpenable;
xtensa_set_sr(cpenable, cpenable);
for (i = 0; i < XCHAL_CP_MAX; i++) {
if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti)
coprocessor_flush(ti, i);
cpenable >>= 1;
if (ti->cpenable) {
/* pairs with memw (2) in fast_coprocessor */
smp_rmb();
smp_call_function_single(ti->cp_owner_cpu,
local_coprocessor_flush_all,
ti, true);
}
xtensa_set_sr(old_cpenable, cpenable);
}
preempt_enable();
static void local_coprocessor_flush_release_all(void *info)
{
local_coprocessor_flush_all(info);
local_coprocessor_release_all(info);
}
void coprocessor_flush_release_all(struct thread_info *ti)
{
if (ti->cpenable) {
/* pairs with memw (2) in fast_coprocessor */
smp_rmb();
smp_call_function_single(ti->cp_owner_cpu,
local_coprocessor_flush_release_all,
ti, true);
}
}
#endif
@ -140,8 +203,7 @@ void flush_thread(void)
{
#if XTENSA_HAVE_COPROCESSORS
struct thread_info *ti = current_thread_info();
coprocessor_flush_all(ti);
coprocessor_release_all(ti);
coprocessor_flush_release_all(ti);
#endif
flush_ptrace_hw_breakpoint(current);
}

View file

@ -171,8 +171,7 @@ static int tie_set(struct task_struct *target,
#if XTENSA_HAVE_COPROCESSORS
/* Flush all coprocessors before we overwrite them. */
coprocessor_flush_all(ti);
coprocessor_release_all(ti);
coprocessor_flush_release_all(ti);
ti->xtregs_cp.cp0 = newregs->cp0;
ti->xtregs_cp.cp1 = newregs->cp1;
ti->xtregs_cp.cp2 = newregs->cp2;

View file

@ -162,8 +162,7 @@ setup_sigcontext(struct rt_sigframe __user *frame, struct pt_regs *regs)
return err;
#if XTENSA_HAVE_COPROCESSORS
coprocessor_flush_all(ti);
coprocessor_release_all(ti);
coprocessor_flush_release_all(ti);
err |= __copy_to_user(&frame->xtregs.cp, &ti->xtregs_cp,
sizeof (frame->xtregs.cp));
#endif

View file

@ -30,6 +30,7 @@
#include <linux/thread_info.h>
#include <asm/cacheflush.h>
#include <asm/coprocessor.h>
#include <asm/kdebug.h>
#include <asm/mmu_context.h>
#include <asm/mxregs.h>
@ -272,6 +273,12 @@ int __cpu_disable(void)
*/
set_cpu_online(cpu, false);
#if XTENSA_HAVE_COPROCESSORS
/*
* Flush coprocessor contexts that are active on the current CPU.
*/
local_coprocessors_flush_release_all();
#endif
/*
* OK - migrate IRQs away from this CPU
*/

View file

@ -57,6 +57,9 @@ static void do_nmi(struct pt_regs *regs);
static void do_unaligned_user(struct pt_regs *regs);
#endif
static void do_multihit(struct pt_regs *regs);
#if XTENSA_HAVE_COPROCESSORS
static void do_coprocessor(struct pt_regs *regs);
#endif
static void do_debug(struct pt_regs *regs);
/*
@ -69,7 +72,8 @@ static void do_debug(struct pt_regs *regs);
#define USER 0x02
#define COPROCESSOR(x) \
{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor }
{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, USER|KRNL, fast_coprocessor },\
{ EXCCAUSE_COPROCESSOR ## x ## _DISABLED, 0, do_coprocessor }
typedef struct {
int cause;
@ -327,6 +331,13 @@ static void do_unaligned_user(struct pt_regs *regs)
}
#endif
#if XTENSA_HAVE_COPROCESSORS
static void do_coprocessor(struct pt_regs *regs)
{
coprocessor_flush_release_all(current_thread_info());
}
#endif
/* Handle debug events.
* When CONFIG_HAVE_HW_BREAKPOINT is on this handler is called with
* preemption disabled to avoid rescheduling and keep mapping of hardware