From d1752aa884ec0ac3027c1a3d456bf69bf765c8b8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 25 Oct 2006 01:00:22 +0200
Subject: [PATCH 1/2] [PATCH] x86-64: Simplify the vector allocator.

There is no reason to remember a per cpu position of which vector
to try.  Keeping a global position is simpler and more likely to
result in a global vector allocation even if I don't need or require
it.  For level triggered interrupts this means we are less likely to
acknowledge another cpus irq, and cause the level triggered irq to
harmlessly refire.

This simplification makes it easier to only access data structures
of  online cpus, by having fewer special cases to deal with.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index b000017e4b5d..0e89ae7e7b22 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -612,10 +612,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	 * Also, we've got to be careful not to trash gate
 	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
 	 */
-	static struct {
-		int vector;
-		int offset;
-	} pos[NR_CPUS] = { [ 0 ... NR_CPUS - 1] = {FIRST_DEVICE_VECTOR, 0} };
+	static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
 	int old_vector = -1;
 	int cpu;
 
@@ -631,14 +628,13 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 
 	for_each_cpu_mask(cpu, mask) {
 		cpumask_t domain;
-		int first, new_cpu;
+		int new_cpu;
 		int vector, offset;
 
 		domain = vector_allocation_domain(cpu);
-		first = first_cpu(domain);
 
-		vector = pos[first].vector;
-		offset = pos[first].offset;
+		vector = current_vector;
+		offset = current_offset;
 next:
 		vector += 8;
 		if (vector >= FIRST_SYSTEM_VECTOR) {
@@ -646,7 +642,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 			offset = (offset + 1) % 8;
 			vector = FIRST_DEVICE_VECTOR + offset;
 		}
-		if (unlikely(pos[first].vector == vector))
+		if (unlikely(current_vector == vector))
 			continue;
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
@@ -654,10 +650,8 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
-		for_each_cpu_mask(new_cpu, domain) {
-			pos[new_cpu].vector = vector;
-			pos[new_cpu].offset = offset;
-		}
+		current_vector = vector;
+		current_offset = offset;
 		if (old_vector >= 0) {
 			int old_cpu;
 			for_each_cpu_mask(old_cpu, irq_domain[irq])

From 70a0a5357db20c291d46c04011d646d5d84c868c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 25 Oct 2006 01:00:23 +0200
Subject: [PATCH 2/2] [PATCH] x86-64: Only look at per_cpu data for online
 cpus.

When I generalized __assign_irq_vector I failed to pay attention
to what happens when you access a per cpu data structure for
a cpu that is not online.   It is an undefined case making any
code that does it have undefined behavior as well.

The code still needs to be able to allocate a vector across cpus
that are not online to properly handle combinations like lowest
priority interrupt delivery and cpu_hotplug.  Not that we can do
that today but the infrastructure shouldn't prevent it.

So this patch updates the places where we touch per cpu data
to only touch online cpus, it makes cpu vector allocation
an atomic operation with respect to cpu hotplug, and it updates
the cpu start code to properly initialize vector_irq so we
don't have inconsistencies.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 42 +++++++++++++++++++++++++++++++-----
 arch/x86_64/kernel/smpboot.c |  7 +++++-
 include/asm-x86_64/hw_irq.h  |  2 ++
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 0e89ae7e7b22..fe429e5d6b29 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -63,7 +63,7 @@ int timer_over_8254 __initdata = 1;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -618,6 +618,9 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 
 	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
+	/* Only try and allocate irqs on cpus that are present */
+	cpus_and(mask, mask, cpu_online_map);
+
 	if (irq_vector[irq] > 0)
 		old_vector = irq_vector[irq];
 	if (old_vector > 0) {
@@ -627,11 +630,12 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	}
 
 	for_each_cpu_mask(cpu, mask) {
-		cpumask_t domain;
+		cpumask_t domain, new_mask;
 		int new_cpu;
 		int vector, offset;
 
 		domain = vector_allocation_domain(cpu);
+		cpus_and(new_mask, domain, cpu_online_map);
 
 		vector = current_vector;
 		offset = current_offset;
@@ -646,18 +650,20 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 			continue;
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
-		for_each_cpu_mask(new_cpu, domain)
+		for_each_cpu_mask(new_cpu, new_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
 		if (old_vector >= 0) {
+			cpumask_t old_mask;
 			int old_cpu;
-			for_each_cpu_mask(old_cpu, irq_domain[irq])
+			cpus_and(old_mask, irq_domain[irq], cpu_online_map);
+			for_each_cpu_mask(old_cpu, old_mask)
 				per_cpu(vector_irq, old_cpu)[old_vector] = -1;
 		}
-		for_each_cpu_mask(new_cpu, domain)
+		for_each_cpu_mask(new_cpu, new_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		irq_vector[irq] = vector;
 		irq_domain[irq] = domain;
@@ -678,6 +684,32 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	return vector;
 }
 
+void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	/* This function must be called with vector_lock held */
+	unsigned long flags;
+	int irq, vector;
+
+
+	/* Mark the inuse vectors */
+	for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
+		if (!cpu_isset(cpu, irq_domain[irq]))
+			continue;
+		vector = irq_vector[irq];
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq < 0)
+			continue;
+		if (!cpu_isset(cpu, irq_domain[irq]))
+			per_cpu(vector_irq, cpu)[vector] = -1;
+	}
+}
+
+
 extern void (*interrupt[NR_IRQS])(void);
 
 static struct irq_chip ioapic_chip;
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 7b7a6870288a..62c2e747af58 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -581,12 +581,16 @@ void __cpuinit start_secondary(void)
 	 * smp_call_function().
 	 */
 	lock_ipi_call_lock();
+	spin_lock(&vector_lock);
 
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(smp_processor_id());
 	/*
 	 * Allow the master to continue.
 	 */
 	cpu_set(smp_processor_id(), cpu_online_map);
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	spin_unlock(&vector_lock);
 	unlock_ipi_call_lock();
 
 	cpu_idle();
@@ -799,7 +803,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 				cpu, node);
 	}
 
-
 	alternatives_smp_switch(1);
 
 	c_idle.idle = get_idle_for_cpu(cpu);
@@ -1246,8 +1249,10 @@ int __cpu_disable(void)
 	local_irq_disable();
 	remove_siblinginfo(cpu);
 
+	spin_lock(&vector_lock);
 	/* It's now safe to remove this processor from the online map */
 	cpu_clear(cpu, cpu_online_map);
+	spin_unlock(&vector_lock);
 	remove_cpu_from_maps();
 	fixup_irqs(cpu_online_map);
 	return 0;
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
index 792dd52fcd70..179cce755aa7 100644
--- a/include/asm-x86_64/hw_irq.h
+++ b/include/asm-x86_64/hw_irq.h
@@ -76,6 +76,8 @@
 #ifndef __ASSEMBLY__
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
+extern void __setup_vector_irq(int cpu);
+extern spinlock_t vector_lock;
 
 /*
  * Various low-level irq details needed by irq.c, process.c,