Patch from Manfred Spraul <manfred@colorfullife.com>

init_irq calls request_irq while holding the ide_lock spinlock.  This can
deadlock, because request_irq can sleep under some circumstances - e.g.  for
the allocation of the /proc/irq entries, or for randomness management
structures.

The attached patch modifies init_irq and calls request_irq without the
spinlock held.  It also fixes a potential race between choose_drive and
init_irq.

The patch is against 2.5.64-ac4, it should apply to 2.5.64 with some offsets.




 25-akpm/drivers/ide/ide-io.c    |    4 -
 25-akpm/drivers/ide/ide-probe.c |  114 +++++++++++++++++++++++++---------------
 25-akpm/drivers/ide/ide.c       |   66 ++++++++++++++++-------
 25-akpm/include/linux/ide.h     |   13 ++++
 4 files changed, 136 insertions(+), 61 deletions(-)

diff -puN drivers/ide/ide.c~ide_probe-iint_irq-fix drivers/ide/ide.c
--- 25/drivers/ide/ide.c~ide_probe-iint_irq-fix	Mon Mar 17 14:30:35 2003
+++ 25-akpm/drivers/ide/ide.c	Mon Mar 17 14:30:35 2003
@@ -177,6 +177,7 @@ static int idebus_parameter;	/* holds th
 static int system_bus_speed;	/* holds what we think is VESA/PCI bus speed */
 static int initializing;	/* set while initializing built-in drivers */
 
+DECLARE_MUTEX(ide_cfg_sem);
 spinlock_t ide_lock __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 
 static int ide_scan_direction; /* THIS was formerly 2.2.x pci=reverse */
@@ -580,17 +581,19 @@ extern void init_hwif_data(unsigned int 
  
 void ide_unregister (unsigned int index)
 {
-	ide_drive_t *drive, *d;
+	ide_drive_t *drive;
 	ide_hwif_t *hwif, *g;
 	ide_hwgroup_t *hwgroup;
 	int irq_count = 0, unit, i;
-	unsigned long flags;
 	ide_hwif_t old_hwif;
 
 	if (index >= MAX_HWIFS)
 		BUG();
 		
-	spin_lock_irqsave(&ide_lock, flags);
+	BUG_ON(in_interrupt());
+	BUG_ON(irqs_disabled());
+	down(&ide_cfg_sem);
+	spin_lock_irq(&ide_lock);
 	hwif = &ide_hwifs[index];
 	if (!hwif->present)
 		goto abort;
@@ -605,7 +608,7 @@ void ide_unregister (unsigned int index)
 	}
 	hwif->present = 0;
 	
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irq(&ide_lock);
 
 	for (unit = 0; unit < MAX_DRIVES; ++unit) {
 		drive = &hwif->drives[unit];
@@ -618,7 +621,6 @@ void ide_unregister (unsigned int index)
 #ifdef CONFIG_PROC_FS
 	destroy_proc_ide_drives(hwif);
 #endif
-	spin_lock_irqsave(&ide_lock, flags);
 	hwgroup = hwif->hwgroup;
 
 	/*
@@ -633,6 +635,7 @@ void ide_unregister (unsigned int index)
 	if (irq_count == 1)
 		free_irq(hwif->irq, hwgroup);
 
+	spin_lock_irq(&ide_lock);
 	/*
 	 * Note that we only release the standard ports,
 	 * and do not even try to handle any extra ports
@@ -644,7 +647,6 @@ void ide_unregister (unsigned int index)
 	 * Remove us from the hwgroup, and free
 	 * the hwgroup if we were the only member
 	 */
-	d = hwgroup->drive;
 	for (i = 0; i < MAX_DRIVES; ++i) {
 		drive = &hwif->drives[i];
 		if (drive->de) {
@@ -653,11 +655,23 @@ void ide_unregister (unsigned int index)
 		}
 		if (!drive->present)
 			continue;
-		while (hwgroup->drive->next != drive)
-			hwgroup->drive = hwgroup->drive->next;
-		hwgroup->drive->next = drive->next;
-		if (hwgroup->drive == drive)
+		if (drive == drive->next) {
+			/* special case: last drive from hwgroup. */
+			BUG_ON(hwgroup->drive != drive);
 			hwgroup->drive = NULL;
+		} else {
+			ide_drive_t *walk;
+
+			walk = hwgroup->drive;
+			while (walk->next != drive)
+				walk = walk->next;
+			walk->next = drive->next;
+			if (hwgroup->drive == drive) {
+				hwgroup->drive = drive->next;
+				hwgroup->hwif = HWIF(hwgroup->drive);
+			}
+		}
+		BUG_ON(hwgroup->drive == drive);
 		if (drive->id != NULL) {
 			kfree(drive->id);
 			drive->id = NULL;
@@ -665,15 +679,28 @@ void ide_unregister (unsigned int index)
 		drive->present = 0;
 		blk_cleanup_queue(&drive->queue);
 	}
-	if (d->present)
-		hwgroup->drive = d;
-	while (hwgroup->hwif->next != hwif)
-		hwgroup->hwif = hwgroup->hwif->next;
-	hwgroup->hwif->next = hwif->next;
-	if (hwgroup->hwif == hwif)
+	if (hwif->next == hwif) {
 		kfree(hwgroup);
-	else
-		hwgroup->hwif = HWIF(hwgroup->drive);
+		BUG_ON(hwgroup->hwif != hwif);
+	} else {
+		/* There is another interface in hwgroup.
+		 * Unlink us, and set hwgroup->drive and ->hwif to
+		 * something sane.
+		 */
+		g = hwgroup->hwif;
+		while (g->next != hwif)
+			g = g->next;
+		g->next = hwif->next;
+		if (hwgroup->hwif == hwif) {
+			/* Chose a random hwif for hwgroup->hwif.
+			 * It's guaranteed that there are no drives
+			 * left in the hwgroup.
+			 */
+			BUG_ON(hwgroup->drive != NULL);
+			hwgroup->hwif = g;
+		}
+		BUG_ON(hwgroup->hwif == hwif);
+	}
 
 #if !defined(CONFIG_DMA_NONPCI)
 	if (hwif->dma_base) {
@@ -813,7 +840,8 @@ void ide_unregister (unsigned int index)
 
 	hwif->hwif_data			= old_hwif.hwif_data;
 abort:
-	spin_unlock_irqrestore(&ide_lock, flags);
+	spin_unlock_irq(&ide_lock);
+	up(&ide_cfg_sem);
 }
 
 EXPORT_SYMBOL(ide_unregister);
diff -puN drivers/ide/ide-io.c~ide_probe-iint_irq-fix drivers/ide/ide-io.c
--- 25/drivers/ide/ide-io.c~ide_probe-iint_irq-fix	Mon Mar 17 14:30:35 2003
+++ 25-akpm/drivers/ide/ide-io.c	Mon Mar 17 14:30:35 2003
@@ -742,8 +742,8 @@ void ide_do_request (ide_hwgroup_t *hwgr
 	/* for atari only: POSSIBLY BROKEN HERE(?) */
 	ide_get_lock(ide_intr, hwgroup);
 
-	/* necessary paranoia: ensure IRQs are masked on local CPU */
-	local_irq_disable();
+	/* caller must own ide_lock */
+	BUG_ON(!irqs_disabled());
 
 	while (!hwgroup->busy) {
 		hwgroup->busy = 1;
diff -puN drivers/ide/ide-probe.c~ide_probe-iint_irq-fix drivers/ide/ide-probe.c
--- 25/drivers/ide/ide-probe.c~ide_probe-iint_irq-fix	Mon Mar 17 14:30:35 2003
+++ 25-akpm/drivers/ide/ide-probe.c	Mon Mar 17 14:30:35 2003
@@ -1009,7 +1009,13 @@ static void ide_init_queue(ide_drive_t *
 
 	/* This is a driver limit and could be eliminated. */
 	blk_queue_max_phys_segments(q, PRD_ENTRIES);
+}
 
+/*
+ * Setup the drive for request handling.
+ */
+static void ide_init_drive(ide_drive_t *drive)
+{
 	ide_toggle_bounce(drive, 1);
 
 #ifdef CONFIG_BLK_DEV_IDE_TCQ_DEFAULT
@@ -1032,16 +1038,14 @@ static void ide_init_queue(ide_drive_t *
  */
 static int init_irq (ide_hwif_t *hwif)
 {
-	unsigned long flags;
 	unsigned int index;
-	ide_hwgroup_t *hwgroup, *new_hwgroup;
+	ide_hwgroup_t *hwgroup;
 	ide_hwif_t *match = NULL;
 
-	/* Allocate the buffer and potentially sleep first */
-	new_hwgroup = kmalloc(sizeof(ide_hwgroup_t),GFP_KERNEL);
-	
-	spin_lock_irqsave(&ide_lock, flags);
 
+	BUG_ON(in_interrupt());
+	BUG_ON(irqs_disabled());	
+	down(&ide_cfg_sem);
 	hwif->hwgroup = NULL;
 #if MAX_HWIFS > 1
 	/*
@@ -1073,14 +1077,28 @@ static int init_irq (ide_hwif_t *hwif)
 	 */
 	if (match) {
 		hwgroup = match->hwgroup;
-		if(new_hwgroup)
-			kfree(new_hwgroup);
+		hwif->hwgroup = hwgroup;
+		/*
+		 * Link us into the hwgroup.
+		 * This must be done early, do ensure that unexpected_intr
+		 * can find the hwif and prevent irq storms.
+		 * No drives are attached to the new hwif, choose_drive
+		 * can't do anything stupid (yet).
+		 * Add ourself as the 2nd entry to the hwgroup->hwif
+		 * linked list, the first entry is the hwif that owns
+		 * hwgroup->handler - do not change that.
+		 */
+		spin_lock_irq(&ide_lock);
+		hwif->next = hwgroup->hwif->next;
+		hwgroup->hwif->next = hwif;
+		spin_unlock_irq(&ide_lock);
 	} else {
-		hwgroup = new_hwgroup;
-		if (!hwgroup) {
-			spin_unlock_irqrestore(&ide_lock, flags);
-			return 1;
-		}
+		hwgroup = kmalloc(sizeof(ide_hwgroup_t),GFP_KERNEL);
+		if (!hwgroup)
+	       		goto out_up;
+
+		hwif->hwgroup = hwgroup;
+
 		memset(hwgroup, 0, sizeof(ide_hwgroup_t));
 		hwgroup->hwif     = hwif->next = hwif;
 		hwgroup->rq       = NULL;
@@ -1112,44 +1130,35 @@ static int init_irq (ide_hwif_t *hwif)
 			/* clear nIEN */
 			hwif->OUTB(0x08, hwif->io_ports[IDE_CONTROL_OFFSET]);
 
-		if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup)) {
-			if (!match)
-				kfree(hwgroup);
-			spin_unlock_irqrestore(&ide_lock, flags);
-			return 1;
-		}
+		if (request_irq(hwif->irq,&ide_intr,sa,hwif->name,hwgroup))
+	       		goto out_unlink;
 	}
 
 	/*
-	 * Everything is okay, so link us into the hwgroup
+	 * Link any new drives into the hwgroup, allocate
+	 * the block device queue and initialize the drive.
+	 * Note that ide_init_drive sends commands to the new
+	 * drive.
 	 */
-	hwif->hwgroup = hwgroup;
-	hwif->next = hwgroup->hwif->next;
-	hwgroup->hwif->next = hwif;
-
 	for (index = 0; index < MAX_DRIVES; ++index) {
 		ide_drive_t *drive = &hwif->drives[index];
 		if (!drive->present)
 			continue;
-		if (!hwgroup->drive)
-			hwgroup->drive = drive;
-		drive->next = hwgroup->drive->next;
-		hwgroup->drive->next = drive;
-		spin_unlock_irqrestore(&ide_lock, flags);
 		ide_init_queue(drive);
-		spin_lock_irqsave(&ide_lock, flags);
-	}
-
-	if (!hwgroup->hwif) {
-		hwgroup->hwif = HWIF(hwgroup->drive);
-#ifdef DEBUG
-		printk("%s : Adding missed hwif to hwgroup!!\n", hwif->name);
-#endif
+		spin_lock_irq(&ide_lock);
+		if (!hwgroup->drive) {
+			/* first drive for hwgroup. */
+			drive->next = drive;
+			hwgroup->drive = drive;
+			hwgroup->hwif = HWIF(hwgroup->drive);
+		} else {
+			drive->next = hwgroup->drive->next;
+			hwgroup->drive->next = drive;
+		}
+		spin_unlock_irq(&ide_lock);
+		ide_init_drive(drive);
 	}
 
-	/* all CPUs; safe now that hwif->hwgroup is set up */
-	spin_unlock_irqrestore(&ide_lock, flags);
-
 #if !defined(__mc68000__) && !defined(CONFIG_APUS) && !defined(__sparc__)
 	printk("%s at 0x%03lx-0x%03lx,0x%03lx on irq %d", hwif->name,
 		hwif->io_ports[IDE_DATA_OFFSET],
@@ -1168,6 +1177,30 @@ static int init_irq (ide_hwif_t *hwif)
 		printk(" (%sed with %s)",
 			hwif->sharing_irq ? "shar" : "serializ", match->name);
 	printk("\n");
+	up(&ide_cfg_sem);
+	return 0;
+out_unlink:
+	spin_lock_irq(&ide_lock);
+	if (hwif->next == hwif) {
+		BUG_ON(match);
+		BUG_ON(hwgroup->hwif != hwif);
+		kfree(hwgroup);
+	} else {
+		ide_hwif_t *g;
+		g = hwgroup->hwif;
+		while (g->next != hwif)
+			g = g->next;
+		g->next = hwif->next;
+		if (hwgroup->hwif == hwif) {
+			/* Impossible. */
+			printk(KERN_ERR "Duh. Uninitialized hwif listed as active hwif.\n");
+			hwgroup->hwif = g;
+		}
+		BUG_ON(hwgroup->hwif == hwif);
+	}
+	spin_unlock_irq(&ide_lock);
+out_up:
+	up(&ide_cfg_sem);
 	return 0;
 }
 
@@ -1340,6 +1373,7 @@ EXPORT_SYMBOL(hwif_init);
 void export_ide_init_queue (ide_drive_t *drive)
 {
 	ide_init_queue(drive);
+	ide_init_drive(drive);
 }
 
 EXPORT_SYMBOL(export_ide_init_queue);
diff -puN include/linux/ide.h~ide_probe-iint_irq-fix include/linux/ide.h
--- 25/include/linux/ide.h~ide_probe-iint_irq-fix	Mon Mar 17 14:30:35 2003
+++ 25-akpm/include/linux/ide.h	Mon Mar 17 14:30:35 2003
@@ -1730,6 +1730,19 @@ extern void ide_toggle_bounce(ide_drive_
 extern int ide_set_xfer_rate(ide_drive_t *drive, u8 rate);
 
 extern spinlock_t ide_lock;
+extern struct semaphore ide_cfg_sem;
+/*
+ * Structure locking:
+ *
+ * ide_cfg_sem and ide_lock together protect changes to
+ * ide_hwif_t->{next,hwgroup}
+ * ide_drive_t->next
+ *
+ * ide_hwgroup_t->busy: ide_lock
+ * ide_hwgroup_t->hwif: ide_lock
+ * ide_hwif_t->mate: constant, no locking
+ * ide_drive_t->hwif: constant, no locking
+ */
 
 #define local_irq_set(flags)	do { local_save_flags((flags)); local_irq_enable(); } while (0)
 

_