From: Nick Piggin <piggin@cyberone.com.au>

John Hawkes discribed this problem to me:

There *is* a small problem in this area, though, that SuSE avoids. 
"jiffies" gets updated by cpu0.  The other CPUs may, over time, get out of
sync (and they're initialized on ia64 to start out being out of sync), so
it's no guarantee that every CPU will wake up from its timer interrupt and
see a "jiffies" value that is guaranteed to be last_jiffies+1.  Sometimes
the jiffies value may be unchanged since the last wakeup.  Sometimes the
jiffies value may have incremented by 2 (or more, especially if cpu0's
interrupts are disabled for long stretches of time).  So an algoithm that
says, "I'll call load_balance() only when jiffies is *exactly* N" is going
to fail on occasion, either by calling load_balance() too often or not
often enough.  ***

I fixed this by adding a last_balance field to struct sched_domain, and
working off that.



---

 25-akpm/include/linux/sched.h |    4 ++++
 25-akpm/kernel/sched.c        |   16 ++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff -puN include/linux/sched.h~sched-no-drop-balance include/linux/sched.h
--- 25/include/linux/sched.h~sched-no-drop-balance	Wed Feb  4 12:30:04 2004
+++ 25-akpm/include/linux/sched.h	Wed Feb  4 12:30:04 2004
@@ -528,6 +528,7 @@ struct sched_domain {
 	int flags;			/* See SD_FLAG_* */
 
 	/* Runtime fields. */
+	unsigned long last_balance;	/* init to jiffies. units in jiffies */
 	unsigned int balance_interval;	/* initialise to 1. units in ms. */
 	unsigned int nr_balance_failed; /* initialise to 0 */
 };
@@ -544,6 +545,7 @@ struct sched_domain {
 	.cache_hot_time		= 0,			\
 	.cache_nice_tries	= 0,			\
 	.flags			= SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE | SD_FLAG_WAKE,\
+	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
 }
@@ -560,6 +562,7 @@ struct sched_domain {
 	.cache_hot_time		= (5*1000000),		\
 	.cache_nice_tries	= 2,			\
 	.flags			= SD_FLAG_FASTMIGRATE | SD_FLAG_NEWIDLE,\
+	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
 }
@@ -577,6 +580,7 @@ struct sched_domain {
 	.cache_hot_time		= (5*1000000),		\
 	.cache_nice_tries	= 1,			\
 	.flags			= SD_FLAG_EXEC,		\
+	.last_balance		= jiffies,		\
 	.balance_interval	= 1,			\
 	.nr_balance_failed	= 0,			\
 }
diff -puN kernel/sched.c~sched-no-drop-balance kernel/sched.c
--- 25/kernel/sched.c~sched-no-drop-balance	Wed Feb  4 12:30:04 2004
+++ 25-akpm/kernel/sched.c	Wed Feb  4 12:30:04 2004
@@ -1717,26 +1717,26 @@ static void rebalance_tick(int this_cpu,
 
 	/* Run through all this CPU's domains */
 	do {
-		int modulo;
+		unsigned long interval;
 
 		if (unlikely(!domain->groups))
 			break;
 
-		modulo = domain->balance_interval;
-
+		interval = domain->balance_interval;
 		if (idle != IDLE)
-			modulo *= domain->busy_factor;
+			interval *= domain->busy_factor;
 
 		/* scale ms to jiffies */
-		modulo = modulo * HZ / 1000;
-		if (modulo == 0)
-			modulo = 1;
+		interval = interval * HZ / 1000;
+		if (unlikely(interval == 0))
+			interval = 1;
 
-		if (!(j % modulo)) {
+		if (j - domain->last_balance >= interval) {
 			if (load_balance(this_cpu, this_rq, domain, idle)) {
 				/* We've pulled tasks over so no longer idle */
 				idle = NOT_IDLE;
 			}
+			domain->last_balance += interval;
 		}
 
 		domain = domain->parent;

_