From: Con Kolivas <kernel@kolivas.org>

Changes:

Reverted the child penalty to 95 as new changes help this from hurting

Changed the logic behind loss of interactive credits to those that burn off 
all their sleep_avg

Now all tasks get proportionately more sleep as their relative bonus drops 
off. This has the effect of detecting a change from a cpu burner to an 
interactive task more rapidly as in O10. 

The _major_ change in this patch is that tasks on uninterruptible sleep do not 
earn any sleep avg during that sleep; it is not voluntary sleep so they should 
not get it. This has the effect of stopping cpu hogs from gaining dynamic 
priority during periods of heavy I/O. Very good for the jerks you may 
see in X or audio skips when you start a whole swag of disk intensive cpu hogs 
(eg make -j large number). I've simply dropped all their sleep_avg, but 
weighting it may be more appropriate. This has the side effect that pure
disk tasks (eg cp) have relatively low priority which is why weighting may
be better. We shall see.

Please test this one extensively. It should _not_ affect I/O throughput per 
se, but I'd like to see some of the I/O benchmarks on this. I do not want to 
have detrimental effects elsewhere.

patch-O12.3-O13int applies on top of 2.6.0-test2-mm4 that has been 
patched with O12.3int and is available on my site, and a full patch
against 2.6.0-test2 called patch-test2-O13int is here:

http://kernel.kolivas.org/2.5




 kernel/sched.c |   67 +++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 42 insertions(+), 25 deletions(-)

diff -puN kernel/sched.c~o13int kernel/sched.c
--- 25/kernel/sched.c~o13int	2003-08-30 15:41:49.000000000 -0700
+++ 25-akpm/kernel/sched.c	2003-08-30 15:41:49.000000000 -0700
@@ -78,7 +78,7 @@
 #define MAX_TIMESLICE		(200 * HZ / 1000)
 #define TIMESLICE_GRANULARITY	(HZ/40 ?: 1)
 #define ON_RUNQUEUE_WEIGHT	30
-#define CHILD_PENALTY		90
+#define CHILD_PENALTY		95
 #define PARENT_PENALTY		100
 #define EXIT_WEIGHT		3
 #define PRIO_BONUS_RATIO	25
@@ -364,6 +364,9 @@ static void recalc_task_prio(task_t *p, 
 	unsigned long long __sleep_time = now - p->timestamp;
 	unsigned long sleep_time;
 
+	if (!p->sleep_avg)
+		p->interactive_credit--;
+
 	if (__sleep_time > NS_MAX_SLEEP_AVG)
 		sleep_time = NS_MAX_SLEEP_AVG;
 	else
@@ -383,17 +386,19 @@ static void recalc_task_prio(task_t *p, 
 					JIFFIES_TO_NS(JUST_INTERACTIVE_SLEEP(p));
 		else {
 			/*
-			 * Tasks with interactive credits get boosted more
-			 * rapidly if their bonus has dropped off. Other
-			 * tasks are limited to one timeslice worth of
-			 * sleep avg.
+			 * The lower the sleep avg a task has the more
+			 * rapidly it will rise with sleep time. Tasks
+			 * without interactive_credit are limited to
+			 * one timeslice worth of sleep avg bonus.
 			 */
-			if (p->interactive_credit > 0)
-				sleep_time *= (MAX_BONUS + 1 -
+			sleep_time *= (MAX_BONUS + 1 -
 					(NS_TO_JIFFIES(p->sleep_avg) *
 					MAX_BONUS / MAX_SLEEP_AVG));
-			else if (sleep_time > JIFFIES_TO_NS(task_timeslice(p)))
-				sleep_time = JIFFIES_TO_NS(task_timeslice(p));
+
+			if (p->interactive_credit < 0 &&
+				sleep_time > JIFFIES_TO_NS(task_timeslice(p)))
+					sleep_time =
+						JIFFIES_TO_NS(task_timeslice(p));
 
 			/*
 			 * This code gives a bonus to interactive tasks.
@@ -434,20 +439,26 @@ static inline void activate_task(task_t 
 	recalc_task_prio(p, now);
 
 	/*
-	 * Tasks which were woken up by interrupts (ie. hw events)
-	 * are most likely of interactive nature. So we give them
-	 * the credit of extending their sleep time to the period
-	 * of time they spend on the runqueue, waiting for execution
-	 * on a CPU, first time around:
+	 * This checks to make sure it's not an uninterruptible task
+	 * that is now waking up.
 	 */
-	if (in_interrupt())
-		p->activated = 2;
-	else
-	/*
-	 * Normal first-time wakeups get a credit too for on-runqueue time,
-	 * but it will be weighted down:
-	 */
-		p->activated = 1;
+	if (!p->activated){
+		/*
+		 * Tasks which were woken up by interrupts (ie. hw events)
+		 * are most likely of interactive nature. So we give them
+		 * the credit of extending their sleep time to the period
+		 * of time they spend on the runqueue, waiting for execution
+		 * on a CPU, first time around:
+		 */
+		if (in_interrupt())
+			p->activated = 2;
+		else
+		/*
+		 * Normal first-time wakeups get a credit too for on-runqueue
+		 * time, but it will be weighted down:
+		 */
+			p->activated = 1;
+	}
 
 	p->timestamp = now;
 
@@ -571,8 +582,15 @@ repeat_lock_task:
 				task_rq_unlock(rq, &flags);
 				goto repeat_lock_task;
 			}
-			if (old_state == TASK_UNINTERRUPTIBLE)
+			if (old_state == TASK_UNINTERRUPTIBLE){
+				/*
+				 * Tasks on involuntary sleep don't earn
+				 * sleep_avg
+				 */
 				rq->nr_uninterruptible--;
+				p->timestamp = sched_clock();
+				p->activated = -1;
+			}
 			if (sync)
 				__activate_task(p, rq);
 			else {
@@ -1347,7 +1365,6 @@ void scheduler_tick(int user_ticks, int 
 		p->prio = effective_prio(p);
 		p->time_slice = task_timeslice(p);
 		p->first_time_slice = 0;
-		p->interactive_credit--;
 
 		if (!rq->expired_timestamp)
 			rq->expired_timestamp = jiffies;
@@ -1482,7 +1499,7 @@ pick_next_task:
 	queue = array->queue + idx;
 	next = list_entry(queue->next, task_t, run_list);
 
-	if (next->activated && next->interactive_credit > 0) {
+	if (next->activated > 0) {
 		unsigned long long delta = now - next->timestamp;
 
 		if (next->activated == 1)

_