diff -urNp a/kernel/sched.c b/kernel/sched.c --- a/kernel/sched.c Wed Aug 21 13:18:04 2002 +++ b/kernel/sched.c Wed Aug 21 13:39:09 2002 @@ -571,6 +571,175 @@ static inline unsigned int double_lock_b return nr_running; } +#define HISTORY_RING_SIZE 25 +/* load balancing history entry */ +struct lb_hist_entry { + unsigned long time; /* jiffy */ + int pid; /* stolen task (0 if none) */ + int busiest_cpu; /* busiest RQ */ +}; +/* load balancing history ring */ +struct lb_hist_ring { + int curr; /* current pointer */ + struct lb_hist_entry data[HISTORY_RING_SIZE]; +} ____cacheline_aligned; +/* per CPU history ring array */ +struct lb_hist_ring lb_ring[NR_CPUS]; + +/* initial load balancing decision entry */ +struct ilb_hist_entry { + unsigned long time; /* jiffy */ + int pid; + int node; /* selected homenode */ + int load[NR_NODES]; /* node loads at decision time */ +}; +/* initial load balancing history ring */ +struct ilb_hist_ring { + int curr; /* current pointer */ + struct ilb_hist_entry data[HISTORY_RING_SIZE]; +} ____cacheline_aligned; +/* per CPU history ring array */ +struct ilb_hist_ring ilb_ring[NR_CPUS]; + +/* add entry to lb_ring */ +void lb_ring_add(int cpu, int pid, int busiest_cpu) +{ + int next=(lb_ring[cpu].curr + 1 ) % HISTORY_RING_SIZE; + + lb_ring[cpu].data[next].time = jiffies; + lb_ring[cpu].data[next].pid = pid; + lb_ring[cpu].data[next].busiest_cpu = busiest_cpu; + lb_ring[cpu].curr = next; +} + +/* add entry to ilb_ring */ +void ilb_ring_add(int cpu, int pid, int node, int *load) +{ + int i, next=(ilb_ring[cpu].curr + 1 ) % HISTORY_RING_SIZE; + + ilb_ring[cpu].data[next].time = jiffies; + ilb_ring[cpu].data[next].pid = pid; + ilb_ring[cpu].data[next].node = node; + for (i=0; icount) len = count; + if (len<0) len = 0; + return len; +} + +/* print initial lb history ring buffer */ +int ilb_ring_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i, j, len, entry; + char *buff=page; + int cpu=(int)data; + + buff += sprintf(buff," tick pid node node_loads\n"); + entry = ilb_ring[cpu].curr; + for (i=0; icount) len = count; + if (len<0) len = 0; + return len; +} + +/* print runqueue load */ +int rq_load_read_proc(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int i, len; + runqueue_t *rq; + char *buff=page; + int cpu=(int)data; + + rq=cpu_rq(cpu); + buff += sprintf(buff,"cpu %d : ",cpu); + buff += sprintf(buff,"curr: %d %s\n",rq->curr->pid,rq->curr->comm); + buff += sprintf(buff,"running uninter nr_homenode\n"); + buff += sprintf(buff,"%7d %7d",rq->nr_running,rq->nr_uninterruptible); + for (i=0; inr_homenode[i]); + buff += sprintf(buff,"\n"); + + len = buff-page; + if (len <= off+count) *eof = 1; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +#include +/* initialize /proc entries */ +void init_sched_proc(void) +{ + int i; + char name[12]; + struct proc_dir_entry *p, *hist, *sched, *load; + + sched = proc_mkdir("sched",&proc_root); + hist = proc_mkdir("history",sched); + for (i=0; iread_proc = lb_ring_read_proc; + p->data = (long)i; + } + sprintf(name,"ilb%02d",i); + p = create_proc_entry(name,S_IRUGO,hist); + if (p) { + p->read_proc = ilb_ring_read_proc; + p->data = (long)i; + } + } + load = proc_mkdir("load",sched); + for (i=0; iread_proc = rq_load_read_proc; + p->data = (long)i; + } + } +} + /* * Calculate load of a CPU pool, store results in data[][NR_CPUS]. * Return the index of the most loaded runqueue. @@ -867,6 +1036,7 @@ static void load_balance(runqueue_t *thi if (!next) goto out_unlock; + lb_ring_add(smp_processor_id(), next->pid, next->cpu); array = next->array; /* @@ -1729,7 +1899,7 @@ static int sched_best_cpu(struct task_st */ static int sched_best_node(struct task_struct *p, int flag) { - int n, best_node=0, min_load, pool_load, min_pool=p->node; + int n, best_node=0, min_load, min_pool=p->node; int pool, load[NR_NODES]; unsigned long mask = p->cpus_allowed & cpu_online_map; @@ -1755,13 +1925,14 @@ static int sched_best_node(struct task_s min_load = 100000000; for (n = 0; n < numpools; n++) { pool = (best_node + n) % numpools; - pool_load = (100*load[pool])/pool_nr_cpus[pool]; - if ((pool_load < min_load) && (pool_mask[pool] & mask)) { - min_load = pool_load; + load[pool] = (100*load[pool])/pool_nr_cpus[pool]; + if ((load[pool] < min_load) && (pool_mask[pool] & mask)) { + min_load = load[pool]; min_pool = pool; } } atomic_set(&sched_node, min_pool); + ilb_ring_add(smp_processor_id(), p->pid, min_pool, load); return min_pool; } @@ -1958,6 +2129,7 @@ void bld_pools(void) find_node_levels(numpools); init_pool_weight(); init_pool_delay(); + init_sched_proc(); } void set_task_node(task_t *p, int node)