2007-07-10 00:51:58 +08:00
|
|
|
/*
|
|
|
|
* Real-Time Scheduling Class (mapped to the SCHED_FIFO and SCHED_RR
|
|
|
|
* policies)
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Update the current task's runtime statistics. Skip current tasks that
|
|
|
|
* are not in our scheduling class.
|
|
|
|
*/
|
2007-08-09 17:16:48 +08:00
|
|
|
static inline void update_curr_rt(struct rq *rq)
|
2007-07-10 00:51:58 +08:00
|
|
|
{
|
|
|
|
struct task_struct *curr = rq->curr;
|
|
|
|
u64 delta_exec;
|
|
|
|
|
|
|
|
if (!task_has_rt_policy(curr))
|
|
|
|
return;
|
|
|
|
|
2007-08-09 17:16:47 +08:00
|
|
|
delta_exec = rq->clock - curr->se.exec_start;
|
2007-07-10 00:51:58 +08:00
|
|
|
if (unlikely((s64)delta_exec < 0))
|
|
|
|
delta_exec = 0;
|
2007-08-02 23:41:40 +08:00
|
|
|
|
|
|
|
schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
|
2007-07-10 00:51:58 +08:00
|
|
|
|
|
|
|
curr->se.sum_exec_runtime += delta_exec;
|
2007-08-09 17:16:47 +08:00
|
|
|
curr->se.exec_start = rq->clock;
|
2007-07-10 00:51:58 +08:00
|
|
|
}
|
|
|
|
|
2007-08-09 17:16:48 +08:00
|
|
|
static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
|
2007-07-10 00:51:58 +08:00
|
|
|
{
|
|
|
|
struct rt_prio_array *array = &rq->rt.active;
|
|
|
|
|
|
|
|
list_add_tail(&p->run_list, array->queue + p->prio);
|
|
|
|
__set_bit(p->prio, array->bitmap);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Adding/removing a task to/from a priority array:
|
|
|
|
*/
|
2007-08-09 17:16:48 +08:00
|
|
|
static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
|
2007-07-10 00:51:58 +08:00
|
|
|
{
|
|
|
|
struct rt_prio_array *array = &rq->rt.active;
|
|
|
|
|
2007-08-09 17:16:48 +08:00
|
|
|
update_curr_rt(rq);
|
2007-07-10 00:51:58 +08:00
|
|
|
|
|
|
|
list_del(&p->run_list);
|
|
|
|
if (list_empty(array->queue + p->prio))
|
|
|
|
__clear_bit(p->prio, array->bitmap);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put task to the end of the run list without the overhead of dequeue
|
|
|
|
* followed by enqueue.
|
|
|
|
*/
|
|
|
|
static void requeue_task_rt(struct rq *rq, struct task_struct *p)
|
|
|
|
{
|
|
|
|
struct rt_prio_array *array = &rq->rt.active;
|
|
|
|
|
|
|
|
list_move_tail(&p->run_list, array->queue + p->prio);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2007-10-15 23:00:08 +08:00
|
|
|
yield_task_rt(struct rq *rq)
|
2007-07-10 00:51:58 +08:00
|
|
|
{
|
2007-10-15 23:00:08 +08:00
|
|
|
requeue_task_rt(rq, rq->curr);
|
2007-07-10 00:51:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Preempt the current task with a newly woken task if needed:
|
|
|
|
*/
|
|
|
|
static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
|
|
|
|
{
|
|
|
|
if (p->prio < rq->curr->prio)
|
|
|
|
resched_task(rq->curr);
|
|
|
|
}
|
|
|
|
|
2007-08-09 17:16:48 +08:00
|
|
|
static struct task_struct *pick_next_task_rt(struct rq *rq)
|
2007-07-10 00:51:58 +08:00
|
|
|
{
|
|
|
|
struct rt_prio_array *array = &rq->rt.active;
|
|
|
|
struct task_struct *next;
|
|
|
|
struct list_head *queue;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
idx = sched_find_first_bit(array->bitmap);
|
|
|
|
if (idx >= MAX_RT_PRIO)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
queue = array->queue + idx;
|
|
|
|
next = list_entry(queue->next, struct task_struct, run_list);
|
|
|
|
|
2007-08-09 17:16:47 +08:00
|
|
|
next->se.exec_start = rq->clock;
|
2007-07-10 00:51:58 +08:00
|
|
|
|
|
|
|
return next;
|
|
|
|
}
|
|
|
|
|
2007-08-09 17:16:49 +08:00
|
|
|
static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
|
2007-07-10 00:51:58 +08:00
|
|
|
{
|
2007-08-09 17:16:48 +08:00
|
|
|
update_curr_rt(rq);
|
2007-07-10 00:51:58 +08:00
|
|
|
p->se.exec_start = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Load-balancing iterator. Note: while the runqueue stays locked
|
|
|
|
* during the whole iteration, the current task might be
|
|
|
|
* dequeued so the iterator has to be dequeue-safe. Here we
|
|
|
|
* achieve that by always pre-iterating before returning
|
|
|
|
* the current task:
|
|
|
|
*/
|
|
|
|
static struct task_struct *load_balance_start_rt(void *arg)
|
|
|
|
{
|
|
|
|
struct rq *rq = arg;
|
|
|
|
struct rt_prio_array *array = &rq->rt.active;
|
|
|
|
struct list_head *head, *curr;
|
|
|
|
struct task_struct *p;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
idx = sched_find_first_bit(array->bitmap);
|
|
|
|
if (idx >= MAX_RT_PRIO)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
head = array->queue + idx;
|
|
|
|
curr = head->prev;
|
|
|
|
|
|
|
|
p = list_entry(curr, struct task_struct, run_list);
|
|
|
|
|
|
|
|
curr = curr->prev;
|
|
|
|
|
|
|
|
rq->rt.rt_load_balance_idx = idx;
|
|
|
|
rq->rt.rt_load_balance_head = head;
|
|
|
|
rq->rt.rt_load_balance_curr = curr;
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct task_struct *load_balance_next_rt(void *arg)
|
|
|
|
{
|
|
|
|
struct rq *rq = arg;
|
|
|
|
struct rt_prio_array *array = &rq->rt.active;
|
|
|
|
struct list_head *head, *curr;
|
|
|
|
struct task_struct *p;
|
|
|
|
int idx;
|
|
|
|
|
|
|
|
idx = rq->rt.rt_load_balance_idx;
|
|
|
|
head = rq->rt.rt_load_balance_head;
|
|
|
|
curr = rq->rt.rt_load_balance_curr;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we arrived back to the head again then
|
|
|
|
* iterate to the next queue (if any):
|
|
|
|
*/
|
|
|
|
if (unlikely(head == curr)) {
|
|
|
|
int next_idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
|
|
|
|
|
|
|
|
if (next_idx >= MAX_RT_PRIO)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
idx = next_idx;
|
|
|
|
head = array->queue + idx;
|
|
|
|
curr = head->prev;
|
|
|
|
|
|
|
|
rq->rt.rt_load_balance_idx = idx;
|
|
|
|
rq->rt.rt_load_balance_head = head;
|
|
|
|
}
|
|
|
|
|
|
|
|
p = list_entry(curr, struct task_struct, run_list);
|
|
|
|
|
|
|
|
curr = curr->prev;
|
|
|
|
|
|
|
|
rq->rt.rt_load_balance_curr = curr;
|
|
|
|
|
|
|
|
return p;
|
|
|
|
}
|
|
|
|
|
sched: simplify move_tasks()
The move_tasks() function is currently multiplexed with two distinct
capabilities:
1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.
The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.
The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.
This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()). However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:
1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.
One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list. This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.
Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).
NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.
[ mingo@elte.hu ]
this change also reduces code size nicely:
text data bss dec hex filename
39216 3618 24 42858 a76a sched.o.before
39173 3618 24 42815 a73f sched.o.after
Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2007-08-09 17:16:46 +08:00
|
|
|
static unsigned long
|
2007-07-10 00:51:58 +08:00
|
|
|
load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
|
|
|
|
unsigned long max_nr_move, unsigned long max_load_move,
|
|
|
|
struct sched_domain *sd, enum cpu_idle_type idle,
|
2007-08-09 17:16:46 +08:00
|
|
|
int *all_pinned, int *this_best_prio)
|
2007-07-10 00:51:58 +08:00
|
|
|
{
|
|
|
|
int nr_moved;
|
|
|
|
struct rq_iterator rt_rq_iterator;
|
sched: simplify move_tasks()
The move_tasks() function is currently multiplexed with two distinct
capabilities:
1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.
The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.
The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.
This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()). However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:
1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.
One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list. This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.
Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).
NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.
[ mingo@elte.hu ]
this change also reduces code size nicely:
text data bss dec hex filename
39216 3618 24 42858 a76a sched.o.before
39173 3618 24 42815 a73f sched.o.after
Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2007-08-09 17:16:46 +08:00
|
|
|
unsigned long load_moved;
|
2007-07-10 00:51:58 +08:00
|
|
|
|
|
|
|
rt_rq_iterator.start = load_balance_start_rt;
|
|
|
|
rt_rq_iterator.next = load_balance_next_rt;
|
|
|
|
/* pass 'busiest' rq argument into
|
|
|
|
* load_balance_[start|next]_rt iterators
|
|
|
|
*/
|
|
|
|
rt_rq_iterator.arg = busiest;
|
|
|
|
|
|
|
|
nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
|
sched: simplify move_tasks()
The move_tasks() function is currently multiplexed with two distinct
capabilities:
1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.
The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.
The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.
This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()). However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:
1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.
One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list. This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.
Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).
NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.
[ mingo@elte.hu ]
this change also reduces code size nicely:
text data bss dec hex filename
39216 3618 24 42858 a76a sched.o.before
39173 3618 24 42815 a73f sched.o.after
Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2007-08-09 17:16:46 +08:00
|
|
|
max_load_move, sd, idle, all_pinned, &load_moved,
|
2007-08-09 17:16:46 +08:00
|
|
|
this_best_prio, &rt_rq_iterator);
|
2007-07-10 00:51:58 +08:00
|
|
|
|
sched: simplify move_tasks()
The move_tasks() function is currently multiplexed with two distinct
capabilities:
1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.
The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.
The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.
This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()). However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:
1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.
One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list. This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.
Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).
NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.
[ mingo@elte.hu ]
this change also reduces code size nicely:
text data bss dec hex filename
39216 3618 24 42858 a76a sched.o.before
39173 3618 24 42815 a73f sched.o.after
Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
2007-08-09 17:16:46 +08:00
|
|
|
return load_moved;
|
2007-07-10 00:51:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void task_tick_rt(struct rq *rq, struct task_struct *p)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* RR tasks need a special form of timeslice management.
|
|
|
|
* FIFO tasks have no timeslices.
|
|
|
|
*/
|
|
|
|
if (p->policy != SCHED_RR)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (--p->time_slice)
|
|
|
|
return;
|
|
|
|
|
|
|
|
p->time_slice = static_prio_timeslice(p->static_prio);
|
|
|
|
|
2007-08-25 02:39:10 +08:00
|
|
|
/*
|
|
|
|
* Requeue to the end of queue if we are not the only element
|
|
|
|
* on the queue:
|
|
|
|
*/
|
|
|
|
if (p->run_list.prev != p->run_list.next) {
|
|
|
|
requeue_task_rt(rq, p);
|
|
|
|
set_tsk_need_resched(p);
|
|
|
|
}
|
2007-07-10 00:51:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static struct sched_class rt_sched_class __read_mostly = {
|
|
|
|
.enqueue_task = enqueue_task_rt,
|
|
|
|
.dequeue_task = dequeue_task_rt,
|
|
|
|
.yield_task = yield_task_rt,
|
|
|
|
|
|
|
|
.check_preempt_curr = check_preempt_curr_rt,
|
|
|
|
|
|
|
|
.pick_next_task = pick_next_task_rt,
|
|
|
|
.put_prev_task = put_prev_task_rt,
|
|
|
|
|
|
|
|
.load_balance = load_balance_rt,
|
|
|
|
|
|
|
|
.task_tick = task_tick_rt,
|
|
|
|
};
|