Commit aa731ee1 authored by Pavel Emelyanov's avatar Pavel Emelyanov

core: Support task scheduler policies and priorities

No magic here, just fetch info using getpriority and sched_getxxx calls.
Good news is that the mentioned syscalls take pid as argument and do work
with it, i.e. -- no need in parasite help here.

Restore is splitted into prep -- copy sched bits from image on restorer
args -- and the restore itself. It's done to avoid restoring tasks info
with IDLE priority ;) To make restorer not-fail sched bits are validated
for sanity on prep stage.

Minimal sanity test is also there.
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 33c0add2
......@@ -18,6 +18,9 @@
#include <sys/sendfile.h>
#include <sys/mman.h>
#include <sched.h>
#include <sys/resource.h>
#include <linux/major.h>
#include "protobuf.h"
......@@ -98,6 +101,54 @@ err:
return ret;
}
static int dump_sched_info(int pid, ThreadCoreEntry *tc)
{
int ret;
struct sched_param sp;
BUILD_BUG_ON(SCHED_OTHER != 0); /* default in proto message */
ret = sched_getscheduler(pid);
if (ret < 0) {
pr_perror("Can't get sched policy for %d", pid);
return -1;
}
pr_info("%d has %d sched policy\n", pid, ret);
tc->has_sched_policy = true;
tc->sched_policy = ret;
if ((ret == SCHED_RR) || (ret == SCHED_FIFO)) {
ret = sched_getparam(pid, &sp);
if (ret < 0) {
pr_perror("Can't get sched param for %d", pid);
return -1;
}
pr_info("\tdumping %d prio for %d\n", sp.sched_priority, pid);
tc->has_sched_prio = true;
tc->sched_prio = sp.sched_priority;
}
/*
* The nice is ignored for RT sched policies, but is stored
* in kernel. Thus we have to take it with us in the image.
*/
errno = 0;
ret = getpriority(PRIO_PROCESS, pid);
if (errno) {
pr_perror("Can't get nice for %d", pid);
return -1;
}
pr_info("\tdumping %d nice for %d\n", ret, pid);
tc->has_sched_nice = true;
tc->sched_nice = ret;
return 0;
}
struct cr_fdset *glob_fdset;
static int collect_fds(pid_t pid, struct parasite_drain_fd *dfds)
......@@ -880,6 +931,10 @@ static int dump_task_core_all(pid_t pid, const struct proc_pid_stat *stat,
core->tc->task_state = TASK_ALIVE;
core->tc->exit_code = 0;
ret = dump_sched_info(pid, core->thread_core);
if (ret)
goto err_free;
ret = pb_write_one(fd_core, core, PB_CORE);
if (ret < 0) {
pr_info("ERROR\n");
......@@ -1277,6 +1332,10 @@ static int dump_task_thread(struct parasite_ctl *parasite_ctl, struct pid *tid)
pr_info("%d: tid_address=%p\n", pid, taddr);
core->thread_info->clear_tid_addr = (u64) taddr;
ret = dump_sched_info(pid, core->thread_core);
if (ret)
goto err_free;
pr_info("OK\n");
fd_core = open_image(CR_FD_CORE, O_DUMP, tid->virt);
......
......@@ -1258,6 +1258,45 @@ static int remap_restorer_blob(void *addr)
return 0;
}
static int validate_sched_parm(struct rst_sched_param *sp)
{
if ((sp->nice < -20) || (sp->nice > 19))
return 0;
switch (sp->policy) {
case SCHED_RR:
case SCHED_FIFO:
return ((sp->prio > 0) && (sp->prio < 100));
case SCHED_IDLE:
case SCHED_OTHER:
case SCHED_BATCH:
return sp->prio == 0;
}
return 0;
}
static int prep_sched_info(struct rst_sched_param *sp, ThreadCoreEntry *tc)
{
if (!tc->has_sched_policy) {
sp->policy = SCHED_OTHER;
sp->nice = 0;
return 0;
}
sp->policy = tc->sched_policy;
sp->nice = tc->sched_nice;
sp->prio = tc->sched_prio;
if (!validate_sched_parm(sp)) {
pr_err("Inconsistent sched params received (%d.%d.%d)\n",
sp->policy, sp->nice, sp->prio);
return -1;
}
return 0;
}
static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_vmas, int nr_vmas)
{
long restore_task_vma_len;
......@@ -1427,6 +1466,10 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
task_args->has_futex = true;
task_args->futex_rla = core->thread_core->futex_rla;
task_args->futex_rla_len = core->thread_core->futex_rla_len;
ret = prep_sched_info(&task_args->sp, core->thread_core);
if (ret)
goto err;
}
/* No longer need it */
......@@ -1494,6 +1537,10 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core, struct list_head *tgt_v
thread_args[i].has_futex = true;
thread_args[i].futex_rla = core->thread_core->futex_rla;
thread_args[i].futex_rla_len = core->thread_core->futex_rla_len;
ret = prep_sched_info(&thread_args[i].sp, core->thread_core);
if (ret)
goto err;
}
core_entry__free_unpacked(core, NULL);
......
......@@ -56,6 +56,12 @@ struct restore_mem_zone {
#define first_on_heap(ptr, heap) ((typeof(ptr))heap)
#define next_on_heap(ptr, prev) ((typeof(ptr))((long)(prev) + sizeof(*(prev))))
struct rst_sched_param {
int policy;
int nice;
int prio;
};
/* Make sure it's pow2 in size */
struct thread_restore_args {
struct restore_mem_zone mem_zone;
......@@ -68,6 +74,8 @@ struct thread_restore_args {
bool has_futex;
u64 futex_rla;
u32 futex_rla_len;
struct rst_sched_param sp;
} __aligned(sizeof(long));
struct task_restore_core_args {
......@@ -113,6 +121,8 @@ struct task_restore_core_args {
int *rst_tcp_socks;
int rst_tcp_socks_size;
struct rst_sched_param sp;
} __aligned(sizeof(long));
struct pt_regs {
......
......@@ -10,6 +10,7 @@
#include <sys/time.h>
#include <arpa/inet.h>
#include <sched.h>
#include "types.h"
......
......@@ -54,6 +54,8 @@ __NR_setfsgid 123 sys_setfsgid (int fsgid)
__NR_getsid 124 sys_getsid (void)
__NR_capset 126 sys_capset (struct cap_header *h, struct cap_data *d)
__NR_personality 135 sys_personality (unsigned int personality)
__NR_setpriority 141 sys_setpriority (int which, int who, int nice)
__NR_sched_setscheduler 144 sys_sched_setscheduler (int pid, int policy, struct sched_param *p)
__NR_prctl 157 sys_prctl (int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5)
__NR_arch_prctl 158 sys_arch_prctl (int option, unsigned long addr)
__NR_mount 165 sys_mount (char *dev_nmae, char *dir_name, char *type, unsigned long flags, void *data)
......
......@@ -69,6 +69,9 @@ message thread_info_x86 {
message thread_core_entry {
required uint64 futex_rla = 1;
required uint32 futex_rla_len = 2;
optional sint32 sched_nice = 3;
optional uint32 sched_policy = 4;
optional uint32 sched_prio = 5;
}
message core_entry {
......
......@@ -127,6 +127,21 @@ static void restore_creds(CredsEntry *ce)
sys_capset(&hdr, data);
}
static void restore_sched_info(struct rst_sched_param *p)
{
struct sched_param parm;
if ((p->policy == SCHED_OTHER) && (p->nice == 0))
return;
pr_info("Restoring scheduler params %d.%d.%d\n",
p->policy, p->nice, p->prio);
sys_setpriority(PRIO_PROCESS, 0, p->nice);
parm.sched_priority = p->prio;
sys_sched_setscheduler(0, p->policy, &parm);
}
/*
* Threads restoration via sigreturn. Note it's locked
* routine and calls for unlock at the end.
......@@ -152,6 +167,8 @@ long __export_restore_thread(struct thread_restore_args *args)
}
}
restore_sched_info(&args->sp);
rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8;
#define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = args->gpregs.d
......@@ -442,6 +459,8 @@ long __export_restore_task(struct task_restore_core_args *args)
}
}
restore_sched_info(&args->sp);
/*
* We need to prepare a valid sigframe here, so
* after sigreturn the kernel will pick up the
......
......@@ -19,6 +19,7 @@ static/write_read02
static/write_read10
static/wait00
static/vdso00
static/sched_prio00
static/file_shared
static/timers
static/futex
......
......@@ -19,6 +19,7 @@ TST_NOFILE = \
mprotect00 \
timers \
unbound_sock \
sched_prio00 \
socket_listen \
socket_udp \
socket6_udp \
......
#include <errno.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <string.h>
#include <sched.h>
#include "zdtmtst.h"
const char *test_doc = "Check sched prios to be preserved";
const char *test_author = "Pavel Emelyanov <xemul@parallels.com>";
#define NRTASKS 4
static int do_nothing(void)
{
while (1)
sleep(10);
return -1;
}
static void kill_all(int *pid, int n)
{
int i;
for (i = 0; i < n; i++)
kill(pid[i], SIGKILL);
}
int main(int argc, char ** argv)
{
int pid[NRTASKS], i, parm[NRTASKS], ret;
test_init(argc, argv);
/* first 3 -- normal */
parm[0] = -20;
parm[1] = 19;
parm[2] = 1;
parm[3] = 3;
/* next 1 -- RR */
for (i = 0; i < NRTASKS; i++) {
pid[i] = fork();
if (!pid[i])
return do_nothing();
if (i < 3) {
if (setpriority(PRIO_PROCESS, pid[i], parm[i])) {
err("Can't set prio %d", i);
kill_all(pid, i);
return -1;
}
} else {
struct sched_param p;
p.sched_priority = parm[i];
if (sched_setscheduler(pid[i], SCHED_RR, &p)) {
err("Can't set policy %d", i);
kill_all(pid, i);
return -1;
}
}
}
test_daemon();
test_waitsig();
for (i = 0; i < NRTASKS; i++) {
if (i < 3) {
errno = 0;
ret = getpriority(PRIO_PROCESS, pid[i]);
if (errno) {
fail("No prio for task %d", i);
break;
}
if (ret != parm[i]) {
fail("Broken nice for %d", i);
break;
}
} else {
struct sched_param p;
ret = sched_getscheduler(pid[i]);
if (ret != SCHED_RR) {
fail("Broken/No policy for %d", i);
break;
}
ret = sched_getparam(pid[i], &p);
if (ret < 0 || p.sched_priority != parm[i]) {
fail("Broken prio for %d", i);
break;
}
}
}
if (i == NRTASKS)
pass();
kill_all(pid, NRTASKS);
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment