Commit beb158a6 authored by Pavel Emelyanov's avatar Pavel Emelyanov Committed by Cyrill Gorcunov

cr: Task creds support

Dumping is simple. All but secbits can be read from proc, secbits
are got from parasite.

Restoring is a bit tricky -- when you change anything on kernel
cred's struct it performs sophisticated checks and can change
some more stuff than requested, so the creds restoration procedure
is carefully commented step-by-step.

Another thing to mention is that creds are restored after everything
else, i.e. right before performing final threads sync and sigreturns.
This is done to avoid potential problems with insufficient caps for
restoring other stuff (e.g. CAP_DAC_OVERRIDE or zero euid is most
likely required for opening any image file and the notorious control
/proc/sys/kernel/ns_last_pid, which in turn is performed till the
very last moment).
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
parent d846d108
......@@ -432,6 +432,48 @@ err:
return ret;
}
static int dump_task_creds(pid_t pid, int pid_dir,
struct parasite_dump_misc *misc, struct cr_fdset *fds)
{
int ret, i;
struct proc_status_creds cr;
struct creds_entry ce;
pr_info("\n");
pr_info("Dumping creds for %d)\n", pid);
pr_info("----------------------------------------\n");
ret = parse_pid_status(pid_dir, &cr);
if (ret < 0)
return ret;
ce.uid = cr.uids[0];
ce.gid = cr.gids[0];
ce.euid = cr.uids[1];
ce.egid = cr.gids[1];
ce.suid = cr.uids[2];
ce.sgid = cr.gids[2];
ce.fsuid = cr.uids[3];
ce.fsgid = cr.gids[3];
BUILD_BUG_ON(CR_CAP_SIZE != PROC_CAP_SIZE);
for (i = 0; i < CR_CAP_SIZE; i++) {
ce.cap_inh[i] = cr.cap_inh[i];
ce.cap_prm[i] = cr.cap_prm[i];
ce.cap_eff[i] = cr.cap_eff[i];
ce.cap_bnd[i] = cr.cap_bnd[i];
}
ce.secbits = misc->secbits;
ret = write_img(fds->fds[CR_FD_CREDS], &ce);
if (ret < 0)
return ret;
return 0;
}
#define assign_reg(dst, src, e) dst.e = (__typeof__(dst.e))src.e
#define assign_array(dst, src, e) memcpy(&dst.e, &src.e, sizeof(dst.e))
......@@ -1249,6 +1291,12 @@ static int dump_one_task(struct pstree_item *item, struct cr_fdset *cr_fdset)
goto err;
}
ret = dump_task_creds(pid, pid_dir, &misc, cr_fdset);
if (ret) {
pr_err("Dump creds (pid: %d) failed with %d\n", pid, ret);
goto err;
}
ret = finalize_core(pid, &vma_area_list, cr_fdset);
if (ret) {
pr_err("Finalizing core (pid: %d) failed with %d\n", pid, ret);
......
......@@ -1591,6 +1591,23 @@ static int prepare_itimers(int pid, struct task_restore_core_args *args)
return ret;
}
static int prepare_creds(int pid, struct task_restore_core_args *args)
{
int fd, ret;
fd = open_image_ro(CR_FD_CREDS, pid);
if (fd < 0)
return fd;
ret = read_img(fd, &args->creds);
close(fd);
/* XXX -- validate creds here? */
return ret > 0 ? 0 : -1;
}
static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
{
long restore_code_len, restore_task_vma_len;
......@@ -1813,6 +1830,10 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
if (ret < 0)
goto err;
ret = prepare_creds(pid, task_args);
if (ret < 0)
goto err;
cr_mutex_init(&task_args->rst_lock);
if (pstree_entry.nr_threads) {
......
......@@ -249,6 +249,39 @@ out:
pr_img_tail(CR_FD_ITIMERS);
}
static void show_cap(char *name, u32 *v)
{
int i;
pr_info("%s: ", name);
for (i = CR_CAP_SIZE - 1; i >= 0; i--)
pr_info("%08x", v[i]);
pr_info("\n");
}
static void show_creds(int fd)
{
struct creds_entry ce;
pr_img_head(CR_FD_CREDS);
if (read_img(fd, &ce) < 0)
goto out;
pr_info("uid %u euid %u suid %u fsuid %u\n",
ce.uid, ce.euid, ce.suid, ce.fsuid);
pr_info("gid %u egid %u sgid %u fsgid %u\n",
ce.gid, ce.egid, ce.sgid, ce.fsgid);
show_cap("Inh", ce.cap_inh);
show_cap("Eff", ce.cap_eff);
show_cap("Prm", ce.cap_prm);
show_cap("Bnd", ce.cap_bnd);
pr_info("secbits: %x\n", ce.secbits);
out:
pr_img_tail(CR_FD_CREDS);
}
static int show_pstree(int fd_pstree, struct list_head *collect)
{
struct pstree_entry e;
......@@ -477,6 +510,9 @@ static int cr_parse_file(struct cr_options *opts)
case UTSNS_MAGIC:
show_utsns(fd);
break;
case CREDS_MAGIC:
show_creds(fd);
break;
default:
pr_err("Unknown magic %x on %s\n", magic, opts->show_dump_file);
goto err;
......@@ -556,6 +592,8 @@ static int cr_show_all(unsigned long pid, struct cr_options *opts)
show_itimers(cr_fdset->fds[CR_FD_ITIMERS]);
show_creds(cr_fdset->fds[CR_FD_CREDS]);
close_cr_fdset(&cr_fdset);
if (opts->leader_only)
......
......@@ -98,6 +98,12 @@ struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX] = {
.magic = ITIMERS_MAGIC,
},
/* creds */
[CR_FD_CREDS] = {
.fmt = FMT_FNAME_CREDS,
.magic = CREDS_MAGIC,
},
/* UTS namespace */
[CR_FD_UTSNS] = {
.fmt = FMT_FNAME_UTSNS,
......
......@@ -29,6 +29,7 @@ enum {
CR_FD_UNIXSK,
CR_FD_INETSK,
CR_FD_ITIMERS,
CR_FD_CREDS,
/*
* Global entries
......@@ -76,6 +77,7 @@ extern struct cr_fd_desc_tmpl fdset_template[CR_FD_MAX];
#define FMT_FNAME_UNIXSK "unixsk-%d.img"
#define FMT_FNAME_INETSK "inetsk-%d.img"
#define FMT_FNAME_ITIMERS "itimers-%d.img"
#define FMT_FNAME_CREDS "creds-%d.img"
#define FMT_FNAME_UTSNS "utsns-%d.img"
extern int get_image_path(char *path, int size, const char *fmt, int pid);
......@@ -104,7 +106,8 @@ struct cr_fdset {
CR_FD_DESC_USE(CR_FD_SIGACT) |\
CR_FD_DESC_USE(CR_FD_UNIXSK) |\
CR_FD_DESC_USE(CR_FD_INETSK) |\
CR_FD_DESC_USE(CR_FD_ITIMERS) )
CR_FD_DESC_USE(CR_FD_ITIMERS) |\
CR_FD_DESC_USE(CR_FD_CREDS) )
#define CR_FD_DESC_NS (\
CR_FD_DESC_USE(CR_FD_UTSNS) )
#define CR_FD_DESC_NONE (0)
......
......@@ -20,6 +20,7 @@
#define INETSK_MAGIC 0x56443851 /* Pereslavl */
#define ITIMERS_MAGIC 0x57464056 /* Kostroma */
#define UTSNS_MAGIC 0x54473203 /* Smolensk */
#define CREDS_MAGIC 0x54023547 /* Kozelsk */
#define PIPEFS_MAGIC 0x50495045
......@@ -142,6 +143,26 @@ struct itimer_entry {
u64 vusec;
} __packed;
#define CR_CAP_SIZE 2
struct creds_entry {
u32 uid;
u32 gid;
u32 euid;
u32 egid;
u32 suid;
u32 sgid;
u32 fsuid;
u32 fsgid;
u32 cap_inh[CR_CAP_SIZE];
u32 cap_prm[CR_CAP_SIZE];
u32 cap_eff[CR_CAP_SIZE];
u32 cap_bnd[CR_CAP_SIZE];
u32 secbits;
} __packed;
#define HEADER_VERSION 1
#define HEADER_ARCH_X86_64 1
......
......@@ -81,6 +81,7 @@ struct parasite_dump_pages_args {
struct parasite_dump_misc {
parasite_status_t status;
unsigned int secbits;
};
/*
......
......@@ -79,6 +79,8 @@ struct task_restore_core_args {
rt_sigaction_t sigchld_act;
struct itimerval itimers[3];
struct creds_entry creds;
} __aligned(sizeof(long));
struct pt_regs {
......
......@@ -352,6 +352,8 @@ static int dump_misc(struct parasite_dump_misc *args)
{
parasite_status_t *st = &args->status;
args->secbits = sys_prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
SET_PARASITE_STATUS(st, 0, 0);
return 0;
}
......
......@@ -41,6 +41,78 @@ static void sigchld_handler(int signal, siginfo_t *siginfo, void *data)
sys_exit(1);
}
static void restore_creds(struct creds_entry *ce)
{
int b, i;
struct cap_header hdr;
struct cap_data data[_LINUX_CAPABILITY_U32S_3];
/*
* We're still root here and thus can do it without failures.
*/
if (ce == NULL)
return;
/*
* First -- set the SECURE_NO_SETUID_FIXUP bit not to
* lose caps bits when changing xids.
*/
sys_prctl(PR_SET_SECUREBITS, 1 << SECURE_NO_SETUID_FIXUP, 0, 0, 0);
/*
* Second -- restore xids. Since we still have the CAP_SETUID
* capability nothing should fail. But call the setfsXid last
* to override the setresXid settings.
*/
sys_setresuid(ce->uid, ce->euid, ce->suid);
sys_setfsuid(ce->fsuid);
sys_setresgid(ce->gid, ce->egid, ce->sgid);
sys_setfsgid(ce->fsgid);
/*
* Third -- restore securebits. We don't need them in any
* special state any longer.
*/
sys_prctl(PR_SET_SECUREBITS, ce->secbits, 0, 0, 0);
/*
* Fourth -- trim bset. This can only be done while
* having the CAP_SETPCAP capablity.
*/
for (b = 0; b < CR_CAP_SIZE; b++) {
for (i = 0; i < 32; i++) {
if (ce->cap_bnd[b] & (1 << i))
/* already set */
continue;
sys_prctl(PR_CAPBSET_DROP, i + b * 32, 0, 0, 0);
}
}
/*
* Fifth -- restore caps. Nothing but cap bits are changed
* at this stage, so just do it.
*/
hdr.version = _LINUX_CAPABILITY_VERSION_3;
hdr.pid = 0;
BUILD_BUG_ON(_LINUX_CAPABILITY_U32S_3 != CR_CAP_SIZE);
for (i = 0; i < CR_CAP_SIZE; i++) {
data[i].eff = ce->cap_eff[i];
data[i].prm = ce->cap_prm[i];
data[i].inh = ce->cap_inh[i];
}
sys_capset(&hdr, data);
}
/*
* Threads restoration via sigreturn. Note it's locked
* routine and calls for unlock at the end.
......@@ -117,6 +189,14 @@ long restore_thread(struct thread_restore_args *args)
cr_mutex_unlock(args->rst_lock);
/*
* FIXME -- threads do not share creds, but it looks like
* nobody tries to mess with this crap. That said we should
* pass the master thread creds here
*/
restore_creds(NULL);
new_sp = (long)rt_sigframe + 8;
asm volatile(
"movq %0, %%rax \n"
......@@ -537,6 +617,13 @@ long restore_task(struct task_restore_core_args *args)
sys_close(fd);
}
/*
* Restore creds late to avoid potential problems with
* insufficient caps for restoring this or that before
*/
restore_creds(&args->creds);
task_entry = task_get_entry(args->task_entries, my_pid);
cr_wait_dec(&args->task_entries->nr_in_progress);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment