Commit eb5d8442 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Pavel Emelyanov

creds: restore -- Implement per-thread restore of credentials

Because the creds parameters are to be passed inside pie/restorer
code but read before thread_restore_args and task_restore_args
structures are allocated we need a small trick and prepare
creds int several stages

 - collect all creds data into separate private memory blobs
 - once all memory needed for restorer is allocated we relocate
   pointers in this blocks and setup
   thread_restore_args::thread_creds_args to appropriate
   address
 - restorer works as usual and setup creds parameters as before

v2:
 - fix addressing in positioning of rst_ memory (I've occasionally
   zap pointers and when been sending patches forgot to merge changes
   back, so while I've the series successfully restoring containers
   with different creds, if been merged the series won't work. So
   all changes are merged as appropriate)

 - drop module's global @cap_last_cap from pie/restorer.c
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@openvz.org>
Acked-by: 's avatarTycho Andersen <tycho.andersen@canonical.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@parallels.com>
parent 212e2105
...@@ -2492,73 +2492,6 @@ static inline int verify_cap_size(CredsEntry *ce) ...@@ -2492,73 +2492,6 @@ static inline int verify_cap_size(CredsEntry *ce)
(ce->n_cap_prm == CR_CAP_SIZE) && (ce->n_cap_bnd == CR_CAP_SIZE)); (ce->n_cap_prm == CR_CAP_SIZE) && (ce->n_cap_bnd == CR_CAP_SIZE));
} }
static CredsEntry *read_creds(int pid)
{
int ret;
struct cr_img *img;
CredsEntry *ce = NULL;
img = open_image(CR_FD_CREDS, O_RSTR, pid);
if (!img)
return NULL;
ret = pb_read_one(img, &ce, PB_CREDS);
close_image(img);
if (ret < 0) {
creds_entry__free_unpacked(ce, NULL);
return NULL;
}
if (!verify_cap_size(ce)) {
pr_err("Caps size mismatch %d %d %d %d\n",
(int)ce->n_cap_inh, (int)ce->n_cap_eff,
(int)ce->n_cap_prm, (int)ce->n_cap_bnd);
creds_entry__free_unpacked(ce, NULL);
return NULL;
}
if (!may_restore(ce)) {
creds_entry__free_unpacked(ce, NULL);
return NULL;
}
return ce;
}
static int prepare_creds(CredsEntry *ce, struct task_restore_args *args)
{
args->creds = *ce;
args->creds.cap_inh = args->cap_inh;
memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
args->creds.cap_eff = args->cap_eff;
memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
args->creds.cap_prm = args->cap_prm;
memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
args->creds.cap_bnd = args->cap_bnd;
memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
/*
* We can set supplementary groups here. This won't affect any
* permission checks for us (we're still root) and will not be
* reset by subsequent creds changes in restorer.
*/
BUILD_BUG_ON(sizeof(*ce->groups) != sizeof(gid_t));
if (setgroups(ce->n_groups, ce->groups) < 0) {
pr_perror("Can't set supplementary groups");
return -1;
}
creds_entry__free_unpacked(ce, NULL);
args->cap_last_cap = kdat.last_cap;
/* XXX -- validate creds here? */
return 0;
}
static int prepare_mm(pid_t pid, struct task_restore_args *args) static int prepare_mm(pid_t pid, struct task_restore_args *args)
{ {
int exe_fd, i, ret = -1; int exe_fd, i, ret = -1;
...@@ -2855,6 +2788,175 @@ out: ...@@ -2855,6 +2788,175 @@ out:
extern void __gcov_flush(void) __attribute__((weak)); extern void __gcov_flush(void) __attribute__((weak));
void __gcov_flush(void) {} void __gcov_flush(void) {}
static void rst_reloc_creds(struct thread_restore_args *thread_args,
unsigned long *creds_pos_next)
{
struct thread_creds_args *args;
if (unlikely(!*creds_pos_next))
return;
args = rst_mem_remap_ptr(*creds_pos_next, RM_PRIVATE);
if (args->lsm_profile)
args->lsm_profile = rst_mem_remap_ptr(args->mem_lsm_profile_pos, RM_PRIVATE);
if (args->groups)
args->groups = rst_mem_remap_ptr(args->mem_groups_pos, RM_PRIVATE);
*creds_pos_next = args->mem_pos_next;
thread_args->creds_args = args;
}
static struct thread_creds_args *
rst_prep_creds_args(struct thread_creds_args *prev, CredsEntry *ce)
{
unsigned long this_pos = rst_mem_cpos(RM_PRIVATE);
struct thread_creds_args *args;
if (!verify_cap_size(ce)) {
pr_err("Caps size mismatch %d %d %d %d\n",
(int)ce->n_cap_inh, (int)ce->n_cap_eff,
(int)ce->n_cap_prm, (int)ce->n_cap_bnd);
return ERR_PTR(-EINVAL);
}
if (!may_restore(ce))
return ERR_PTR(-EINVAL);
args = rst_mem_alloc(sizeof(*args), RM_PRIVATE);
if (!args)
return ERR_PTR(-ENOMEM);
args->cap_last_cap = kdat.last_cap;
memcpy(&args->creds, ce, sizeof(args->creds));
if (ce->lsm_profile || opts.lsm_supplied) {
char *rendered, *profile;
profile = ce->lsm_profile;
if (opts.lsm_supplied)
profile = opts.lsm_profile;
if (validate_lsm(profile) < 0)
return ERR_PTR(-EINVAL);
if (profile) {
size_t lsm_profile_len;
if (render_lsm_profile(profile, &rendered))
return ERR_PTR(-EINVAL);
args->mem_lsm_profile_pos = rst_mem_cpos(RM_PRIVATE);
lsm_profile_len = strlen(rendered);
args->lsm_profile = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
if (!args->lsm_profile) {
xfree(rendered);
return ERR_PTR(-ENOMEM);
}
strncpy(args->lsm_profile, rendered, lsm_profile_len);
xfree(rendered);
}
} else {
args->lsm_profile = NULL;
args->mem_lsm_profile_pos = 0;
}
/*
* Zap fields which we cant use.
*/
args->creds.cap_inh = NULL;
args->creds.cap_eff = NULL;
args->creds.cap_prm = NULL;
args->creds.cap_bnd = NULL;
args->creds.groups = NULL;
args->creds.lsm_profile = NULL;
memcpy(args->cap_inh, ce->cap_inh, sizeof(args->cap_inh));
memcpy(args->cap_eff, ce->cap_eff, sizeof(args->cap_eff));
memcpy(args->cap_prm, ce->cap_prm, sizeof(args->cap_prm));
memcpy(args->cap_bnd, ce->cap_bnd, sizeof(args->cap_bnd));
if (ce->n_groups) {
args->mem_groups_pos = rst_mem_cpos(RM_PRIVATE);
args->groups = rst_mem_alloc(ce->n_groups * sizeof(u32), RM_PRIVATE);
if (!args->groups)
return ERR_PTR(-ENOMEM);
memcpy(args->groups, ce->groups, ce->n_groups * sizeof(u32));
} else {
args->groups = NULL;
args->mem_groups_pos = 0;
}
args->mem_pos_next = 0;
if (prev)
prev->mem_pos_next = this_pos;
return args;
}
static int rst_prep_creds_from_img(pid_t pid)
{
CredsEntry *ce = NULL;
struct cr_img *img;
int ret;
img = open_image(CR_FD_CREDS, O_RSTR, pid);
if (!img)
return -ENOENT;
ret = pb_read_one(img, &ce, PB_CREDS);
close_image(img);
if (ret > 0) {
struct thread_creds_args *args;
args = rst_prep_creds_args(NULL, ce);
if (IS_ERR(args))
ret = PTR_ERR(args);
else
ret = 0;
}
creds_entry__free_unpacked(ce, NULL);
return ret;
}
static int rst_prep_creds(pid_t pid, CoreEntry *core, unsigned long *creds_pos)
{
struct thread_creds_args *args = NULL;
size_t i;
/*
* This is _really_ very old image
* format where @thread_core were not
* present. It means we don't have
* creds either, just ignore and exit
* early.
*/
if (unlikely(!core->thread_core)) {
*creds_pos = 0;
return 0;
}
*creds_pos = rst_mem_cpos(RM_PRIVATE);
/*
* Old format: one Creds per task carried in own image file.
*/
if (!core->thread_core->creds)
return rst_prep_creds_from_img(pid);
for (i = 0; i < current->nr_threads; i++) {
CredsEntry *ce = current->core[i]->thread_core->creds;
args = rst_prep_creds_args(args, ce);
if (IS_ERR(args))
return PTR_ERR(args);
}
return 0;
}
static int sigreturn_restore(pid_t pid, CoreEntry *core) static int sigreturn_restore(pid_t pid, CoreEntry *core)
{ {
void *mem = MAP_FAILED; void *mem = MAP_FAILED;
...@@ -2882,10 +2984,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -2882,10 +2984,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
unsigned long aio_rings; unsigned long aio_rings;
MmEntry *mm = rsti(current)->mm; MmEntry *mm = rsti(current)->mm;
char *lsm = NULL;
int lsm_profile_len = 0;
unsigned long lsm_pos = 0;
int n_seccomp_filters = 0; int n_seccomp_filters = 0;
unsigned long seccomp_filter_pos = 0; unsigned long seccomp_filter_pos = 0;
...@@ -2893,7 +2991,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -2893,7 +2991,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
struct vm_area_list *vmas = &rsti(current)->vmas; struct vm_area_list *vmas = &rsti(current)->vmas;
int i; int i;
CredsEntry *creds; unsigned long creds_pos = 0;
unsigned long creds_pos_next;
pr_info("Restore via sigreturn\n"); pr_info("Restore via sigreturn\n");
...@@ -2956,6 +3055,13 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -2956,6 +3055,13 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
if (rst_timerfd_prep()) if (rst_timerfd_prep())
goto err_nv; goto err_nv;
/*
* Read creds info for every thread and allocate memory
* needed so we can use this data inside restorer.
*/
if (rst_prep_creds(pid, core, &creds_pos))
goto err_nv;
/* /*
* We're about to search for free VM area and inject the restorer blob * We're about to search for free VM area and inject the restorer blob
* into it. No irrelevent mmaps/mremaps beyond this point, otherwise * into it. No irrelevent mmaps/mremaps beyond this point, otherwise
...@@ -2966,45 +3072,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -2966,45 +3072,9 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
if (ret < 0) if (ret < 0)
goto err; goto err;
creds = read_creds(pid);
if (!creds)
goto err;
if (creds->lsm_profile || opts.lsm_supplied) {
char *rendered, *profile;
int ret;
profile = creds->lsm_profile;
if (opts.lsm_supplied)
profile = opts.lsm_profile;
if (validate_lsm(profile) < 0)
return -1;
if (profile) {
ret = render_lsm_profile(profile, &rendered);
if (ret < 0) {
goto err_nv;
}
lsm_pos = rst_mem_cpos(RM_PRIVATE);
lsm_profile_len = strlen(rendered);
lsm = rst_mem_alloc(lsm_profile_len + 1, RM_PRIVATE);
if (!lsm) {
xfree(rendered);
goto err_nv;
}
strncpy(lsm, rendered, lsm_profile_len);
xfree(rendered);
}
}
if (seccomp_filters_get_rst_pos(core, &n_seccomp_filters, &seccomp_filter_pos) < 0) if (seccomp_filters_get_rst_pos(core, &n_seccomp_filters, &seccomp_filter_pos) < 0)
goto err; goto err;
rst_mem_size = rst_mem_lock(); rst_mem_size = rst_mem_lock();
restore_bootstrap_len = restorer_len + args_len + rst_mem_size; restore_bootstrap_len = restorer_len + args_len + rst_mem_size;
...@@ -3080,10 +3150,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -3080,10 +3150,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
goto err; goto err;
} }
ret = prepare_creds(creds, task_args);
if (ret < 0)
goto err;
/* /*
* Get a reference to shared memory area which is * Get a reference to shared memory area which is
* used to signal if shmem restoration complete * used to signal if shmem restoration complete
...@@ -3134,11 +3200,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -3134,11 +3200,6 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
if (core->tc->has_seccomp_mode) if (core->tc->has_seccomp_mode)
task_args->seccomp_mode = core->tc->seccomp_mode; task_args->seccomp_mode = core->tc->seccomp_mode;
if (lsm)
task_args->creds.lsm_profile = rst_mem_remap_ptr(lsm_pos, RM_PRIVATE);
else
task_args->creds.lsm_profile = NULL;
/* /*
* Arguments for task restoration. * Arguments for task restoration.
*/ */
...@@ -3155,6 +3216,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -3155,6 +3216,7 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
/* /*
* Fill up per-thread data. * Fill up per-thread data.
*/ */
creds_pos_next = creds_pos;
for (i = 0; i < current->nr_threads; i++) { for (i = 0; i < current->nr_threads; i++) {
CoreEntry *tcore; CoreEntry *tcore;
struct rt_sigframe *sigframe; struct rt_sigframe *sigframe;
...@@ -3189,6 +3251,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core) ...@@ -3189,6 +3251,8 @@ static int sigreturn_restore(pid_t pid, CoreEntry *core)
thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr; thread_args[i].clear_tid_addr = CORE_THREAD_ARCH_INFO(tcore)->clear_tid_addr;
core_get_tls(tcore, &thread_args[i].tls); core_get_tls(tcore, &thread_args[i].tls);
rst_reloc_creds(&thread_args[i], &creds_pos_next);
if (tcore->thread_core) { if (tcore->thread_core) {
thread_args[i].has_futex = true; thread_args[i].has_futex = true;
thread_args[i].futex_rla = tcore->thread_core->futex_rla; thread_args[i].futex_rla = tcore->thread_core->futex_rla;
......
...@@ -72,6 +72,26 @@ struct task_restore_core_args; ...@@ -72,6 +72,26 @@ struct task_restore_core_args;
* simpler, force both _args alignment be 64 bytes. * simpler, force both _args alignment be 64 bytes.
*/ */
struct thread_creds_args {
CredsEntry creds;
unsigned int cap_last_cap;
u32 cap_inh[CR_CAP_SIZE];
u32 cap_prm[CR_CAP_SIZE];
u32 cap_eff[CR_CAP_SIZE];
u32 cap_bnd[CR_CAP_SIZE];
unsigned int secbits;
char *lsm_profile;
unsigned int *groups;
unsigned long mem_lsm_profile_pos;
unsigned long mem_groups_pos;
unsigned long mem_pos_next;
};
struct thread_restore_args { struct thread_restore_args {
struct restore_mem_zone mem_zone; struct restore_mem_zone mem_zone;
...@@ -93,6 +113,8 @@ struct thread_restore_args { ...@@ -93,6 +113,8 @@ struct thread_restore_args {
unsigned int siginfo_n; unsigned int siginfo_n;
int pdeath_sig; int pdeath_sig;
struct thread_creds_args *creds_args;
} __aligned(64); } __aligned(64);
struct task_restore_args { struct task_restore_args {
...@@ -153,13 +175,6 @@ struct task_restore_args { ...@@ -153,13 +175,6 @@ struct task_restore_args {
struct itimerval itimers[3]; struct itimerval itimers[3];
CredsEntry creds;
u32 cap_inh[CR_CAP_SIZE];
u32 cap_prm[CR_CAP_SIZE];
u32 cap_eff[CR_CAP_SIZE];
u32 cap_bnd[CR_CAP_SIZE];
u32 cap_last_cap;
MmEntry mm; MmEntry mm;
auxv_t mm_saved_auxv[AT_VECTOR_SIZE]; auxv_t mm_saved_auxv[AT_VECTOR_SIZE];
u32 mm_saved_auxv_size; u32 mm_saved_auxv_size;
......
...@@ -51,7 +51,6 @@ ...@@ -51,7 +51,6 @@
static struct task_entries *task_entries; static struct task_entries *task_entries;
static futex_t thread_inprogress; static futex_t thread_inprogress;
static int cap_last_cap;
static pid_t *helpers; static pid_t *helpers;
static int n_helpers; static int n_helpers;
static pid_t *zombies; static pid_t *zombies;
...@@ -121,8 +120,9 @@ static int lsm_set_label(char *label, int procfd) ...@@ -121,8 +120,9 @@ static int lsm_set_label(char *label, int procfd)
return 0; return 0;
} }
static int restore_creds(CredsEntry *ce, int procfd) static int restore_creds(struct thread_creds_args *args, int procfd)
{ {
CredsEntry *ce = &args->creds;
int b, i, ret; int b, i, ret;
struct cap_header hdr; struct cap_header hdr;
struct cap_data data[_LINUX_CAPABILITY_U32S_3]; struct cap_data data[_LINUX_CAPABILITY_U32S_3];
...@@ -131,6 +131,17 @@ static int restore_creds(CredsEntry *ce, int procfd) ...@@ -131,6 +131,17 @@ static int restore_creds(CredsEntry *ce, int procfd)
* We're still root here and thus can do it without failures. * We're still root here and thus can do it without failures.
*/ */
/*
* Setup supplementary group IDs early.
*/
if (args->groups) {
ret = sys_setgroups(ce->n_groups, args->groups);
if (ret) {
pr_err("Can't setup supplementary group IDs: %d\n", ret);
return -1;
}
}
/* /*
* First -- set the SECURE_NO_SETUID_FIXUP bit not to * First -- set the SECURE_NO_SETUID_FIXUP bit not to
* lose caps bits when changing xids. * lose caps bits when changing xids.
...@@ -190,9 +201,9 @@ static int restore_creds(CredsEntry *ce, int procfd) ...@@ -190,9 +201,9 @@ static int restore_creds(CredsEntry *ce, int procfd)
for (b = 0; b < CR_CAP_SIZE; b++) { for (b = 0; b < CR_CAP_SIZE; b++) {
for (i = 0; i < 32; i++) { for (i = 0; i < 32; i++) {
if (b * 32 + i > cap_last_cap) if (b * 32 + i > args->cap_last_cap)
break; break;
if (ce->cap_bnd[b] & (1 << i)) if (args->cap_bnd[b] & (1 << i))
/* already set */ /* already set */
continue; continue;
ret = sys_prctl(PR_CAPBSET_DROP, i + b * 32, 0, 0, 0); ret = sys_prctl(PR_CAPBSET_DROP, i + b * 32, 0, 0, 0);
...@@ -215,9 +226,9 @@ static int restore_creds(CredsEntry *ce, int procfd) ...@@ -215,9 +226,9 @@ static int restore_creds(CredsEntry *ce, int procfd)
BUILD_BUG_ON(_LINUX_CAPABILITY_U32S_3 != CR_CAP_SIZE); BUILD_BUG_ON(_LINUX_CAPABILITY_U32S_3 != CR_CAP_SIZE);
for (i = 0; i < CR_CAP_SIZE; i++) { for (i = 0; i < CR_CAP_SIZE; i++) {
data[i].eff = ce->cap_eff[i]; data[i].eff = args->cap_eff[i];
data[i].prm = ce->cap_prm[i]; data[i].prm = args->cap_prm[i];
data[i].inh = ce->cap_inh[i]; data[i].inh = args->cap_inh[i];
} }
ret = sys_capset(&hdr, data); ret = sys_capset(&hdr, data);
...@@ -226,9 +237,8 @@ static int restore_creds(CredsEntry *ce, int procfd) ...@@ -226,9 +237,8 @@ static int restore_creds(CredsEntry *ce, int procfd)
return -1; return -1;
} }
if (lsm_set_label(ce->lsm_profile, procfd) < 0) if (lsm_set_label(args->lsm_profile, procfd) < 0)
return -1; return -1;
return 0; return 0;
} }
...@@ -443,7 +453,7 @@ long __export_restore_thread(struct thread_restore_args *args) ...@@ -443,7 +453,7 @@ long __export_restore_thread(struct thread_restore_args *args)
if (restore_thread_common(rt_sigframe, args)) if (restore_thread_common(rt_sigframe, args))
goto core_restore_end; goto core_restore_end;
ret = restore_creds(&args->ta->creds, args->ta->proc_fd); ret = restore_creds(args->creds_args, args->ta->proc_fd);
if (ret) if (ret)
goto core_restore_end; goto core_restore_end;
...@@ -884,8 +894,6 @@ long __export_restore_task(struct task_restore_args *args) ...@@ -884,8 +894,6 @@ long __export_restore_task(struct task_restore_args *args)
log_set_fd(args->logfd); log_set_fd(args->logfd);
log_set_loglevel(args->loglevel); log_set_loglevel(args->loglevel);
cap_last_cap = args->cap_last_cap;
pr_info("Switched to the restorer %d\n", my_pid); pr_info("Switched to the restorer %d\n", my_pid);
if (vdso_do_park(&args->vdso_sym_rt, args->vdso_rt_parked_at, vdso_rt_size)) if (vdso_do_park(&args->vdso_sym_rt, args->vdso_rt_parked_at, vdso_rt_size))
...@@ -1267,7 +1275,7 @@ long __export_restore_task(struct task_restore_args *args) ...@@ -1267,7 +1275,7 @@ long __export_restore_task(struct task_restore_args *args)
* turning off TCP repair is CAP_SYS_NED_ADMIN protected, * turning off TCP repair is CAP_SYS_NED_ADMIN protected,
* thus restore* creds _after_ all of the above. * thus restore* creds _after_ all of the above.
*/ */
ret = restore_creds(&args->creds, args->proc_fd); ret = restore_creds(args->t->creds_args, args->proc_fd);
ret = ret || restore_dumpable_flag(&args->mm); ret = ret || restore_dumpable_flag(&args->mm);
ret = ret || restore_pdeath_sig(args->t); ret = ret || restore_pdeath_sig(args->t);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment