Commit 6068d10c authored by Dmitry Safonov's avatar Dmitry Safonov Committed by Andrei Vagin

core/x86: add compatible 32 register set

Introduced user_regs_struct32.
Other changes mainly are reforming existing code to use the new
register sets union.

For protobuf images - reuse user_x86_regs_entry for both compatible
and native tasks with enum in the beggining that describes register
set type. That's better and simpler, than introducing a new 32-bit
register set for compatible tasks. I tried to do this firstly with
oneof keyword:
https://github.com/0x7f454c46/criu/commit/499c93ae0e2b8ffb8c562f309bb046d77d6b07c0
But protobuf supports oneof keyword only from recent version 2.6.0,
so I tried to rework it into enum + 2 register sets:
https://github.com/0x7f454c46/criu/commit/aab4489bd4e0b1360b6e05614c2fce3ff2a52eb7

But that did not work either because restorer pie takes gpregs as
thread_restore_args parameter and UserRegsEntry shouldn't contain
pointers, but structure objects. This may be fixed by redefining
UserRegsEntry not as typedef for UserX86RegsEntry, but containing
needed objects, than treat it right for restorer - but that's
more complicated that reusing user_x86_regs_entry.
Signed-off-by: 's avatarDmitry Safonov <dsafonov@virtuozzo.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
Signed-off-by: 's avatarAndrei Vagin <avagin@virtuozzo.com>
parent d94b9f30
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
/* /*
* Injected syscall instruction * Injected syscall instruction
*/ */
/* FIXME: 32-bit syscalls */
const char code_syscall[] = { const char code_syscall[] = {
0x0f, 0x05, /* syscall */ 0x0f, 0x05, /* syscall */
0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */ 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc /* int 3, ... */
...@@ -40,17 +41,21 @@ static inline __always_unused void __check_code_syscall(void) ...@@ -40,17 +41,21 @@ static inline __always_unused void __check_code_syscall(void)
BUILD_BUG_ON(!is_log2(sizeof(code_syscall))); BUILD_BUG_ON(!is_log2(sizeof(code_syscall)));
} }
/*
* regs must be inited when calling this function from original context
*/
void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs) void parasite_setup_regs(unsigned long new_ip, void *stack, user_regs_struct_t *regs)
{ {
regs->ip = new_ip; set_user_reg(regs, ip, new_ip);
if (stack) if (stack)
regs->sp = (unsigned long) stack; set_user_reg(regs, sp, (unsigned long) stack);
/* Avoid end of syscall processing */ /* Avoid end of syscall processing */
regs->orig_ax = -1; set_user_reg(regs, orig_ax, -1);
/* Make sure flags are in known state */ /* Make sure flags are in known state */
regs->flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF); set_user_reg(regs, flags, get_user_reg(regs, flags) &
~(X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_IF));
} }
int arch_task_compatible(pid_t pid) int arch_task_compatible(pid_t pid)
...@@ -58,14 +63,18 @@ int arch_task_compatible(pid_t pid) ...@@ -58,14 +63,18 @@ int arch_task_compatible(pid_t pid)
unsigned long cs, ds; unsigned long cs, ds;
errno = 0; errno = 0;
cs = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct_t, cs), 0); /*
* Offset of register must be from 64-bit set even for
* compatible tasks. Fix this to support native i386 tasks
*/
cs = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct64, cs), 0);
if (errno != 0) { if (errno != 0) {
pr_perror("Can't get CS register for %d", pid); pr_perror("Can't get CS register for %d", pid);
return -1; return -1;
} }
errno = 0; errno = 0;
ds = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct_t, ds), 0); ds = ptrace(PTRACE_PEEKUSER, pid, offsetof(user_regs_struct64, ds), 0);
if (errno != 0) { if (errno != 0) {
pr_perror("Can't get DS register for %d", pid); pr_perror("Can't get DS register for %d", pid);
return -1; return -1;
...@@ -79,6 +88,7 @@ bool arch_can_dump_task(struct parasite_ctl *ctl) ...@@ -79,6 +88,7 @@ bool arch_can_dump_task(struct parasite_ctl *ctl)
{ {
pid_t pid = ctl->rpid; pid_t pid = ctl->rpid;
/* FIXME: remove it */
if (arch_task_compatible(pid)) { if (arch_task_compatible(pid)) {
pr_err("Can't dump task %d running in 32-bit mode\n", pid); pr_err("Can't dump task %d running in 32-bit mode\n", pid);
return false; return false;
...@@ -98,23 +108,40 @@ int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret, ...@@ -98,23 +108,40 @@ int syscall_seized(struct parasite_ctl *ctl, int nr, unsigned long *ret,
user_regs_struct_t regs = ctl->orig.regs; user_regs_struct_t regs = ctl->orig.regs;
int err; int err;
regs.ax = (unsigned long)nr; if (regs.is_native) {
regs.di = arg1; user_regs_struct64 *r = &regs.native;
regs.si = arg2;
regs.dx = arg3; r->ax = (uint64_t)nr;
regs.r10 = arg4; r->di = arg1;
regs.r8 = arg5; r->si = arg2;
regs.r9 = arg6; r->dx = arg3;
r->r10 = arg4;
r->r8 = arg5;
r->r9 = arg6;
} else {
user_regs_struct32 *r = &regs.compat;
r->ax = (uint32_t)nr;
r->bx = arg1;
r->cx = arg2;
r->dx = arg3;
r->si = arg4;
r->di = arg5;
r->bp = arg6;
}
err = __parasite_execute_syscall(ctl, &regs, code_syscall); err = __parasite_execute_syscall(ctl, &regs, code_syscall);
*ret = regs.ax; *ret = get_user_reg(&regs, ax);
return err; return err;
} }
static int save_task_regs(CoreEntry *core, static int save_task_regs(CoreEntry *core,
user_regs_struct_t *regs, user_fpregs_struct_t *fpregs); user_regs_struct_t *regs, user_fpregs_struct_t *fpregs);
#define get_signed_user_reg(pregs, name) \
(((pregs)->is_native) ? (int64_t)((pregs)->native.name) : \
(int32_t)((pregs)->compat.name))
int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core) int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
{ {
user_fpregs_struct_t xsave = { }, *xs = NULL; user_fpregs_struct_t xsave = { }, *xs = NULL;
...@@ -125,18 +152,18 @@ int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core) ...@@ -125,18 +152,18 @@ int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core)
pr_info("Dumping GP/FPU registers for %d\n", pid); pr_info("Dumping GP/FPU registers for %d\n", pid);
/* Did we come from a system call? */ /* Did we come from a system call? */
if ((int)regs.orig_ax >= 0) { if (get_signed_user_reg(&regs, orig_ax) >= 0) {
/* Restart the system call */ /* Restart the system call */
switch ((long)(int)regs.ax) { switch (get_signed_user_reg(&regs, ax)) {
case -ERESTARTNOHAND: case -ERESTARTNOHAND:
case -ERESTARTSYS: case -ERESTARTSYS:
case -ERESTARTNOINTR: case -ERESTARTNOINTR:
regs.ax = regs.orig_ax; set_user_reg(&regs, ax, get_user_reg(&regs, orig_ax));
regs.ip -= 2; set_user_reg(&regs, ip, get_user_reg(&regs, ip) - 2);
break; break;
case -ERESTART_RESTARTBLOCK: case -ERESTART_RESTARTBLOCK:
pr_warn("Will restore %d with interrupted system call\n", pid); pr_warn("Will restore %d with interrupted system call\n", pid);
regs.ax = -EINTR; set_user_reg(&regs, ax, -EINTR);
break; break;
} }
} }
...@@ -180,61 +207,83 @@ static int save_task_regs(CoreEntry *core, ...@@ -180,61 +207,83 @@ static int save_task_regs(CoreEntry *core,
{ {
UserX86RegsEntry *gpregs = core->thread_info->gpregs; UserX86RegsEntry *gpregs = core->thread_info->gpregs;
#define assign_reg(dst, src, e) do { dst->e = (__typeof__(dst->e))(src)->e; } while (0) #define assign_reg(dst, src, e) do { dst->e = (__typeof__(dst->e))src.e; } while (0)
#define assign_array(dst, src, e) memcpy(dst->e, &(src)->e, sizeof((src)->e)) #define assign_array(dst, src, e) memcpy(dst->e, &src.e, sizeof(src.e))
assign_reg(gpregs, regs, r15); if (regs.is_native) {
assign_reg(gpregs, regs, r14); assign_reg(gpregs, regs->native, r15);
assign_reg(gpregs, regs, r13); assign_reg(gpregs, regs->native, r14);
assign_reg(gpregs, regs, r12); assign_reg(gpregs, regs->native, r13);
assign_reg(gpregs, regs, bp); assign_reg(gpregs, regs->native, r12);
assign_reg(gpregs, regs, bx); assign_reg(gpregs, regs->native, bp);
assign_reg(gpregs, regs, r11); assign_reg(gpregs, regs->native, bx);
assign_reg(gpregs, regs, r10); assign_reg(gpregs, regs->native, r11);
assign_reg(gpregs, regs, r9); assign_reg(gpregs, regs->native, r10);
assign_reg(gpregs, regs, r8); assign_reg(gpregs, regs->native, r9);
assign_reg(gpregs, regs, ax); assign_reg(gpregs, regs->native, r8);
assign_reg(gpregs, regs, cx); assign_reg(gpregs, regs->native, ax);
assign_reg(gpregs, regs, dx); assign_reg(gpregs, regs->native, cx);
assign_reg(gpregs, regs, si); assign_reg(gpregs, regs->native, dx);
assign_reg(gpregs, regs, di); assign_reg(gpregs, regs->native, si);
assign_reg(gpregs, regs, orig_ax); assign_reg(gpregs, regs->native, di);
assign_reg(gpregs, regs, ip); assign_reg(gpregs, regs->native, orig_ax);
assign_reg(gpregs, regs, cs); assign_reg(gpregs, regs->native, ip);
assign_reg(gpregs, regs, flags); assign_reg(gpregs, regs->native, cs);
assign_reg(gpregs, regs, sp); assign_reg(gpregs, regs->native, flags);
assign_reg(gpregs, regs, ss); assign_reg(gpregs, regs->native, sp);
assign_reg(gpregs, regs, fs_base); assign_reg(gpregs, regs->native, ss);
assign_reg(gpregs, regs, gs_base); assign_reg(gpregs, regs->native, fs_base);
assign_reg(gpregs, regs, ds); assign_reg(gpregs, regs->native, gs_base);
assign_reg(gpregs, regs, es); assign_reg(gpregs, regs->native, ds);
assign_reg(gpregs, regs, fs); assign_reg(gpregs, regs->native, es);
assign_reg(gpregs, regs, gs); assign_reg(gpregs, regs->native, fs);
assign_reg(gpregs, regs->native, gs);
gpregs->mode = USER_X86_REGS_MODE__NATIVE;
} else {
assign_reg(gpregs, regs->compat, bx);
assign_reg(gpregs, regs->compat, cx);
assign_reg(gpregs, regs->compat, dx);
assign_reg(gpregs, regs->compat, si);
assign_reg(gpregs, regs->compat, di);
assign_reg(gpregs, regs->compat, bp);
assign_reg(gpregs, regs->compat, ax);
assign_reg(gpregs, regs->compat, ds);
assign_reg(gpregs, regs->compat, es);
assign_reg(gpregs, regs->compat, fs);
assign_reg(gpregs, regs->compat, gs);
assign_reg(gpregs, regs->compat, orig_ax);
assign_reg(gpregs, regs->compat, ip);
assign_reg(gpregs, regs->compat, cs);
assign_reg(gpregs, regs->compat, flags);
assign_reg(gpregs, regs->compat, sp);
assign_reg(gpregs, regs->compat, ss);
gpregs->mode = USER_X86_REGS_MODE__COMPAT;
}
if (!fpregs) if (!fpregs)
return 0; return 0;
assign_reg(core->thread_info->fpregs, &fpregs->i387, cwd); assign_reg(core->thread_info->fpregs, fpregs->i387, cwd);
assign_reg(core->thread_info->fpregs, &fpregs->i387, swd); assign_reg(core->thread_info->fpregs, fpregs->i387, swd);
assign_reg(core->thread_info->fpregs, &fpregs->i387, twd); assign_reg(core->thread_info->fpregs, fpregs->i387, twd);
assign_reg(core->thread_info->fpregs, &fpregs->i387, fop); assign_reg(core->thread_info->fpregs, fpregs->i387, fop);
assign_reg(core->thread_info->fpregs, &fpregs->i387, rip); assign_reg(core->thread_info->fpregs, fpregs->i387, rip);
assign_reg(core->thread_info->fpregs, &fpregs->i387, rdp); assign_reg(core->thread_info->fpregs, fpregs->i387, rdp);
assign_reg(core->thread_info->fpregs, &fpregs->i387, mxcsr); assign_reg(core->thread_info->fpregs, fpregs->i387, mxcsr);
assign_reg(core->thread_info->fpregs, &fpregs->i387, mxcsr_mask); assign_reg(core->thread_info->fpregs, fpregs->i387, mxcsr_mask);
/* Make sure we have enough space */ /* Make sure we have enough space */
BUG_ON(core->thread_info->fpregs->n_st_space != ARRAY_SIZE(fpregs->i387.st_space)); BUG_ON(core->thread_info->fpregs->n_st_space != ARRAY_SIZE(fpregs->i387.st_space));
BUG_ON(core->thread_info->fpregs->n_xmm_space != ARRAY_SIZE(fpregs->i387.xmm_space)); BUG_ON(core->thread_info->fpregs->n_xmm_space != ARRAY_SIZE(fpregs->i387.xmm_space));
assign_array(core->thread_info->fpregs, &fpregs->i387, st_space); assign_array(core->thread_info->fpregs, fpregs->i387, st_space);
assign_array(core->thread_info->fpregs, &fpregs->i387, xmm_space); assign_array(core->thread_info->fpregs, fpregs->i387, xmm_space);
if (cpu_has_feature(X86_FEATURE_OSXSAVE)) { if (cpu_has_feature(X86_FEATURE_OSXSAVE)) {
BUG_ON(core->thread_info->fpregs->xsave->n_ymmh_space != ARRAY_SIZE(fpregs->ymmh.ymmh_space)); BUG_ON(core->thread_info->fpregs->xsave->n_ymmh_space != ARRAY_SIZE(fpregs->ymmh.ymmh_space));
assign_reg(core->thread_info->fpregs->xsave, &fpregs->xsave_hdr, xstate_bv); assign_reg(core->thread_info->fpregs->xsave, fpregs->xsave_hdr, xstate_bv);
assign_array(core->thread_info->fpregs->xsave, &fpregs->ymmh, ymmh_space); assign_array(core->thread_info->fpregs->xsave, fpregs->ymmh, ymmh_space);
} }
#undef assign_reg #undef assign_reg
...@@ -243,6 +292,44 @@ static int save_task_regs(CoreEntry *core, ...@@ -243,6 +292,44 @@ static int save_task_regs(CoreEntry *core,
return 0; return 0;
} }
int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs)
{
struct iovec iov;
int ret;
iov.iov_base = &regs->native;
iov.iov_len = sizeof(user_regs_struct64);
ret = ptrace(PTRACE_GETREGSET, pid, NT_PRSTATUS, &iov);
if (iov.iov_len == sizeof(regs->native)) {
regs->is_native = true;
return ret;
}
if (iov.iov_len == sizeof(regs->compat)) {
regs->is_native = false;
return ret;
}
pr_err("PTRACE_GETREGSET read %zu bytes for pid %d, but native/compat regs sizes are %zu/%zu bytes",
iov.iov_len, pid,
sizeof(regs->native), sizeof(regs->compat));
return -1;
}
int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs)
{
struct iovec iov;
if (regs->is_native) {
iov.iov_base = &regs->native;
iov.iov_len = sizeof(user_regs_struct64);
} else {
iov.iov_base = &regs->compat;
iov.iov_len = sizeof(user_regs_struct32);
}
return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
}
int arch_alloc_thread_info(CoreEntry *core) int arch_alloc_thread_info(CoreEntry *core)
{ {
size_t sz; size_t sz;
...@@ -477,6 +564,12 @@ void *mmap_seized(struct parasite_ctl *ctl, ...@@ -477,6 +564,12 @@ void *mmap_seized(struct parasite_ctl *ctl,
int restore_gpregs(struct rt_sigframe *f, UserX86RegsEntry *r) int restore_gpregs(struct rt_sigframe *f, UserX86RegsEntry *r)
{ {
/* FIXME: rt_sigcontext for compatible tasks */
if (r->gpregs_case != USER_X86_REGS_CASE_T__NATIVE) {
pr_err("Can't prepare rt_sigframe for compatible task restore\n");
return -1;
}
#define CPREG1(d) f->uc.uc_mcontext.d = r->d #define CPREG1(d) f->uc.uc_mcontext.d = r->d
#define CPREG2(d, s) f->uc.uc_mcontext.d = r->s #define CPREG2(d, s) f->uc.uc_mcontext.d = r->s
......
...@@ -5,6 +5,9 @@ extern int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core); ...@@ -5,6 +5,9 @@ extern int get_task_regs(pid_t pid, user_regs_struct_t regs, CoreEntry *core);
extern int arch_alloc_thread_info(CoreEntry *core); extern int arch_alloc_thread_info(CoreEntry *core);
extern void arch_free_thread_info(CoreEntry *core); extern void arch_free_thread_info(CoreEntry *core);
#define ARCH_HAS_GET_REGS
extern int ptrace_get_regs(pid_t pid, user_regs_struct_t *regs);
extern int ptrace_set_regs(pid_t pid, user_regs_struct_t *regs);
#define core_put_tls(core, tls) #define core_put_tls(core, tls)
......
...@@ -11,34 +11,78 @@ ...@@ -11,34 +11,78 @@
#define SIGMAX_OLD 31 #define SIGMAX_OLD 31
typedef struct { typedef struct {
unsigned long r15; uint64_t r15;
unsigned long r14; uint64_t r14;
unsigned long r13; uint64_t r13;
unsigned long r12; uint64_t r12;
unsigned long bp; uint64_t bp;
unsigned long bx; uint64_t bx;
unsigned long r11; uint64_t r11;
unsigned long r10; uint64_t r10;
unsigned long r9; uint64_t r9;
unsigned long r8; uint64_t r8;
unsigned long ax; uint64_t ax;
unsigned long cx; uint64_t cx;
unsigned long dx; uint64_t dx;
unsigned long si; uint64_t si;
unsigned long di; uint64_t di;
unsigned long orig_ax; uint64_t orig_ax;
unsigned long ip; uint64_t ip;
unsigned long cs; uint64_t cs;
unsigned long flags; uint64_t flags;
unsigned long sp; uint64_t sp;
unsigned long ss; uint64_t ss;
unsigned long fs_base; uint64_t fs_base;
unsigned long gs_base; uint64_t gs_base;
unsigned long ds; uint64_t ds;
unsigned long es; uint64_t es;
unsigned long fs; uint64_t fs;
unsigned long gs; uint64_t gs;
} user_regs_struct64;
typedef struct {
uint32_t bx;
uint32_t cx;
uint32_t dx;
uint32_t si;
uint32_t di;
uint32_t bp;
uint32_t ax;
uint32_t ds;
uint32_t es;
uint32_t fs;
uint32_t gs;
uint32_t orig_ax;
uint32_t ip;
uint32_t cs;
uint32_t flags;
uint32_t sp;
uint32_t ss;
} user_regs_struct32;
#ifdef CONFIG_X86_64
typedef struct {
union {
user_regs_struct64 native;
user_regs_struct32 compat;
};
bool is_native;
} user_regs_struct_t; } user_regs_struct_t;
#define get_user_reg(pregs, name) (((pregs)->is_native) ? \
((pregs)->native.name) : \
((pregs)->compat.name))
#define set_user_reg(pregs, name, val) (((pregs)->is_native) ? \
((pregs)->native.name = val) : \
((pregs)->compat.name = val))
#else
typedef struct {
union {
user_regs_struct32 native;
};
} user_regs_struct_t;
#define get_user_reg(pregs, name) ((pregs)->native.name)
#define set_user_reg(pregs, name, val) ((pregs)->native.name = val)
#endif
#if 0 #if 0
typedef struct { typedef struct {
...@@ -74,9 +118,9 @@ static inline unsigned long task_size(void) { return TASK_SIZE; } ...@@ -74,9 +118,9 @@ static inline unsigned long task_size(void) { return TASK_SIZE; }
typedef uint64_t auxv_t; typedef uint64_t auxv_t;
typedef uint32_t tls_t; typedef uint32_t tls_t;
#define REG_RES(regs) ((regs).ax) #define REG_RES(regs) get_user_reg(&regs, ax)
#define REG_IP(regs) ((regs).ip) #define REG_IP(regs) get_user_reg(&regs, ip)
#define REG_SYSCALL_NR(regs) ((regs).orig_ax) #define REG_SYSCALL_NR(regs) get_user_reg(&regs, orig_ax)
#define AT_VECTOR_SIZE 44 #define AT_VECTOR_SIZE 44
......
...@@ -69,6 +69,7 @@ unsigned long get_exec_start(struct vm_area_list *vmas) ...@@ -69,6 +69,7 @@ unsigned long get_exec_start(struct vm_area_list *vmas)
return 0; return 0;
} }
#ifndef ARCH_HAS_GET_REGS
static inline int ptrace_get_regs(int pid, user_regs_struct_t *regs) static inline int ptrace_get_regs(int pid, user_regs_struct_t *regs)
{ {
struct iovec iov; struct iovec iov;
...@@ -86,6 +87,7 @@ static inline int ptrace_set_regs(int pid, user_regs_struct_t *regs) ...@@ -86,6 +87,7 @@ static inline int ptrace_set_regs(int pid, user_regs_struct_t *regs)
iov.iov_len = sizeof(user_regs_struct_t); iov.iov_len = sizeof(user_regs_struct_t);
return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov); return ptrace(PTRACE_SETREGSET, pid, NT_PRSTATUS, &iov);
} }
#endif
static int get_thread_ctx(int pid, struct thread_ctx *ctx) static int get_thread_ctx(int pid, struct thread_ctx *ctx)
{ {
......
...@@ -2,6 +2,12 @@ syntax = "proto2"; ...@@ -2,6 +2,12 @@ syntax = "proto2";
import "opts.proto"; import "opts.proto";
enum user_x86_regs_case_t {
NATIVE = 1;
COMPAT = 2;
}
/* Reusing entry for both 64 and 32 bits register sets */
message user_x86_regs_entry { message user_x86_regs_entry {
required uint64 r15 = 1; required uint64 r15 = 1;
required uint64 r14 = 2; required uint64 r14 = 2;
...@@ -30,6 +36,7 @@ message user_x86_regs_entry { ...@@ -30,6 +36,7 @@ message user_x86_regs_entry {
required uint64 es = 25; required uint64 es = 25;
required uint64 fs = 26; required uint64 fs = 26;
required uint64 gs = 27; required uint64 gs = 27;
optional user_x86_regs_case_t gpregs_case = 28 [default = NATIVE];
} }
message user_x86_xsave_entry { message user_x86_xsave_entry {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment