Commit a0956172 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov

restore: Simplify sigreturn code

Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
parent 11eb2ca4
...@@ -1219,7 +1219,7 @@ static int restore_all_tasks(pid_t pid) ...@@ -1219,7 +1219,7 @@ static int restore_all_tasks(pid_t pid)
return restore_root_task(path, pstree_fd); return restore_root_task(path, pstree_fd);
} }
static long restorer_vma_hint(pid_t pid, struct list_head *self_vma_list, long vma_len) static long restorer_get_vma_hint(pid_t pid, struct list_head *self_vma_list, long vma_len)
{ {
struct vma_area *vma_area; struct vma_area *vma_area;
long prev_vma_end, hint; long prev_vma_end, hint;
...@@ -1284,27 +1284,29 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1284,27 +1284,29 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
{ {
long restore_task_code_len, restore_task_vma_len; long restore_task_code_len, restore_task_vma_len;
long restore_thread_code_len, restore_thread_vma_len; long restore_thread_code_len, restore_thread_vma_len;
void *exec_mem = MAP_FAILED; void *exec_mem = MAP_FAILED;
void *restore_thread_exec_start; void *restore_thread_exec_start;
void *restore_task_exec_start; void *restore_task_exec_start;
long args_offset, new_sp, hint;
long new_sp, exec_mem_hint;
long ret; long ret;
struct task_restore_core_args *args; struct task_restore_core_args *task_args;
struct thread_restore_args *thread_args;
char path[64]; char path[64];
LIST_HEAD(self_vma_list); LIST_HEAD(self_vma_list);
struct vma_area *vma_area; struct vma_area *vma_area;
int fd_vmas = -1; int fd_self_vmas = -1;
int fd_core = -1;
int num; int num;
struct thread_restore_args *thread_args;
struct pstree_entry pstree_entry; struct pstree_entry pstree_entry;
int *fd_core_threads; int *fd_core_threads;
int fd_pstree = -1; int fd_pstree = -1;
RLOCK_T(rlock);
restore_task_code_len = 0; restore_task_code_len = 0;
restore_task_vma_len = 0; restore_task_vma_len = 0;
restore_thread_code_len = 0; restore_thread_code_len = 0;
...@@ -1313,9 +1315,8 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1313,9 +1315,8 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
if (parse_maps(getpid(), &self_vma_list, false)) if (parse_maps(getpid(), &self_vma_list, false))
goto err; goto err;
pr_info_vma_list(&self_vma_list); /* pr_info_vma_list(&self_vma_list); */
BUILD_BUG_ON(sizeof(struct task_restore_core_args) > RESTORE_ARGS_SIZE);
BUILD_BUG_ON(sizeof(struct task_restore_core_args) & 1); BUILD_BUG_ON(sizeof(struct task_restore_core_args) & 1);
BUILD_BUG_ON(sizeof(struct thread_restore_args) & 1); BUILD_BUG_ON(sizeof(struct thread_restore_args) & 1);
...@@ -1326,17 +1327,24 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1326,17 +1327,24 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
goto err; goto err;
} }
snprintf(path, sizeof(path), FMT_FNAME_CORE_OUT, pid);
fd_core = open(path, O_RDONLY, CR_FD_PERM);
if (fd_core < 0) {
pr_perror("Can't open %s\n", path);
goto err;
}
snprintf(path, sizeof(path), FMT_FNAME_VMAS, getpid()); snprintf(path, sizeof(path), FMT_FNAME_VMAS, getpid());
unlink(path); unlink(path);
fd_vmas = open(path, O_CREAT | O_WRONLY, CR_FD_PERM); fd_self_vmas = open(path, O_CREAT | O_RDWR, CR_FD_PERM);
if (fd_vmas < 0) { if (fd_self_vmas < 0) {
pr_perror("Can't open %s\n", path); pr_perror("Can't open %s\n", path);
goto err; goto err;
} }
num = 0; num = 0;
list_for_each_entry(vma_area, &self_vma_list, list) { list_for_each_entry(vma_area, &self_vma_list, list) {
ret = write(fd_vmas, &vma_area->vma, sizeof(vma_area->vma)); ret = write(fd_self_vmas, &vma_area->vma, sizeof(vma_area->vma));
if (ret != sizeof(vma_area->vma)) { if (ret != sizeof(vma_area->vma)) {
pr_perror("\nUnable to write vma entry (%li written)\n", num); pr_perror("\nUnable to write vma entry (%li written)\n", num);
goto err; goto err;
...@@ -1344,17 +1352,12 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1344,17 +1352,12 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
num++; num++;
} }
close_safe(&fd_vmas);
free_mappings(&self_vma_list); free_mappings(&self_vma_list);
restore_task_code_len = restore_task(RESTORE_CMD__GET_SELF_LEN) - (long)restore_task; restore_task_code_len = restore_task(RESTORE_CMD__GET_SELF_LEN, NULL) - (long)restore_task;
restore_task_code_len = round_up(restore_task_code_len, 16); restore_task_code_len = round_up(restore_task_code_len, 16);
args_offset = restore_task(RESTORE_CMD__GET_ARG_OFFSET) - (long)restore_task; restore_task_vma_len = round_up(restore_task_code_len + sizeof(*task_args), PAGE_SIZE);
restore_task_vma_len = round_up(restore_task_code_len + RESTORE_STACK_SIZE + RESTORE_STACK_FRAME, PAGE_SIZE);
restore_thread_code_len = restore_thread(RESTORE_CMD__GET_SELF_LEN, NULL) - (long)restore_thread;
restore_thread_code_len = round_up(restore_thread_code_len, 16);
/* /*
* Thread statistics * Thread statistics
...@@ -1381,11 +1384,12 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1381,11 +1384,12 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
/* /*
* Compute how many memory we will need * Compute how many memory we will need
* to restore all threads, every thread * to restore all threads, every thread
* requires own stack and heap, it's about * requires own stack and heap, it's ~40K
* 40K per thread. * per thread.
*/ */
BUILD_BUG_ON(sizeof(*thread_args) & 1); restore_thread_code_len = restore_thread(RESTORE_CMD__GET_SELF_LEN, NULL) - (long)restore_thread;
restore_thread_code_len = round_up(restore_thread_code_len, 16);
restore_thread_vma_len = sizeof(*thread_args) * pstree_entry.nr_threads; restore_thread_vma_len = sizeof(*thread_args) * pstree_entry.nr_threads;
restore_thread_vma_len = round_up(restore_thread_vma_len, 16); restore_thread_vma_len = round_up(restore_thread_vma_len, 16);
...@@ -1397,17 +1401,22 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1397,17 +1401,22 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
break; break;
} }
hint = restorer_vma_hint(pid, &self_vma_list, exec_mem_hint = restorer_get_vma_hint(pid, &self_vma_list,
restore_task_vma_len + restore_thread_vma_len); restore_task_vma_len +
if (hint == -1) { restore_thread_vma_len);
if (exec_mem_hint == -1) {
pr_err("No suitable area for task_restore bootstrap (%dK)\n", pr_err("No suitable area for task_restore bootstrap (%dK)\n",
restore_task_vma_len + restore_thread_vma_len); restore_task_vma_len + restore_thread_vma_len);
goto err; goto err;
} else } else {
pr_info("Found bootstrap VMA hint at: %lx\n", hint); pr_info("Found bootstrap VMA hint at: %lx (needs ~%dK)\n",
exec_mem_hint,
(restore_task_vma_len + restore_thread_vma_len) >> 10);
}
/* VMA we need to run task_restore code */ /* VMA we need to run task_restore code */
exec_mem = mmap((void *)hint, restore_task_vma_len + restore_thread_vma_len, exec_mem = mmap((void *)exec_mem_hint,
restore_task_vma_len + restore_thread_vma_len,
PROT_READ | PROT_WRITE | PROT_EXEC, PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANON, 0, 0); MAP_PRIVATE | MAP_ANON, 0, 0);
if (exec_mem == MAP_FAILED) { if (exec_mem == MAP_FAILED) {
...@@ -1416,76 +1425,44 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1416,76 +1425,44 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
} }
/* /*
* Prepare a stack for the task_restore. It's a bit * Prepare a memory map for restorer. Note a thread space
* tricky -- since compiler generates function * might be completely unused so it's here just for convenience.
* prologue we need to manually tune up stack
* value.
*/ */
memzero(exec_mem, RESTORE_STACK_SIZE + RESTORE_STACK_FRAME); restore_task_exec_start = exec_mem;
restore_task_exec_start = exec_mem + RESTORE_STACK_SIZE + RESTORE_STACK_FRAME; restore_thread_exec_start = restore_task_exec_start + restore_task_vma_len;
task_args = restore_task_exec_start + restore_task_code_len;
thread_args = restore_thread_exec_start + restore_thread_code_len;
/* Restorer content at the new location */ memzero_p(task_args);
memcpy(restore_task_exec_start, &restore_task, restore_task_code_len); memzero_p(thread_args);
/* /*
* Adjust stack with red-zone area. * Code at a new place.
*/ */
new_sp = (long)exec_mem + RESTORE_STACK_SIZE - RESTORE_STACK_REDZONE; memcpy(restore_task_exec_start, &restore_task, restore_task_code_len);
memcpy(restore_thread_exec_start, &restore_thread, restore_thread_code_len);
/* /*
* Thread restorer will be there. * Adjust stack.
*/ */
restore_thread_exec_start = (void *)((long)exec_mem + restore_task_vma_len); new_sp = RESTORE_ALIGN_STACK((long)task_args->mem_zone.stack, sizeof(task_args->mem_zone.stack));
memcpy(restore_thread_exec_start, &restore_thread, restore_thread_code_len);
/* /*
* Pass arguments and run a command. * Arguments for task restoration.
*/ */
args = (struct task_restore_core_args *)(restore_task_exec_start + args_offset); task_args->pid = pid;
args->rt_sigframe = (void *)((long)exec_mem + RESTORE_STACK_SIZE + RESTORE_STACK_FRAME - RESTORE_STACK_REDZONE); task_args->fd_core = fd_core;
args->self_entry = exec_mem; task_args->fd_self_vmas = fd_self_vmas;
args->self_size = restore_task_vma_len;
args->lock = &rlock;
args->pid = pid;
strcpy(args->self_vmas_path, path);
snprintf(path, sizeof(path), FMT_FNAME_CORE_OUT, pid);
strcpy(args->core_path, path);
pr_info("restore_task_vma_len: %li restore_task_code_len: %li\n"
"exec_mem: %p restore_task_exec_start: %p new_sp: %p\n"
"args: %p args->rt_sigframe: %p\n"
"args->self_entry: %p args->self_size: %p\n"
"args->self_vmas_path: %p args->core_path: %p\n"
"args_offset: %li\n",
restore_task_vma_len, restore_task_code_len,
exec_mem, restore_task_exec_start, new_sp, args,
args->rt_sigframe, args->self_entry, args->self_size,
args->self_vmas_path, args->core_path,
args_offset);
if (pstree_entry.nr_threads) { if (pstree_entry.nr_threads) {
int i; int i;
/* /*
* Now prepare run-time data for threads restore. * Now prepare run-time data for threads restore.
*/ */
thread_args = (struct thread_restore_args *) task_args->nr_threads = pstree_entry.nr_threads;
((long)restore_thread_exec_start + task_args->clone_restore_fn = (void *)restore_thread_exec_start;
(long)restore_thread_code_len); task_args->thread_args = thread_args;
args->nr_threads = (long)pstree_entry.nr_threads;
args->clone_restore_fn = (void *)restore_thread_exec_start;
args->thread_args = thread_args;
pr_info("args->nr_threads: %li\n"
"args->clone_restore_fn: %p\n"
"args->thread_args: %p\n",
args->nr_threads,
args->clone_restore_fn,
args->thread_args);
/* /*
* Fill up per-thread data. * Fill up per-thread data.
...@@ -1494,8 +1471,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1494,8 +1471,7 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
for (i = 0; i < pstree_entry.nr_threads; i++) { for (i = 0; i < pstree_entry.nr_threads; i++) {
read_ptr_safe(fd_pstree, &thread_args[i].pid, err); read_ptr_safe(fd_pstree, &thread_args[i].pid, err);
thread_args[i].lock = args->lock; /* Core files are to be opened */
snprintf(path, sizeof(path), FMT_FNAME_CORE, thread_args[i].pid); snprintf(path, sizeof(path), FMT_FNAME_CORE, thread_args[i].pid);
thread_args[i].fd_core = open(path, O_RDONLY, CR_FD_PERM); thread_args[i].fd_core = open(path, O_RDONLY, CR_FD_PERM);
if (thread_args[i].fd_core < 0) { if (thread_args[i].fd_core < 0) {
...@@ -1503,15 +1479,26 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1503,15 +1479,26 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
goto err; goto err;
} }
pr_info("Thread %4d stack %8p heap %8p rt_sigframe %8p lock %8p\n", pr_info("Thread %4d stack %8p heap %8p rt_sigframe %8p\n",
i, (long)thread_args[i].stack, i, (long)thread_args[i].mem_zone.stack,
thread_args[i].heap, thread_args[i].mem_zone.heap,
thread_args[i].rt_sigframe, thread_args[i].mem_zone.rt_sigframe);
thread_args[i].lock);
} }
} }
pr_info("task_args: %p\n"
"task_args->pid: %d\n"
"task_args->fd_core: %d\n"
"task_args->fd_self_vmas: %d\n"
"task_args->nr_threads: %d\n"
"task_args->clone_restore_fn: %p\n"
"task_args->thread_args: %p\n",
task_args, task_args->pid,
task_args->fd_core, task_args->fd_self_vmas,
task_args->nr_threads, task_args->clone_restore_fn,
task_args->thread_args);
close_safe(&fd_pstree); close_safe(&fd_pstree);
/* /*
...@@ -1521,17 +1508,21 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid) ...@@ -1521,17 +1508,21 @@ static void sigreturn_restore(pid_t pstree_pid, pid_t pid)
asm volatile( asm volatile(
"movq %0, %%rbx \n" "movq %0, %%rbx \n"
"movq %1, %%rax \n" "movq %1, %%rax \n"
"movq %2, %%rsi \n"
"movl $"__stringify(RESTORE_CMD__RESTORE_CORE)", %%edi \n" "movl $"__stringify(RESTORE_CMD__RESTORE_CORE)", %%edi \n"
"movq %%rbx, %%rsp \n" "movq %%rbx, %%rsp \n"
"callq *%%rax \n" "callq *%%rax \n"
: :
: "g"(new_sp), "g"(restore_task_exec_start) : "g"(new_sp),
: "rsp", "rdi", "rbx", "rax", "memory"); "g"(restore_task_exec_start),
"g"(task_args)
: "rsp", "rdi", "rsi", "rbx", "rax", "memory");
err: err:
free_mappings(&self_vma_list); free_mappings(&self_vma_list);
close_safe(&fd_pstree); close_safe(&fd_pstree);
close_safe(&fd_vmas); close_safe(&fd_core);
close_safe(&fd_self_vmas);
if (exec_mem != MAP_FAILED) if (exec_mem != MAP_FAILED)
munmap(exec_mem, restore_task_vma_len + restore_thread_vma_len); munmap(exec_mem, restore_task_vma_len + restore_thread_vma_len);
......
...@@ -12,64 +12,63 @@ ...@@ -12,64 +12,63 @@
# error Only x86-64 is supported # error Only x86-64 is supported
#endif #endif
struct task_restore_core_args;
struct thread_restore_args;
extern long restore_task(long cmd, struct task_restore_core_args *args);
extern long restore_thread(long cmd, struct thread_restore_args *args);
typedef long (*task_restore_fcall_t) (long cmd, struct task_restore_core_args *args);
typedef long (*thread_restore_fcall_t) (long cmd, struct thread_restore_args *args);
#define RESTORE_CMD__NONE 0
#define RESTORE_CMD__GET_SELF_LEN 1
#define RESTORE_CMD__RESTORE_CORE 2
#define RESTORE_CMD__RESTORE_THREAD 3
/* /*
* These must! be power of two values. * These *must* be power of two values.
*/ */
#define RESTORE_ARGS_SIZE (512) #define RESTORE_ARGS_SIZE (512)
#define RESTORE_STACK_REDZONE (128) #define RESTORE_STACK_REDZONE (128)
#define RESTORE_STACK_FRAME (16 << 10) #define RESTORE_STACK_SIGFRAME (16 << 10)
#define RESTORE_THREAD_STACK_SIZE (16 << 10)
#define RESTORE_THREAD_HEAP_SIZE (16 << 10)
#define RESTORE_STACK_SIZE (32 << 10) #define RESTORE_STACK_SIZE (32 << 10)
#define RESTORE_HEAP_SIZE (16 << 10)
#define RESTORE_CMD__NONE 0 #define RESTORE_ALIGN_STACK(start, size) \
#define RESTORE_CMD__GET_ARG_OFFSET 1 (ALIGN((start) + (size) - sizeof(long), sizeof(long)))
#define RESTORE_CMD__GET_SELF_LEN 2
#define RESTORE_CMD__PR_ARG_STRING 3
#define RESTORE_CMD__RESTORE_CORE 4
#define RESTORE_CMD__RESTORE_THREAD 5
#define ABI_RED_ZONE 128 struct restore_mem_zone {
u8 redzone[RESTORE_STACK_REDZONE];
u8 stack[RESTORE_STACK_SIZE];
u8 rt_sigframe[RESTORE_STACK_SIGFRAME];
u8 heap[RESTORE_HEAP_SIZE];
} __aligned(sizeof(long));
#define align_sigframe(sp) round_down(sp, 16) - 8 #define first_on_heap(ptr, heap) ((typeof(ptr))heap)
#define next_on_heap(ptr, prev) ((typeof(ptr))((long)(prev) + sizeof(*(prev))))
typedef u32 rlock_t;
#define RLOCK_T(v) rlock_t v __aligned(sizeof(u32)) = 0
/* Make sure it's pow2 in size */ /* Make sure it's pow2 in size */
struct thread_restore_args { struct thread_restore_args {
u32 pid; struct restore_mem_zone mem_zone;
u32 fd_core;
rlock_t *lock;
u8 stack[RESTORE_THREAD_STACK_SIZE];
union {
struct core_entry core_entry;
u8 heap[RESTORE_THREAD_HEAP_SIZE];
} __aligned(sizeof(long));
u8 rt_sigframe[RESTORE_STACK_FRAME];
};
extern long restore_task(long cmd); int pid;
extern long restore_thread(long cmd, struct thread_restore_args *args); int fd_core;
} __aligned(sizeof(long));
typedef long (*task_restore_fcall_t) (long cmd);
typedef long (*thread_restore_fcall_t) (long cmd, struct thread_restore_args *args);
struct task_restore_core_args { struct task_restore_core_args {
void *self_entry; /* restorer placed at */ struct restore_mem_zone mem_zone;
void *rt_sigframe; /* sigframe placed at */
long self_size; /* size for restorer granted */ int pid; /* task pid */
char core_path[64]; int fd_core; /* opened core file */
char self_vmas_path[64]; int fd_self_vmas; /* opened file with running VMAs to unmap */
u32 pid; bool restore_threads; /* if to restore threads */
rlock_t *lock;
/* threads restoration */
/* threads restoration specifics */ int nr_threads; /* number of threads */
thread_restore_fcall_t clone_restore_fn; /* helper address for clone() call */ thread_restore_fcall_t clone_restore_fn; /* helper address for clone() call */
long nr_threads; /* number of threads */
struct thread_restore_args *thread_args; /* array of thread arguments */ struct thread_restore_args *thread_args; /* array of thread arguments */
}; } __aligned(sizeof(long));
struct pt_regs { struct pt_regs {
unsigned long r15; unsigned long r15;
...@@ -207,6 +206,8 @@ static void always_inline write_hex_n(unsigned long num) ...@@ -207,6 +206,8 @@ static void always_inline write_hex_n(unsigned long num)
unsigned char c; unsigned char c;
int i; int i;
c = 'x';
sys_write(1, &c, 1);
for (i = sizeof(long)/sizeof(char) - 1; i >= 0; i--) { for (i = sizeof(long)/sizeof(char) - 1; i >= 0; i--) {
c = (s[i] & 0xf0) >> 4; c = (s[i] & 0xf0) >> 4;
add_ord(c); add_ord(c);
...@@ -221,29 +222,4 @@ static void always_inline write_hex_n(unsigned long num) ...@@ -221,29 +222,4 @@ static void always_inline write_hex_n(unsigned long num)
sys_write(1, &c, 1); sys_write(1, &c, 1);
} }
static always_inline void r_lock(rlock_t *v)
{
while (*v) {
asm volatile("lfence");
asm volatile("pause");
}
(*v)++;
asm volatile("sfence");
}
static always_inline void r_unlock(rlock_t *v)
{
(*v)--;
asm volatile("sfence");
}
static always_inline void r_wait_unlock(rlock_t *v)
{
while (*v) {
asm volatile("lfence");
asm volatile("pause");
}
}
#endif /* CR_RESTORER_H__ */ #endif /* CR_RESTORER_H__ */
...@@ -17,17 +17,9 @@ ...@@ -17,17 +17,9 @@
#include "crtools.h" #include "crtools.h"
#include "restorer.h" #include "restorer.h"
#define lea_args_off(to, label) \ /*
do { \ * Threads restoration via sigreturn.
asm volatile( \ */
"leaq " #label "(%%rip), %%rax \n" \
"movq %%rax, %0 \n" \
: "=m"(to) \
: \
: "memory"); \
} while (0)
long restore_thread(long cmd, struct thread_restore_args *args) long restore_thread(long cmd, struct thread_restore_args *args)
{ {
long ret = -1; long ret = -1;
...@@ -36,14 +28,10 @@ long restore_thread(long cmd, struct thread_restore_args *args) ...@@ -36,14 +28,10 @@ long restore_thread(long cmd, struct thread_restore_args *args)
case RESTORE_CMD__RESTORE_THREAD: case RESTORE_CMD__RESTORE_THREAD:
{ {
struct core_entry *core_entry; struct core_entry *core_entry;
struct user_fpregs_entry *fpregs;
struct user_regs_entry *gpregs;
struct rt_sigframe *rt_sigframe; struct rt_sigframe *rt_sigframe;
unsigned long new_sp, fsgs_base; unsigned long new_sp, fsgs_base;
core_entry = &args->core_entry; core_entry = (struct core_entry *)&args->mem_zone.heap;
sys_lseek(args->fd_core, MAGIC_OFFSET, SEEK_SET); sys_lseek(args->fd_core, MAGIC_OFFSET, SEEK_SET);
ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry)); ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry));
...@@ -52,9 +40,10 @@ long restore_thread(long cmd, struct thread_restore_args *args) ...@@ -52,9 +40,10 @@ long restore_thread(long cmd, struct thread_restore_args *args)
goto core_restore_end; goto core_restore_end;
} }
/* We're to close it! */
sys_close(args->fd_core); sys_close(args->fd_core);
rt_sigframe = (void *)args->rt_sigframe + 8; rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8;
#define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->u.arch.gpregs.d #define CPREGT1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->u.arch.gpregs.d
#define CPREGT2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->u.arch.gpregs.s #define CPREGT2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->u.arch.gpregs.s
...@@ -97,8 +86,6 @@ long restore_thread(long cmd, struct thread_restore_args *args) ...@@ -97,8 +86,6 @@ long restore_thread(long cmd, struct thread_restore_args *args)
goto core_restore_end; goto core_restore_end;
} }
//r_unlock(args->lock);
new_sp = (long)rt_sigframe + 8; new_sp = (long)rt_sigframe + 8;
asm volatile( asm volatile(
"movq %0, %%rax \n" "movq %0, %%rax \n"
...@@ -143,37 +130,14 @@ self_len_start: ...@@ -143,37 +130,14 @@ self_len_start:
goto self_len_end; goto self_len_end;
} }
long restore_task(long cmd) /*
* The main routine to restore task via sigreturn.
*/
long restore_task(long cmd, struct task_restore_core_args *args)
{ {
long ret = -1; long ret = -1;
asm volatile(
"jmp 1f \n"
"restore_args__: \n"
".skip "__stringify(RESTORE_ARGS_SIZE)",0 \n"
"1: \n"
:
:
: "memory");
#define restore_lea_args_off(to) \
lea_args_off(to, restore_args__)
switch (cmd) { switch (cmd) {
case RESTORE_CMD__PR_ARG_STRING:
{
char *str = NULL;
restore_lea_args_off(str);
write_string(str);
ret = 0;
}
break;
case RESTORE_CMD__GET_ARG_OFFSET:
restore_lea_args_off(ret);
break;
case RESTORE_CMD__GET_SELF_LEN: case RESTORE_CMD__GET_SELF_LEN:
goto self_len_start; goto self_len_start;
...@@ -188,88 +152,73 @@ self_len_end: ...@@ -188,88 +152,73 @@ self_len_end:
*/ */
case RESTORE_CMD__RESTORE_CORE: case RESTORE_CMD__RESTORE_CORE:
{ {
struct task_restore_core_args *args; struct core_entry *core_entry;
int fd_core, fd_thread; struct vma_entry *vma_entry;
int fd_self_vmas;
struct core_entry core_entry;
struct vma_entry vma_entry;
u64 va; u64 va;
struct user_fpregs_entry *fpregs;
struct user_regs_entry *gpregs;
struct rt_sigframe *rt_sigframe; struct rt_sigframe *rt_sigframe;
unsigned long new_sp, fsgs_base; unsigned long new_sp, fsgs_base;
restore_lea_args_off(args); core_entry = first_on_heap(core_entry, args->mem_zone.heap);
vma_entry = next_on_heap(vma_entry, core_entry);
write_string_n(args->core_path); #if 0
write_string_n(args->self_vmas_path); write_hex_n((long)args);
write_hex_n((long)args->mem_zone.heap);
fd_core = sys_open(args->core_path, O_RDONLY, CR_FD_PERM); write_hex_n((long)core_entry);
if (fd_core < 0) { write_hex_n((long)vma_entry);
write_hex_n(__LINE__); #endif
goto core_restore_end;
}
sys_lseek(fd_core, MAGIC_OFFSET, SEEK_SET); sys_lseek(args->fd_core, MAGIC_OFFSET, SEEK_SET);
ret = sys_read(fd_core, &core_entry, sizeof(core_entry)); ret = sys_read(args->fd_core, core_entry, sizeof(*core_entry));
if (ret != sizeof(core_entry)) { if (ret != sizeof(*core_entry)) {
write_hex_n(__LINE__);
goto core_restore_end;
}
fd_self_vmas = sys_open(args->self_vmas_path, O_RDONLY, CR_FD_PERM);
if (fd_self_vmas < 0) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
goto core_restore_end; goto core_restore_end;
} }
/* Note no magic constant on fd_self_vmas */ /* Note no magic constant on fd_self_vmas */
sys_lseek(fd_self_vmas, 0, SEEK_SET); ret = sys_lseek(args->fd_self_vmas, 0, SEEK_SET);
while (1) { while (1) {
ret = sys_read(fd_self_vmas, &vma_entry, sizeof(vma_entry)); ret = sys_read(args->fd_self_vmas, vma_entry, sizeof(*vma_entry));
if (!ret) if (!ret)
break; break;
if (ret != sizeof(vma_entry)) { if (ret != sizeof(*vma_entry)) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
write_hex_n(ret); write_hex_n(ret);
goto core_restore_end; goto core_restore_end;
} }
if (!vma_entry_is(&vma_entry, VMA_AREA_REGULAR)) if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
continue; continue;
if (sys_munmap((void *)vma_entry.start, vma_entry_len(&vma_entry))) { if (sys_munmap((void *)vma_entry->start, vma_entry_len(vma_entry))) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
goto core_restore_end; goto core_restore_end;
} }
} }
sys_close(fd_self_vmas); sys_close(args->fd_self_vmas);
sys_unlink(args->self_vmas_path);
/* /*
* OK, lets try to map new one. * OK, lets try to map new one.
*/ */
sys_lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET); sys_lseek(args->fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
while (1) { while (1) {
ret = sys_read(fd_core, &vma_entry, sizeof(vma_entry)); ret = sys_read(args->fd_core, vma_entry, sizeof(*vma_entry));
if (!ret) if (!ret)
break; break;
if (ret != sizeof(vma_entry)) { if (ret != sizeof(*vma_entry)) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
write_hex_n(ret); write_hex_n(ret);
goto core_restore_end; goto core_restore_end;
} }
if (final_vma_entry(&vma_entry)) if (final_vma_entry(vma_entry))
break; break;
if (vma_entry_is(&vma_entry, VMA_AREA_VDSO)) { if (vma_entry_is(vma_entry, VMA_AREA_VDSO)) {
ret = sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SETUP_VDSO_AT, ret = sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SETUP_VDSO_AT,
vma_entry.start, 0, 0); vma_entry->start, 0, 0);
if (ret) { if (ret) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
write_hex_n(ret); write_hex_n(ret);
...@@ -278,7 +227,7 @@ self_len_end: ...@@ -278,7 +227,7 @@ self_len_end:
continue; continue;
} }
if (!vma_entry_is(&vma_entry, VMA_AREA_REGULAR)) if (!vma_entry_is(vma_entry, VMA_AREA_REGULAR))
continue; continue;
/* /*
...@@ -287,9 +236,9 @@ self_len_end: ...@@ -287,9 +236,9 @@ self_len_end:
* MAP_ANONYMOUS should be eliminated so fd would * MAP_ANONYMOUS should be eliminated so fd would
* be taken into account by a kernel. * be taken into account by a kernel.
*/ */
if (vma_entry_is(&vma_entry, VMA_ANON_SHARED)) { if (vma_entry_is(vma_entry, VMA_ANON_SHARED)) {
if (vma_entry.fd != -1UL) if (vma_entry->fd != -1UL)
vma_entry.flags &= ~MAP_ANONYMOUS; vma_entry->flags &= ~MAP_ANONYMOUS;
} }
/* /*
...@@ -297,34 +246,34 @@ self_len_end: ...@@ -297,34 +246,34 @@ self_len_end:
* writable since we're going to restore page * writable since we're going to restore page
* contents. * contents.
*/ */
va = sys_mmap((void *)vma_entry.start, va = sys_mmap((void *)vma_entry->start,
vma_entry_len(&vma_entry), vma_entry_len(vma_entry),
vma_entry.prot | PROT_WRITE, vma_entry->prot | PROT_WRITE,
vma_entry.flags | MAP_FIXED, vma_entry->flags | MAP_FIXED,
vma_entry.fd, vma_entry->fd,
vma_entry.pgoff); vma_entry->pgoff);
if (va != vma_entry.start) { if (va != vma_entry->start) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
write_hex_n(vma_entry.start); write_hex_n(vma_entry->start);
write_hex_n(vma_entry.end); write_hex_n(vma_entry->end);
write_hex_n(vma_entry.prot); write_hex_n(vma_entry->prot);
write_hex_n(vma_entry.flags); write_hex_n(vma_entry->flags);
write_hex_n(vma_entry.fd); write_hex_n(vma_entry->fd);
write_hex_n(vma_entry.pgoff); write_hex_n(vma_entry->pgoff);
write_hex_n(va); write_hex_n(va);
goto core_restore_end; goto core_restore_end;
} }
if (vma_entry.fd != -1UL) if (vma_entry->fd != -1UL)
sys_close(vma_entry.fd); sys_close(vma_entry->fd);
} }
/* /*
* Read page contents. * Read page contents.
*/ */
while (1) { while (1) {
ret = sys_read(fd_core, &va, sizeof(va)); ret = sys_read(args->fd_core, &va, sizeof(va));
if (!ret) if (!ret)
break; break;
if (ret != sizeof(va)) { if (ret != sizeof(va)) {
...@@ -335,7 +284,7 @@ self_len_end: ...@@ -335,7 +284,7 @@ self_len_end:
if (final_page_va(va)) if (final_page_va(va))
break; break;
ret = sys_read(fd_core, (void *)va, PAGE_SIZE); ret = sys_read(args->fd_core, (void *)va, PAGE_SIZE);
if (ret != PAGE_SIZE) { if (ret != PAGE_SIZE) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
write_hex_n(ret); write_hex_n(ret);
...@@ -347,44 +296,51 @@ self_len_end: ...@@ -347,44 +296,51 @@ self_len_end:
* Walk though all VMAs again to drop PROT_WRITE * Walk though all VMAs again to drop PROT_WRITE
* if it was not there. * if it was not there.
*/ */
sys_lseek(fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET); sys_lseek(args->fd_core, GET_FILE_OFF_AFTER(struct core_entry), SEEK_SET);
while (1) { while (1) {
ret = sys_read(fd_core, &vma_entry, sizeof(vma_entry)); ret = sys_read(args->fd_core, vma_entry, sizeof(*vma_entry));
if (!ret) if (!ret)
break; break;
if (ret != sizeof(vma_entry)) { if (ret != sizeof(*vma_entry)) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
write_hex_n(ret); write_hex_n(ret);
goto core_restore_end; goto core_restore_end;
} }
if (final_vma_entry(&vma_entry)) if (final_vma_entry(vma_entry))
break; break;
if (!(vma_entry_is(&vma_entry, VMA_AREA_REGULAR))) if (!(vma_entry_is(vma_entry, VMA_AREA_REGULAR)))
continue; continue;
if (vma_entry.prot & PROT_WRITE) if (vma_entry->prot & PROT_WRITE)
continue; continue;
sys_mprotect(vma_entry.start, sys_mprotect(vma_entry->start,
vma_entry_len(&vma_entry), vma_entry_len(vma_entry),
vma_entry.prot); vma_entry->prot);
} }
sys_close(fd_core); sys_close(args->fd_core);
/* /*
* Tune up the task fields. * Tune up the task fields.
*/ */
sys_prctl(PR_SET_NAME, (long)core_entry.task_comm, 0, 0, 0); sys_prctl(PR_SET_NAME, (long)core_entry->task_comm, 0, 0, 0);
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_CODE, (long)core_entry.mm_start_code, 0, 0); sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_CODE,
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_END_CODE, (long)core_entry.mm_end_code, 0, 0); (long)core_entry->mm_start_code, 0, 0);
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_DATA, (long)core_entry.mm_start_data, 0, 0); sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_END_CODE,
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_END_DATA, (long)core_entry.mm_end_data, 0, 0); (long)core_entry->mm_end_code, 0, 0);
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_STACK, (long)core_entry.mm_start_stack, 0, 0); sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_DATA,
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_BRK, (long)core_entry.mm_start_brk, 0, 0); (long)core_entry->mm_start_data, 0, 0);
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_BRK, (long)core_entry.mm_brk, 0, 0); sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_END_DATA,
(long)core_entry->mm_end_data, 0, 0);
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_STACK,
(long)core_entry->mm_start_stack, 0, 0);
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_START_BRK,
(long)core_entry->mm_start_brk, 0, 0);
sys_prctl(PR_CKPT_CTL, PR_CKPT_CTL_SET_MM_BRK,
(long)core_entry->mm_brk, 0, 0);
/* /*
* We need to prepare a valid sigframe here, so * We need to prepare a valid sigframe here, so
...@@ -392,10 +348,10 @@ self_len_end: ...@@ -392,10 +348,10 @@ self_len_end:
* registers from the frame, set them up and * registers from the frame, set them up and
* finally pass execution to the new IP. * finally pass execution to the new IP.
*/ */
rt_sigframe = args->rt_sigframe - sizeof(*rt_sigframe); rt_sigframe = (void *)args->mem_zone.rt_sigframe + 8;
#define CPREG1(d) rt_sigframe->uc.uc_mcontext.d = core_entry.u.arch.gpregs.d #define CPREG1(d) rt_sigframe->uc.uc_mcontext.d = core_entry->u.arch.gpregs.d
#define CPREG2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry.u.arch.gpregs.s #define CPREG2(d,s) rt_sigframe->uc.uc_mcontext.d = core_entry->u.arch.gpregs.s
CPREG1(r8); CPREG1(r8);
CPREG1(r9); CPREG1(r9);
...@@ -419,7 +375,7 @@ self_len_end: ...@@ -419,7 +375,7 @@ self_len_end:
CPREG1(gs); CPREG1(gs);
CPREG1(fs); CPREG1(fs);
fsgs_base = core_entry.u.arch.gpregs.fs_base; fsgs_base = core_entry->u.arch.gpregs.fs_base;
ret = sys_arch_prctl(ARCH_SET_FS, (void *)fsgs_base); ret = sys_arch_prctl(ARCH_SET_FS, (void *)fsgs_base);
if (ret) { if (ret) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
...@@ -427,7 +383,7 @@ self_len_end: ...@@ -427,7 +383,7 @@ self_len_end:
goto core_restore_end; goto core_restore_end;
} }
fsgs_base = core_entry.u.arch.gpregs.gs_base; fsgs_base = core_entry->u.arch.gpregs.gs_base;
ret = sys_arch_prctl(ARCH_SET_GS, (void *)fsgs_base); ret = sys_arch_prctl(ARCH_SET_GS, (void *)fsgs_base);
if (ret) { if (ret) {
write_hex_n(__LINE__); write_hex_n(__LINE__);
...@@ -438,7 +394,7 @@ self_len_end: ...@@ -438,7 +394,7 @@ self_len_end:
/* /*
* Blocked signals. * Blocked signals.
*/ */
rt_sigframe->uc.uc_sigmask.sig[0] = core_entry.task_sigset; rt_sigframe->uc.uc_sigmask.sig[0] = core_entry->task_sigset;
/* /*
* Threads restoration. This requires some more comments. This * Threads restoration. This requires some more comments. This
...@@ -446,11 +402,9 @@ self_len_end: ...@@ -446,11 +402,9 @@ self_len_end:
* memory map, prepared by a caller code. * memory map, prepared by a caller code.
* *
* | <-- low addresses high addresses --> | * | <-- low addresses high addresses --> |
* +------------------------------------------------+-----------------------+ * +-------------------------------------------------------+-----------------------+
* | own stack | rt_sigframe space | this proc body | thread restore zone | * | this proc body | own stack | heap | rt_sigframe space | thread restore zone |
* +------------------------------------------------+-----------------------+ * +-------------------------------------------------------+-----------------------+
* %sp->| call %rip ->|
* params->|
* *
* where each thread restore zone is the following * where each thread restore zone is the following
* *
...@@ -458,10 +412,6 @@ self_len_end: ...@@ -458,10 +412,6 @@ self_len_end:
* +--------------------------------------------------------------------------+ * +--------------------------------------------------------------------------+
* | thread restore proc | thread1 stack | thread1 heap | thread1 rt_sigframe | * | thread restore proc | thread1 stack | thread1 heap | thread1 rt_sigframe |
* +--------------------------------------------------------------------------+ * +--------------------------------------------------------------------------+
* |<- call %rip %sp ->| |
* params->| | |
* |<-heap |
* |<-frame
*/ */
if (args->nr_threads) { if (args->nr_threads) {
...@@ -478,12 +428,9 @@ self_len_end: ...@@ -478,12 +428,9 @@ self_len_end:
if (thread_args[i].pid == args->pid) if (thread_args[i].pid == args->pid)
continue; continue;
new_sp = (long)thread_args[i].stack + new_sp =
sizeof(thread_args[i].stack) - RESTORE_ALIGN_STACK((long)thread_args[i].mem_zone.stack,
ABI_RED_ZONE; sizeof(thread_args[i].mem_zone.stack));
/* Threads will unlock it */
//r_lock(args->lock);
/* /*
* To achieve functionality like libc's clone() * To achieve functionality like libc's clone()
...@@ -535,10 +482,8 @@ self_len_end: ...@@ -535,10 +482,8 @@ self_len_end:
} }
} }
//r_lock(args->lock);
/* /*
* sigframe is on stack. * Sigframe stack.
*/ */
new_sp = (long)rt_sigframe + 8; new_sp = (long)rt_sigframe + 8;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment