Commit 8b122598 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov

Zap xemul directory

No need for samples anymore
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
parent a896bfd6
From f7e9d28188e7e2fd0f13f2696f29f20d784cb8fd Mon Sep 17 00:00:00 2001
From: root <root@ovzept.sw.ru>
Date: Fri, 3 Jun 2011 18:16:10 +0400
Subject: [PATCH] Image dumping via proc file
---
fs/proc/Kconfig | 8
fs/proc/Makefile | 1
fs/proc/base.c | 3
fs/proc/img_dump.c | 397 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/binfmt_img.h | 87 +++++++++
include/linux/proc_fs.h | 2
6 files changed, 498 insertions(+)
create mode 100644 fs/proc/img_dump.c
create mode 100644 include/linux/binfmt_img.h
Index: linux-2.6.git/fs/proc/Kconfig
===================================================================
--- linux-2.6.git.orig/fs/proc/Kconfig
+++ linux-2.6.git/fs/proc/Kconfig
@@ -67,3 +67,11 @@ config PROC_PAGE_MONITOR
/proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap,
/proc/kpagecount, and /proc/kpageflags. Disabling these
interfaces will reduce the size of the kernel by approximately 4kb.
+
+config PROC_IMG
+ default y
+ depends on PROC_FS
+ bool "Enable /proc/<pid>/dump file"
+ help
+ Say Y here if you want to be able to produce checkpoint-restore images
+ for tasks via proc
Index: linux-2.6.git/fs/proc/Makefile
===================================================================
--- linux-2.6.git.orig/fs/proc/Makefile
+++ linux-2.6.git/fs/proc/Makefile
@@ -28,3 +28,4 @@ proc-$(CONFIG_PROC_VMCORE) += vmcore.o
proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
proc-$(CONFIG_PRINTK) += kmsg.o
proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
+proc-$(CONFIG_PROC_IMG) += img_dump.o
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -2983,6 +2983,9 @@ static const struct pid_entry tgid_base_
#endif
INF("cmdline", S_IRUGO, proc_pid_cmdline),
ONE("stat", S_IRUGO, proc_tgid_stat),
+#ifdef CONFIG_PROC_IMG
+ REG("dump", S_IRUSR|S_IWUSR, proc_pid_dump_operations),
+#endif
ONE("statm", S_IRUGO, proc_pid_statm),
REG("maps", S_IRUGO, proc_maps_operations),
#ifdef CONFIG_NUMA
Index: linux-2.6.git/fs/proc/img_dump.c
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/proc/img_dump.c
@@ -0,0 +1,397 @@
+#include <linux/proc_fs.h>
+#include <linux/sched.h>
+#include <linux/uaccess.h>
+#include <linux/binfmt_img.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/highmem.h>
+#include <linux/types.h>
+#include "internal.h"
+
+static int img_dump_buffer(char __user *ubuf, size_t size, void *buf, int len, int pos)
+{
+ int ret;
+ static size_t dumped = 0;
+
+ len -= pos;
+ if (len > size)
+ len = size;
+
+ ret = copy_to_user(ubuf, buf + pos, len);
+ if (ret)
+ return -EFAULT;
+
+ dumped += len;
+ return len;
+}
+
+static int img_dump_header(char __user *buf, size_t size, int pos)
+{
+ struct binfmt_img_header hdr;
+
+ hdr.magic = BINFMT_IMG_MAGIC;
+ hdr.version = BINFMT_IMG_VERS_0;
+
+ return img_dump_buffer(buf, size, &hdr, sizeof(hdr), pos);
+}
+
+static __u16 encode_segment(unsigned short seg)
+{
+ if (seg == 0)
+ return CKPT_X86_SEG_NULL;
+ BUG_ON((seg & 3) != 3);
+
+ if (seg == __USER_CS)
+ return CKPT_X86_SEG_USER64_CS;
+ if (seg == __USER_DS)
+ return CKPT_X86_SEG_USER64_DS;
+#ifdef CONFIG_COMPAT
+ if (seg == __USER32_CS)
+ return CKPT_X86_SEG_USER32_CS;
+ if (seg == __USER32_DS)
+ return CKPT_X86_SEG_USER32_DS;
+#endif
+
+ if (seg & 4)
+ return CKPT_X86_SEG_LDT | (seg >> 3);
+
+ seg >>= 3;
+ if (GDT_ENTRY_TLS_MIN <= seg && seg <= GDT_ENTRY_TLS_MAX)
+ return CKPT_X86_SEG_TLS | (seg - GDT_ENTRY_TLS_MIN);
+
+ printk(KERN_ERR "c/r: (decode) bad segment %#hx\n", seg);
+ BUG();
+}
+
+static __u64 encode_tls(struct desc_struct *d)
+{
+ return ((__u64)d->a << 32) + d->b;
+}
+
+static int img_dump_regs(struct task_struct *p, char __user *buf, size_t size, int pos)
+{
+ struct binfmt_regs_image regi;
+ struct pt_regs *regs;
+ int i;
+
+ regs = task_pt_regs(p);
+
+ regi.r15 = regs->r15;
+ regi.r14 = regs->r14;
+ regi.r13 = regs->r13;
+ regi.r12 = regs->r12;
+ regi.r11 = regs->r11;
+ regi.r10 = regs->r10;
+ regi.r9 = regs->r9;
+ regi.r8 = regs->r8;
+ regi.ax = regs->ax;
+ regi.orig_ax = regs->orig_ax;
+ regi.bx = regs->bx;
+ regi.cx = regs->cx;
+ regi.dx = regs->dx;
+ regi.si = regs->si;
+ regi.di = regs->di;
+ regi.ip = regs->ip;
+ regi.flags = regs->flags;
+ regi.bp = regs->bp;
+ regi.sp = regs->sp;
+
+ /* segments */
+ regi.gsindex = encode_segment(p->thread.gsindex);
+ regi.fsindex = encode_segment(p->thread.fsindex);
+ regi.cs = encode_segment(regs->cs);
+ regi.ss = encode_segment(regs->ss);
+ regi.ds = encode_segment(p->thread.ds);
+ regi.es = encode_segment(p->thread.es);
+
+ BUILD_BUG_ON(GDT_ENTRY_TLS_ENTRIES != CKPT_TLS_ENTRIES);
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ regi.tls[i] = encode_tls(&p->thread.tls_array[i]);
+
+ if (p->thread.gsindex)
+ regi.gs = 0;
+ else
+ regi.gs = p->thread.gs;
+
+ if (p->thread.fsindex)
+ regi.fs = 0;
+ else
+ regi.fs = p->thread.fs;
+
+ return img_dump_buffer(buf, size, &regi, sizeof(regi), pos);
+}
+
+static int img_dump_mm(struct mm_struct *mm, char __user *buf, size_t size, int pos)
+{
+ struct binfmt_mm_image mmi;
+
+ mmi.flags = mm->flags;
+ mmi.def_flags = mm->def_flags;
+ mmi.start_code = mm->start_code;
+ mmi.end_code = mm->end_code;
+ mmi.start_data = mm->start_data;
+ mmi.end_data = mm->end_data;
+ mmi.start_brk = mm->start_brk;
+ mmi.brk = mm->brk;
+ mmi.start_stack = mm->start_stack;
+ mmi.arg_start = mm->arg_start;
+ mmi.arg_end = mm->arg_end;
+ mmi.env_start = mm->env_start;
+ mmi.env_end = mm->env_end;
+ mmi.exe_fd = 0;
+
+ return img_dump_buffer(buf, size, &mmi, sizeof(mmi), pos);
+}
+
+static int img_dump_vma(struct vm_area_struct *vma, char __user *buf, size_t size, int pos)
+{
+ struct binfmt_vma_image vmai;
+
+ if (vma == NULL) {
+ memset(&vmai, 0, sizeof(vmai));
+ goto dumpit;
+ }
+
+ printk("Dumping vma %016lx-%016lx %p/%p\n", vma->vm_start, vma->vm_end, vma, vma->vm_mm);
+
+ vmai.fd = 0;
+ vmai.prot = 0;
+ if (vma->vm_flags & VM_READ)
+ vmai.prot |= PROT_READ;
+ if (vma->vm_flags & VM_WRITE)
+ vmai.prot |= PROT_WRITE;
+ if (vma->vm_flags & VM_EXEC)
+ vmai.prot |= PROT_EXEC;
+
+ vmai.flags = 0;
+ if (vma->vm_file == NULL)
+ vmai.flags |= MAP_ANONYMOUS;
+ if (vma->vm_flags & VM_MAYSHARE)
+ vmai.flags |= MAP_SHARED;
+ else
+ vmai.flags |= MAP_PRIVATE;
+
+ vmai.start = vma->vm_start;
+ vmai.end = vma->vm_end;
+ vmai.pgoff = vma->vm_pgoff;
+
+dumpit:
+ return img_dump_buffer(buf, size, &vmai, sizeof(vmai), pos);
+}
+
+static int img_dump_page(unsigned long addr, void *data, char __user *buf, size_t size, int pos)
+{
+ struct binfmt_page_image pgi;
+ int ret = 0, tmp;
+
+ pgi.vaddr = addr;
+
+ if (pos < sizeof(pgi)) {
+ tmp = img_dump_buffer(buf, size, &pgi, sizeof(pgi), pos);
+ if (tmp < 0)
+ return tmp;
+
+ ret = tmp;
+ if (size <= ret)
+ return ret;
+
+ buf += ret;
+ size -= ret;
+ pos = 0;
+ } else
+ pos -= sizeof(pgi);
+
+ tmp = img_dump_buffer(buf, size, data, PAGE_SIZE, pos);
+ if (tmp < 0)
+ return tmp;
+
+ return ret + tmp;
+}
+
+static inline int is_private_vma(struct vm_area_struct *vma)
+{
+ if (vma->vm_file == NULL)
+ return 1;
+ if (!(vma->vm_flags & VM_SHARED))
+ return 1;
+ return 0;
+}
+
+static ssize_t do_produce_dump(struct task_struct *p, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ size_t img_pos = 0, img_ppos;
+ size_t produced = 0;
+ int len;
+ loff_t pos = *ppos;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+
+#define move_pos(); do { \
+ buf += len; \
+ produced += len;\
+ size -= len; \
+ pos += len; \
+ } while (0)
+
+#define seek_pos(__size); do { \
+ img_ppos = img_pos; \
+ img_pos += (__size); \
+ } while (0)
+
+ /* header */
+ seek_pos(sizeof(struct binfmt_img_header));
+ if (pos < img_pos) {
+ len = img_dump_header(buf, size, pos - img_ppos);
+ if (len < 0)
+ goto err;
+
+ move_pos();
+ if (size == 0)
+ goto out;
+ }
+
+ /* registers */
+ seek_pos(sizeof(struct binfmt_regs_image));
+ if (pos < img_pos) {
+ len = img_dump_regs(p, buf, size, pos - img_ppos);
+ if (len < 0)
+ goto err;
+
+ move_pos();
+ if (size == 0)
+ goto out;
+ }
+
+ /* memory */
+ mm = get_task_mm(p);
+ if (mm == NULL)
+ return -EACCES;
+
+ down_read(&mm->mmap_sem);
+
+ seek_pos(sizeof(struct binfmt_mm_image));
+ if (pos < img_pos) {
+ len = img_dump_mm(mm, buf, size, pos - img_ppos);
+ if (len < 0)
+ goto err_mm;
+
+ move_pos();
+ if (size == 0)
+ goto out_mm;
+ }
+
+ vma = mm->mmap;
+ while (1) {
+ seek_pos(sizeof(struct binfmt_vma_image));
+ if (pos < img_pos) {
+ len = img_dump_vma(vma, buf, size, pos - img_ppos);
+ if (len < 0)
+ goto err_mm;
+
+ move_pos();
+ if (size == 0)
+ goto out_mm;
+ }
+
+ if (vma == NULL)
+ break;
+
+ vma = vma->vm_next;
+ }
+
+ for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) {
+ /* slow and stupid */
+ unsigned long addr;
+ struct page *page;
+ void *pg_data;
+
+ if (!is_private_vma(vma))
+ continue;
+
+ for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
+ page = follow_page(vma, addr, FOLL_FORCE | FOLL_DUMP | FOLL_GET);
+ if (page == NULL)
+ continue;
+ if (IS_ERR(page)) /* huh? */
+ continue;
+
+ seek_pos(sizeof(struct binfmt_page_image) + PAGE_SIZE);
+ if (pos < img_pos) {
+ pg_data = kmap(page);
+ len = img_dump_page(addr, pg_data, buf, size, pos - img_ppos);
+ kunmap(page);
+
+ if (len < 0) {
+ put_page(page);
+ goto err_mm;
+ }
+
+ move_pos();
+ if (size == 0) {
+ put_page(page);
+ goto out_mm;
+ }
+ }
+
+ put_page(page);
+ }
+ }
+
+ seek_pos(sizeof(struct binfmt_page_image));
+ if (pos < img_pos) {
+ struct binfmt_page_image zero;
+
+ memset(&zero, 0, sizeof(zero));
+ len = img_dump_buffer(buf, size, &zero, sizeof(zero), pos - img_ppos);
+ if (len < 0)
+ goto err;
+
+ move_pos();
+ }
+
+out_mm:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+out:
+ *ppos = pos;
+ return produced;
+
+err_mm:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+err:
+ return len;
+}
+
+static ssize_t img_dump_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
+{
+ struct task_struct *p;
+
+ p = get_proc_task(file->f_dentry->d_inode);
+ if (p == NULL)
+ return -ESRCH;
+
+ if (!(p->state & TASK_STOPPED)) {
+ put_task_struct(p);
+ return -EINVAL;
+ }
+
+ return do_produce_dump(p, buf, size, ppos);
+}
+
+static int img_dump_open(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
+static int img_dump_release(struct inode *inode, struct file *filp)
+{
+ return 0;
+}
+
+const struct file_operations proc_pid_dump_operations = {
+ .open = img_dump_open,
+ .read = img_dump_read,
+ .release = img_dump_release,
+};
Index: linux-2.6.git/include/linux/binfmt_img.h
===================================================================
--- /dev/null
+++ linux-2.6.git/include/linux/binfmt_img.h
@@ -0,0 +1,87 @@
+#ifndef __BINFMT_IMG_H__
+#define __BINFMT_IMG_H__
+
+#include <linux/types.h>
+
+struct binfmt_img_header {
+ __u32 magic;
+ __u32 version;
+};
+
+#define CKPT_TLS_ENTRIES 3
+
+struct binfmt_regs_image {
+ __u64 r15;
+ __u64 r14;
+ __u64 r13;
+ __u64 r12;
+ __u64 r11;
+ __u64 r10;
+ __u64 r9;
+ __u64 r8;
+ __u64 ax;
+ __u64 orig_ax;
+ __u64 bx;
+ __u64 cx;
+ __u64 dx;
+ __u64 si;
+ __u64 di;
+ __u64 ip;
+ __u64 flags;
+ __u64 bp;
+ __u64 sp;
+
+ __u64 gs;
+ __u64 fs;
+ __u64 tls[CKPT_TLS_ENTRIES];
+ __u16 gsindex;
+ __u16 fsindex;
+ __u16 cs;
+ __u16 ss;
+ __u16 ds;
+ __u16 es;
+};
+
+#define CKPT_X86_SEG_NULL 0
+#define CKPT_X86_SEG_USER32_CS 1
+#define CKPT_X86_SEG_USER32_DS 2
+#define CKPT_X86_SEG_USER64_CS 3
+#define CKPT_X86_SEG_USER64_DS 4
+#define CKPT_X86_SEG_TLS 0x4000
+#define CKPT_X86_SEG_LDT 0x8000
+
+struct binfmt_mm_image {
+ __u64 flags;
+ __u64 def_flags;
+ __u64 start_code;
+ __u64 end_code;
+ __u64 start_data;
+ __u64 end_data;
+ __u64 start_brk;
+ __u64 brk;
+ __u64 start_stack;
+ __u64 arg_start;
+ __u64 arg_end;
+ __u64 env_start;
+ __u64 env_end;
+ __u32 exe_fd;
+};
+
+struct binfmt_vma_image {
+ __u32 prot;
+ __u32 flags;
+ __u32 pad;
+ __u32 fd;
+ __u64 start;
+ __u64 end;
+ __u64 pgoff;
+};
+
+struct binfmt_page_image {
+ __u64 vaddr;
+};
+
+#define BINFMT_IMG_MAGIC 0xa75b8d43
+#define BINFMT_IMG_VERS_0 0x00000100
+
+#endif
Index: linux-2.6.git/include/linux/proc_fs.h
===================================================================
--- linux-2.6.git.orig/include/linux/proc_fs.h
+++ linux-2.6.git/include/linux/proc_fs.h
@@ -102,6 +102,8 @@ struct vmcore {
#ifdef CONFIG_PROC_FS
+extern const struct file_operations proc_pid_dump_operations;
+
extern void proc_root_init(void);
void proc_flush_task(struct task_struct *task);
From 0f8e07457aa91e9461665440ca258eb9f93bf2f9 Mon Sep 17 00:00:00 2001
From: root <root@ovzept.sw.ru>
Date: Fri, 3 Jun 2011 18:16:43 +0400
Subject: [PATCH] Images execution binfmt handler
---
fs/Kconfig.binfmt | 6 +
fs/Makefile | 1 +
fs/binfmt_img.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 331 insertions(+), 0 deletions(-)
create mode 100644 fs/binfmt_img.c
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 79e2ca7..0b2f48e 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -161,3 +161,9 @@ config BINFMT_MISC
You may say M here for module support and later load the module when
you have use for it; the module is called binfmt_misc. If you
don't know what to answer at this point, say Y.
+
+config BINFMT_IMG
+ tristate "Kernel support for IMG binaries"
+ depends on X86
+ help
+ Say M/Y here to enable support for checkpoint-restore images execution
diff --git a/fs/Makefile b/fs/Makefile
index fb68c2b..8221719 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
+obj-$(CONFIG_BINFMT_IMG) += binfmt_img.o
# binfmt_script is always there
obj-y += binfmt_script.o
diff --git a/fs/binfmt_img.c b/fs/binfmt_img.c
new file mode 100644
index 0000000..9b09797
--- /dev/null
+++ b/fs/binfmt_img.c
@@ -0,0 +1,324 @@
+#include <linux/binfmt_img.h>
+#include <linux/module.h>
+#include <linux/binfmts.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+/*
+ * The binary handler to save and restore a single task state
+ */
+
+static int img_check_header(void *buf)
+{
+ struct binfmt_img_header *hdr = buf;
+
+ if (hdr->magic != BINFMT_IMG_MAGIC)
+ return -ENOEXEC;
+
+ if (hdr->version != BINFMT_IMG_VERS_0)
+ return -EINVAL;
+
+ return sizeof(*hdr);
+}
+
+static unsigned short decode_segment(__u16 seg)
+{
+ if (seg == CKPT_X86_SEG_NULL)
+ return 0;
+
+ if (seg == CKPT_X86_SEG_USER64_CS)
+ return __USER_CS;
+ if (seg == CKPT_X86_SEG_USER64_DS)
+ return __USER_DS;
+#ifdef CONFIG_COMPAT
+ if (seg == CKPT_X86_SEG_USER32_CS)
+ return __USER32_CS;
+ if (seg == CKPT_X86_SEG_USER32_DS)
+ return __USER32_DS;
+#endif
+
+ if (seg & CKPT_X86_SEG_TLS) {
+ seg &= ~CKPT_X86_SEG_TLS;
+ return ((GDT_ENTRY_TLS_MIN + seg) << 3) | 3;
+ }
+ if (seg & CKPT_X86_SEG_LDT) {
+ seg &= ~CKPT_X86_SEG_LDT;
+ return (seg << 3) | 7;
+ }
+ BUG();
+}
+
+static void decode_tls(struct desc_struct *d, __u64 val)
+{
+ d->a = (unsigned int)(val >> 32);
+ d->b = (unsigned int)(val & 0xFFFFFFFF);
+}
+
+static int img_restore_regs(struct linux_binprm *bprm, loff_t off, struct pt_regs *regs)
+{
+ int ret, i;
+ struct binfmt_regs_image regi;
+ struct thread_struct *th = &current->thread;
+ unsigned short seg;
+
+ ret = kernel_read(bprm->file, off, (char *)&regi, sizeof(regi));
+ if (ret != sizeof(regi))
+ return -EIO;
+
+ regs->r15 = regi.r15;
+ regs->r14 = regi.r14;
+ regs->r13 = regi.r13;
+ regs->r12 = regi.r12;
+ regs->r11 = regi.r11;
+ regs->r10 = regi.r10;
+ regs->r9 = regi.r9;
+ regs->r8 = regi.r8;
+ regs->ax = regi.ax;
+ regs->orig_ax = regi.orig_ax;
+ regs->bx = regi.bx;
+ regs->cx = regi.cx;
+ regs->dx = regi.dx;
+ regs->si = regi.si;
+ regs->di = regi.di;
+ regs->ip = regi.ip;
+ regs->flags = regi.flags;
+ regs->bp = regi.bp;
+ regs->sp = regi.sp;
+
+ regs->cs = decode_segment(regi.cs);
+ regs->ss = decode_segment(regi.ss);
+
+ th->usersp = regi.sp;
+ th->ds = decode_segment(regi.ds);
+ th->es = decode_segment(regi.es);
+ th->fsindex = decode_segment(regi.fsindex);
+ th->gsindex = decode_segment(regi.gsindex);
+
+ th->fs = regi.fs;
+ th->gs = regi.gs;
+
+ BUILD_BUG_ON(GDT_ENTRY_TLS_ENTRIES != CKPT_TLS_ENTRIES);
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ decode_tls(&th->tls_array[i], regi.tls[i]);
+
+ load_TLS(th, smp_processor_id());
+
+ seg = th->fsindex;
+ loadsegment(fs, seg);
+ savesegment(fs, seg);
+ if (seg != th->fsindex) {
+ printk("ERROR saving fs selector want %x, has %x\n",
+ (unsigned int)th->fsindex, (unsigned int)seg);
+ return -EFAULT;
+ }
+
+ if (th->fs)
+ wrmsrl(MSR_FS_BASE, th->fs);
+ load_gs_index(th->gsindex);
+ if (th->gs)
+ wrmsrl(MSR_KERNEL_GS_BASE, th->gs);
+
+ return sizeof(regi);
+}
+
+static int img_restore_mm(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ struct binfmt_mm_image mmi;
+ struct mm_struct *mm = current->mm;
+
+ ret = kernel_read(bprm->file, off, (char *)&mmi, sizeof(mmi));
+ if (ret != sizeof(mmi))
+ return -EIO;
+
+ mm->flags = mmi.flags;
+ mm->def_flags = mmi.def_flags;
+ mm->start_code = mmi.start_code;
+ mm->end_code = mmi.end_code;
+ mm->start_data = mmi.start_data;
+ mm->end_data = mmi.end_data;
+ mm->start_brk = mmi.start_brk;
+ mm->brk = mmi.brk;
+ mm->start_stack = mmi.start_stack;
+ mm->arg_start = mmi.arg_start;
+ mm->arg_end = mmi.arg_end;
+ mm->env_start = mmi.env_start;
+ mm->env_end = mmi.env_end;
+
+ if (mmi.exe_fd != 0) {
+ struct file *f;
+
+ f = fget(mmi.exe_fd);
+ if (f == NULL)
+ return -EBADF;
+
+ fput(mm->exe_file);
+ mm->exe_file = f;
+ }
+
+ return sizeof(mmi);
+}
+
+static int img_restore_vmas(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ struct mm_struct *mm = current->mm;
+ int len = 0;
+
+ do_munmap(mm, 0, TASK_SIZE);
+
+ while (1) {
+ struct binfmt_vma_image vmai;
+ unsigned long addr;
+ struct file *file = NULL;
+
+ len += sizeof(vmai);
+
+ ret = kernel_read(bprm->file, off, (char *)&vmai, sizeof(vmai));
+ if (ret != sizeof(vmai))
+ return -EIO;
+
+ if (vmai.start == 0 && vmai.end == 0)
+ break;
+
+ if (vmai.fd != 0) {
+ file = fget(vmai.fd);
+ if (file == NULL)
+ return -EBADF;
+ } else
+ vmai.flags |= MAP_ANONYMOUS;
+
+ if (vmai.start <= mm->start_stack && vmai.end >= mm->start_stack)
+ vmai.flags |= MAP_GROWSDOWN;
+
+ addr = do_mmap_pgoff(file, vmai.start, vmai.end - vmai.start,
+ vmai.prot, vmai.flags | MAP_FIXED, vmai.pgoff);
+
+ if (vmai.fd) {
+ fput(file);
+ do_close(vmai.fd);
+ }
+
+ if ((long)addr < 0 || (addr != vmai.start))
+ return -ENXIO;
+
+ off += sizeof(vmai);
+ }
+
+ return len;
+}
+
+static int img_restore_pages(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ struct mm_struct *mm = current->mm;
+ int len = 0;
+
+ while (1) {
+ struct binfmt_page_image pgi;
+ struct vm_area_struct *vma;
+ struct page *page;
+ void *pg_data;
+
+ ret = kernel_read(bprm->file, off, (char *)&pgi, sizeof(pgi));
+ if (ret != sizeof(pgi))
+ return -EIO;
+
+ len += sizeof(pgi);
+ if (pgi.vaddr == 0)
+ break;
+
+ vma = find_vma(mm, pgi.vaddr);
+ if (vma == NULL)
+ return -ESRCH;
+
+ ret = get_user_pages(current, current->mm, (unsigned long)pgi.vaddr,
+ 1, 1, 1, &page, NULL);
+ if (ret != 1)
+ return -EFAULT;
+
+ pg_data = kmap(page);
+ ret = kernel_read(bprm->file, off + sizeof(pgi), pg_data, PAGE_SIZE);
+ kunmap(page);
+ put_page(page);
+
+ if (ret != PAGE_SIZE)
+ return -EFAULT;
+
+ len += PAGE_SIZE;
+ off += sizeof(pgi) + PAGE_SIZE;
+ }
+
+ return len;
+}
+
+static int img_restore_mem(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ loff_t len = off;
+
+ ret = img_restore_mm(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_vmas(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_pages(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ return len;
+
+}
+
+static int img_load_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+{
+ int ret;
+ loff_t len = 0;
+
+ ret = img_check_header(bprm->buf);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_regs(bprm, len, regs);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_mem(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static struct linux_binfmt img_binfmt = {
+ .module = THIS_MODULE,
+ .load_binary = img_load_binary,
+};
+
+static __init int img_binfmt_init(void)
+{
+ return register_binfmt(&img_binfmt);
+}
+
+static __exit void img_binfmt_exit(void)
+{
+ unregister_binfmt(&img_binfmt);
+}
+
+module_init(img_binfmt_init);
+module_exit(img_binfmt_exit);
+MODULE_LICENSE("GPL");
--
1.5.5.6
#ifndef __BINFMT_IMG_H__
#define __BINFMT_IMG_H__
#include <linux/types.h>
#define __packed __attribute__((packed))
struct binfmt_img_header {
__u32 magic;
__u32 version;
__u16 arch;
__u16 flags;
} __packed;
#define CKPT_TLS_ENTRIES 3
struct binfmt_regs_image {
union {
struct {
__u64 r15;
__u64 r14;
__u64 r13;
__u64 r12;
__u64 r11;
__u64 r10;
__u64 r9;
__u64 r8;
__u64 ax;
__u64 orig_ax;
__u64 bx;
__u64 cx;
__u64 dx;
__u64 si;
__u64 di;
__u64 ip;
__u64 flags;
__u64 bp;
__u64 sp;
__u64 gs;
__u64 fs;
__u64 tls[CKPT_TLS_ENTRIES];
__u16 gsindex;
__u16 fsindex;
__u16 cs;
__u16 ss;
__u16 ds;
__u16 es;
} r;
__u64 dummy[32];
};
} __packed;
#define CKPT_X86_SEG_NULL 0
#define CKPT_X86_SEG_USER32_CS 1
#define CKPT_X86_SEG_USER32_DS 2
#define CKPT_X86_SEG_USER64_CS 3
#define CKPT_X86_SEG_USER64_DS 4
#define CKPT_X86_SEG_TLS 0x4000
#define CKPT_X86_SEG_LDT 0x8000
struct binfmt_mm_image {
__u64 flags;
__u64 def_flags;
__u64 start_code;
__u64 end_code;
__u64 start_data;
__u64 end_data;
__u64 start_brk;
__u64 brk;
__u64 start_stack;
__u64 arg_start;
__u64 arg_end;
__u64 env_start;
__u64 env_end;
__u32 exe_fd;
} __packed;
struct binfmt_vma_image {
__u32 prot;
__u32 flags;
__u32 pad;
__u32 fd;
__u64 start;
__u64 end;
__u64 pgoff;
} __packed;
struct binfmt_page_image {
__u64 vaddr;
} __packed;
#define BINFMT_IMG_MAGIC 0xa75b8d43
#define BINFMT_IMG_VERS_0 0x00000100
#endif
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <dirent.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <linux/kdev_t.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/vfs.h>
#include <linux/types.h>
#include "img_structs.h"
static int fdinfo_img;
static int pages_img;
static int core_img;
static int shmem_img;
static int pipes_img;
#define PIPEFS_MAGIC 0x50495045
static int prep_img_files(int pid)
{
__u32 type;
char name[64];
sprintf(name, "fdinfo-%d.img", pid);
fdinfo_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (fdinfo_img < 0) {
perror("Can't open fdinfo");
return 1;
}
type = FDINFO_MAGIC;
write(fdinfo_img, &type, 4);
sprintf(name, "pages-%d.img", pid);
pages_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (pages_img < 0) {
perror("Can't open shmem");
return 1;
}
type = PAGES_MAGIC;
write(pages_img, &type, 4);
sprintf(name, "core-%d.img", pid);
core_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (core_img < 0) {
perror("Can't open core");
return 1;
}
sprintf(name, "shmem-%d.img", pid);
shmem_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (shmem_img < 0) {
perror("Can't open shmem");
return 1;
}
type = SHMEM_MAGIC;
write(shmem_img, &type, 4);
sprintf(name, "pipes-%d.img", pid);
pipes_img = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (pipes_img < 0) {
perror("Can't open pipes");
return 1;
}
type = PIPES_MAGIC;
write(pipes_img, &type, 4);
return 0;
}
static void kill_imgfiles(int pid)
{
/* FIXME */
}
static int stop_task(int pid)
{
return kill(pid, SIGSTOP);
}
static void continue_task(int pid)
{
if (kill(pid, SIGCONT))
perror("Can't cont task");
}
static char big_tmp_str[PATH_MAX];
static int read_fd_params(int pid, char *fd, unsigned long *pos, unsigned int *flags)
{
char fd_str[128];
int ifd;
sprintf(fd_str, "/proc/%d/fdinfo/%s", pid, fd);
printf("\tGetting fdinfo for fd %s\n", fd);
ifd = open(fd_str, O_RDONLY);
if (ifd < 0) {
perror("Can't open fdinfo");
return 1;
}
read(ifd, big_tmp_str, sizeof(big_tmp_str));
close(ifd);
sscanf(big_tmp_str, "pos:\t%lli\nflags:\t%o\n", pos, flags);
return 0;
}
static int dump_one_reg_file(int type, unsigned long fd_name, int lfd,
int lclose, unsigned long pos, unsigned int flags)
{
char fd_str[128];
int len;
struct fdinfo_entry e;
sprintf(fd_str, "/proc/self/fd/%d", lfd);
len = readlink(fd_str, big_tmp_str, sizeof(big_tmp_str) - 1);
if (len < 0) {
perror("Can't readlink fd");
return 1;
}
big_tmp_str[len] = '\0';
printf("\tDumping path for %x fd via self %d [%s]\n", fd_name, lfd, big_tmp_str);
if (lclose)
close(lfd);
e.type = type;
e.addr = fd_name;
e.len = len;
e.pos = pos;
e.flags = flags;
write(fdinfo_img, &e, sizeof(e));
write(fdinfo_img, big_tmp_str, len);
return 0;
}
#define MAX_PIPE_BUF_SIZE 1024 /* FIXME - this is not so */
#define SPLICE_F_NONBLOCK 0x2
static int dump_pipe_and_data(int lfd, struct pipes_entry *e)
{
int steal_pipe[2];
int ret;
printf("\tDumping data from pipe %x\n", e->pipeid);
if (pipe(steal_pipe) < 0) {
perror("Can't create pipe for stealing data");
return 1;
}
ret = tee(lfd, steal_pipe[1], MAX_PIPE_BUF_SIZE, SPLICE_F_NONBLOCK);
if (ret < 0) {
if (errno != EAGAIN) {
perror("Can't pick pipe data");
return 1;
}
ret = 0;
}
e->bytes = ret;
write(pipes_img, e, sizeof(*e));
if (ret) {
ret = splice(steal_pipe[0], NULL, pipes_img, NULL, ret, 0);
if (ret < 0) {
perror("Can't push pipe data");
return 1;
}
}
close(steal_pipe[0]);
close(steal_pipe[1]);
return 0;
}
static int dump_one_pipe(int fd, int lfd, unsigned int id, unsigned int flags)
{
struct pipes_entry e;
printf("\tDumping pipe %d/%x flags %x\n", fd, id, flags);
e.fd = fd;
e.pipeid = id;
e.flags = flags;
if (flags & O_WRONLY) {
e.bytes = 0;
write(pipes_img, &e, sizeof(e));
return 0;
}
return dump_pipe_and_data(lfd, &e);
}
static int dump_one_fd(int dir, char *fd_name, unsigned long pos, unsigned int flags)
{
int fd;
struct stat st_buf;
struct statfs stfs_buf;
printf("\tDumping fd %s\n", fd_name);
fd = openat(dir, fd_name, O_RDONLY);
if (fd == -1) {
printf("Tried to openat %d/%d %s\n", getpid(), dir, fd_name);
perror("Can't open fd");
return 1;
}
if (fstat(fd, &st_buf) < 0) {
perror("Can't stat one");
return 1;
}
if (S_ISREG(st_buf.st_mode))
return dump_one_reg_file(FDINFO_FD, atoi(fd_name), fd, 1, pos, flags);
if (S_ISFIFO(st_buf.st_mode)) {
if (fstatfs(fd, &stfs_buf) < 0) {
perror("Can't statfs one");
return 1;
}
if (stfs_buf.f_type == PIPEFS_MAGIC)
return dump_one_pipe(atoi(fd_name), fd, st_buf.st_ino, flags);
}
if (!strcmp(fd_name, "0")) {
printf("\tSkipping stdin\n");
return 0;
}
if (!strcmp(fd_name, "1")) {
printf("\tSkipping stdout\n");
return 0;
}
if (!strcmp(fd_name, "2")) {
printf("\tSkipping stderr\n");
return 0;
}
if (!strcmp(fd_name, "3")) {
printf("\tSkipping tty\n");
return 0;
}
fprintf(stderr, "Can't dump file %s of that type [%x]\n", fd_name, st_buf.st_mode);
return 1;
}
static int dump_task_files(int pid)
{
char pid_fd_dir[64];
DIR *fd_dir;
struct dirent *de;
unsigned long pos;
unsigned int flags;
printf("Dumping open files for %d\n", pid);
sprintf(pid_fd_dir, "/proc/%d/fd", pid);
fd_dir = opendir(pid_fd_dir);
if (fd_dir == NULL) {
perror("Can't open fd dir");
return -1;
}
while ((de = readdir(fd_dir)) != NULL) {
if (de->d_name[0] == '.')
continue;
if (read_fd_params(pid, de->d_name, &pos, &flags))
return 1;
if (dump_one_fd(dirfd(fd_dir), de->d_name, pos, flags))
return 1;
}
closedir(fd_dir);
return 0;
}
#define PAGE_SIZE 4096
#define PAGE_RSS 0x1
static unsigned long rawhex(char *str, char **end)
{
unsigned long ret = 0;
while (1) {
if (str[0] >= '0' && str[0] <= '9') {
ret <<= 4;
ret += str[0] - '0';
} else if (str[0] >= 'a' && str[0] <= 'f') {
ret <<= 4;
ret += str[0] - 'a' + 0xA;
} else if (str[0] >= 'A' && str[0] <= 'F') {
ret <<= 4;
ret += str[0] - 'A' + 0xA;
} else {
if (end)
*end = str;
return ret;
}
str++;
}
}
static void map_desc_parm(char *desc, unsigned long *pgoff, unsigned long *len)
{
char *s;
unsigned long start, end;
start = rawhex(desc, &s);
if (*s != '-') {
goto bug;
}
end = rawhex(s + 1, &s);
if (*s != ' ') {
goto bug;
}
s = strchr(s + 1, ' ');
*pgoff = rawhex(s + 1, &s);
if (*s != ' ') {
goto bug;
}
if (start > end)
goto bug;
*len = end - start;
if (*len % PAGE_SIZE) {
goto bug;
}
if (*pgoff % PAGE_SIZE) {
goto bug;
}
return;
bug:
fprintf(stderr, "BUG\n");
exit(1);
}
static int dump_map_pages(int lfd, unsigned long start, unsigned long pgoff, unsigned long len)
{
unsigned int nrpages, pfn;
void *mem;
unsigned char *mc;
printf("\t\tDumping pages start %x len %x off %x\n", start, len, pgoff);
mem = mmap(NULL, len, PROT_READ, MAP_FILE | MAP_PRIVATE, lfd, pgoff);
if (mem == MAP_FAILED) {
perror("Can't map");
return 1;
}
nrpages = len / PAGE_SIZE;
mc = malloc(nrpages);
if (mincore(mem, len, mc)) {
perror("Can't mincore mapping");
return 1;
}
for (pfn = 0; pfn < nrpages; pfn++)
if (mc[pfn] & PAGE_RSS) {
__u64 vaddr;
vaddr = start + pfn * PAGE_SIZE;
write(pages_img, &vaddr, 8);
write(pages_img, mem + pfn * PAGE_SIZE, PAGE_SIZE);
}
munmap(mem, len);
return 0;
}
static int dump_anon_private_map(char *start)
{
printf("\tSkipping anon private mapping at %s\n", start);
return 0;
}
static int dump_anon_shared_map(char *_start, char *mdesc, int lfd, struct stat *st)
{
unsigned long pgoff, len;
struct shmem_entry e;
unsigned long start;
struct stat buf;
map_desc_parm(mdesc, &pgoff, &len);
start = rawhex(_start, NULL);
e.start = start;
e.end = start + len;
e.shmid = st->st_ino;
write(shmem_img, &e, sizeof(e));
if (dump_map_pages(lfd, start, pgoff, len))
return 1;
close(lfd);
return 0;
}
static int dump_file_shared_map(char *start, char *mdesc, int lfd)
{
printf("\tSkipping file shared mapping at %s\n", start);
close(lfd);
return 0;
}
static int dump_file_private_map(char *_start, char *mdesc, int lfd)
{
unsigned long pgoff, len;
unsigned long start;
map_desc_parm(mdesc, &pgoff, &len);
start = rawhex(_start, NULL);
if (dump_one_reg_file(FDINFO_MAP, start, lfd, 0, 0, O_RDONLY))
return 1;
close(lfd);
return 0;
}
static int dump_one_mapping(char *mdesc, DIR *mfd_dir)
{
char *flags, *tmp;
char map_start[32];
int lfd;
struct stat st_buf;
tmp = strchr(mdesc, '-');
memset(map_start, 0, sizeof(map_start));
strncpy(map_start, mdesc, tmp - mdesc);
flags = strchr(mdesc, ' ');
flags++;
printf("\tDumping %s\n", map_start);
lfd = openat(dirfd(mfd_dir), map_start, O_RDONLY);
if (lfd == -1) {
if (errno != ENOENT) {
perror("Can't open mapping");
return 1;
}
if (flags[3] != 'p') {
fprintf(stderr, "Bogus mapping [%s]\n", mdesc);
return 1;
}
return dump_anon_private_map(map_start);
}
if (fstat(lfd, &st_buf) < 0) {
perror("Can't stat mapping!");
return 1;
}
if (!S_ISREG(st_buf.st_mode)) {
perror("Can't handle non-regular mapping");
return 1;
}
if (MAJOR(st_buf.st_dev) == 0) {
if (flags[3] != 's') {
fprintf(stderr, "Bogus mapping [%s]\n", mdesc);
return 1;
}
/* FIXME - this can be tmpfs visible file mapping */
return dump_anon_shared_map(map_start, mdesc, lfd, &st_buf);
}
if (flags[3] == 'p')
return dump_file_private_map(map_start, mdesc, lfd);
else
return dump_file_shared_map(map_start, mdesc, lfd);
}
static int dump_task_ext_mm(int pid)
{
char path[64];
DIR *mfd_dir;
FILE *maps;
printf("Dumping mappings for %d\n", pid);
sprintf(path, "/proc/%d/mfd", pid);
mfd_dir = opendir(path);
if (mfd_dir == NULL) {
perror("Can't open mfd dir");
return -1;
}
sprintf(path, "/proc/%d/maps", pid);
maps = fopen(path, "r");
if (maps == NULL) {
perror("Can't open maps file");
return 1;
}
while (fgets(big_tmp_str, sizeof(big_tmp_str), maps) != NULL)
if (dump_one_mapping(big_tmp_str, mfd_dir))
return 1;
fclose(maps);
closedir(mfd_dir);
return 0;
}
static int dump_task_state(int pid)
{
char path[64];
int dump_fd;
void *mem;
printf("Dumping task image for %d\n", pid);
sprintf(path, "/proc/%d/kstate_dump", pid);
dump_fd = open(path, O_RDONLY);
if (dump_fd < 0) {
perror("Can't open dump file");
return 1;
}
mem = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, 0, 0);
if (mem == MAP_FAILED) {
perror("Can't get mem");
return 1;
}
while (1) {
int r, w;
r = read(dump_fd, mem, 4096);
if (r == 0)
break;
if (r < 0) {
perror("Can't read dump file");
return 1;
}
w = 0;
while (w < r) {
int ret;
ret = write(core_img, mem + w, r - w);
if (ret <= 0) {
perror("Can't write core");
return 1;
}
w += ret;
}
}
munmap(mem, 4096);
close(dump_fd);
return 0;
}
static int dump_one_task(int pid, int stop)
{
printf("Dumping task %d\n", pid);
if (prep_img_files(pid))
return 1;
if (stop && stop_task(pid))
goto err_task;
if (dump_task_files(pid))
goto err;
if (dump_task_ext_mm(pid))
goto err;
if (dump_task_state(pid))
goto err;
if (stop)
continue_task(pid);
printf("Dump is complete\n");
return 0;
err:
if (stop)
continue_task(pid);
err_task:
kill_imgfiles(pid);
return 1;
}
static int pstree_fd;
static char big_tmp_str[4096];
static int *pids, nr_pids;
static char *get_children_pids(int pid)
{
FILE *f;
int len;
char *ret, *tmp;
sprintf(big_tmp_str, "/proc/%d/status", pid);
f = fopen(big_tmp_str, "r");
if (f == NULL)
return NULL;
while ((fgets(big_tmp_str, sizeof(big_tmp_str), f)) != NULL) {
if (strncmp(big_tmp_str, "Children:", 9))
continue;
tmp = big_tmp_str + 10;
len = strlen(tmp);
ret = malloc(len + 1);
strcpy(ret, tmp);
if (len)
ret[len - 1] = ' ';
fclose(f);
return ret;
}
fclose(f);
return NULL;
}
static int dump_pid_and_children(int pid)
{
struct pstree_entry e;
char *chlist, *tmp, *tmp2;
printf("\tReading %d children list\n", pid);
chlist = get_children_pids(pid);
if (chlist == NULL)
return 1;
printf("\t%d has children %s\n", pid, chlist);
e.pid = pid;
e.nr_children = 0;
pids = realloc(pids, (nr_pids + 1) * sizeof(int));
pids[nr_pids++] = e.pid;
tmp = chlist;
while ((tmp = strchr(tmp, ' ')) != NULL) {
tmp++;
e.nr_children++;
}
write(pstree_fd, &e, sizeof(e));
tmp = chlist;
while (1) {
__u32 cpid;
cpid = strtol(tmp, &tmp, 10);
if (cpid == 0)
break;
if (*tmp != ' ') {
fprintf(stderr, "Error in string with children!\n");
return 1;
}
write(pstree_fd, &cpid, sizeof(cpid));
tmp++;
}
tmp = chlist;
while ((tmp2 = strchr(tmp, ' ')) != NULL) {
*tmp2 = '\0';
if (dump_pid_and_children(atoi(tmp)))
return 1;
tmp = tmp2 + 1;
}
free(chlist);
return 0;
}
static int __dump_all_tasks(void)
{
int i, pid;
printf("Dumping tasks' images for");
for (i = 0; i < nr_pids; i++)
printf(" %d", pids[i]);
printf("\n");
printf("Stopping tasks\n");
for (i = 0; i < nr_pids; i++)
if (stop_task(pids[i]))
goto err;
for (i = 0; i < nr_pids; i++) {
if (dump_one_task(pids[i], 0))
goto err;
}
printf("Resuming tasks\n");
for (i = 0; i < nr_pids; i++)
continue_task(pids[i]);
return 0;
err:
for (i = 0; i < nr_pids; i++)
continue_task(pids[i]);
return 1;
}
static int dump_all_tasks(int pid)
{
char *chlist;
__u32 type;
pids = NULL;
nr_pids = 0;
printf("Dumping process tree, start from %d\n", pid);
sprintf(big_tmp_str, "pstree-%d.img", pid);
pstree_fd = open(big_tmp_str, O_WRONLY | O_CREAT | O_EXCL, 0600);
if (pstree_fd < 0) {
perror("Can't create pstree");
return 1;
}
type = PSTREE_MAGIC;
write(pstree_fd, &type, sizeof(type));
if (dump_pid_and_children(pid))
return 1;
close(pstree_fd);
return __dump_all_tasks();
}
int main(int argc, char **argv)
{
if (argc != 3)
goto usage;
if (argv[1][0] != '-')
goto usage;
if (argv[1][1] == 'p')
return dump_one_task(atoi(argv[2]), 1);
if (argv[1][1] == 't')
return dump_all_tasks(atoi(argv[2]));
usage:
printf("Usage: %s (-p|-t) <pid>\n", argv[0]);
return 1;
}
#include <stdio.h>
#include <unistd.h>
#include <signal.h>
#include <dirent.h>
#include <string.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include <linux/kdev_t.h>
#include <stdlib.h>
#include <sys/mman.h>
#include <sys/sendfile.h>
#define PAGE_SIZE 4096
#include <linux/types.h>
#include "img_structs.h"
#include "binfmt_img.h"
struct fmap_fd {
unsigned long start;
int fd;
struct fmap_fd *next;
};
static struct fmap_fd *fmap_fds;
struct shmem_info {
unsigned long start;
unsigned long end;
unsigned long id;
int pid;
int real_pid;
};
static struct shmem_info *shmems;
static int nr_shmems;
struct pipes_info {
unsigned int id;
int pid;
int real_pid;
int read_fd;
int write_fd;
int users;
};
static struct pipes_info *pipes;
static int nr_pipes;
static int restore_task_with_children(int my_pid, char *pstree_path);
#define CLONE_CHILD_USEPID 0x02000000
static void show_saved_shmems(void)
{
int i;
printf("\tSaved shmems:\n");
for (i = 0; i < nr_shmems; i++)
printf("\t\t%016lx %lx %d\n", shmems[i].start, shmems[i].id, shmems[i].pid);
}
static void show_saved_pipes(void)
{
int i;
printf("\tSaved pipes:\n");
for (i = 0; i < nr_pipes; i++)
printf("\t\t%x -> %d\n", pipes[i].id, pipes[i].pid);
}
static struct shmem_info *search_shmem(unsigned long addr, unsigned long id)
{
int i;
for (i = 0; i < nr_shmems; i++) {
struct shmem_info *si;
si = shmems + i;
if (si->start <= addr && si->end >= addr && si->id == id)
return si;
}
return NULL;
}
static struct pipes_info *search_pipes(unsigned int pipeid)
{
int i;
for (i = 0; i < nr_pipes; i++) {
struct pipes_info *pi;
pi = pipes + i;
if (pi->id == pipeid)
return pi;
}
return NULL;
}
static void shmem_update_real_pid(int vpid, int rpid)
{
int i;
for (i = 0; i < nr_shmems; i++)
if (shmems[i].pid == vpid)
shmems[i].real_pid = rpid;
}
static int shmem_wait_and_open(struct shmem_info *si)
{
/* FIXME - not good */
char path[128];
unsigned long time = 1000;
sleep(1);
while (si->real_pid == 0)
usleep(time);
sprintf(path, "/proc/%d/mfd/0x%lx", si->real_pid, si->start);
while (1) {
int ret;
ret = open(path, O_RDWR);
if (ret > 0)
return ret;
if (ret < 0 && errno != ENOENT) {
perror(" Can't stat shmem");
return -1;
}
printf("Waiting for [%s] to appear\n", path);
if (time < 20000000)
time <<= 1;
usleep(time);
}
}
static int try_to_add_shmem(int pid, struct shmem_entry *e)
{
int i;
for (i = 0; i < nr_shmems; i++) {
if (shmems[i].start != e->start || shmems[i].id != e->shmid)
continue;
if (shmems[i].end != e->end) {
printf("Bogus shmem\n");
return 1;
}
if (shmems[i].pid > pid)
shmems[i].pid = pid;
return 0;
}
if ((nr_shmems + 1) * sizeof(struct shmem_info) >= 4096) {
printf("OOM storing shmems\n");
return 1;
}
shmems[nr_shmems].start = e->start;
shmems[nr_shmems].end = e->end;
shmems[nr_shmems].id = e->shmid;
shmems[nr_shmems].pid = pid;
shmems[nr_shmems].real_pid = 0;
nr_shmems++;
return 0;
}
static int try_to_add_pipe(int pid, struct pipes_entry *e, int p_fd)
{
int i;
for (i = 0; i < nr_pipes; i++) {
if (pipes[i].id != e->pipeid)
continue;
if (pipes[i].pid > pid)
pipes[i].pid = pid;
pipes[i].users++;
return 0;
}
if ((nr_pipes + 1) * sizeof(struct pipes_info) >= 4096) {
printf("OOM storing pipes\n");
return 1;
}
pipes[nr_pipes].id = e->pipeid;
pipes[nr_pipes].pid = pid;
pipes[nr_pipes].real_pid = 0;
pipes[nr_pipes].read_fd = 0;
pipes[nr_pipes].write_fd = 0;
pipes[nr_pipes].users = 1;
nr_pipes++;
return 0;
}
static int prepare_shmem_pid(int pid)
{
char path[64];
int sh_fd;
__u32 type = 0;
sprintf(path, "shmem-%d.img", pid);
sh_fd = open(path, O_RDONLY);
if (sh_fd < 0) {
perror("Can't open shmem info");
return 1;
}
read(sh_fd, &type, sizeof(type));
if (type != SHMEM_MAGIC) {
perror("Bad shmem magic");
return 1;
}
while (1) {
struct shmem_entry e;
int ret;
ret = read(sh_fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
perror("Can't read shmem entry");
return 1;
}
if (try_to_add_shmem(pid, &e))
return 1;
}
close(sh_fd);
return 0;
}
static int prepare_pipes_pid(int pid)
{
char path[64];
int p_fd;
__u32 type = 0;
sprintf(path, "pipes-%d.img", pid);
p_fd = open(path, O_RDONLY);
if (p_fd < 0) {
perror("Can't open pipes image");
return 1;
}
read(p_fd, &type, sizeof(type));
if (type != PIPES_MAGIC) {
perror("Bad pipes magin");
return 1;
}
while (1) {
struct pipes_entry e;
int ret;
ret = read(p_fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
fprintf(stderr, "Read pipes for %s failed %d of %d read\n",
path, ret, sizeof(e));
perror("Can't read pipes entry");
return 1;
}
if (try_to_add_pipe(pid, &e, p_fd))
return 1;
lseek(p_fd, e.bytes, SEEK_CUR);
}
close(p_fd);
return 0;
}
static int prepare_shared(int ps_fd)
{
printf("Preparing info about shared resources\n");
nr_shmems = 0;
shmems = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
if (shmems == MAP_FAILED) {
perror("Can't map shmems");
return 1;
}
pipes = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON, 0, 0);
if (pipes == MAP_FAILED) {
perror("Can't map pipes");
return 1;
}
while (1) {
struct pstree_entry e;
int ret;
ret = read(ps_fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
perror("Can't read ps");
return 1;
}
if (prepare_shmem_pid(e.pid))
return 1;
if (prepare_pipes_pid(e.pid))
return 1;
lseek(ps_fd, e.nr_children * sizeof(__u32), SEEK_CUR);
}
lseek(ps_fd, sizeof(__u32), SEEK_SET);
show_saved_shmems();
show_saved_pipes();
return 0;
}
static struct fmap_fd *pop_fmap_fd(unsigned long start)
{
struct fmap_fd **p, *r;
printf("Looking for %lx : ", start);
for (p = &fmap_fds; *p != NULL; p = &(*p)->next) {
if ((*p)->start != start)
continue;
r = *p;
*p = r->next;
printf("found\n");
return r;
}
printf("not found\n");
return NULL;
}
static int open_fe_fd(struct fdinfo_entry *fe, int fd)
{
char path[PATH_MAX];
int tmp;
if (read(fd, path, fe->len) != fe->len) {
fprintf(stderr, "Error reading path");
return -1;
}
path[fe->len] = '\0';
tmp = open(path, fe->flags);
if (tmp < 0) {
perror("Can't open file");
return -1;
}
lseek(tmp, fe->pos, SEEK_SET);
return tmp;
}
static int reopen_fd(int old_fd, int new_fd)
{
int tmp;
if (old_fd != new_fd) {
tmp = dup2(old_fd, new_fd);
if (tmp < 0)
return tmp;
close(old_fd);
}
return new_fd;
}
static int open_fd(int pid, struct fdinfo_entry *fe, int *cfd)
{
int fd, tmp;
if (*cfd == (int)fe->addr) {
tmp = dup(*cfd);
if (tmp < 0) {
perror("Can't dup file");
return 1;
}
printf("%s: Dup for %d\n", __func__, tmp);
*cfd = tmp;
}
tmp = open_fe_fd(fe, *cfd);
if (tmp < 0)
return 1;
fd = reopen_fd(tmp, (int)fe->addr);
if (fd < 0) {
perror("Can't dup");
return 1;
}
return 0;
}
static int open_fmap(int pid, struct fdinfo_entry *fe, int fd)
{
int tmp;
struct fmap_fd *new;
tmp = open_fe_fd(fe, fd);
if (tmp < 0)
return 1;
printf("%d:\t\tWill map %lx to %d\n", pid, (unsigned long)fe->addr, tmp);
new = malloc(sizeof(*new));
new->start = fe->addr;
new->fd = tmp;
new->next = fmap_fds;
fmap_fds = new;
return 0;
}
static int prepare_fds(int pid)
{
__u32 mag;
char path[64];
int fdinfo_fd;
printf("%d: Opening files\n", pid);
sprintf(path, "fdinfo-%d.img", pid);
fdinfo_fd = open(path, O_RDONLY);
if (fdinfo_fd < 0) {
perror("Can't open fdinfo");
return 1;
}
read(fdinfo_fd, &mag, 4);
if (mag != FDINFO_MAGIC) {
fprintf(stderr, "Bad file\n");
return 1;
}
while (1) {
int ret;
struct fdinfo_entry fe;
ret = read(fdinfo_fd, &fe, sizeof(fe));
if (ret == 0) {
close(fdinfo_fd);
return 0;
}
if (ret < 0) {
perror("Can't read file");
return 1;
}
if (ret != sizeof(fe)) {
fprintf(stderr, "Error reading\n");
return 1;
}
printf("\t%d: Got fd for %lx type %d namelen %d\n", pid,
(unsigned long)fe.addr, fe.type, fe.len);
switch (fe.type) {
case FDINFO_FD:
if (open_fd(pid, &fe, &fdinfo_fd))
return 1;
break;
case FDINFO_MAP:
if (open_fmap(pid, &fe, fdinfo_fd))
return 1;
break;
default:
fprintf(stderr, "Some bullshit in a file\n");
return 1;
}
}
}
struct shmem_to_id {
unsigned long addr;
unsigned long end;
unsigned long id;
struct shmem_to_id *next;
};
static struct shmem_to_id *my_shmem_ids;
static unsigned long find_shmem_id(unsigned long addr)
{
struct shmem_to_id *si;
for (si = my_shmem_ids; si != NULL; si = si->next)
if (si->addr <= addr && si->end >= addr)
return si->id;
return 0;
}
static void save_shmem_id(struct shmem_entry *e)
{
struct shmem_to_id *si;
si = malloc(sizeof(*si));
si->addr = e->start;
si->end = e->end;
si->id = e->shmid;
si->next = my_shmem_ids;
my_shmem_ids = si;
}
static int prepare_shmem(int pid)
{
char path[64];
int sh_fd;
__u32 type = 0;
sprintf(path, "shmem-%d.img", pid);
sh_fd = open(path, O_RDONLY);
if (sh_fd < 0) {
perror("Can't open shmem info");
return 1;
}
read(sh_fd, &type, sizeof(type));
if (type != SHMEM_MAGIC) {
perror("Bad shmem magic");
return 1;
}
while (1) {
struct shmem_entry e;
int ret;
ret = read(sh_fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
perror("Can't read shmem entry");
return 1;
}
save_shmem_id(&e);
}
close(sh_fd);
return 0;
}
static int try_fixup_file_map(int pid, struct binfmt_vma_image *vi, int fd)
{
struct fmap_fd *fmfd;
fmfd = pop_fmap_fd(vi->start);
if (fmfd != NULL) {
printf("%d: Fixing %lx vma to %d fd\n", pid, vi->start, fmfd->fd);
lseek(fd, -sizeof(*vi), SEEK_CUR);
printf("%d: Wrote %d\n", fmfd->fd);
vi->fd = fmfd->fd;
if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) {
perror("Can't write img");
return 1;
}
free(fmfd);
}
return 0;
}
static int try_fixup_shared_map(int pid, struct binfmt_vma_image *vi, int fd)
{
struct shmem_info *si;
unsigned long id;
id = find_shmem_id(vi->start);
if (id == 0)
return 0;
si = search_shmem(vi->start, id);
printf("%d: Search for %016lx shmem %p/%d\n", pid, vi->start, si, si ? si->pid : -1);
if (si == NULL) {
fprintf(stderr, "Can't find my shmem %016lx\n", vi->start);
return 1;
}
if (si->pid != pid) {
int sh_fd;
sh_fd = shmem_wait_and_open(si);
printf("%d: Fixing %lx vma to %x/%d shmem -> %d\n", pid, vi->start, si->id, si->pid, sh_fd);
if (fd < 0) {
perror("Can't open shmem");
return 1;
}
lseek(fd, -sizeof(*vi), SEEK_CUR);
vi->fd = sh_fd;
if (write(fd, vi, sizeof(*vi)) != sizeof(*vi)) {
perror("Can't write img");
return 1;
}
}
return 0;
}
static int fixup_vma_fds(int pid, int fd)
{
int offset =
sizeof(struct binfmt_img_header) +
sizeof(struct binfmt_regs_image) +
sizeof(struct binfmt_mm_image);
printf("Seek for: %li bytes\n", offset);
lseek(fd, offset, SEEK_SET);
while (1) {
struct binfmt_vma_image vi;
if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) {
perror("Can't read");
return 1;
}
if (vi.start == 0 && vi.end == 0)
return 0;
printf("%d: Fixing %016lx-%016lx %016lx vma\n", pid, vi.start, vi.end, vi.pgoff);
if (try_fixup_file_map(pid, &vi, fd))
return 1;
if (try_fixup_shared_map(pid, &vi, fd))
return 1;
}
}
static inline int should_restore_page(int pid, unsigned long vaddr)
{
struct shmem_info *si;
unsigned long id;
id = find_shmem_id(vaddr);
if (id == 0)
return 1;
si = search_shmem(vaddr, id);
return si->pid == pid;
}
static int fixup_pages_data(int pid, int fd)
{
char path[128];
int shfd;
__u32 mag;
__u64 vaddr;
sprintf(path, "pages-%d.img", pid);
shfd = open(path, O_RDONLY);
if (shfd < 0) {
perror("Can't open shmem image");
return 1;
}
read(shfd, &mag, sizeof(mag));
if (mag != PAGES_MAGIC) {
fprintf(stderr, "Bad shmem image\n");
return 1;
}
lseek(fd, -sizeof(struct binfmt_page_image), SEEK_END);
read(fd, &vaddr, sizeof(vaddr));
if (vaddr != 0) {
printf("SHIT %lx\n", (unsigned long)vaddr);
return 1;
}
lseek(fd, -sizeof(struct binfmt_page_image), SEEK_END);
while (1) {
int ret;
ret = read(shfd, &vaddr, sizeof(vaddr));
if (ret == 0)
break;
if (ret < 0 || ret != sizeof(vaddr)) {
perror("Can't read vaddr");
return 1;
}
if (vaddr == 0)
break;
if (!should_restore_page(pid, vaddr)) {
lseek(shfd, PAGE_SIZE, SEEK_CUR);
continue;
}
// printf("Copy page %lx to image\n", (unsigned long)vaddr);
write(fd, &vaddr, sizeof(vaddr));
sendfile(fd, shfd, NULL, PAGE_SIZE);
}
close(shfd);
vaddr = 0;
write(fd, &vaddr, sizeof(vaddr));
return 0;
}
static int prepare_image_maps(int fd, int pid)
{
printf("%d: Fixing maps before executing image\n", pid);
if (fixup_vma_fds(pid, fd))
return 1;
if (fixup_pages_data(pid, fd))
return 1;
close(fd);
return 0;
}
static int execute_image(int pid)
{
char path[128];
int fd, fd_new;
struct stat buf;
sprintf(path, "core-%d.img", pid);
fd = open(path, O_RDONLY);
if (fd < 0) {
perror("Can't open exec image");
return 1;
}
if (fstat(fd, &buf)) {
perror("Can't stat");
return 1;
}
sprintf(path, "core-%d.img.out", pid);
fd_new = open(path, O_RDWR | O_CREAT | O_EXCL, 0700);
if (fd_new < 0) {
perror("Can't open new image");
return 1;
}
printf("%d: Preparing execution image (%li bytes)\n", pid, buf.st_size);
sendfile(fd_new, fd, NULL, buf.st_size);
close(fd);
if (fchmod(fd_new, 0700)) {
perror("Can't prepare exec image");
return 1;
}
if (prepare_image_maps(fd_new, pid))
return 1;
sync();
printf("%d/%d EXEC IMAGE\n", pid, getpid());
return execl(path, path, NULL);
}
static int create_pipe(int pid, struct pipes_entry *e, struct pipes_info *pi, int pipes_fd)
{
int pfd[2], tmp;
unsigned long time = 1000;
printf("\t%d: Creating pipe %x\n", pid, e->pipeid);
if (pipe(pfd) < 0) {
perror("Can't create pipe");
return 1;
}
if (e->bytes) {
printf("\t%d: Splicing data to %d\n", pid, pfd[1]);
tmp = splice(pipes_fd, NULL, pfd[1], NULL, e->bytes, 0);
if (tmp != e->bytes) {
fprintf(stderr, "Wanted to restore %ld bytes, but got %ld\n",
e->bytes, tmp);
if (tmp < 0)
perror("Error splicing data");
return 1;
}
}
pi->read_fd = pfd[0];
pi->write_fd = pfd[1];
pi->real_pid = getpid();
printf("\t%d: Done, waiting for others on %d pid with r:%d w:%d\n",
pid, pi->real_pid, pfd[0], pfd[1]);
while (1) {
if (pi->users == 1) /* only I left */
break;
printf("\t%d: Waiting for %x pipe to attach (%d users left)\n",
pid, e->pipeid, pi->users - 1);
if (time < 20000000)
time <<= 1;
usleep(time);
}
printf("\t%d: All is ok - reopening pipe for %d\n", pid, e->fd);
if (e->flags & O_WRONLY) {
close(pfd[0]);
tmp = reopen_fd(pfd[1], e->fd);
} else {
close(pfd[1]);
tmp = reopen_fd(pfd[0], e->fd);
}
if (tmp < 0) {
perror("Can't dup pipe fd");
return 1;
}
return 0;
}
static int attach_pipe(int pid, struct pipes_entry *e, struct pipes_info *pi)
{
char path[128];
int tmp, fd;
printf("\t%d: Wating for pipe %x to appear\n", pid, e->pipeid);
while (pi->real_pid == 0)
usleep(1000);
if (e->flags & O_WRONLY)
tmp = pi->write_fd;
else
tmp = pi->read_fd;
sprintf(path, "/proc/%d/fd/%d", pi->real_pid, tmp);
printf("\t%d: Attaching pipe %s\n", pid, path);
fd = open(path, e->flags);
if (fd < 0) {
perror("Can't attach pipe");
return 1;
}
printf("\t%d: Done, reopening for %d\n", pid, e->fd);
pi->users--;
tmp = reopen_fd(fd, e->fd);
if (tmp < 0) {
perror("Can't dup to attach pipe");
return 1;
}
return 0;
}
static int open_pipe(int pid, struct pipes_entry *e, int *pipes_fd)
{
struct pipes_info *pi;
printf("\t%d: Opening pipe %x on fd %d\n", pid, e->pipeid, e->fd);
if (e->fd == *pipes_fd) {
int tmp;
tmp = dup(*pipes_fd);
if (tmp < 0) {
perror("Can't dup file");
return 1;
}
*pipes_fd = tmp;
}
pi = search_pipes(e->pipeid);
if (pi == NULL) {
fprintf(stderr, "BUG: can't find my pipe %x\n", e->pipeid);
return 1;
}
if (pi->pid == pid)
return create_pipe(pid, e, pi, *pipes_fd);
else
return attach_pipe(pid, e, pi);
}
static int prepare_pipes(int pid)
{
char path[64];
int pipes_fd;
__u32 type = 0;
printf("%d: Opening pipes\n", pid);
sprintf(path, "pipes-%d.img", pid);
pipes_fd = open(path, O_RDONLY);
if (pipes_fd < 0) {
perror("Can't open pipes img");
return 1;
}
read(pipes_fd, &type, sizeof(type));
if (type != PIPES_MAGIC) {
perror("Bad pipes file");
return 1;
}
while (1) {
struct pipes_entry e;
int ret;
ret = read(pipes_fd, &e, sizeof(e));
if (ret == 0) {
close(pipes_fd);
return 0;
}
if (ret != sizeof(e)) {
perror("Bad pipes entry");
return 1;
}
if (open_pipe(pid, &e, &pipes_fd))
return 1;
}
}
static int restore_one_task(int pid)
{
printf("%d: Restoring resources\n", pid);
if (prepare_pipes(pid))
return 1;
if (prepare_fds(pid))
return 1;
if (prepare_shmem(pid))
return 1;
return execute_image(pid);
}
static int do_child(void *arg)
{
return restore_task_with_children(getpid(), arg);
}
static inline int fork_with_pid(int pid, char *pstree_path)
{
int ret = 0;
void *stack;
stack = mmap(0, 4 * 4096, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANON | MAP_GROWSDOWN, 0, 0);
if (stack == MAP_FAILED)
return -1;
stack += 4 * 4096;
ret = clone(do_child, stack, SIGCHLD | CLONE_CHILD_USEPID, pstree_path, NULL, NULL, &pid);
return ret;
}
static int restore_task_with_children(int my_pid, char *pstree_path)
{
int *pids;
int fd, ret, i;
struct pstree_entry e;
printf("%d: Starting restore\n", my_pid);
fd = open(pstree_path, O_RDONLY);
if (fd < 0) {
perror("Can't reopen pstree image");
exit(1);
}
lseek(fd, sizeof(__u32), SEEK_SET);
while (1) {
ret = read(fd, &e, sizeof(e));
if (ret != sizeof(e)) {
fprintf(stderr, "%d: Read returned %d\n", my_pid, ret);
if (ret < 0)
perror("Can't read pstree");
exit(1);
}
if (e.pid != my_pid) {
lseek(fd, e.nr_children * sizeof(__u32), SEEK_CUR);
continue;
}
break;
}
if (e.nr_children > 0) {
i = e.nr_children * sizeof(int);
pids = malloc(i);
ret = read(fd, pids, i);
if (ret != i) {
perror("Can't read children pids");
exit(1);
}
close(fd);
printf("%d: Restoring %d children:\n", my_pid, e.nr_children);
for (i = 0; i < e.nr_children; i++) {
printf("\tFork %d from %d\n", pids[i], my_pid);
ret = fork_with_pid(pids[i], pstree_path);
if (ret < 0) {
perror("Can't fork kid");
exit(1);
}
}
} else
close(fd);
shmem_update_real_pid(my_pid, getpid());
return restore_one_task(my_pid);
}
static int restore_root_task(char *pstree_path, int fd)
{
struct pstree_entry e;
int ret;
ret = read(fd, &e, sizeof(e));
if (ret != sizeof(e)) {
perror("Can't read root pstree entry");
return 1;
}
close(fd);
printf("Forking root with %d pid\n", e.pid);
ret = fork_with_pid(e.pid, pstree_path);
if (ret < 0) {
perror("Can't fork root");
return 1;
}
wait(NULL);
return 0;
}
static int restore_all_tasks(char *pid)
{
char path[128];
int pstree_fd;
__u32 type = 0;
sprintf(path, "pstree-%s.img", pid);
pstree_fd = open(path, O_RDONLY);
if (pstree_fd < 0) {
perror("Can't open pstree image");
return 1;
}
read(pstree_fd, &type, sizeof(type));
if (type != PSTREE_MAGIC) {
perror("Bad pstree magic");
return 1;
}
if (prepare_shared(pstree_fd))
return 1;
return restore_root_task(path, pstree_fd);
}
int main(int argc, char **argv)
{
if (argc != 3)
goto usage;
if (argv[1][0] != '-')
goto usage;
if (argv[1][1] == 'p')
return restore_one_task(atoi(argv[2]));
if (argv[1][1] == 't')
return restore_all_tasks(argv[2]);
usage:
printf("Usage: %s (-t|-p) <pid>\n", argv[0]);
return 1;
}
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <linux/types.h>
#include <string.h>
#include "img_structs.h"
#include "binfmt_img.h"
static int show_fdinfo(int fd)
{
char data[1024];
struct fdinfo_entry e;
while (1) {
int ret;
ret = read(fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
perror("Can't read");
return 1;
}
ret = read(fd, data, e.len);
if (ret != e.len) {
perror("Can't read");
return 1;
}
data[e.len] = '\0';
switch (e.type) {
case FDINFO_FD:
printf("fd %d [%s] pos %lx flags %o\n", (int)e.addr, data, e.pos, e.flags);
break;
case FDINFO_MAP:
printf("map %lx [%s] flags %o\n", e.addr, data, e.flags);
break;
default:
fprintf(stderr, "Unknown fdinfo entry type %d\n", e.type);
return 1;
}
}
return 0;
}
#define PAGE_SIZE 4096
static int show_mem(int fd)
{
__u64 vaddr;
unsigned int data[2];
while (1) {
if (read(fd, &vaddr, 8) == 0)
break;
if (vaddr == 0)
break;
read(fd, &data[0], sizeof(unsigned int));
lseek(fd, PAGE_SIZE - 2 * sizeof(unsigned int), SEEK_CUR);
read(fd, &data[1], sizeof(unsigned int));
printf("\tpage 0x%lx [%x...%x]\n", (unsigned long)vaddr, data[0], data[1]);
}
return 0;
}
static int show_pages(int fd)
{
return show_mem(fd);
}
static int show_shmem(int fd)
{
int r;
struct shmem_entry e;
while (1) {
r = read(fd, &e, sizeof(e));
if (r == 0)
return 0;
if (r != sizeof(e)) {
perror("Can't read shmem entry");
return 1;
}
printf("%016lx-%016lx %016x\n", e.start, e.end, e.shmid);
}
}
static char *segval(__u16 seg)
{
switch (seg) {
case CKPT_X86_SEG_NULL: return "nul";
case CKPT_X86_SEG_USER32_CS: return "cs32";
case CKPT_X86_SEG_USER32_DS: return "ds32";
case CKPT_X86_SEG_USER64_CS: return "cs64";
case CKPT_X86_SEG_USER64_DS: return "ds64";
}
if (seg & CKPT_X86_SEG_TLS)
return "tls";
if (seg & CKPT_X86_SEG_LDT)
return "ldt";
return "[unknown]";
}
static int show_regs(int fd)
{
struct binfmt_regs_image ri;
if (read(fd, &ri, sizeof(ri)) != sizeof(ri)) {
perror("Can't read registers from image");
return 1;
}
printf("Registers:\n");
printf("\tr15: %016lx\n", ri.r.r15);
printf("\tr14: %016lx\n", ri.r.r14);
printf("\tr13: %016lx\n", ri.r.r13);
printf("\tr12: %016lx\n", ri.r.r12);
printf("\tr11: %016lx\n", ri.r.r11);
printf("\tr10: %016lx\n", ri.r.r10);
printf("\tr9: %016lx\n", ri.r.r9);
printf("\tr8: %016lx\n", ri.r.r8);
printf("\tax: %016lx\n", ri.r.ax);
printf("\torig_ax: %016lx\n", ri.r.orig_ax);
printf("\tbx: %016lx\n", ri.r.bx);
printf("\tcx: %016lx\n", ri.r.cx);
printf("\tdx: %016lx\n", ri.r.dx);
printf("\tsi: %016lx\n", ri.r.si);
printf("\tdi: %016lx\n", ri.r.di);
printf("\tip: %016lx\n", ri.r.ip);
printf("\tflags: %016lx\n", ri.r.flags);
printf("\tbp: %016lx\n", ri.r.bp);
printf("\tsp: %016lx\n", ri.r.sp);
printf("\tgs: %016lx\n", ri.r.gs);
printf("\tfs: %016lx\n", ri.r.fs);
printf("\tgsindex: %s\n", segval(ri.r.gsindex));
printf("\tfsindex: %s\n", segval(ri.r.fsindex));
printf("\tcs: %s\n", segval(ri.r.cs));
printf("\tss: %s\n", segval(ri.r.ss));
printf("\tds: %s\n", segval(ri.r.ds));
printf("\tes: %s\n", segval(ri.r.es));
printf("\ttls0 %016lx\n", ri.r.tls[0]);
printf("\ttls1 %016lx\n", ri.r.tls[1]);
printf("\ttls2 %016lx\n", ri.r.tls[2]);
return 0;
}
static int show_mm(int fd, unsigned long *stack)
{
struct binfmt_mm_image mi;
if (read(fd, &mi, sizeof(mi)) != sizeof(mi)) {
perror("Can't read mm from image");
return 1;
}
printf("MM:\n");
printf("\tflags: %016lx\n", mi.flags);
printf("\tdef_flags: %016lx\n", mi.def_flags);
printf("\tstart_code: %016lx\n", mi.start_code);
printf("\tend_code: %016lx\n", mi.end_code);
printf("\tstart_data: %016lx\n", mi.start_data);
printf("\tend_data: %016lx\n", mi.end_data);
printf("\tstart_brk: %016lx\n", mi.start_brk);
printf("\tbrk: %016lx\n", mi.brk);
printf("\tstart_stack: %016lx\n", mi.start_stack);
printf("\targ_start: %016lx\n", mi.arg_start);
printf("\targ_end: %016lx\n", mi.arg_end);
printf("\tenv_start: %016lx\n", mi.env_start);
printf("\tenv_end: %016lx\n", mi.env_end);
*stack = mi.start_stack;
return 0;
}
static int show_vmas(int fd, unsigned long stack)
{
struct binfmt_vma_image vi;
printf("VMAs:\n");
while (1) {
char *note = "";
if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) {
perror("Can't read vma from image");
return 1;
}
if (vi.start == 0 && vi.end == 0)
return 0;
if (vi.start <= stack && vi.end >= stack)
note = "[stack]";
printf("\t%016lx-%016lx file %d %016lx prot %x flags %x %s\n",
vi.start, vi.end, vi.fd, vi.pgoff,
vi.prot, vi.flags, note);
}
}
static int show_privmem(int fd)
{
printf("Pages:\n");
return show_mem(fd);
}
static int show_core(int fd)
{
__u32 version = 0;
unsigned long stack;
read(fd, &version, 4);
if (version != BINFMT_IMG_VERS_0) {
printf("Unsupported version %d\n", version);
return 1;
}
/* the pad */
read(fd, &version, 4);
printf("Showing version 0\n");
if (show_regs(fd))
return 1;
if (show_mm(fd, &stack))
return 1;
if (show_vmas(fd, stack))
return 1;
if (show_privmem(fd))
return 1;
return 0;
}
static int show_pstree(int fd)
{
int ret;
struct pstree_entry e;
while (1) {
int i;
__u32 *ch;
ret = read(fd, &e, sizeof(e));
if (ret == 0)
return 0;
if (ret != sizeof(e)) {
perror("Can't read processes entry");
return 1;
}
printf("%d:", e.pid);
i = e.nr_children * sizeof(__u32);
ch = malloc(i);
ret = read(fd, ch, i);
if (ret != i) {
perror("Can't read children list");
return 1;
}
for (i = 0; i < e.nr_children; i++)
printf(" %d", ch[i]);
printf("\n");
}
}
static int show_pipes(int fd)
{
struct pipes_entry e;
int ret;
char buf[17];
while (1) {
ret = read(fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
perror("Can't read pipe entry");
return 1;
}
printf("%d: %lx %o %d ", e.fd, e.pipeid, e.flags, e.bytes);
if (e.flags & O_WRONLY) {
printf("\n");
if (e.bytes) {
printf("Bogus pipe\n");
return 1;
}
continue;
}
memset(buf, 0, sizeof(buf));
ret = e.bytes;
if (ret > 16)
ret = 16;
read(fd, buf, ret);
printf("\t[%s", buf);
if (ret < e.bytes)
printf("...");
printf("]\n");
lseek(fd, e.bytes - ret, SEEK_CUR);
}
return 0;
}
int main(int argc, char **argv)
{
__u32 type;
int fd;
fd = open(argv[1], O_RDONLY);
if (fd < 0) {
perror("Can't open");
return 1;
}
read(fd, &type, 4);
if (type == FDINFO_MAGIC)
return show_fdinfo(fd);
if (type == PAGES_MAGIC)
return show_pages(fd);
if (type == SHMEM_MAGIC)
return show_shmem(fd);
if (type == PSTREE_MAGIC)
return show_pstree(fd);
if (type == PIPES_MAGIC)
return show_pipes(fd);
if (type == BINFMT_IMG_MAGIC)
return show_core(fd);
printf("Unknown file type 0x%x\n", type);
return 1;
}
#define FDINFO_MAGIC 0x01010101
struct fdinfo_entry {
__u8 type;
__u8 len;
__u16 flags;
__u32 pos;
__u64 addr;
};
#define FDINFO_FD 1
#define FDINFO_MAP 2
#define PAGES_MAGIC 0x20202020
#define SHMEM_MAGIC 0x03300330
struct shmem_entry {
__u64 start;
__u64 end;
__u64 shmid;
};
#define PSTREE_MAGIC 0x40044004
struct pstree_entry {
__u32 pid;
__u32 nr_children;
};
#define PIPES_MAGIC 0x05055050
struct pipes_entry {
__u32 fd;
__u32 pipeid;
__u32 flags;
__u32 bytes;
};
Previous version of C/R -- uses in-kernel dumper restorer.
It's here for the reference and not used by crtools itself.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment