Commit 8b122598 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov

Zap xemul directory

No need for samples anymore
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
parent a896bfd6
This diff is collapsed.
From 0f8e07457aa91e9461665440ca258eb9f93bf2f9 Mon Sep 17 00:00:00 2001
From: root <root@ovzept.sw.ru>
Date: Fri, 3 Jun 2011 18:16:43 +0400
Subject: [PATCH] Images execution binfmt handler
---
fs/Kconfig.binfmt | 6 +
fs/Makefile | 1 +
fs/binfmt_img.c | 324 +++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 331 insertions(+), 0 deletions(-)
create mode 100644 fs/binfmt_img.c
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 79e2ca7..0b2f48e 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -161,3 +161,9 @@ config BINFMT_MISC
You may say M here for module support and later load the module when
you have use for it; the module is called binfmt_misc. If you
don't know what to answer at this point, say Y.
+
+config BINFMT_IMG
+ tristate "Kernel support for IMG binaries"
+ depends on X86
+ help
+ Say M/Y here to enable support for checkpoint-restore images execution
diff --git a/fs/Makefile b/fs/Makefile
index fb68c2b..8221719 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -33,6 +33,7 @@ obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o
obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o
+obj-$(CONFIG_BINFMT_IMG) += binfmt_img.o
# binfmt_script is always there
obj-y += binfmt_script.o
diff --git a/fs/binfmt_img.c b/fs/binfmt_img.c
new file mode 100644
index 0000000..9b09797
--- /dev/null
+++ b/fs/binfmt_img.c
@@ -0,0 +1,324 @@
+#include <linux/binfmt_img.h>
+#include <linux/module.h>
+#include <linux/binfmts.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/highmem.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+/*
+ * The binary handler to save and restore a single task state
+ */
+
+static int img_check_header(void *buf)
+{
+ struct binfmt_img_header *hdr = buf;
+
+ if (hdr->magic != BINFMT_IMG_MAGIC)
+ return -ENOEXEC;
+
+ if (hdr->version != BINFMT_IMG_VERS_0)
+ return -EINVAL;
+
+ return sizeof(*hdr);
+}
+
+static unsigned short decode_segment(__u16 seg)
+{
+ if (seg == CKPT_X86_SEG_NULL)
+ return 0;
+
+ if (seg == CKPT_X86_SEG_USER64_CS)
+ return __USER_CS;
+ if (seg == CKPT_X86_SEG_USER64_DS)
+ return __USER_DS;
+#ifdef CONFIG_COMPAT
+ if (seg == CKPT_X86_SEG_USER32_CS)
+ return __USER32_CS;
+ if (seg == CKPT_X86_SEG_USER32_DS)
+ return __USER32_DS;
+#endif
+
+ if (seg & CKPT_X86_SEG_TLS) {
+ seg &= ~CKPT_X86_SEG_TLS;
+ return ((GDT_ENTRY_TLS_MIN + seg) << 3) | 3;
+ }
+ if (seg & CKPT_X86_SEG_LDT) {
+ seg &= ~CKPT_X86_SEG_LDT;
+ return (seg << 3) | 7;
+ }
+ BUG();
+}
+
+static void decode_tls(struct desc_struct *d, __u64 val)
+{
+ d->a = (unsigned int)(val >> 32);
+ d->b = (unsigned int)(val & 0xFFFFFFFF);
+}
+
+static int img_restore_regs(struct linux_binprm *bprm, loff_t off, struct pt_regs *regs)
+{
+ int ret, i;
+ struct binfmt_regs_image regi;
+ struct thread_struct *th = &current->thread;
+ unsigned short seg;
+
+ ret = kernel_read(bprm->file, off, (char *)&regi, sizeof(regi));
+ if (ret != sizeof(regi))
+ return -EIO;
+
+ regs->r15 = regi.r15;
+ regs->r14 = regi.r14;
+ regs->r13 = regi.r13;
+ regs->r12 = regi.r12;
+ regs->r11 = regi.r11;
+ regs->r10 = regi.r10;
+ regs->r9 = regi.r9;
+ regs->r8 = regi.r8;
+ regs->ax = regi.ax;
+ regs->orig_ax = regi.orig_ax;
+ regs->bx = regi.bx;
+ regs->cx = regi.cx;
+ regs->dx = regi.dx;
+ regs->si = regi.si;
+ regs->di = regi.di;
+ regs->ip = regi.ip;
+ regs->flags = regi.flags;
+ regs->bp = regi.bp;
+ regs->sp = regi.sp;
+
+ regs->cs = decode_segment(regi.cs);
+ regs->ss = decode_segment(regi.ss);
+
+ th->usersp = regi.sp;
+ th->ds = decode_segment(regi.ds);
+ th->es = decode_segment(regi.es);
+ th->fsindex = decode_segment(regi.fsindex);
+ th->gsindex = decode_segment(regi.gsindex);
+
+ th->fs = regi.fs;
+ th->gs = regi.gs;
+
+ BUILD_BUG_ON(GDT_ENTRY_TLS_ENTRIES != CKPT_TLS_ENTRIES);
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ decode_tls(&th->tls_array[i], regi.tls[i]);
+
+ load_TLS(th, smp_processor_id());
+
+ seg = th->fsindex;
+ loadsegment(fs, seg);
+ savesegment(fs, seg);
+ if (seg != th->fsindex) {
+ printk("ERROR saving fs selector want %x, has %x\n",
+ (unsigned int)th->fsindex, (unsigned int)seg);
+ return -EFAULT;
+ }
+
+ if (th->fs)
+ wrmsrl(MSR_FS_BASE, th->fs);
+ load_gs_index(th->gsindex);
+ if (th->gs)
+ wrmsrl(MSR_KERNEL_GS_BASE, th->gs);
+
+ return sizeof(regi);
+}
+
+static int img_restore_mm(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ struct binfmt_mm_image mmi;
+ struct mm_struct *mm = current->mm;
+
+ ret = kernel_read(bprm->file, off, (char *)&mmi, sizeof(mmi));
+ if (ret != sizeof(mmi))
+ return -EIO;
+
+ mm->flags = mmi.flags;
+ mm->def_flags = mmi.def_flags;
+ mm->start_code = mmi.start_code;
+ mm->end_code = mmi.end_code;
+ mm->start_data = mmi.start_data;
+ mm->end_data = mmi.end_data;
+ mm->start_brk = mmi.start_brk;
+ mm->brk = mmi.brk;
+ mm->start_stack = mmi.start_stack;
+ mm->arg_start = mmi.arg_start;
+ mm->arg_end = mmi.arg_end;
+ mm->env_start = mmi.env_start;
+ mm->env_end = mmi.env_end;
+
+ if (mmi.exe_fd != 0) {
+ struct file *f;
+
+ f = fget(mmi.exe_fd);
+ if (f == NULL)
+ return -EBADF;
+
+ fput(mm->exe_file);
+ mm->exe_file = f;
+ }
+
+ return sizeof(mmi);
+}
+
+static int img_restore_vmas(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ struct mm_struct *mm = current->mm;
+ int len = 0;
+
+ do_munmap(mm, 0, TASK_SIZE);
+
+ while (1) {
+ struct binfmt_vma_image vmai;
+ unsigned long addr;
+ struct file *file = NULL;
+
+ len += sizeof(vmai);
+
+ ret = kernel_read(bprm->file, off, (char *)&vmai, sizeof(vmai));
+ if (ret != sizeof(vmai))
+ return -EIO;
+
+ if (vmai.start == 0 && vmai.end == 0)
+ break;
+
+ if (vmai.fd != 0) {
+ file = fget(vmai.fd);
+ if (file == NULL)
+ return -EBADF;
+ } else
+ vmai.flags |= MAP_ANONYMOUS;
+
+ if (vmai.start <= mm->start_stack && vmai.end >= mm->start_stack)
+ vmai.flags |= MAP_GROWSDOWN;
+
+ addr = do_mmap_pgoff(file, vmai.start, vmai.end - vmai.start,
+ vmai.prot, vmai.flags | MAP_FIXED, vmai.pgoff);
+
+ if (vmai.fd) {
+ fput(file);
+ do_close(vmai.fd);
+ }
+
+ if ((long)addr < 0 || (addr != vmai.start))
+ return -ENXIO;
+
+ off += sizeof(vmai);
+ }
+
+ return len;
+}
+
+static int img_restore_pages(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ struct mm_struct *mm = current->mm;
+ int len = 0;
+
+ while (1) {
+ struct binfmt_page_image pgi;
+ struct vm_area_struct *vma;
+ struct page *page;
+ void *pg_data;
+
+ ret = kernel_read(bprm->file, off, (char *)&pgi, sizeof(pgi));
+ if (ret != sizeof(pgi))
+ return -EIO;
+
+ len += sizeof(pgi);
+ if (pgi.vaddr == 0)
+ break;
+
+ vma = find_vma(mm, pgi.vaddr);
+ if (vma == NULL)
+ return -ESRCH;
+
+ ret = get_user_pages(current, current->mm, (unsigned long)pgi.vaddr,
+ 1, 1, 1, &page, NULL);
+ if (ret != 1)
+ return -EFAULT;
+
+ pg_data = kmap(page);
+ ret = kernel_read(bprm->file, off + sizeof(pgi), pg_data, PAGE_SIZE);
+ kunmap(page);
+ put_page(page);
+
+ if (ret != PAGE_SIZE)
+ return -EFAULT;
+
+ len += PAGE_SIZE;
+ off += sizeof(pgi) + PAGE_SIZE;
+ }
+
+ return len;
+}
+
+static int img_restore_mem(struct linux_binprm *bprm, loff_t off)
+{
+ int ret;
+ loff_t len = off;
+
+ ret = img_restore_mm(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_vmas(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_pages(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ return len;
+
+}
+
+static int img_load_binary(struct linux_binprm * bprm, struct pt_regs * regs)
+{
+ int ret;
+ loff_t len = 0;
+
+ ret = img_check_header(bprm->buf);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_regs(bprm, len, regs);
+ if (ret < 0)
+ return ret;
+
+ len += ret;
+ ret = img_restore_mem(bprm, len);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static struct linux_binfmt img_binfmt = {
+ .module = THIS_MODULE,
+ .load_binary = img_load_binary,
+};
+
+static __init int img_binfmt_init(void)
+{
+ return register_binfmt(&img_binfmt);
+}
+
+static __exit void img_binfmt_exit(void)
+{
+ unregister_binfmt(&img_binfmt);
+}
+
+module_init(img_binfmt_init);
+module_exit(img_binfmt_exit);
+MODULE_LICENSE("GPL");
--
1.5.5.6
#ifndef __BINFMT_IMG_H__
#define __BINFMT_IMG_H__
#include <linux/types.h>
#define __packed __attribute__((packed))
struct binfmt_img_header {
__u32 magic;
__u32 version;
__u16 arch;
__u16 flags;
} __packed;
#define CKPT_TLS_ENTRIES 3
struct binfmt_regs_image {
union {
struct {
__u64 r15;
__u64 r14;
__u64 r13;
__u64 r12;
__u64 r11;
__u64 r10;
__u64 r9;
__u64 r8;
__u64 ax;
__u64 orig_ax;
__u64 bx;
__u64 cx;
__u64 dx;
__u64 si;
__u64 di;
__u64 ip;
__u64 flags;
__u64 bp;
__u64 sp;
__u64 gs;
__u64 fs;
__u64 tls[CKPT_TLS_ENTRIES];
__u16 gsindex;
__u16 fsindex;
__u16 cs;
__u16 ss;
__u16 ds;
__u16 es;
} r;
__u64 dummy[32];
};
} __packed;
#define CKPT_X86_SEG_NULL 0
#define CKPT_X86_SEG_USER32_CS 1
#define CKPT_X86_SEG_USER32_DS 2
#define CKPT_X86_SEG_USER64_CS 3
#define CKPT_X86_SEG_USER64_DS 4
#define CKPT_X86_SEG_TLS 0x4000
#define CKPT_X86_SEG_LDT 0x8000
struct binfmt_mm_image {
__u64 flags;
__u64 def_flags;
__u64 start_code;
__u64 end_code;
__u64 start_data;
__u64 end_data;
__u64 start_brk;
__u64 brk;
__u64 start_stack;
__u64 arg_start;
__u64 arg_end;
__u64 env_start;
__u64 env_end;
__u32 exe_fd;
} __packed;
struct binfmt_vma_image {
__u32 prot;
__u32 flags;
__u32 pad;
__u32 fd;
__u64 start;
__u64 end;
__u64 pgoff;
} __packed;
struct binfmt_page_image {
__u64 vaddr;
} __packed;
#define BINFMT_IMG_MAGIC 0xa75b8d43
#define BINFMT_IMG_VERS_0 0x00000100
#endif
This diff is collapsed.
This diff is collapsed.
#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdlib.h>
#include <linux/types.h>
#include <string.h>
#include "img_structs.h"
#include "binfmt_img.h"
static int show_fdinfo(int fd)
{
char data[1024];
struct fdinfo_entry e;
while (1) {
int ret;
ret = read(fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
perror("Can't read");
return 1;
}
ret = read(fd, data, e.len);
if (ret != e.len) {
perror("Can't read");
return 1;
}
data[e.len] = '\0';
switch (e.type) {
case FDINFO_FD:
printf("fd %d [%s] pos %lx flags %o\n", (int)e.addr, data, e.pos, e.flags);
break;
case FDINFO_MAP:
printf("map %lx [%s] flags %o\n", e.addr, data, e.flags);
break;
default:
fprintf(stderr, "Unknown fdinfo entry type %d\n", e.type);
return 1;
}
}
return 0;
}
#define PAGE_SIZE 4096
static int show_mem(int fd)
{
__u64 vaddr;
unsigned int data[2];
while (1) {
if (read(fd, &vaddr, 8) == 0)
break;
if (vaddr == 0)
break;
read(fd, &data[0], sizeof(unsigned int));
lseek(fd, PAGE_SIZE - 2 * sizeof(unsigned int), SEEK_CUR);
read(fd, &data[1], sizeof(unsigned int));
printf("\tpage 0x%lx [%x...%x]\n", (unsigned long)vaddr, data[0], data[1]);
}
return 0;
}
static int show_pages(int fd)
{
return show_mem(fd);
}
static int show_shmem(int fd)
{
int r;
struct shmem_entry e;
while (1) {
r = read(fd, &e, sizeof(e));
if (r == 0)
return 0;
if (r != sizeof(e)) {
perror("Can't read shmem entry");
return 1;
}
printf("%016lx-%016lx %016x\n", e.start, e.end, e.shmid);
}
}
static char *segval(__u16 seg)
{
switch (seg) {
case CKPT_X86_SEG_NULL: return "nul";
case CKPT_X86_SEG_USER32_CS: return "cs32";
case CKPT_X86_SEG_USER32_DS: return "ds32";
case CKPT_X86_SEG_USER64_CS: return "cs64";
case CKPT_X86_SEG_USER64_DS: return "ds64";
}
if (seg & CKPT_X86_SEG_TLS)
return "tls";
if (seg & CKPT_X86_SEG_LDT)
return "ldt";
return "[unknown]";
}
static int show_regs(int fd)
{
struct binfmt_regs_image ri;
if (read(fd, &ri, sizeof(ri)) != sizeof(ri)) {
perror("Can't read registers from image");
return 1;
}
printf("Registers:\n");
printf("\tr15: %016lx\n", ri.r.r15);
printf("\tr14: %016lx\n", ri.r.r14);
printf("\tr13: %016lx\n", ri.r.r13);
printf("\tr12: %016lx\n", ri.r.r12);
printf("\tr11: %016lx\n", ri.r.r11);
printf("\tr10: %016lx\n", ri.r.r10);
printf("\tr9: %016lx\n", ri.r.r9);
printf("\tr8: %016lx\n", ri.r.r8);
printf("\tax: %016lx\n", ri.r.ax);
printf("\torig_ax: %016lx\n", ri.r.orig_ax);
printf("\tbx: %016lx\n", ri.r.bx);
printf("\tcx: %016lx\n", ri.r.cx);
printf("\tdx: %016lx\n", ri.r.dx);
printf("\tsi: %016lx\n", ri.r.si);
printf("\tdi: %016lx\n", ri.r.di);
printf("\tip: %016lx\n", ri.r.ip);
printf("\tflags: %016lx\n", ri.r.flags);
printf("\tbp: %016lx\n", ri.r.bp);
printf("\tsp: %016lx\n", ri.r.sp);
printf("\tgs: %016lx\n", ri.r.gs);
printf("\tfs: %016lx\n", ri.r.fs);
printf("\tgsindex: %s\n", segval(ri.r.gsindex));
printf("\tfsindex: %s\n", segval(ri.r.fsindex));
printf("\tcs: %s\n", segval(ri.r.cs));
printf("\tss: %s\n", segval(ri.r.ss));
printf("\tds: %s\n", segval(ri.r.ds));
printf("\tes: %s\n", segval(ri.r.es));
printf("\ttls0 %016lx\n", ri.r.tls[0]);
printf("\ttls1 %016lx\n", ri.r.tls[1]);
printf("\ttls2 %016lx\n", ri.r.tls[2]);
return 0;
}
static int show_mm(int fd, unsigned long *stack)
{
struct binfmt_mm_image mi;
if (read(fd, &mi, sizeof(mi)) != sizeof(mi)) {
perror("Can't read mm from image");
return 1;
}
printf("MM:\n");
printf("\tflags: %016lx\n", mi.flags);
printf("\tdef_flags: %016lx\n", mi.def_flags);
printf("\tstart_code: %016lx\n", mi.start_code);
printf("\tend_code: %016lx\n", mi.end_code);
printf("\tstart_data: %016lx\n", mi.start_data);
printf("\tend_data: %016lx\n", mi.end_data);
printf("\tstart_brk: %016lx\n", mi.start_brk);
printf("\tbrk: %016lx\n", mi.brk);
printf("\tstart_stack: %016lx\n", mi.start_stack);
printf("\targ_start: %016lx\n", mi.arg_start);
printf("\targ_end: %016lx\n", mi.arg_end);
printf("\tenv_start: %016lx\n", mi.env_start);
printf("\tenv_end: %016lx\n", mi.env_end);
*stack = mi.start_stack;
return 0;
}
static int show_vmas(int fd, unsigned long stack)
{
struct binfmt_vma_image vi;
printf("VMAs:\n");
while (1) {
char *note = "";
if (read(fd, &vi, sizeof(vi)) != sizeof(vi)) {
perror("Can't read vma from image");
return 1;
}
if (vi.start == 0 && vi.end == 0)
return 0;
if (vi.start <= stack && vi.end >= stack)
note = "[stack]";
printf("\t%016lx-%016lx file %d %016lx prot %x flags %x %s\n",
vi.start, vi.end, vi.fd, vi.pgoff,
vi.prot, vi.flags, note);
}
}
static int show_privmem(int fd)
{
printf("Pages:\n");
return show_mem(fd);
}
static int show_core(int fd)
{
__u32 version = 0;
unsigned long stack;
read(fd, &version, 4);
if (version != BINFMT_IMG_VERS_0) {
printf("Unsupported version %d\n", version);
return 1;
}
/* the pad */
read(fd, &version, 4);
printf("Showing version 0\n");
if (show_regs(fd))
return 1;
if (show_mm(fd, &stack))
return 1;
if (show_vmas(fd, stack))
return 1;
if (show_privmem(fd))
return 1;
return 0;
}
static int show_pstree(int fd)
{
int ret;
struct pstree_entry e;
while (1) {
int i;
__u32 *ch;
ret = read(fd, &e, sizeof(e));
if (ret == 0)
return 0;
if (ret != sizeof(e)) {
perror("Can't read processes entry");
return 1;
}
printf("%d:", e.pid);
i = e.nr_children * sizeof(__u32);
ch = malloc(i);
ret = read(fd, ch, i);
if (ret != i) {
perror("Can't read children list");
return 1;
}
for (i = 0; i < e.nr_children; i++)
printf(" %d", ch[i]);
printf("\n");
}
}
static int show_pipes(int fd)
{
struct pipes_entry e;
int ret;
char buf[17];
while (1) {
ret = read(fd, &e, sizeof(e));
if (ret == 0)
break;
if (ret != sizeof(e)) {
perror("Can't read pipe entry");
return 1;
}
printf("%d: %lx %o %d ", e.fd, e.pipeid, e.flags, e.bytes);
if (e.flags & O_WRONLY) {
printf("\n");
if (e.bytes) {
printf("Bogus pipe\n");
return 1;
}
continue;
}
memset(buf, 0, sizeof(buf));
ret = e.bytes;
if (ret > 16)
ret = 16;
read(fd, buf, ret);
printf("\t[%s", buf);
if (ret < e.bytes)
printf("...");
printf("]\n");
lseek(fd, e.bytes - ret, SEEK_CUR);
}
return 0;
}
int main(int argc, char **argv)
{
__u32 type;
int fd;
fd = open(argv[1], O_RDONLY);
if (fd < 0) {
perror("Can't open");
return 1;
}
read(fd, &type, 4);
if (type == FDINFO_MAGIC)
return show_fdinfo(fd);
if (type == PAGES_MAGIC)
return show_pages(fd);
if (type == SHMEM_MAGIC)
return show_shmem(fd);
if (type == PSTREE_MAGIC)
return show_pstree(fd);
if (type == PIPES_MAGIC)
return show_pipes(fd);
if (type == BINFMT_IMG_MAGIC)
return show_core(fd);
printf("Unknown file type 0x%x\n", type);
return 1;
}
#define FDINFO_MAGIC 0x01010101
struct fdinfo_entry {
__u8 type;
__u8 len;
__u16 flags;
__u32 pos;
__u64 addr;
};
#define FDINFO_FD 1
#define FDINFO_MAP 2
#define PAGES_MAGIC 0x20202020
#define SHMEM_MAGIC 0x03300330
struct shmem_entry {
__u64 start;
__u64 end;
__u64 shmid;
};
#define PSTREE_MAGIC 0x40044004
struct pstree_entry {
__u32 pid;
__u32 nr_children;
};
#define PIPES_MAGIC 0x05055050
struct pipes_entry {
__u32 fd;
__u32 pipeid;
__u32 flags;
__u32 bytes;
};
Previous version of C/R -- uses in-kernel dumper restorer.
It's here for the reference and not used by crtools itself.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment