Commit b50ee4a1 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov

Update kernel area

Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
parent 83c209ef
crtools
=======
An utility to to checkpoint/restore tasks.
An utility to checkpoint/restore tasks.
Some code snippets are borrowed from
......@@ -13,3 +13,20 @@ Some code snippets are borrowed from
Many thanks to these projects.
Licensed under GPLv2 (http://www.gnu.org/licenses/gpl-2.0.txt)
Kernel patching
===============
To have crtools up and running either
1) use patches from kernel/ directory
2) or clone git://github.com/cyrillos/linux-2.6.git
and switch to branch "crtools".
It's based on Linux
| commit 1ea6b8f48918282bdca0b32a34095504ee65bab5
| Author: Linus Torvalds <torvalds@linux-foundation.org>
| Date: Mon Nov 7 16:16:02 2011 -0800
|
| Linux 3.2-rc1
fs, proc: Make proc_get_link to use dentry instead of inode
From fc4504ee8f471ac1ac8162ec68e98f2c09d53411 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 8 Nov 2011 14:57:10 +0400
Subject: [PATCH 1/4] fs, proc: Make proc_get_link to use dentry instead of
inode
This patch prepares the ground for the next "map_files"
patch which needs a name of a link file to analyse.
......@@ -16,11 +20,11 @@ CC: Andrew Morton <akpm@linux-foundation.org>
include/linux/proc_fs.h | 2 +-
2 files changed, 11 insertions(+), 11 deletions(-)
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -165,9 +165,9 @@ static int get_task_root(struct task_str
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2db1bd3..93c81aa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -165,9 +165,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
return result;
}
......@@ -32,7 +36,7 @@ Index: linux-2.6.git/fs/proc/base.c
int result = -ENOENT;
if (task) {
@@ -182,9 +182,9 @@ static int proc_cwd_link(struct inode *i
@@ -182,9 +182,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
return result;
}
......@@ -44,7 +48,7 @@ Index: linux-2.6.git/fs/proc/base.c
int result = -ENOENT;
if (task) {
@@ -1580,13 +1580,13 @@ static const struct file_operations proc
@@ -1567,13 +1567,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
.release = single_release,
};
......@@ -60,7 +64,7 @@ Index: linux-2.6.git/fs/proc/base.c
if (!task)
return -ENOENT;
mm = get_task_mm(task);
@@ -1616,7 +1616,7 @@ static void *proc_pid_follow_link(struct
@@ -1603,7 +1603,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
if (!proc_fd_access_allowed(inode))
goto out;
......@@ -69,7 +73,7 @@ Index: linux-2.6.git/fs/proc/base.c
out:
return ERR_PTR(error);
}
@@ -1655,7 +1655,7 @@ static int proc_pid_readlink(struct dent
@@ -1642,7 +1642,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
if (!proc_fd_access_allowed(inode))
goto out;
......@@ -78,7 +82,7 @@ Index: linux-2.6.git/fs/proc/base.c
if (error)
goto out;
@@ -1959,9 +1959,9 @@ out_task:
@@ -1980,9 +1980,9 @@ out_task:
return rc;
}
......@@ -90,11 +94,11 @@ Index: linux-2.6.git/fs/proc/base.c
}
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
Index: linux-2.6.git/include/linux/proc_fs.h
===================================================================
--- linux-2.6.git.orig/include/linux/proc_fs.h
+++ linux-2.6.git/include/linux/proc_fs.h
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations u
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 643b96c..c3d11ff 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations utsns_operations;
extern const struct proc_ns_operations ipcns_operations;
union proc_op {
......@@ -103,3 +107,6 @@ Index: linux-2.6.git/include/linux/proc_fs.h
int (*proc_read)(struct task_struct *task, char *page);
int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid,
--
1.7.6.4
fs, proc: Introduce the /proc/<pid>/map_files/ directory v14
From d23bde31590a7679aa2be7960848b0fedd0ce032 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 8 Nov 2011 14:58:01 +0400
Subject: [PATCH 2/4] fs, proc: Introduce the /proc/<pid>/map_files/ directory
v14
This one behaves similarly to the /proc/<pid>/fd/ one - it contains symlinks
one for each mapping with file, the name of a symlink is "vma->vm_start-vma->vm_end",
......@@ -115,14 +117,14 @@ CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Machek <pavel@ucw.cz>
---
fs/proc/base.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm.h | 12 +
2 files changed, 357 insertions(+)
fs/proc/base.c | 345 ++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm.h | 12 ++
2 files changed, 357 insertions(+), 0 deletions(-)
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 93c81aa..9b7a9cd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -83,6 +83,7 @@
#include <linux/pid_namespace.h>
#include <linux/fs_struct.h>
......@@ -140,7 +142,7 @@ Index: linux-2.6.git/fs/proc/base.c
/*
* Count the number of hardlinks for the pid_entry table, excluding the .
* and .. links.
@@ -2201,6 +2204,347 @@ static const struct file_operations proc
@@ -2217,6 +2220,347 @@ static const struct file_operations proc_fd_operations = {
};
/*
......@@ -488,7 +490,7 @@ Index: linux-2.6.git/fs/proc/base.c
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
@@ -2815,6 +3159,7 @@ static const struct inode_operations pro
@@ -2832,6 +3176,7 @@ static const struct inode_operations proc_task_inode_operations;
static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
......@@ -496,11 +498,11 @@ Index: linux-2.6.git/fs/proc/base.c
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
Index: linux-2.6.git/include/linux/mm.h
===================================================================
--- linux-2.6.git.orig/include/linux/mm.h
+++ linux-2.6.git/include/linux/mm.h
@@ -1491,6 +1491,18 @@ static inline unsigned long vma_pages(st
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dc3a8c..14159d3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1491,6 +1491,18 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
......@@ -519,3 +521,6 @@ Index: linux-2.6.git/include/linux/mm.h
#ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(unsigned long vm_flags);
#else
--
1.7.6.4
proc: Introduce the Children: line in /proc/<pid>/status
From 9e489dbc4f796b76adb4440ccf4888d934ede61d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 8 Nov 2011 14:59:40 +0400
Subject: [PATCH 3/4] fs, proc: Introduce the Children: line in
/proc/<pid>/status
Although we can get the pids of some task's issue, this is just
Although we can get the pids of some task's issue, this is just
more convenient to have them this way.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
......@@ -10,13 +12,13 @@ Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
fs/proc/array.c | 14 ++++++++++++++
1 file changed, 14 insertions(+)
1 files changed, 14 insertions(+), 0 deletions(-)
Index: linux-2.6.git/fs/proc/array.c
===================================================================
--- linux-2.6.git.orig/fs/proc/array.c
+++ linux-2.6.git/fs/proc/array.c
@@ -158,6 +158,18 @@ static inline const char *get_task_state
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd..8f33329 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -158,6 +158,18 @@ static inline const char *get_task_state(struct task_struct *tsk)
return *p;
}
......@@ -35,7 +37,7 @@ Index: linux-2.6.git/fs/proc/array.c
static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *p)
{
@@ -192,6 +204,8 @@ static inline void task_state(struct seq
@@ -192,6 +204,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns,
cred->uid, cred->euid, cred->suid, cred->fsuid,
cred->gid, cred->egid, cred->sgid, cred->fsgid);
......@@ -44,3 +46,6 @@ Index: linux-2.6.git/fs/proc/array.c
task_lock(p);
if (p->files)
fdt = files_fdtable(p->files);
--
1.7.6.4
fs, proc: Add start_data, end_data, start_brk members to /proc/$pid/stat
From e46fc1fa01faea36ad4c5608436f5900e66c9529 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 8 Nov 2011 15:00:56 +0400
Subject: [PATCH 4/4] fs, proc: Add start_data, end_data, start_brk members to
/proc/$pid/stat
It helps to dump and restore this mm_struct members at chekpoint/restore time.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
fs/proc/array.c | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
1 files changed, 5 insertions(+), 2 deletions(-)
Index: linux-2.6.git/fs/proc/array.c
===================================================================
--- linux-2.6.git.orig/fs/proc/array.c
+++ linux-2.6.git/fs/proc/array.c
@@ -478,7 +478,7 @@ static int do_task_stat(struct seq_file
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 8f33329..8248682 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -478,7 +478,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
......@@ -20,7 +24,7 @@ Index: linux-2.6.git/fs/proc/array.c
pid_nr_ns(pid, ns),
tcomm,
state,
@@ -525,7 +525,10 @@ static int do_task_stat(struct seq_file
@@ -525,7 +525,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
task->policy,
(unsigned long long)delayacct_blkio_ticks(task),
cputime_to_clock_t(gtime),
......@@ -32,3 +36,6 @@ Index: linux-2.6.git/fs/proc/array.c
if (mm)
mmput(mm);
return 0;
--
1.7.6.4
elf: Add support for loading ET_CKPT files
This patch add ability to run that named "checkpoint" files by
enhancing Elf file format, which includes
- new Elf file type ET_CKPT
- three additional program header types PT_CKPT_VMA, PT_CKPT_CORE
and PT_CKPT_PAGES.
PT_CKPT_VMA -- holds 'vma_entry' structure, which describes the
memory area the kernel should map. It also might contain a file descriptor
so the kernel will be mapping a file povided. Usually such file get
opened by user-space helper tool which prepares 'vma_entry' structure
for the kernel.
PT_CKPT_CORE -- 'core_entry' structure (registers, tls, tasks specific
settings). The structure is defined as a 16K container which should be
enough for most cases. 8K of it is reserved for arch specific settings.
PT_CKPT_PAGES -- a set of all pages which contents we should restored.
Apart from Elf extension flush_old_exec() has been splitted to two
functions -- the former flush_old_exec() and flush_exec_keep_thread().
The later doesn't call for de_thread() allowing to keep threads
relationship. Also arch_setup_additional_pages_at() helper added
to setup vdso at predefined address.
At moment only pure x86-64 architecture is supported.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
CC: Andrew Vagin <avagin@parallels.com>
CC: Pavel Emelyanov <xemul@parallels.com>
CC: James Bottomley <jbottomley@parallels.com>
CC: Glauber Costa <glommer@parallels.com>
CC: H. Peter Anvin <hpa@zytor.com>
CC: Ingo Molnar <mingo@elte.hu>
CC: Tejun Heo <tj@kernel.org>
CC: Dave Hansen <dave@linux.vnet.ibm.com>
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: Daniel Lezcano <dlezcano@fr.ibm.com>
CC: Alexey Dobriyan <adobriyan@gmail.com>
---
arch/x86/include/asm/elf.h | 3
arch/x86/include/asm/elf_ckpt.h | 80 ++++++++
arch/x86/kernel/Makefile | 2
arch/x86/kernel/elf_ckpt.c | 161 ++++++++++++++++++
arch/x86/vdso/vma.c | 22 ++
fs/Kconfig.binfmt | 11 +
fs/Makefile | 1
fs/binfmt_elf.c | 17 +
fs/binfmt_elf_ckpt.c | 356 ++++++++++++++++++++++++++++++++++++++++
fs/exec.c | 27 +--
include/linux/binfmts.h | 1
include/linux/elf_ckpt.h | 103 +++++++++++
12 files changed, 772 insertions(+), 12 deletions(-)
Index: linux-2.6.git/arch/x86/include/asm/elf.h
===================================================================
--- linux-2.6.git.orig/arch/x86/include/asm/elf.h
+++ linux-2.6.git/arch/x86/include/asm/elf.h
@@ -314,7 +314,8 @@ struct linux_binprm;
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
-
+extern int arch_setup_additional_pages_at(struct linux_binprm *bprm,
+ void *addr, int uses_interp);
extern int syscall32_setup_pages(struct linux_binprm *, int exstack);
#define compat_arch_setup_additional_pages syscall32_setup_pages
Index: linux-2.6.git/arch/x86/include/asm/elf_ckpt.h
===================================================================
--- /dev/null
+++ linux-2.6.git/arch/x86/include/asm/elf_ckpt.h
@@ -0,0 +1,80 @@
+#ifndef _LINUX_ELF_X86_CHECKPOINT_H
+#define _LINUX_ELF_X86_CHECKPOINT_H
+
+#include <linux/errno.h>
+
+#include <asm/types.h>
+#include <asm/ptrace.h>
+
+#define CKPT_GDT_ENTRY_TLS_ENTRIES 3
+
+struct user_regs_entry {
+ __u64 r15;
+ __u64 r14;
+ __u64 r13;
+ __u64 r12;
+ __u64 bp;
+ __u64 bx;
+ __u64 r11;
+ __u64 r10;
+ __u64 r9;
+ __u64 r8;
+ __u64 ax;
+ __u64 cx;
+ __u64 dx;
+ __u64 si;
+ __u64 di;
+ __u64 orig_ax;
+ __u64 ip;
+ __u64 cs;
+ __u64 flags;
+ __u64 sp;
+ __u64 ss;
+ __u64 fs_base;
+ __u64 gs_base;
+ __u64 ds;
+ __u64 es;
+ __u64 fs;
+ __u64 gs;
+} __packed;
+
+struct desc_struct_entry {
+ __u32 a;
+ __u32 b;
+} __packed;
+
+struct user_fpregs_entry {
+ __u16 cwd;
+ __u16 swd;
+ __u16 twd;
+ __u16 fop;
+ __u64 rip;
+ __u64 rdp;
+ __u32 mxcsr;
+ __u32 mxcsr_mask;
+ __u32 st_space[32];
+ __u32 xmm_space[64];
+ __u32 padding[24];
+} __packed;
+
+struct ckpt_arch_entry {
+ struct user_regs_entry gpregs;
+ struct user_fpregs_entry fpregs;
+ struct desc_struct tls_array[CKPT_GDT_ENTRY_TLS_ENTRIES];
+};
+
+struct core_entry;
+
+#ifdef CONFIG_X86_64
+extern int load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs,
+ struct core_entry *core_entry);
+#else
+static inline int
+load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs,
+ struct core_entry *core_entry)
+{
+ return -ENOEXEC;
+}
+#endif
+
+#endif /* _LINUX_ELF_X86_CHECKPOINT_H */
Index: linux-2.6.git/arch/x86/kernel/Makefile
===================================================================
--- linux-2.6.git.orig/arch/x86/kernel/Makefile
+++ linux-2.6.git/arch/x86/kernel/Makefile
@@ -99,6 +99,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o
obj-$(CONFIG_OF) += devicetree.o
+obj-$(CONFIG_BINFMT_ELF_CKPT) += elf_ckpt.o
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
Index: linux-2.6.git/arch/x86/kernel/elf_ckpt.c
===================================================================
--- /dev/null
+++ linux-2.6.git/arch/x86/kernel/elf_ckpt.c
@@ -0,0 +1,161 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/binfmts.h>
+#include <linux/string.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/personality.h>
+#include <linux/elfcore.h>
+#include <linux/init.h>
+#include <linux/highuid.h>
+#include <linux/compiler.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/security.h>
+#include <linux/random.h>
+#include <linux/elf.h>
+#include <linux/utsname.h>
+#include <linux/coredump.h>
+#include <linux/regset.h>
+
+#include <asm/uaccess.h>
+#include <asm/param.h>
+#include <asm/page.h>
+#include <asm/prctl.h>
+#include <asm/proto.h>
+#include <asm/i387.h>
+
+#include <linux/elf_ckpt.h>
+#include <linux/flex_array.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+#ifdef CONFIG_X86_64
+
+#define cp_reg(d, s, r) d.r = s.r
+
+int load_elf_ckpt_arch(struct task_struct *tsk, struct pt_regs *regs,
+ struct core_entry *core_entry)
+{
+ struct ckpt_arch_entry *arch = (struct ckpt_arch_entry *)core_entry->arch;
+ struct thread_struct *thread = &current->thread;
+
+ struct user_regs_struct gpregs;
+ struct user_i387_struct fpregs;
+
+ mm_segment_t old_fs;
+ int i, ret;
+
+ if (core_entry->header.arch != CKPT_HEADER_ARCH_X86_64) {
+ pr_err("elf-ckpt-x86: Unsupported or corrupted header\n");
+ return -ENOEXEC;
+ }
+
+ BUILD_BUG_ON(CKPT_GDT_ENTRY_TLS_ENTRIES != GDT_ENTRY_TLS_ENTRIES);
+ BUILD_BUG_ON(sizeof(struct ckpt_arch_entry) > CKPT_ARCH_SIZE);
+
+ memset(&gpregs, 0, sizeof(gpregs));
+ memset(&fpregs, 0, sizeof(fpregs));
+
+ /*
+ * General purpose registers
+ */
+ cp_reg(gpregs, arch->gpregs, r15);
+ cp_reg(gpregs, arch->gpregs, r14);
+ cp_reg(gpregs, arch->gpregs, r13);
+ cp_reg(gpregs, arch->gpregs, r12);
+ cp_reg(gpregs, arch->gpregs, bp);
+ cp_reg(gpregs, arch->gpregs, bx);
+ cp_reg(gpregs, arch->gpregs, r11);
+ cp_reg(gpregs, arch->gpregs, r10);
+ cp_reg(gpregs, arch->gpregs, r9);
+ cp_reg(gpregs, arch->gpregs, r8);
+ cp_reg(gpregs, arch->gpregs, ax);
+ cp_reg(gpregs, arch->gpregs, cx);
+ cp_reg(gpregs, arch->gpregs, dx);
+ cp_reg(gpregs, arch->gpregs, si);
+ cp_reg(gpregs, arch->gpregs, di);
+ cp_reg(gpregs, arch->gpregs, orig_ax);
+ cp_reg(gpregs, arch->gpregs, ip);
+ cp_reg(gpregs, arch->gpregs, cs);
+ cp_reg(gpregs, arch->gpregs, flags);
+ cp_reg(gpregs, arch->gpregs, sp);
+ cp_reg(gpregs, arch->gpregs, ss);
+ cp_reg(gpregs, arch->gpregs, fs_base);
+ cp_reg(gpregs, arch->gpregs, gs_base);
+ cp_reg(gpregs, arch->gpregs, ds);
+ cp_reg(gpregs, arch->gpregs, es);
+ cp_reg(gpregs, arch->gpregs, fs);
+ cp_reg(gpregs, arch->gpregs, gs);
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = arch_ptrace(current, PTRACE_SETREGS, 0, (unsigned long)&gpregs);
+ set_fs(old_fs);
+ if (ret)
+ goto out;
+
+ *regs = *task_pt_regs(current);
+
+ thread->usersp = arch->gpregs.sp;
+ thread->ds = arch->gpregs.ds;
+ thread->es = arch->gpregs.es;
+ thread->fs = arch->gpregs.fs;
+ thread->gs = arch->gpregs.gs;
+
+ thread->fsindex = thread->fs;
+ thread->gsindex = thread->gs;
+
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++) {
+ thread->tls_array[i].a = arch->tls_array[i].a;
+ thread->tls_array[i].b = arch->tls_array[i].b;
+ }
+
+ if (arch->gpregs.fs_base) {
+ ret = do_arch_prctl(current, ARCH_SET_FS, arch->gpregs.fs_base);
+ if (ret)
+ goto out;
+ }
+
+ if (arch->gpregs.gs_base) {
+ ret = do_arch_prctl(current, ARCH_SET_GS, arch->gpregs.gs_base);
+ if (ret)
+ goto out;
+ }
+
+ /* Restoring FPU */
+ if (core_entry->task_flags & PF_USED_MATH) {
+
+ cp_reg(fpregs, arch->fpregs, cwd);
+ cp_reg(fpregs, arch->fpregs, swd);
+ cp_reg(fpregs, arch->fpregs, twd);
+ cp_reg(fpregs, arch->fpregs, fop);
+ cp_reg(fpregs, arch->fpregs, rip);
+ cp_reg(fpregs, arch->fpregs, rdp);
+ cp_reg(fpregs, arch->fpregs, mxcsr);
+ cp_reg(fpregs, arch->fpregs, mxcsr_mask);
+
+ for (i = 0; i < ARRAY_SIZE(arch->fpregs.st_space); i++)
+ cp_reg(fpregs, arch->fpregs, st_space[i]);
+
+ for (i = 0; i < ARRAY_SIZE(arch->fpregs.xmm_space); i++)
+ cp_reg(fpregs, arch->fpregs, xmm_space[i]);
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ ret = arch_ptrace(current, PTRACE_SETFPREGS, 0, (unsigned long)&fpregs);
+ set_fs(old_fs);
+ if (ret)
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+#endif /* CONFIG_X86_64 */
Index: linux-2.6.git/arch/x86/vdso/vma.c
===================================================================
--- linux-2.6.git.orig/arch/x86/vdso/vma.c
+++ linux-2.6.git/arch/x86/vdso/vma.c
@@ -137,6 +137,28 @@ up_fail:
return ret;
}
+int arch_setup_additional_pages_at(struct linux_binprm *bprm, void *addr, int uses_interp)
+{
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ if (!vdso_enabled)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+ current->mm->context.vdso = addr;
+ ret = install_special_mapping(mm, (unsigned long)addr, vdso_size,
+ VM_READ | VM_EXEC |
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
+ VM_ALWAYSDUMP,
+ vdso_pages);
+ if (ret)
+ current->mm->context.vdso = NULL;
+
+ up_write(&mm->mmap_sem);
+ return ret;
+}
+
static __init int vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
Index: linux-2.6.git/fs/Kconfig.binfmt
===================================================================
--- linux-2.6.git.orig/fs/Kconfig.binfmt
+++ linux-2.6.git/fs/Kconfig.binfmt
@@ -23,6 +23,17 @@ config BINFMT_ELF
ld.so (check the file <file:Documentation/Changes> for location and
latest version).
+config BINFMT_ELF_CKPT
+ tristate "Kernel support for CKPT ELF binaries"
+ default n
+ depends on BINFMT_ELF && X86_64
+ help
+ ELF CKPT (checkpoint) is an extension to ELF format to restore
+ checkpointed processes. It's not confirmed yet and highly
+ experimental.
+
+ If unsure, say N.
+
config COMPAT_BINFMT_ELF
bool
depends on COMPAT && BINFMT_ELF
Index: linux-2.6.git/fs/Makefile
===================================================================
--- linux-2.6.git.orig/fs/Makefile
+++ linux-2.6.git/fs/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_BINFMT_MISC) += binfmt_misc
obj-y += binfmt_script.o
obj-$(CONFIG_BINFMT_ELF) += binfmt_elf.o
+obj-$(CONFIG_BINFMT_ELF_CKPT) += binfmt_elf_ckpt.o
obj-$(CONFIG_COMPAT_BINFMT_ELF) += compat_binfmt_elf.o
obj-$(CONFIG_BINFMT_ELF_FDPIC) += binfmt_elf_fdpic.o
obj-$(CONFIG_BINFMT_SOM) += binfmt_som.o
Index: linux-2.6.git/fs/binfmt_elf.c
===================================================================
--- linux-2.6.git.orig/fs/binfmt_elf.c
+++ linux-2.6.git/fs/binfmt_elf.c
@@ -30,6 +30,7 @@
#include <linux/security.h>
#include <linux/random.h>
#include <linux/elf.h>
+#include <linux/elf_ckpt.h>
#include <linux/utsname.h>
#include <linux/coredump.h>
#include <asm/uaccess.h>
@@ -592,7 +593,11 @@ static int load_elf_binary(struct linux_
if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
goto out;
- if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
+ if (loc->elf_ex.e_type != ET_EXEC &&
+#ifdef CONFIG_BINFMT_ELF_CKPT
+ loc->elf_ex.e_type != ET_CKPT &&
+#endif
+ loc->elf_ex.e_type != ET_DYN)
goto out;
if (!elf_check_arch(&loc->elf_ex))
goto out;
@@ -619,6 +624,16 @@ static int load_elf_binary(struct linux_
goto out_free_ph;
}
+#ifdef CONFIG_BINFMT_ELF_CKPT
+ if (loc->elf_ex.e_type == ET_CKPT) {
+ retval = load_elf_ckpt(bprm, regs, &loc->elf_ex,
+ (struct elf_phdr *)elf_phdata);
+ if (!retval)
+ set_binfmt(&elf_format);
+ goto out_free_ph;
+ }
+#endif
+
elf_ppnt = elf_phdata;
elf_bss = 0;
elf_brk = 0;
Index: linux-2.6.git/fs/binfmt_elf_ckpt.c
===================================================================
--- /dev/null
+++ linux-2.6.git/fs/binfmt_elf_ckpt.c
@@ -0,0 +1,356 @@
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/binfmts.h>
+#include <linux/string.h>
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/personality.h>
+#include <linux/elfcore.h>
+#include <linux/init.h>
+#include <linux/highuid.h>
+#include <linux/compiler.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/security.h>
+#include <linux/random.h>
+#include <linux/elf.h>
+#include <linux/utsname.h>
+#include <linux/coredump.h>
+#include <linux/regset.h>
+
+#include <asm/uaccess.h>
+#include <asm/param.h>
+#include <asm/page.h>
+#include <asm/prctl.h>
+#include <asm/proto.h>
+#include <asm/i387.h>
+
+#include <linux/elf_ckpt.h>
+#include <asm/elf_ckpt.h>
+
+#include <linux/flex_array.h>
+#include <asm/tlbflush.h>
+#include <asm/desc.h>
+
+int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr)
+{
+ struct elf_phdr *elf_phdr_pages;
+ struct flex_array *fa = NULL;
+ struct vma_entry *vma_entry_ptr;
+ int nr_vma_found, nr_vma_mapped;
+ struct vma_entry vma_entry;
+ struct file *file = NULL;
+ unsigned long map_addr;
+
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+ unsigned long vdso = -1UL;
+#endif
+
+ struct core_entry *core_entry = NULL;
+ unsigned long start_stack = -1UL;
+
+ int i, ret = -ENOEXEC;
+ loff_t off;
+
+ BUILD_BUG_ON(CKPT_TASK_COMM_LEN != TASK_COMM_LEN);
+ BUILD_BUG_ON(CKPT_PAGE_SIZE != PAGE_SIZE);
+ BUILD_BUG_ON(CKPT_CORE_SIZE != sizeof(*core_entry));
+
+ elf_phdr_pages = NULL;
+ nr_vma_found = 0;
+ nr_vma_mapped = 0;
+
+ /*
+ * An early check for header version so if we fail here
+ * we would not need to use flex array at all.
+ */
+ for (i = 0; i < elf_ex->e_phnum; i++) {
+ if (elf_phdr[i].p_type != PT_CKPT_CORE)
+ continue;
+
+ core_entry = vmalloc(sizeof(*core_entry));
+ if (!core_entry) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = kernel_read(bprm->file, elf_phdr[i].p_offset,
+ (char *)core_entry, sizeof(*core_entry));
+ if (ret != sizeof(*core_entry)) {
+ pr_err("elf-ckpt: Can't read core_entry\n");
+ ret = -EIO;
+ goto out;
+ }
+
+ if (core_entry->header.version != CKPT_HEADER_VERSION) {
+ pr_err("elf-ckpt: Unsupported or corrupted header\n");
+ ret = -ENOEXEC;
+ goto out;
+ }
+
+ break;
+ }
+
+ if (i == elf_ex->e_phnum) {
+ pr_err("elf-ckpt: No header found\n");
+ ret = -ENOEXEC;
+ goto out;
+ }
+
+
+ fa = flex_array_alloc(sizeof(vma_entry), elf_ex->e_phnum, GFP_KERNEL);
+ if (!fa || flex_array_prealloc(fa, 0, elf_ex->e_phnum, GFP_KERNEL)) {
+ ret = -ENOMEM;
+ if (fa) {
+ flex_array_free(fa);
+ fa = NULL;
+ goto out;
+ }
+ }
+
+ ret = flush_exec_keep_thread(bprm);
+ if (ret)
+ goto out;
+
+ current->flags &= ~PF_FORKNOEXEC;
+ current->mm->def_flags = 0;
+
+ /*
+ * We don't care about parameters passed (such as argc, argv, env)
+ * when execute checkpoint file because we're to substitute
+ * all things anyway.
+ */
+ do_munmap(current->mm, 0, TASK_SIZE);
+
+ SET_PERSONALITY(loc->elf_ex);
+
+ for (i = 0; i < elf_ex->e_phnum; i++) {
+
+ switch (elf_phdr[i].p_type) {
+ case PT_CKPT_VMA:
+ ret = kernel_read(bprm->file, elf_phdr[i].p_offset,
+ (char *)&vma_entry, sizeof(vma_entry));
+ if (ret != sizeof(vma_entry)) {
+ pr_err("elf-ckpt: Can't read vma_entry\n");
+ ret = -EIO;
+ goto out;
+ }
+ if (flex_array_put(fa, i, &vma_entry, GFP_KERNEL))
+ BUG();
+
+ /* We need to know if there is executable stack */
+ if (vma_entry.status & VMA_AREA_STACK) {
+ if (vma_entry.flags & PROT_EXEC)
+ current->personality |= READ_IMPLIES_EXEC;
+ }
+
+ nr_vma_found++;
+ continue;
+ case PT_CKPT_PAGES:
+ elf_phdr_pages = &elf_phdr[i];
+ continue;
+ default:
+ continue;
+ }
+ }
+
+ /* Be sure it has the file structure we expected to see. */
+ if (!elf_phdr_pages || !nr_vma_found) {
+ ret = -ENOEXEC;
+ goto out;
+ }
+
+ /*
+ * VMA randomization still needs to be set (just in case if
+ * the program we restore will exec() something else later).
+ */
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ current->flags |= PF_RANDOMIZE;
+
+ /*
+ * FIXME: Note it flushes signal handlers as well,
+ * so we need to dump queued signals and restore
+ * them here.
+ */
+ setup_new_exec(bprm);
+
+ current->mm->free_area_cache = current->mm->mmap_base;
+ current->mm->cached_hole_size = 0;
+
+ for (i = 0; i < nr_vma_found; i++) {
+ vma_entry_ptr = flex_array_get(fa, i);
+
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+ if (vma_entry_ptr->status & VMA_AREA_VDSO)
+ vdso = vma_entry_ptr->start;
+#endif
+
+ if (vma_entry_ptr->status & VMA_AREA_STACK) {
+ /* Note if stack is VM_GROWSUP -- it should be reversed */
+ start_stack = vma_entry_ptr->start;
+ }
+
+ /* Anything special should be ignored */
+ if (!(vma_entry_ptr->status & VMA_AREA_REGULAR))
+ continue;
+
+ /* It's a file mmap'ed */
+ if (vma_entry_ptr->fd != -1) {
+ file = fget((unsigned int)vma_entry_ptr->fd);
+ if (!file) {
+ ret = -EBADF;
+ goto out_unmap;
+ }
+
+ /* Reuse this field to handle error cases */
+ vma_entry_ptr->fd = (__u64)file;
+ } else
+ file = NULL;
+
+ down_write(&current->mm->mmap_sem);
+ map_addr = do_mmap(file,
+ vma_entry_ptr->start,
+ vma_entry_ptr->end - vma_entry_ptr->start,
+ vma_entry_ptr->prot,
+ vma_entry_ptr->flags | MAP_FIXED,
+ vma_entry_ptr->pgoff);
+ up_write(&current->mm->mmap_sem);
+
+ if (file) {
+ fput(file);
+ do_close((unsigned int)vma_entry_ptr->fd);
+ }
+
+ if ((unsigned long)(map_addr) >= TASK_SIZE) {
+ ret = IS_ERR((void *)map_addr) ? PTR_ERR((void*)map_addr) : -EINVAL;
+ goto out_unmap;
+ }
+
+ nr_vma_mapped++;
+ }
+
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+ if (vdso == -1UL) {
+ pr_err("elf-ckpt: Can't find VDSO address\n");
+ ret = -ENOEXEC;
+ goto out_unmap;
+ }
+#endif
+
+ if (start_stack == -1UL) {
+ pr_err("elf-ckpt: Can't find stack VMA\n");
+ ret = -ENOEXEC;
+ goto out_unmap;
+ }
+
+ /* The name it has before */
+ set_task_comm(current, core_entry->task_comm);
+
+ bprm->p = core_entry->mm_start_stack;
+
+ current->mm->start_code = core_entry->mm_start_code;
+ current->mm->end_code = core_entry->mm_end_code;
+ current->mm->start_data = core_entry->mm_start_data;
+ current->mm->end_data = core_entry->mm_end_data;
+ current->mm->start_stack = core_entry->mm_start_stack;
+ current->mm->start_brk = core_entry->mm_start_brk;
+ current->mm->brk = core_entry->mm_brk;
+
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+ ret = arch_setup_additional_pages_at(bprm, (void *)vdso, 0);
+ if (ret) {
+ pr_err("elf-ckpt: Can't setup additional pages at %lx with %d\n",
+ vdso, ret);
+ goto out_unmap;
+ }
+#endif
+
+ /*
+ * Restore pages
+ */
+ off = elf_phdr_pages->p_offset;
+ while (1) {
+ struct vm_area_struct *vma;
+ struct page *page;
+ void *page_data;
+ __u64 va;
+
+ ret = kernel_read(bprm->file, off, (char *)&va, sizeof(va));
+ if (ret != sizeof(va)) {
+ pr_err("elf-ckpt: Can't read page virtual address: "
+ "ret = %d off = %lx\n", ret, (unsigned long)off);
+ ret = -EIO;
+ goto out_unmap;
+ }
+
+ /* End of pages reached */
+ if (!va)
+ break;
+
+ vma = find_vma(current->mm, (unsigned long)va);
+ if (!vma) {
+ pr_err("elf-ckpt: No VMA for page: %16lx\n", (unsigned long)va);
+ ret = -ESRCH;
+ goto out_unmap;
+ }
+
+ ret = get_user_pages(current, current->mm, (unsigned long)va,
+ 1, 1, 1, &page, NULL);
+ if (ret != 1) {
+ pr_err("elf-ckpt: Can't get user page: %16lx\n", (unsigned long)va);
+ ret = -EFAULT;
+ goto out_unmap;
+ }
+
+ page_data = kmap(page);
+ ret = kernel_read(bprm->file, off + sizeof(va), page_data, PAGE_SIZE);
+ kunmap(page);
+ put_page(page);
+
+ if (ret != PAGE_SIZE) {
+ pr_err("elf-ckpt: Can't read data on page: %16lx\n", (unsigned long)va);
+ ret = -EFAULT;
+ goto out_unmap;
+ }
+
+ off += sizeof(va) + PAGE_SIZE;
+ }
+
+ /*
+ * Architecture specific setup for registers
+ * and friends, it's done lately since if
+ * an error happened before there is no much
+ * point to setup arch-specific things at all.
+ */
+ ret = load_elf_ckpt_arch(current, regs, core_entry);
+ if (ret)
+ goto out_unmap;
+
+ /* We're done */
+ ret = 0;
+out:
+ if (core_entry)
+ vfree(core_entry);
+
+ if (fa)
+ flex_array_free(fa);
+ return ret;
+
+out_unmap:
+ for (i = 0; i < nr_vma_mapped; i++) {
+ vma_entry_ptr = flex_array_get(fa, i);
+ down_write(&current->mm->mmap_sem);
+ do_munmap(current->mm, vma_entry_ptr->start,
+ vma_entry_ptr->end - vma_entry_ptr->start);
+ up_write(&current->mm->mmap_sem);
+ }
+
+ send_sig(SIGKILL, current, 0);
+ goto out;
+}
Index: linux-2.6.git/fs/exec.c
===================================================================
--- linux-2.6.git.orig/fs/exec.c
+++ linux-2.6.git/fs/exec.c
@@ -1071,18 +1071,10 @@ void set_task_comm(struct task_struct *t
perf_event_comm(tsk);
}
-int flush_old_exec(struct linux_binprm * bprm)
+int flush_exec_keep_thread(struct linux_binprm * bprm)
{
int retval;
- /*
- * Make sure we have a private signal table and that
- * we are unassociated from the previous thread group.
- */
- retval = de_thread(current);
- if (retval)
- goto out;
-
set_mm_exe_file(bprm->mm, bprm->file);
/*
@@ -1101,10 +1093,25 @@ int flush_old_exec(struct linux_binprm *
current->personality &= ~bprm->per_clear;
return 0;
-
out:
return retval;
}
+EXPORT_SYMBOL(flush_exec_keep_thread);
+
+int flush_old_exec(struct linux_binprm * bprm)
+{
+ int retval;
+
+ /*
+ * Make sure we have a private signal table and that
+ * we are unassociated from the previous thread group.
+ */
+ retval = de_thread(current);
+ if (retval)
+ return retval;
+
+ return flush_exec_keep_thread(bprm);
+}
EXPORT_SYMBOL(flush_old_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
Index: linux-2.6.git/include/linux/binfmts.h
===================================================================
--- linux-2.6.git.orig/include/linux/binfmts.h
+++ linux-2.6.git/include/linux/binfmts.h
@@ -110,6 +110,7 @@ extern int prepare_binprm(struct linux_b
extern int __must_check remove_arg_zero(struct linux_binprm *);
extern int search_binary_handler(struct linux_binprm *, struct pt_regs *);
extern int flush_old_exec(struct linux_binprm * bprm);
+extern int flush_exec_keep_thread(struct linux_binprm * bprm);
extern void setup_new_exec(struct linux_binprm * bprm);
extern void would_dump(struct linux_binprm *, struct file *);
Index: linux-2.6.git/include/linux/elf_ckpt.h
===================================================================
--- /dev/null
+++ linux-2.6.git/include/linux/elf_ckpt.h
@@ -0,0 +1,103 @@
+#ifndef _LINUX_ELF_CHECKPOINT_H
+#define _LINUX_ELF_CHECKPOINT_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/elf-em.h>
+
+#include <asm/elf.h>
+#include <asm/elf_ckpt.h>
+
+/*
+ * Elf extension includes new Elf file type
+ * and program header types as well.
+ */
+#define ET_CKPT 5
+
+#define PT_CKPT_OFFSET 0x01010101
+
+#define PT_CKPT_VMA (PT_LOOS + PT_CKPT_OFFSET + 1)
+#define PT_CKPT_CORE (PT_LOOS + PT_CKPT_OFFSET + 2)
+#define PT_CKPT_PAGES (PT_LOOS + PT_CKPT_OFFSET + 3)
+
+#define CKPT_PAGE_SIZE 4096
+#define CKPT_TASK_COMM_LEN 16
+
+#define CKPT_HEADER_VERSION 1
+#define CKPT_HEADER_ARCH_X86_64 1
+
+#define VMA_AREA_REGULAR (1 << 0)
+#define VMA_AREA_STACK (1 << 1)
+#define VMA_AREA_VSYSCALL (1 << 2)
+#define VMA_AREA_VDSO (1 << 3)
+#define VMA_FORCE_READ (1 << 4)
+#define VMA_AREA_HEAP (1 << 5)
+#define VMA_FILE_PRIVATE (1 << 6)
+#define VMA_FILE_SHARED (1 << 7)
+#define VMA_ANON_SHARED (1 << 8)
+#define VMA_ANON_PRIVATE (1 << 9)
+#define VMA_FORCE_WRITE (1 << 10)
+
+struct vma_entry {
+ __u64 start;
+ __u64 end;
+ __u64 pgoff;
+ __u32 prot;
+ __u32 flags;
+ __u32 status; /* from VMA_x above */
+ __u32 pid; /* pid VMA belongs to */
+ __s64 fd;
+ __u64 ino;
+ __u32 dev_maj;
+ __u32 dev_min;
+} __packed;
+
+struct page_entry {
+ __u64 va; /* page virtual address */
+ __u8 data[CKPT_PAGE_SIZE]; /* page contents */
+} __packed;
+
+struct image_header {
+ __u16 version;
+ __u16 arch;
+ __u32 flags;
+} __packed;
+
+#define CKPT_ARCH_SIZE (2 * 4096)
+#define CKPT_CORE_SIZE (4 * 4096)
+
+struct core_entry {
+ union {
+ struct {
+ struct image_header header;
+ __u8 arch[CKPT_ARCH_SIZE]; /* should be enough for all archs */
+ __u32 task_personality;
+ __u8 task_comm[CKPT_TASK_COMM_LEN];
+ __u32 task_flags;
+ __u64 mm_start_code;
+ __u64 mm_end_code;
+ __u64 mm_start_data;
+ __u64 mm_end_data;
+ __u64 mm_start_stack;
+ __u64 mm_start_brk;
+ __u64 mm_brk;
+ };
+ __u8 __core_pad[CKPT_CORE_SIZE];
+ };
+} __packed;
+
+#ifdef CONFIG_BINFMT_ELF_CKPT
+extern int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr);
+#else
+static inline int load_elf_ckpt(struct linux_binprm *bprm, struct pt_regs *regs,
+ struct elfhdr *elf_ex, struct elf_phdr *elf_phdr)
+{
+ return -ENOEXEC;
+}
+#endif
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_ELF_CHECKPOINT_H */
clone: Introduce the CLONE_CHILD_USEPID functionality
From: Pavel Emelyanov <xemul@openvz.org>
When restoring a task (or a set of tasks) we need to recreate them with
exactly the same pid as they had before. Thus we need the ability to create
a task with specified pid.
The proposal is to reuse the already free CLONE_STOPPED clone flag.
About the security implication - this can create some problems with pids
wraparound and similar, so this approach can be restricted with the "don't
allow for CLONE_CHILD_USEPID when the current pid namespace has ever done
real pid allocation". This will work perfectly for checkpoint-restore and
will not give anyone chances for screwing pids up on a living system.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/linux/pid.h | 2 -
include/linux/sched.h | 1
kernel/fork.c | 10 ++++++-
kernel/pid.c | 70 ++++++++++++++++++++++++++++++++++++--------------
4 files changed, 62 insertions(+), 21 deletions(-)
Index: linux-2.6.git/include/linux/pid.h
===================================================================
--- linux-2.6.git.orig/include/linux/pid.h
+++ linux-2.6.git/include/linux/pid.h
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
-extern struct pid *alloc_pid(struct pid_namespace *ns);
+extern struct pid *alloc_pid(struct pid_namespace *ns, int pid);
extern void free_pid(struct pid *pid);
/*
Index: linux-2.6.git/include/linux/sched.h
===================================================================
--- linux-2.6.git.orig/include/linux/sched.h
+++ linux-2.6.git/include/linux/sched.h
@@ -23,6 +23,7 @@
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
and is now available for re-use. */
+#define CLONE_CHILD_USEPID 0x02000000 /* use the given pid */
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
Index: linux-2.6.git/kernel/fork.c
===================================================================
--- linux-2.6.git.orig/kernel/fork.c
+++ linux-2.6.git/kernel/fork.c
@@ -1253,8 +1253,16 @@ static struct task_struct *copy_process(
goto bad_fork_cleanup_io;
if (pid != &init_struct_pid) {
+ int want_pid = 0;
+
+ if (clone_flags & CLONE_CHILD_USEPID) {
+ retval = get_user(want_pid, child_tidptr);
+ if (retval)
+ goto bad_fork_cleanup_io;
+ }
+
retval = -ENOMEM;
- pid = alloc_pid(p->nsproxy->pid_ns);
+ pid = alloc_pid(p->nsproxy->pid_ns, want_pid);
if (!pid)
goto bad_fork_cleanup_io;
}
Index: linux-2.6.git/kernel/pid.c
===================================================================
--- linux-2.6.git.orig/kernel/pid.c
+++ linux-2.6.git/kernel/pid.c
@@ -159,11 +159,55 @@ static void set_last_pid(struct pid_name
} while ((prev != last_write) && (pid_before(base, last_write, pid)));
}
-static int alloc_pidmap(struct pid_namespace *pid_ns)
+static int alloc_pidmap_page(struct pidmap *map)
+{
+ if (unlikely(!map->page)) {
+ void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ /*
+ * Free the page if someone raced with us
+ * installing it:
+ */
+ spin_lock_irq(&pidmap_lock);
+ if (!map->page) {
+ map->page = page;
+ page = NULL;
+ }
+ spin_unlock_irq(&pidmap_lock);
+ kfree(page);
+ if (unlikely(!map->page))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int set_pidmap(struct pid_namespace *pid_ns, int pid)
+{
+ int offset;
+ struct pidmap *map;
+
+ offset = pid & BITS_PER_PAGE_MASK;
+ map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
+
+ if (alloc_pidmap_page(map) < 0)
+ return -ENOMEM;
+
+ if (!test_and_set_bit(offset, map->page)) {
+ atomic_dec(&map->nr_free);
+ return pid;
+ }
+
+ return -EBUSY;
+}
+
+static int alloc_pidmap(struct pid_namespace *pid_ns, int desired_pid)
{
int i, offset, max_scan, pid, last = pid_ns->last_pid;
struct pidmap *map;
+ if (desired_pid)
+ return set_pidmap(pid_ns, desired_pid);
+
pid = last + 1;
if (pid >= pid_max)
pid = RESERVED_PIDS;
@@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_names
*/
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
for (i = 0; i <= max_scan; ++i) {
- if (unlikely(!map->page)) {
- void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
- /*
- * Free the page if someone raced with us
- * installing it:
- */
- spin_lock_irq(&pidmap_lock);
- if (!map->page) {
- map->page = page;
- page = NULL;
- }
- spin_unlock_irq(&pidmap_lock);
- kfree(page);
- if (unlikely(!map->page))
- break;
- }
+ if (alloc_pidmap_page(map) < 0)
+ break;
+
if (likely(atomic_read(&map->nr_free))) {
do {
if (!test_and_set_bit(offset, map->page)) {
@@ -277,7 +308,7 @@ void free_pid(struct pid *pid)
call_rcu(&pid->rcu, delayed_put_pid);
}
-struct pid *alloc_pid(struct pid_namespace *ns)
+struct pid *alloc_pid(struct pid_namespace *ns, int this_ns_pid)
{
struct pid *pid;
enum pid_type type;
@@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespa
tmp = ns;
for (i = ns->level; i >= 0; i--) {
- nr = alloc_pidmap(tmp);
+ nr = alloc_pidmap(tmp, this_ns_pid);
if (nr < 0)
goto out_free;
pid->numbers[i].nr = nr;
pid->numbers[i].ns = tmp;
tmp = tmp->parent;
+ this_ns_pid = 0;
}
get_pid_ns(ns);
vfs: Add ->statfs callback for pipefs
From: Pavel Emelyanov <xemul@parallels.com>
This is done to make it possible to distinguish pipes
from fifos when opening one via /proc/<pid>/fd/ link.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
fs/pipe.c | 1 +
1 file changed, 1 insertion(+)
Index: linux-2.6.git/fs/pipe.c
===================================================================
--- linux-2.6.git.orig/fs/pipe.c
+++ linux-2.6.git/fs/pipe.c
@@ -1254,6 +1254,7 @@ out:
static const struct super_operations pipefs_ops = {
.destroy_inode = free_inode_nonrcu,
+ .statfs = simple_statfs,
};
/*
fs: Add do_close helper
To be able to close file descriptors right from inside
kernel space do_close() helper is added.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
fs/open.c | 32 ++++++++++++++++++++------------
include/linux/fs.h | 1 +
2 files changed, 21 insertions(+), 12 deletions(-)
Index: linux-2.6.git/fs/open.c
===================================================================
--- linux-2.6.git.orig/fs/open.c
+++ linux-2.6.git/fs/open.c
@@ -1056,17 +1056,11 @@ int filp_close(struct file *filp, fl_own
EXPORT_SYMBOL(filp_close);
-/*
- * Careful here! We test whether the file pointer is NULL before
- * releasing the fd. This ensures that one clone task can't release
- * an fd while another clone is opening it.
- */
-SYSCALL_DEFINE1(close, unsigned int, fd)
+int do_close(unsigned int fd)
{
struct file * filp;
struct files_struct *files = current->files;
struct fdtable *fdt;
- int retval;
spin_lock(&files->file_lock);
fdt = files_fdtable(files);
@@ -1079,7 +1073,25 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
FD_CLR(fd, fdt->close_on_exec);
__put_unused_fd(files, fd);
spin_unlock(&files->file_lock);
- retval = filp_close(filp, files);
+
+ return filp_close(filp, files);
+
+out_unlock:
+ spin_unlock(&files->file_lock);
+ return -EBADF;
+}
+EXPORT_SYMBOL_GPL(do_close);
+
+/*
+ * Careful here! We test whether the file pointer is NULL before
+ * releasing the fd. This ensures that one clone task can't release
+ * an fd while another clone is opening it.
+ */
+SYSCALL_DEFINE1(close, unsigned int, fd)
+{
+ int retval;
+
+ retval = do_close(fd);
/* can't restart close syscall because file table entry was cleared */
if (unlikely(retval == -ERESTARTSYS ||
@@ -1089,10 +1101,6 @@ SYSCALL_DEFINE1(close, unsigned int, fd)
retval = -EINTR;
return retval;
-
-out_unlock:
- spin_unlock(&files->file_lock);
- return -EBADF;
}
EXPORT_SYMBOL(sys_close);
Index: linux-2.6.git/include/linux/fs.h
===================================================================
--- linux-2.6.git.orig/include/linux/fs.h
+++ linux-2.6.git/include/linux/fs.h
@@ -2025,6 +2025,7 @@ extern struct file *file_open_root(struc
extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
+extern int do_close(unsigned int fd);
extern char * getname(const char __user *);
/* fs/ioctl.c */
fs, proc: Add /proc/$pid/tls entry
To be able to restart checkpointed tasks we need
to know TLS status at dumping time. Export this
information by /proc/$pid/tls entry.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
fs/proc/base.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -3150,6 +3150,23 @@ static int proc_pid_personality(struct s
return err;
}
+#ifdef CONFIG_X86
+static int proc_pid_tls(struct seq_file *m, struct pid_namespace *ns,
+ struct pid *pid, struct task_struct *task)
+{
+ int err = lock_trace(task);
+ if (!err) {
+ int i;
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ seq_printf(m, "%x %x\n",
+ task->thread.tls_array[i].a,
+ task->thread.tls_array[i].b);
+ unlock_trace(task);
+ }
+ return err;
+}
+#endif
+
/*
* Thread groups
*/
@@ -3169,6 +3186,9 @@ static const struct pid_entry tgid_base_
INF("auxv", S_IRUSR, proc_pid_auxv),
ONE("status", S_IRUGO, proc_pid_status),
ONE("personality", S_IRUGO, proc_pid_personality),
+#ifdef CONFIG_X86
+ ONE("tls", S_IRUGO, proc_pid_tls),
+#endif
INF("limits", S_IRUGO, proc_pid_limits),
#ifdef CONFIG_SCHED_DEBUG
REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations),
From: Vasiliy Kulikov <segooon@gmail.com>
In the patch "proc: fix races against execve() of /proc/PID/fd**"
proc_pid_fd_link_getattr() leaked task_struct if ptrace check fails.
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Reported-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
fs/proc/base.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff -puN fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd-fix fs/proc/base.c
--- a/fs/proc/base.c~proc-fix-races-against-execve-of-proc-pid-fd-fix
+++ a/fs/proc/base.c
@@ -1681,9 +1681,9 @@ static int proc_pid_fd_link_getattr(stru
generic_fillattr(inode, stat);
unlock_trace(task);
- put_task_struct(task);
rc = 0;
out_task:
+ put_task_struct(task);
return rc;
}
_
From: Vasiliy Kulikov <segoon@openwall.com>
fd* files are restricted to the task's owner, and other users may not get
direct access to them. But one may open any of these files and run any
setuid program, keeping opened file descriptors. As there are permission
checks on open(), but not on readdir() and read(), operations on the kept
file descriptors will not be checked. It makes it possible to violate
procfs permission model.
Reading fdinfo/* may disclosure current fds' position and flags, reading
directory contents of fdinfo/ and fd/ may disclosure the number of opened
files by the target task. This information is not sensible per se, but it
can reveal some private information (like length of a password stored in a
file) under certain conditions.
Used existing (un)lock_trace functions to check for ptrace_may_access(),
but instead of using EPERM return code from it use EACCES to be consistent
with existing proc_pid_follow_link()/proc_pid_readlink() return code. If
they differ, attacker can guess what fds exist by analyzing stat() return
code. Patched handlers: stat() for fd/*, stat() and read() for fdindo/*,
readdir() and lookup() for fd/ and fdinfo/.
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
fs/proc/base.c | 146 ++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 103 insertions(+), 43 deletions(-)
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -1665,12 +1665,46 @@ out:
return error;
}
+static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct inode *inode = dentry->d_inode;
+ struct task_struct *task = get_proc_task(inode);
+ int rc;
+
+ if (task == NULL)
+ return -ESRCH;
+
+ rc = -EACCES;
+ if (lock_trace(task))
+ goto out_task;
+
+ generic_fillattr(inode, stat);
+ unlock_trace(task);
+ put_task_struct(task);
+ rc = 0;
+out_task:
+ return rc;
+}
+
static const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
.follow_link = proc_pid_follow_link,
.setattr = proc_setattr,
};
+static const struct inode_operations proc_fdinfo_link_inode_operations = {
+ .setattr = proc_setattr,
+ .getattr = proc_pid_fd_link_getattr,
+};
+
+static const struct inode_operations proc_fd_link_inode_operations = {
+ .readlink = proc_pid_readlink,
+ .follow_link = proc_pid_follow_link,
+ .setattr = proc_setattr,
+ .getattr = proc_pid_fd_link_getattr,
+};
+
/* building an inode */
@@ -1902,49 +1936,61 @@ out:
static int proc_fd_info(struct inode *inode, struct path *path, char *info)
{
- struct task_struct *task = get_proc_task(inode);
- struct files_struct *files = NULL;
+ struct task_struct *task;
+ struct files_struct *files;
struct file *file;
int fd = proc_fd(inode);
+ int rc;
- if (task) {
- files = get_files_struct(task);
- put_task_struct(task);
- }
- if (files) {
- /*
- * We are not taking a ref to the file structure, so we must
- * hold ->file_lock.
- */
- spin_lock(&files->file_lock);
- file = fcheck_files(files, fd);
- if (file) {
- unsigned int f_flags;
- struct fdtable *fdt;
-
- fdt = files_fdtable(files);
- f_flags = file->f_flags & ~O_CLOEXEC;
- if (FD_ISSET(fd, fdt->close_on_exec))
- f_flags |= O_CLOEXEC;
-
- if (path) {
- *path = file->f_path;
- path_get(&file->f_path);
- }
- if (info)
- snprintf(info, PROC_FDINFO_MAX,
- "pos:\t%lli\n"
- "flags:\t0%o\n",
- (long long) file->f_pos,
- f_flags);
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- return 0;
+ task = get_proc_task(inode);
+ if (!task)
+ return -ENOENT;
+
+ rc = -EACCES;
+ if (lock_trace(task))
+ goto out_task;
+
+ rc = -ENOENT;
+ files = get_files_struct(task);
+ if (files == NULL)
+ goto out_unlock;
+
+ /*
+ * We are not taking a ref to the file structure, so we must
+ * hold ->file_lock.
+ */
+ spin_lock(&files->file_lock);
+ file = fcheck_files(files, fd);
+ if (file) {
+ unsigned int f_flags;
+ struct fdtable *fdt;
+
+ fdt = files_fdtable(files);
+ f_flags = file->f_flags & ~O_CLOEXEC;
+ if (FD_ISSET(fd, fdt->close_on_exec))
+ f_flags |= O_CLOEXEC;
+
+ if (path) {
+ *path = file->f_path;
+ path_get(&file->f_path);
}
- spin_unlock(&files->file_lock);
- put_files_struct(files);
- }
- return -ENOENT;
+ if (info)
+ snprintf(info, PROC_FDINFO_MAX,
+ "pos:\t%lli\n"
+ "flags:\t0%o\n",
+ (long long) file->f_pos,
+ f_flags);
+ rc = 0;
+ } else
+ rc = -ENOENT;
+ spin_unlock(&files->file_lock);
+ put_files_struct(files);
+
+out_unlock:
+ unlock_trace(task);
+out_task:
+ put_task_struct(task);
+ return rc;
}
static int proc_fd_link(struct inode *inode, struct path *path)
@@ -2039,7 +2085,7 @@ static struct dentry *proc_fd_instantiat
spin_unlock(&files->file_lock);
put_files_struct(files);
- inode->i_op = &proc_pid_link_inode_operations;
+ inode->i_op = &proc_fd_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
d_set_d_op(dentry, &tid_fd_dentry_operations);
@@ -2071,7 +2117,12 @@ static struct dentry *proc_lookupfd_comm
if (fd == ~0U)
goto out;
+ result = ERR_PTR(-EACCES);
+ if (lock_trace(task))
+ goto out;
+
result = instantiate(dir, dentry, task, &fd);
+ unlock_trace(task);
out:
put_task_struct(task);
out_no_task:
@@ -2091,23 +2142,28 @@ static int proc_readfd_common(struct fil
retval = -ENOENT;
if (!p)
goto out_no_task;
+
+ retval = -EACCES;
+ if (lock_trace(p))
+ goto out;
+
retval = 0;
fd = filp->f_pos;
switch (fd) {
case 0:
if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
- goto out;
+ goto out_unlock;
filp->f_pos++;
case 1:
ino = parent_ino(dentry);
if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out;
+ goto out_unlock;
filp->f_pos++;
default:
files = get_files_struct(p);
if (!files)
- goto out;
+ goto out_unlock;
rcu_read_lock();
for (fd = filp->f_pos-2;
fd < files_fdtable(files)->max_fds;
@@ -2131,6 +2187,9 @@ static int proc_readfd_common(struct fil
rcu_read_unlock();
put_files_struct(files);
}
+
+out_unlock:
+ unlock_trace(p);
out:
put_task_struct(p);
out_no_task:
@@ -2208,6 +2267,7 @@ static struct dentry *proc_fdinfo_instan
ei->fd = fd;
inode->i_mode = S_IFREG | S_IRUSR;
inode->i_fop = &proc_fdinfo_file_operations;
+ inode->i_op = &proc_fdinfo_link_inode_operations;
d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
From: Vasiliy Kulikov <segoon@openwall.com>
The patch "proc: fix races against execve() of /proc/PID/fd**" is still a
partial fix for a setxid problem. link(2) is a yet another way to
identify whether a specific fd is opened by a privileged process. By
calling link(2) against /proc/PID/fd/* an attacker may identify whether
the fd number is valid for PID by analysing link(2) return code.
Both getattr() and link() can be used by the attacker iff the dentry is
present in the dcache. In this case ->lookup() is not called and the only
way to check ptrace permissions is either operation handler or
->revalidate(). The easiest solution to prevent any unauthorized access
to /proc/PID/fd*/ files is to force the dentry drop on each unauthorized
access attempt.
If an attacker keeps opened fd of /proc/PID/fd/ and dcache contains a
specific dentry for some /proc/PID/fd/XXX, any future attemp to use the
dentry by the attacker would lead to the dentry drop as a result of a
failed ptrace check in ->revalidate(). Then the attacker cannot spawn a
dentry for the specific fd number because of ptrace check in ->lookup().
The dentry drop can be still observed by an attacker by analysing
information from /proc/slabinfo, which is addressed in the successive
patch.
Signed-off-by: Vasiliy Kulikov <segoon@openwall.com>
Cc: Cyrill Gorcunov <gorcunov@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux-foundation.org>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Matt Mackall <mpm@selenic.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
fs/proc/base.c | 42 ++++++------------------------------------
1 file changed, 6 insertions(+), 36 deletions(-)
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -1665,46 +1665,12 @@ out:
return error;
}
-static int proc_pid_fd_link_getattr(struct vfsmount *mnt, struct dentry *dentry,
- struct kstat *stat)
-{
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
- int rc;
-
- if (task == NULL)
- return -ESRCH;
-
- rc = -EACCES;
- if (lock_trace(task))
- goto out_task;
-
- generic_fillattr(inode, stat);
- unlock_trace(task);
- rc = 0;
-out_task:
- put_task_struct(task);
- return rc;
-}
-
static const struct inode_operations proc_pid_link_inode_operations = {
.readlink = proc_pid_readlink,
.follow_link = proc_pid_follow_link,
.setattr = proc_setattr,
};
-static const struct inode_operations proc_fdinfo_link_inode_operations = {
- .setattr = proc_setattr,
- .getattr = proc_pid_fd_link_getattr,
-};
-
-static const struct inode_operations proc_fd_link_inode_operations = {
- .readlink = proc_pid_readlink,
- .follow_link = proc_pid_follow_link,
- .setattr = proc_setattr,
- .getattr = proc_pid_fd_link_getattr,
-};
-
/* building an inode */
@@ -2013,6 +1979,11 @@ static int tid_fd_revalidate(struct dent
task = get_proc_task(inode);
fd = proc_fd(inode);
+ if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
+ put_task_struct(task);
+ task = NULL;
+ }
+
if (task) {
files = get_files_struct(task);
if (files) {
@@ -2085,7 +2056,7 @@ static struct dentry *proc_fd_instantiat
spin_unlock(&files->file_lock);
put_files_struct(files);
- inode->i_op = &proc_fd_link_inode_operations;
+ inode->i_op = &proc_pid_link_inode_operations;
inode->i_size = 64;
ei->op.proc_get_link = proc_fd_link;
d_set_d_op(dentry, &tid_fd_dentry_operations);
@@ -2267,7 +2238,6 @@ static struct dentry *proc_fdinfo_instan
ei->fd = fd;
inode->i_mode = S_IFREG | S_IRUSR;
inode->i_fop = &proc_fdinfo_file_operations;
- inode->i_op = &proc_fdinfo_link_inode_operations;
d_set_d_op(dentry, &tid_fd_dentry_operations);
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
From: Pavel Emelyanov <xemul@openvz.org>
On reading sysctl dirs we should return -EISDIR instead of -EINVAL.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
fs/proc/proc_sysctl.c | 1 +
1 file changed, 1 insertion(+)
Index: linux-2.6.git/fs/proc/proc_sysctl.c
===================================================================
--- linux-2.6.git.orig/fs/proc/proc_sysctl.c
+++ linux-2.6.git/fs/proc/proc_sysctl.c
@@ -370,6 +370,7 @@ static const struct file_operations proc
};
static const struct file_operations proc_sys_dir_file_operations = {
+ .read = generic_read_dir,
.readdir = proc_sys_readdir,
.llseek = generic_file_llseek,
};
The kernel patches series. See "series" file to obtain
order of appliance. Not all patches do address C/R directly
but some of them are needed due to dependencies.
The following patches are known to be in -mm tree already
procfs-report-eisdir-when-reading-sysctl-dirs-in-proc.patch
proc-fix-races-against-execve-of-proc-pid-fd.patch
proc-fix-races-against-execve-of-proc-pid-fd-fix.patch
proc-force-dcache-drop-on-unauthorized-access.patch
cr-statfs-callback-for-pipefs
Has been tested on Linux 3.1-rc3.
procfs-report-eisdir-when-reading-sysctl-dirs-in-proc.patch
proc-fix-races-against-execve-of-proc-pid-fd.patch
proc-fix-races-against-execve-of-proc-pid-fd-fix.patch
proc-force-dcache-drop-on-unauthorized-access.patch
cr-statfs-callback-for-pipefs
fs-proc-switch-to-dentry
cr-proc-map-files-21
cr-clone-with-pid-support
cr-proc-add-children
fs-add-do-close
fs-proc-add-tls
fs-proc-add-mm-task-stat
binfmt-elf-for-cr-5
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment