Commit 8b80bba7 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov

kernel: Update patch series

As usually based on Linux 3.2-rc1
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
parent 71a5efef
From fc4504ee8f471ac1ac8162ec68e98f2c09d53411 Mon Sep 17 00:00:00 2001 From fc4504ee8f471ac1ac8162ec68e98f2c09d53411 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org> From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 8 Nov 2011 14:57:10 +0400 Date: Tue, 8 Nov 2011 14:57:10 +0400
Subject: [PATCH 1/4] fs, proc: Make proc_get_link to use dentry instead of Subject: [PATCH 1/7] fs, proc: Make proc_get_link to use dentry instead of
inode inode
This patch prepares the ground for the next "map_files" This patch prepares the ground for the next "map_files"
...@@ -108,5 +108,5 @@ index 643b96c..c3d11ff 100644 ...@@ -108,5 +108,5 @@ index 643b96c..c3d11ff 100644
int (*proc_show)(struct seq_file *m, int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid, struct pid_namespace *ns, struct pid *pid,
-- --
1.7.6.4 1.7.7.3
From d23bde31590a7679aa2be7960848b0fedd0ce032 Mon Sep 17 00:00:00 2001 From d23bde31590a7679aa2be7960848b0fedd0ce032 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com> From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 8 Nov 2011 14:58:01 +0400 Date: Tue, 8 Nov 2011 14:58:01 +0400
Subject: [PATCH 2/4] fs, proc: Introduce the /proc/<pid>/map_files/ directory Subject: [PATCH 2/7] fs, proc: Introduce the /proc/<pid>/map_files/ directory
v14 v14
This one behaves similarly to the /proc/<pid>/fd/ one - it contains symlinks This one behaves similarly to the /proc/<pid>/fd/ one - it contains symlinks
...@@ -522,5 +522,5 @@ index 3dc3a8c..14159d3 100644 ...@@ -522,5 +522,5 @@ index 3dc3a8c..14159d3 100644
pgprot_t vm_get_page_prot(unsigned long vm_flags); pgprot_t vm_get_page_prot(unsigned long vm_flags);
#else #else
-- --
1.7.6.4 1.7.7.3
From 3142489577d30077c5389dce0832f7859a438401 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 9 Nov 2011 14:18:34 -0800
Subject: [PATCH 3/7] procfs-introduce-the-proc-pid-map_files-directory-checkpatch-fixes
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
WARNING: line over 80 characters
#286: FILE: fs/proc/base.c:2433:
+static int proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
WARNING: line over 80 characters
#351: FILE: fs/proc/base.c:2498:
+ fa = flex_array_alloc(sizeof(info), nr_files, GFP_KERNEL);
WARNING: line over 80 characters
#352: FILE: fs/proc/base.c:2499:
+ if (!fa || flex_array_prealloc(fa, 0, nr_files, GFP_KERNEL)) {
WARNING: line over 80 characters
#360: FILE: fs/proc/base.c:2507:
+ for (i = 0, vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
WARNING: line over 80 characters
#368: FILE: fs/proc/base.c:2515:
+ info.len = snprintf(info.name, sizeof(info.name),
WARNING: line over 80 characters
#424: FILE: fs/proc/base.c:3179:
+ DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
WARNING: line over 80 characters
#437: FILE: include/linux/mm.h:1497:
+find_exact_vma(struct mm_struct *mm, unsigned long vm_start, unsigned long vm_end)
total: 0 errors, 7 warnings, 387 lines checked
./patches/procfs-introduce-the-proc-pid-map_files-directory.patch has style problems, please review.
If any of these errors are false positives, please report
them to the maintainer, see CHECKPATCH in MAINTAINERS.
Please run checkpatch prior to sending patches
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
fs/proc/base.c | 18 +++++++++++-------
include/linux/mm.h | 4 ++--
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b7a9cd..4532044 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2430,7 +2430,8 @@ static const struct inode_operations proc_map_files_inode_operations = {
.setattr = proc_setattr,
};
-static int proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int
+proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
@@ -2495,8 +2496,10 @@ static int proc_map_files_readdir(struct file *filp, void *dirent, filldir_t fil
}
if (nr_files) {
- fa = flex_array_alloc(sizeof(info), nr_files, GFP_KERNEL);
- if (!fa || flex_array_prealloc(fa, 0, nr_files, GFP_KERNEL)) {
+ fa = flex_array_alloc(sizeof(info), nr_files,
+ GFP_KERNEL);
+ if (!fa || flex_array_prealloc(fa, 0, nr_files,
+ GFP_KERNEL)) {
ret = -ENOMEM;
if (fa)
flex_array_free(fa);
@@ -2504,7 +2507,8 @@ static int proc_map_files_readdir(struct file *filp, void *dirent, filldir_t fil
mmput(mm);
goto out_unlock;
}
- for (i = 0, vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
+ for (i = 0, vma = mm->mmap, pos = 2; vma;
+ vma = vma->vm_next) {
if (!vma->vm_file)
continue;
if (++pos <= filp->f_pos)
@@ -2512,9 +2516,9 @@ static int proc_map_files_readdir(struct file *filp, void *dirent, filldir_t fil
get_file(vma->vm_file);
info.file = vma->vm_file;
- info.len = snprintf(info.name, sizeof(info.name),
- "%lx-%lx", vma->vm_start,
- vma->vm_end);
+ info.len = snprintf(info.name,
+ sizeof(info.name), "%lx-%lx",
+ vma->vm_start, vma->vm_end);
if (flex_array_put(fa, i++, &info, GFP_KERNEL))
BUG();
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 14159d3..5e27665 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1492,8 +1492,8 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
}
/* Look up the first VMA which exactly match the interval vm_start ... vm_end */
-static inline struct vm_area_struct *
-find_exact_vma(struct mm_struct *mm, unsigned long vm_start, unsigned long vm_end)
+static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
+ unsigned long vm_start, unsigned long vm_end)
{
struct vm_area_struct *vma = find_vma(mm, vm_start);
--
1.7.7.3
From e46fc1fa01faea36ad4c5608436f5900e66c9529 Mon Sep 17 00:00:00 2001 From 467ebfc7760890deefa3b0d738620b40c1d58991 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org> From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 8 Nov 2011 15:00:56 +0400 Date: Tue, 8 Nov 2011 15:00:56 +0400
Subject: [PATCH 4/4] fs, proc: Add start_data, end_data, start_brk members to Subject: [PATCH 4/7] fs, proc: Add start_data, end_data, start_brk members to
/proc/$pid/stat /proc/$pid/stat
It helps to dump and restore this mm_struct members at chekpoint/restore time. It helps to dump and restore this mm_struct members at chekpoint/restore time.
...@@ -12,10 +12,10 @@ Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> ...@@ -12,10 +12,10 @@ Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
1 files changed, 5 insertions(+), 2 deletions(-) 1 files changed, 5 insertions(+), 2 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c diff --git a/fs/proc/array.c b/fs/proc/array.c
index 8f33329..8248682 100644 index 3a1dafd..d851166 100644
--- a/fs/proc/array.c --- a/fs/proc/array.c
+++ b/fs/proc/array.c +++ b/fs/proc/array.c
@@ -478,7 +478,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, @@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \ seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
...@@ -24,7 +24,7 @@ index 8f33329..8248682 100644 ...@@ -24,7 +24,7 @@ index 8f33329..8248682 100644
pid_nr_ns(pid, ns), pid_nr_ns(pid, ns),
tcomm, tcomm,
state, state,
@@ -525,7 +525,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, @@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
task->policy, task->policy,
(unsigned long long)delayacct_blkio_ticks(task), (unsigned long long)delayacct_blkio_ticks(task),
cputime_to_clock_t(gtime), cputime_to_clock_t(gtime),
...@@ -37,5 +37,5 @@ index 8f33329..8248682 100644 ...@@ -37,5 +37,5 @@ index 8f33329..8248682 100644
mmput(mm); mmput(mm);
return 0; return 0;
-- --
1.7.6.4 1.7.7.3
From 9e489dbc4f796b76adb4440ccf4888d934ede61d Mon Sep 17 00:00:00 2001 From 4d12bad4d48564003c7fe9f82990123271c2bfb5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com> From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 8 Nov 2011 14:59:40 +0400 Date: Tue, 8 Nov 2011 14:59:40 +0400
Subject: [PATCH 3/4] fs, proc: Introduce the Children: line in Subject: [PATCH 5/7] fs, proc: Introduce the Children: line in
/proc/<pid>/status /proc/<pid>/status
Although we can get the pids of some task's issue, this is just Although we can get the pids of some task's issue, this is just
...@@ -15,7 +15,7 @@ Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> ...@@ -15,7 +15,7 @@ Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
1 files changed, 14 insertions(+), 0 deletions(-) 1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd..8f33329 100644 index d851166..8248682 100644
--- a/fs/proc/array.c --- a/fs/proc/array.c
+++ b/fs/proc/array.c +++ b/fs/proc/array.c
@@ -158,6 +158,18 @@ static inline const char *get_task_state(struct task_struct *tsk) @@ -158,6 +158,18 @@ static inline const char *get_task_state(struct task_struct *tsk)
...@@ -47,5 +47,5 @@ index 3a1dafd..8f33329 100644 ...@@ -47,5 +47,5 @@ index 3a1dafd..8f33329 100644
if (p->files) if (p->files)
fdt = files_fdtable(p->files); fdt = files_fdtable(p->files);
-- --
1.7.6.4 1.7.7.3
From 110e1f59ca03d4f0aca9c42eb466870920e3d0c5 Mon Sep 17 00:00:00 2001 From 7348faeab3ba943ea5c2d955b0dcb53477a94629 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org> From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 8 Nov 2011 17:07:58 +0400 Date: Tue, 8 Nov 2011 17:07:58 +0400
Subject: [PATCH 5/6] clone: Introduce the CLONE_CHILD_USEPID functionality Subject: [PATCH 6/7] clone: Introduce the CLONE_CHILD_USEPID functionality
When restoring a task (or a set of tasks) we need to recreate them with When restoring a task (or a set of tasks) we need to recreate them with
exactly the same pid as they had before. Thus we need the ability to create exactly the same pid as they had before. Thus we need the ability to create
...@@ -183,5 +183,5 @@ index fa5f722..b69f6a3 100644 ...@@ -183,5 +183,5 @@ index fa5f722..b69f6a3 100644
get_pid_ns(ns); get_pid_ns(ns);
-- --
1.7.6.4 1.7.7.3
From 8b85d835bca59de5b5c318f6492be0c73d2da313 Mon Sep 17 00:00:00 2001 From ce70ea1a87d978c96f96d61d59c1779df51a4c57 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com> From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 8 Nov 2011 17:45:52 +0400 Date: Mon, 21 Nov 2011 16:27:49 +0400
Subject: [PATCH 6/6] prctl: Add PR_CKPT_CTL helper Subject: [PATCH 7/7] prctl: Add PR_ codes to restore vDSO and tune up
mm_struct entires
In a sake of checkpoint restore we need a number To be able to use vDSO facility at process restore time we need it
of things to be tuned up at restore time, such as being mapped at predefined address (at the address it had at checkpoint
vDSO address, task status, code start/end fields. time). For this sake PR_SETUP_VDSO_AT is introduced.
This is done via prctl interface. Same time a few members in mm_struct are set up by a binfmt
handler code, such as mm_struct -> start_code, end_code,
start_data, end_data, start_stack, start_brk, brk. So at
restore time we need them to have exactly the same values
as they had at checkpoint time. This is handled by PR_SET_MM
prctl opcode.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org> Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
--- ---
arch/x86/vdso/vma.c | 33 +++++++++++++++++++++++++++++++++ arch/x86/vdso/vma.c | 39 +++++++++++++++++++++++++++
include/linux/prctl.h | 12 ++++++++++++ include/linux/prctl.h | 18 ++++++++++++
kernel/sys.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ kernel/sys.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 89 insertions(+), 0 deletions(-) 3 files changed, 127 insertions(+), 0 deletions(-)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 153407c..e0b974f 100644 index 153407c..e0f03da 100644
--- a/arch/x86/vdso/vma.c --- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c
@@ -137,6 +137,39 @@ up_fail: @@ -137,6 +137,45 @@ up_fail:
return ret; return ret;
} }
...@@ -32,6 +38,12 @@ index 153407c..e0b974f 100644 ...@@ -32,6 +38,12 @@ index 153407c..e0b974f 100644
+ if (!vdso_enabled) + if (!vdso_enabled)
+ return 0; + return 0;
+ +
+ if ((unsigned long)addr > TASK_SIZE - vdso_size)
+ return -ENOMEM;
+
+ if ((unsigned long)addr & ~PAGE_MASK)
+ return -EINVAL;
+
+ down_write(&mm->mmap_sem); + down_write(&mm->mmap_sem);
+ +
+ /* + /*
...@@ -61,28 +73,34 @@ index 153407c..e0b974f 100644 ...@@ -61,28 +73,34 @@ index 153407c..e0b974f 100644
{ {
vdso_enabled = simple_strtoul(s, NULL, 0); vdso_enabled = simple_strtoul(s, NULL, 0);
diff --git a/include/linux/prctl.h b/include/linux/prctl.h diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2..2b460ac 100644 index a3baeb2..dddacb0 100644
--- a/include/linux/prctl.h --- a/include/linux/prctl.h
+++ b/include/linux/prctl.h +++ b/include/linux/prctl.h
@@ -102,4 +102,16 @@ @@ -102,4 +102,22 @@
#define PR_MCE_KILL_GET 34 #define PR_MCE_KILL_GET 34
+/* Checkpoint/restore specifics */ +/*
+#define PR_CKPT_CTL 35 + * Tune up process memory map specifics.
+# define PR_CKPT_CTL_SETUP_VDSO_AT 1 + */
+# define PR_CKPT_CTL_SET_TASK_FLAGS 2 +#define PR_SET_MM 35
+# define PR_CKPT_CTL_SET_MM_START_CODE 3 +# define PR_SET_MM_START_CODE 1
+# define PR_CKPT_CTL_SET_MM_END_CODE 4 +# define PR_SET_MM_END_CODE 2
+# define PR_CKPT_CTL_SET_MM_START_DATA 5 +# define PR_SET_MM_START_DATA 3
+# define PR_CKPT_CTL_SET_MM_END_DATA 6 +# define PR_SET_MM_END_DATA 4
+# define PR_CKPT_CTL_SET_MM_START_STACK 7 +# define PR_SET_MM_START_STACK 5
+# define PR_CKPT_CTL_SET_MM_START_BRK 8 +# define PR_SET_MM_START_BRK 6
+# define PR_CKPT_CTL_SET_MM_BRK 9 +# define PR_SET_MM_BRK 7
+
+/*
+ * Unmap current vDSO and setup new one
+ * at predefined address.
+ */
+#define PR_SETUP_VDSO_AT 36
+ +
#endif /* _LINUX_PRCTL_H */ #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c diff --git a/kernel/sys.c b/kernel/sys.c
index 481611f..2bdb30c 100644 index 481611f..76a8173 100644
--- a/kernel/sys.c --- a/kernel/sys.c
+++ b/kernel/sys.c +++ b/kernel/sys.c
@@ -123,6 +123,12 @@ EXPORT_SYMBOL(cad_pid); @@ -123,6 +123,12 @@ EXPORT_SYMBOL(cad_pid);
...@@ -98,51 +116,77 @@ index 481611f..2bdb30c 100644 ...@@ -98,51 +116,77 @@ index 481611f..2bdb30c 100644
/* /*
* Returns true if current's euid is same as p's uid or euid, * Returns true if current's euid is same as p's uid or euid,
* or has CAP_SYS_NICE to p's user_ns. * or has CAP_SYS_NICE to p's user_ns.
@@ -1841,6 +1847,44 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, @@ -1841,6 +1847,70 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else else
error = PR_MCE_KILL_DEFAULT; error = PR_MCE_KILL_DEFAULT;
break; break;
+ case PR_CKPT_CTL: + case PR_SETUP_VDSO_AT:
+ if (arg4 | arg5) + if (arg3 | arg4 | arg5)
+ return -EINVAL; + return -EINVAL;
+ switch (arg2) { +
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES +#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+ case PR_CKPT_CTL_SETUP_VDSO_AT: + error = arch_setup_additional_pages_at((void *)arg2);
+ error = arch_setup_additional_pages_at((void *)arg3); +#else
+ break; + error = -ENOSYS;
+#endif +#endif
+ case PR_CKPT_CTL_SET_TASK_FLAGS: + break;
+ current->flags = arg3; + case PR_SET_MM: {
+ break; + struct mm_struct *mm;
+ case PR_CKPT_CTL_SET_MM_START_CODE: + struct vm_area_struct *vma;
+
+ if (arg4 | arg5)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ error = -ENOENT;
+ mm = get_task_mm(current);
+ if (!mm)
+ return error;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, arg3);
+ if (!vma)
+ goto out;
+
+ switch (arg2) {
+ case PR_SET_MM_START_CODE:
+ current->mm->start_code = arg3; + current->mm->start_code = arg3;
+ break; + break;
+ case PR_CKPT_CTL_SET_MM_END_CODE: + case PR_SET_MM_END_CODE:
+ current->mm->end_code = arg3; + current->mm->end_code = arg3;
+ break; + break;
+ case PR_CKPT_CTL_SET_MM_START_DATA: + case PR_SET_MM_START_DATA:
+ current->mm->start_data = arg3; + current->mm->start_data = arg3;
+ break; + break;
+ case PR_CKPT_CTL_SET_MM_END_DATA: + case PR_SET_MM_END_DATA:
+ current->mm->end_data = arg3; + current->mm->end_data = arg3;
+ break; + break;
+ case PR_CKPT_CTL_SET_MM_START_STACK: + case PR_SET_MM_START_STACK:
+ current->mm->start_stack = arg3; + current->mm->start_stack = arg3;
+ break; + break;
+ case PR_CKPT_CTL_SET_MM_START_BRK: + case PR_SET_MM_START_BRK:
+ current->mm->start_brk = arg3; + current->mm->start_brk = arg3;
+ break; + break;
+ case PR_CKPT_CTL_SET_MM_BRK: + case PR_SET_MM_BRK:
+ current->mm->brk = arg3; + current->mm->brk = arg3;
+ break; + break;
+ default: + default:
+ error = -EINVAL; + error = -EINVAL;
+ break; + break;
+ } + }
+out:
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ break; + break;
+ }
default: default:
error = -EINVAL; error = -EINVAL;
break; break;
-- --
1.7.6.4 1.7.7.3
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment