Commit 5e2b5f09 authored by Cyrill Gorcunov's avatar Cyrill Gorcunov

kernel: Update kernel patches series

Tossing patches in git repo is a pure madness,
so stick back with quilt tool.
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
parent b8638a70
From 7348faeab3ba943ea5c2d955b0dcb53477a94629 Mon Sep 17 00:00:00 2001
clone: Introduce the CLONE_CHILD_USEPID functionality
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 8 Nov 2011 17:07:58 +0400
Subject: [PATCH 6/7] clone: Introduce the CLONE_CHILD_USEPID functionality
When restoring a task (or a set of tasks) we need to recreate them with
exactly the same pid as they had before. Thus we need the ability to create
......@@ -17,16 +16,16 @@ will not give anyone chances for screwing pids up on a living system.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/linux/pid.h | 2 +-
include/linux/sched.h | 1 +
include/linux/pid.h | 2 -
include/linux/sched.h | 1
kernel/fork.c | 10 ++++++-
kernel/pid.c | 70 +++++++++++++++++++++++++++++++++++-------------
kernel/pid.c | 70 ++++++++++++++++++++++++++++++++++++--------------
4 files changed, 62 insertions(+), 21 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h
index b152d44..6bfe317 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
Index: linux-2.6.git/include/linux/pid.h
===================================================================
--- linux-2.6.git.orig/include/linux/pid.h
+++ linux-2.6.git/include/linux/pid.h
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
......@@ -36,10 +35,10 @@ index b152d44..6bfe317 100644
extern void free_pid(struct pid *pid);
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68daf4f..389068d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
Index: linux-2.6.git/include/linux/sched.h
===================================================================
--- linux-2.6.git.orig/include/linux/sched.h
+++ linux-2.6.git/include/linux/sched.h
@@ -23,6 +23,7 @@
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
......@@ -48,11 +47,11 @@ index 68daf4f..389068d 100644
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
diff --git a/kernel/fork.c b/kernel/fork.c
index ba0d172..0c67c63 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1250,8 +1250,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
Index: linux-2.6.git/kernel/fork.c
===================================================================
--- linux-2.6.git.orig/kernel/fork.c
+++ linux-2.6.git/kernel/fork.c
@@ -1250,8 +1250,16 @@ static struct task_struct *copy_process(
goto bad_fork_cleanup_io;
if (pid != &init_struct_pid) {
......@@ -70,11 +69,11 @@ index ba0d172..0c67c63 100644
if (!pid)
goto bad_fork_cleanup_io;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index fa5f722..b69f6a3 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -159,11 +159,55 @@ static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
Index: linux-2.6.git/kernel/pid.c
===================================================================
--- linux-2.6.git.orig/kernel/pid.c
+++ linux-2.6.git/kernel/pid.c
@@ -159,11 +159,55 @@ static void set_last_pid(struct pid_name
} while ((prev != last_write) && (pid_before(base, last_write, pid)));
}
......@@ -131,7 +130,7 @@ index fa5f722..b69f6a3 100644
pid = last + 1;
if (pid >= pid_max)
pid = RESERVED_PIDS;
@@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
@@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_names
*/
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
for (i = 0; i <= max_scan; ++i) {
......@@ -166,7 +165,7 @@ index fa5f722..b69f6a3 100644
{
struct pid *pid;
enum pid_type type;
@@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespace *ns)
@@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespa
tmp = ns;
for (i = ns->level; i >= 0; i--) {
......@@ -182,6 +181,3 @@ index fa5f722..b69f6a3 100644
}
get_pid_ns(ns);
--
1.7.7.3
From 467ebfc7760890deefa3b0d738620b40c1d58991 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 8 Nov 2011 15:00:56 +0400
Subject: [PATCH 4/7] fs, proc: Add start_data, end_data, start_brk members to
/proc/$pid/stat
fs, proc: Add start_data, end_data, start_brk members to /proc/$pid/stat
It helps to dump and restore this mm_struct members at chekpoint/restore time.
It helps to dump and restore this mm_struct members.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
fs/proc/array.c | 7 +++++--
1 files changed, 5 insertions(+), 2 deletions(-)
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3a1dafd..d851166 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
Index: linux-2.6.git/fs/proc/array.c
===================================================================
--- linux-2.6.git.orig/fs/proc/array.c
+++ linux-2.6.git/fs/proc/array.c
@@ -464,7 +464,7 @@ static int do_task_stat(struct seq_file
seq_printf(m, "%d (%s) %c %d %d %d %d %d %u %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \
......@@ -24,7 +20,7 @@ index 3a1dafd..d851166 100644
pid_nr_ns(pid, ns),
tcomm,
state,
@@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
@@ -511,7 +511,10 @@ static int do_task_stat(struct seq_file
task->policy,
(unsigned long long)delayacct_blkio_ticks(task),
cputime_to_clock_t(gtime),
......@@ -36,6 +32,3 @@ index 3a1dafd..d851166 100644
if (mm)
mmput(mm);
return 0;
--
1.7.7.3
From 4d12bad4d48564003c7fe9f82990123271c2bfb5 Mon Sep 17 00:00:00 2001
fs, proc: Introduce the Children: line in /proc/<pid>/status
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 8 Nov 2011 14:59:40 +0400
Subject: [PATCH 5/7] fs, proc: Introduce the Children: line in
/proc/<pid>/status
Although we can get the pids of some task's issue, this is just
more convenient to have them this way.
There is no easy way to make a reverse parent->children chain
from the task status, in turn children->parent provided with "PPid"
field.
So instead of walking over all pids in system to figure out what
children the task have -- we add explicit "Children" member to
/proc/<pid>/status since kernel already knows this kind of information
but it was not yet exported.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
......@@ -14,8 +18,6 @@ Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
fs/proc/array.c | 14 ++++++++++++++
1 files changed, 14 insertions(+), 0 deletions(-)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index d851166..8248682 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -158,6 +158,18 @@ static inline const char *get_task_state(struct task_struct *tsk)
......
From d23bde31590a7679aa2be7960848b0fedd0ce032 Mon Sep 17 00:00:00 2001
fs, proc: Introduce the /proc/<pid>/map_files/ directory v14
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 8 Nov 2011 14:58:01 +0400
Subject: [PATCH 2/7] fs, proc: Introduce the /proc/<pid>/map_files/ directory
v14
This one behaves similarly to the /proc/<pid>/fd/ one - it contains symlinks
one for each mapping with file, the name of a symlink is "vma->vm_start-vma->vm_end",
......@@ -117,14 +115,14 @@ CC: Al Viro <viro@ZenIV.linux.org.uk>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: Pavel Machek <pavel@ucw.cz>
---
fs/proc/base.c | 345 ++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm.h | 12 ++
2 files changed, 357 insertions(+), 0 deletions(-)
fs/proc/base.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm.h | 12 +
2 files changed, 357 insertions(+)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 93c81aa..9b7a9cd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -83,6 +83,7 @@
#include <linux/pid_namespace.h>
#include <linux/fs_struct.h>
......@@ -142,7 +140,7 @@ index 93c81aa..9b7a9cd 100644
/*
* Count the number of hardlinks for the pid_entry table, excluding the .
* and .. links.
@@ -2217,6 +2220,347 @@ static const struct file_operations proc_fd_operations = {
@@ -2217,6 +2220,347 @@ static const struct file_operations proc
};
/*
......@@ -490,7 +488,7 @@ index 93c81aa..9b7a9cd 100644
* /proc/pid/fd needs a special permission handler so that a process can still
* access /proc/self/fd after it has executed a setuid().
*/
@@ -2832,6 +3176,7 @@ static const struct inode_operations proc_task_inode_operations;
@@ -2832,6 +3176,7 @@ static const struct inode_operations pro
static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
......@@ -498,11 +496,11 @@ index 93c81aa..9b7a9cd 100644
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3dc3a8c..14159d3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1491,6 +1491,18 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
Index: linux-2.6.git/include/linux/mm.h
===================================================================
--- linux-2.6.git.orig/include/linux/mm.h
+++ linux-2.6.git/include/linux/mm.h
@@ -1491,6 +1491,18 @@ static inline unsigned long vma_pages(st
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
......@@ -521,6 +519,3 @@ index 3dc3a8c..14159d3 100644
#ifdef CONFIG_MMU
pgprot_t vm_get_page_prot(unsigned long vm_flags);
#else
--
1.7.7.3
From fc4504ee8f471ac1ac8162ec68e98f2c09d53411 Mon Sep 17 00:00:00 2001
fs, proc: Make proc_get_link to use dentry instead of inode
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Tue, 8 Nov 2011 14:57:10 +0400
Subject: [PATCH 1/7] fs, proc: Make proc_get_link to use dentry instead of
inode
This patch prepares the ground for the next "map_files"
patch which needs a name of a link file to analyse.
......@@ -20,11 +18,11 @@ CC: Andrew Morton <akpm@linux-foundation.org>
include/linux/proc_fs.h | 2 +-
2 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2db1bd3..93c81aa 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -165,9 +165,9 @@ static int get_task_root(struct task_struct *task, struct path *root)
Index: linux-2.6.git/fs/proc/base.c
===================================================================
--- linux-2.6.git.orig/fs/proc/base.c
+++ linux-2.6.git/fs/proc/base.c
@@ -165,9 +165,9 @@ static int get_task_root(struct task_str
return result;
}
......@@ -36,7 +34,7 @@ index 2db1bd3..93c81aa 100644
int result = -ENOENT;
if (task) {
@@ -182,9 +182,9 @@ static int proc_cwd_link(struct inode *inode, struct path *path)
@@ -182,9 +182,9 @@ static int proc_cwd_link(struct inode *i
return result;
}
......@@ -48,7 +46,7 @@ index 2db1bd3..93c81aa 100644
int result = -ENOENT;
if (task) {
@@ -1567,13 +1567,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
@@ -1567,13 +1567,13 @@ static const struct file_operations proc
.release = single_release,
};
......@@ -64,7 +62,7 @@ index 2db1bd3..93c81aa 100644
if (!task)
return -ENOENT;
mm = get_task_mm(task);
@@ -1603,7 +1603,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
@@ -1603,7 +1603,7 @@ static void *proc_pid_follow_link(struct
if (!proc_fd_access_allowed(inode))
goto out;
......@@ -73,7 +71,7 @@ index 2db1bd3..93c81aa 100644
out:
return ERR_PTR(error);
}
@@ -1642,7 +1642,7 @@ static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int b
@@ -1642,7 +1642,7 @@ static int proc_pid_readlink(struct dent
if (!proc_fd_access_allowed(inode))
goto out;
......@@ -94,11 +92,11 @@ index 2db1bd3..93c81aa 100644
}
static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 643b96c..c3d11ff 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations utsns_operations;
Index: linux-2.6.git/include/linux/proc_fs.h
===================================================================
--- linux-2.6.git.orig/include/linux/proc_fs.h
+++ linux-2.6.git/include/linux/proc_fs.h
@@ -253,7 +253,7 @@ extern const struct proc_ns_operations u
extern const struct proc_ns_operations ipcns_operations;
union proc_op {
......@@ -107,6 +105,3 @@ index 643b96c..c3d11ff 100644
int (*proc_read)(struct task_struct *task, char *page);
int (*proc_show)(struct seq_file *m,
struct pid_namespace *ns, struct pid *pid,
--
1.7.7.3
From 181ab3a738d454c0aa6f983e25ca076638d43179 Mon Sep 17 00:00:00 2001
mincore: Add named constant for reported present bit
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 25 Nov 2011 17:58:28 +0400
Subject: [PATCH 1/2] mincore: Add named constant for reported present bit
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
include/linux/mman.h | 2 ++
mm/huge_memory.c | 2 +-
mm/mincore.c | 10 +++++-----
3 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 8b74e9b..e4fda1e 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
Index: linux-2.6.git/include/linux/mman.h
===================================================================
--- linux-2.6.git.orig/include/linux/mman.h
+++ linux-2.6.git/include/linux/mman.h
@@ -10,6 +10,8 @@
#define OVERCOMMIT_ALWAYS 1
#define OVERCOMMIT_NEVER 2
......@@ -24,11 +23,11 @@ index 8b74e9b..e4fda1e 100644
#ifdef __KERNEL__
#include <linux/mm.h>
#include <linux/percpu_counter.h>
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 4298aba..a0acb3e 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1045,7 +1045,7 @@ int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
Index: linux-2.6.git/mm/huge_memory.c
===================================================================
--- linux-2.6.git.orig/mm/huge_memory.c
+++ linux-2.6.git/mm/huge_memory.c
@@ -1045,7 +1045,7 @@ int mincore_huge_pmd(struct vm_area_stru
* All logical pages in the range are present
* if backed by a huge page.
*/
......@@ -37,11 +36,11 @@ index 4298aba..a0acb3e 100644
}
} else
spin_unlock(&vma->vm_mm->page_table_lock);
diff --git a/mm/mincore.c b/mm/mincore.c
index 636a8687..b719cdd 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -38,7 +38,7 @@ static void mincore_hugetlb_page_range(struct vm_area_struct *vma,
Index: linux-2.6.git/mm/mincore.c
===================================================================
--- linux-2.6.git.orig/mm/mincore.c
+++ linux-2.6.git/mm/mincore.c
@@ -38,7 +38,7 @@ static void mincore_hugetlb_page_range(s
addr & huge_page_mask(h));
present = ptep && !huge_pte_none(huge_ptep_get(ptep));
while (1) {
......@@ -50,7 +49,7 @@ index 636a8687..b719cdd 100644
vec++;
addr += PAGE_SIZE;
if (addr == end)
@@ -83,7 +83,7 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
@@ -83,7 +83,7 @@ static unsigned char mincore_page(struct
page_cache_release(page);
}
......@@ -59,7 +58,7 @@ index 636a8687..b719cdd 100644
}
static void mincore_unmapped_range(struct vm_area_struct *vma,
@@ -122,7 +122,7 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -122,7 +122,7 @@ static void mincore_pte_range(struct vm_
if (pte_none(pte))
mincore_unmapped_range(vma, addr, next, vec);
else if (pte_present(pte))
......@@ -68,7 +67,7 @@ index 636a8687..b719cdd 100644
else if (pte_file(pte)) {
pgoff = pte_to_pgoff(pte);
*vec = mincore_page(vma->vm_file->f_mapping, pgoff);
@@ -131,14 +131,14 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -131,14 +131,14 @@ static void mincore_pte_range(struct vm_
if (is_migration_entry(entry)) {
/* migration entries are always uptodate */
......@@ -85,6 +84,3 @@ index 636a8687..b719cdd 100644
#endif
}
}
--
1.7.7.3
From 1d2879854a34a16bcfadeb7c71c05290d59eeb36 Mon Sep 17 00:00:00 2001
mincore: Report whether page is anon or not
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 25 Nov 2011 18:01:12 +0400
Subject: [PATCH 2/2] mincore: Report whether page is anon or not
This is required not to dump pages from private file mappings, that are
not mapped or not yet cow-ed.
not mapped or not yet COW-ed.
The thing is that mincode reports bit 1 for pages that are in memory, regardless
of whether they are mapped or not. But in case we have mapped a file of 2 pages and
read a single page mincore will report 1 for both - the 1st one being mapped and
the 2nd one being in page cache due to readahead.
With this fix both pages will be !PageAnon and we can skip them (see further patches
for crtools).
With this fix both pages will be !PageAnon and we can skip them from dumping.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
include/linux/mman.h | 1 +
mm/mincore.c | 15 +++++++++++++--
2 files changed, 14 insertions(+), 2 deletions(-)
diff --git a/include/linux/mman.h b/include/linux/mman.h
index e4fda1e..9d1de16 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
Index: linux-2.6.git/include/linux/mman.h
===================================================================
--- linux-2.6.git.orig/include/linux/mman.h
+++ linux-2.6.git/include/linux/mman.h
@@ -11,6 +11,7 @@
#define OVERCOMMIT_NEVER 2
......@@ -33,11 +31,11 @@ index e4fda1e..9d1de16 100644
#ifdef __KERNEL__
#include <linux/mm.h>
diff --git a/mm/mincore.c b/mm/mincore.c
index b719cdd..f825327 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -38,7 +38,7 @@ static void mincore_hugetlb_page_range(struct vm_area_struct *vma,
Index: linux-2.6.git/mm/mincore.c
===================================================================
--- linux-2.6.git.orig/mm/mincore.c
+++ linux-2.6.git/mm/mincore.c
@@ -38,7 +38,7 @@ static void mincore_hugetlb_page_range(s
addr & huge_page_mask(h));
present = ptep && !huge_pte_none(huge_ptep_get(ptep));
while (1) {
......@@ -46,7 +44,7 @@ index b719cdd..f825327 100644
vec++;
addr += PAGE_SIZE;
if (addr == end)
@@ -86,6 +86,17 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
@@ -86,6 +86,17 @@ static unsigned char mincore_page(struct
return present ? MINCORE_RESIDENT : 0;
}
......@@ -55,7 +53,7 @@ index b719cdd..f825327 100644
+ struct page *pg;
+
+ pg = vm_normal_page(vma, addr, pte);
+ if (pg == NULL)
+ if (!pg)
+ return 0;
+ else
+ return PageAnon(pg) ? MINCORE_ANON : 0;
......@@ -64,7 +62,7 @@ index b719cdd..f825327 100644
static void mincore_unmapped_range(struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
unsigned char *vec)
@@ -122,7 +133,7 @@ static void mincore_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
@@ -122,7 +133,7 @@ static void mincore_pte_range(struct vm_
if (pte_none(pte))
mincore_unmapped_range(vma, addr, next, vec);
else if (pte_present(pte))
......@@ -73,6 +71,3 @@ index b719cdd..f825327 100644
else if (pte_file(pte)) {
pgoff = pte_to_pgoff(pte);
*vec = mincore_page(vma->vm_file->f_mapping, pgoff);
--
1.7.7.3
From 4aca2c63c7df7b19864939d543b478b100d568f6 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 22 Nov 2011 21:08:43 +0400
Subject: [PATCH] prctl: Add PR_ codes to restore vDSO and tune up mm_struct
entires
prctl: Add PR_SET_MM codes to tune up mm_struct entires
To be able to use vDSO facility at process restore time we need it
being mapped at predefined address (at the address it had at checkpoint
time). For this sake PR_SETUP_VDSO_AT is introduced.
A few members of mm_struct such as start_code, end_code,
start_data, end_data, start_stack, start_brk, brk provided
by the kernel via /proc/$pid/stat and we use it at checkpoint
time.
Same time a few members in mm_struct are set up by a binfmt
handler code, such as mm_struct -> start_code, end_code,
start_data, end_data, start_stack, start_brk, brk. So at
restore time we need them to have exactly the same values
as they had at checkpoint time. This is handled by PR_SET_MM
prctl opcode.
At restore time we need a mechanism to restore those values
back and for this sake PR_SET_MM prctl code is introduced.
Note at moment this inteface is allowed for CAP_SYS_ADMIN
only.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
arch/x86/vdso/vma.c | 39 +++++++++++++++++++++++++++
include/linux/prctl.h | 18 ++++++++++++
kernel/sys.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 128 insertions(+), 0 deletions(-)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 153407c..e0f03da 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -137,6 +137,45 @@ up_fail:
return ret;
}
+int arch_setup_additional_pages_at(void *addr)
+{
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ if (!vdso_enabled)
+ return 0;
+
+ if ((unsigned long)addr > TASK_SIZE - vdso_size)
+ return -ENOMEM;
+
+ if ((unsigned long)addr & ~PAGE_MASK)
+ return -EINVAL;
+
+ down_write(&mm->mmap_sem);
+
+ /*
+ * Unmap previous entry.
+ */
+ if (mm->context.vdso) {
+ ret = do_munmap(mm, (unsigned long)mm->context.vdso, vdso_size);
+ if (ret)
+ goto err;
+ }
+
+ mm->context.vdso = addr;
+ ret = install_special_mapping(mm, (unsigned long)addr, vdso_size,
+ VM_READ | VM_EXEC |
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
+ VM_ALWAYSDUMP,
+ vdso_pages);
+ if (ret)
+ mm->context.vdso = NULL;
+
+err:
+ up_write(&mm->mmap_sem);
+ return ret;
+}
+
static __init int vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2..dddacb0 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -102,4 +102,22 @@
Actually I'm not sure if CAP_SYS_ADMIN restriction is
really needed here. Opinions?
include/linux/prctl.h | 12 +++++++++++
kernel/sys.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 64 insertions(+)
Index: linux-2.6.git/include/linux/prctl.h
===================================================================
--- linux-2.6.git.orig/include/linux/prctl.h
+++ linux-2.6.git/include/linux/prctl.h
@@ -102,4 +102,16 @@
#define PR_MCE_KILL_GET 34
......@@ -91,48 +40,16 @@ index a3baeb2..dddacb0 100644
+# define PR_SET_MM_START_STACK 5
+# define PR_SET_MM_START_BRK 6
+# define PR_SET_MM_BRK 7
+
+/*
+ * Unmap current vDSO and setup new one
+ * at predefined address.
+ */
+#define PR_SETUP_VDSO_AT 36
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 481611f..96ee568 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -123,6 +123,12 @@ EXPORT_SYMBOL(cad_pid);
void (*pm_power_off_prepare)(void);
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+extern int arch_setup_additional_pages_at(void *addr);
+#else
+static int arch_setup_additional_pages_at(void *addr) { return 0; }
+#endif
+
/*
* Returns true if current's euid is same as p's uid or euid,
* or has CAP_SYS_NICE to p's user_ns.
@@ -1841,6 +1847,71 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
Index: linux-2.6.git/kernel/sys.c
===================================================================
--- linux-2.6.git.orig/kernel/sys.c
+++ linux-2.6.git/kernel/sys.c
@@ -1841,6 +1841,58 @@ SYSCALL_DEFINE5(prctl, int, option, unsi
else
error = PR_MCE_KILL_DEFAULT;
break;
+ case PR_SETUP_VDSO_AT:
+ if (arg3 | arg4 | arg5)
+ return -EINVAL;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+ error = arch_setup_additional_pages_at((void *)arg2);
+#else
+ error = -ENOSYS;
+#endif
+ break;
+ case PR_SET_MM: {
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
......@@ -188,6 +105,3 @@ index 481611f..96ee568 100644
default:
error = -EINVAL;
break;
--
1.7.7.3
From 3142489577d30077c5389dce0832f7859a438401 Mon Sep 17 00:00:00 2001
procfs-introduce-the-proc-pid-map_files-directory-checkpatch-fixes
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 9 Nov 2011 14:18:34 -0800
Subject: [PATCH 3/7] procfs-introduce-the-proc-pid-map_files-directory-checkpatch-fixes
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
......@@ -49,8 +48,6 @@ Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
include/linux/mm.h | 4 ++--
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 9b7a9cd..4532044 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2430,7 +2430,8 @@ static const struct inode_operations proc_map_files_inode_operations = {
......@@ -99,8 +96,6 @@ index 9b7a9cd..4532044 100644
if (flex_array_put(fa, i++, &info, GFP_KERNEL))
BUG();
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 14159d3..5e27665 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1492,8 +1492,8 @@ static inline unsigned long vma_pages(struct vm_area_struct *vma)
......
0001-fs-proc-Make-proc_get_link-to-use-dentry-instead-of-.patch
0002-fs-proc-Introduce-the-proc-pid-map_files-directory-v.patch
0003-procfs-introduce-the-proc-pid-map_files-directory-ch.patch
0004-fs-proc-Add-start_data-end_data-start_brk-members-to.patch
0005-fs-proc-Introduce-the-Children-line-in-proc-pid-stat.patch
0006-clone-Introduce-the-CLONE_CHILD_USEPID-functionality.patch
0007-prctl-Add-PR_-codes-to-restore-vDSO-and-tune-up-mm_s.patch
0008-mincore-Add-named-constant-for-reported-present-bit.patch
0009-mincore-Report-whether-page-is-anon-or-not.patch
fs-proc-Make-proc_get_link-to-use-dentry
fs-proc-Introduce-the-proc-pid-map_files-directory
procfs-introduce-the-proc-pid-map_files-directory-checkpatch
clone-Introduce-the-CLONE_CHILD_USEPID-functionality
fs-proc-Add-start_data-end_data-start_brk-members
fs-proc-Introduce-the-Children-line-in-proc-pid-stat
prctl-tune-up-mm_struct-members
mincore-Add-named-constant-for-reported-present-bit
mincore-Report-whether-page-is-anon-or-not
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment