Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
C
criu
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
zhul
criu
Commits
a4d37dcf
Commit
a4d37dcf
authored
Nov 08, 2011
by
Cyrill Gorcunov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
kernel: Add patches needed
Signed-off-by:
Cyrill Gorcunov
<
gorcunov@gmail.com
>
parent
76df6d9e
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
335 additions
and
0 deletions
+335
-0
0005-clone-Introduce-the-CLONE_CHILD_USEPID-functionality.patch
...lone-Introduce-the-CLONE_CHILD_USEPID-functionality.patch
+187
-0
0006-prctl-Add-PR_CKPT_CTL-helper.patch
kernel/0006-prctl-Add-PR_CKPT_CTL-helper.patch
+148
-0
No files found.
kernel/0005-clone-Introduce-the-CLONE_CHILD_USEPID-functionality.patch
0 → 100644
View file @
a4d37dcf
From 110e1f59ca03d4f0aca9c42eb466870920e3d0c5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@openvz.org>
Date: Tue, 8 Nov 2011 17:07:58 +0400
Subject: [PATCH 5/6] clone: Introduce the CLONE_CHILD_USEPID functionality
When restoring a task (or a set of tasks) we need to recreate them with
exactly the same pid as they had before. Thus we need the ability to create
a task with specified pid.
The proposal is to reuse the already free CLONE_STOPPED clone flag.
About the security implication - this can create some problems with pids
wraparound and similar, so this approach can be restricted with the "don't
allow for CLONE_CHILD_USEPID when the current pid namespace has ever done
real pid allocation". This will work perfectly for checkpoint-restore and
will not give anyone chances for screwing pids up on a living system.
Signed-off-by: Pavel Emelyanov <xemul@openvz.org>
---
include/linux/pid.h | 2 +-
include/linux/sched.h | 1 +
kernel/fork.c | 10 ++++++-
kernel/pid.c | 70 +++++++++++++++++++++++++++++++++++-------------
4 files changed, 62 insertions(+), 21 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h
index b152d44..6bfe317 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -119,7 +119,7 @@ extern struct pid *find_get_pid(int nr);
extern struct pid *find_ge_pid(int nr, struct pid_namespace *);
int next_pidmap(struct pid_namespace *pid_ns, unsigned int last);
-extern struct pid *alloc_pid(struct pid_namespace *ns);
+extern struct pid *alloc_pid(struct pid_namespace *ns, int pid);
extern void free_pid(struct pid *pid);
/*
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 68daf4f..389068d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -23,6 +23,7 @@
#define CLONE_CHILD_SETTID 0x01000000 /* set the TID in the child */
/* 0x02000000 was previously the unused CLONE_STOPPED (Start in stopped state)
and is now available for re-use. */
+#define CLONE_CHILD_USEPID 0x02000000 /* use the given pid */
#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
diff --git a/kernel/fork.c b/kernel/fork.c
index ba0d172..0c67c63 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1250,8 +1250,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
goto bad_fork_cleanup_io;
if (pid != &init_struct_pid) {
+ int want_pid = 0;
+
+ if (clone_flags & CLONE_CHILD_USEPID) {
+ retval = get_user(want_pid, child_tidptr);
+ if (retval)
+ goto bad_fork_cleanup_io;
+ }
+
retval = -ENOMEM;
- pid = alloc_pid(p->nsproxy->pid_ns);
+ pid = alloc_pid(p->nsproxy->pid_ns, want_pid);
if (!pid)
goto bad_fork_cleanup_io;
}
diff --git a/kernel/pid.c b/kernel/pid.c
index fa5f722..b69f6a3 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -159,11 +159,55 @@ static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
} while ((prev != last_write) && (pid_before(base, last_write, pid)));
}
-static int alloc_pidmap(struct pid_namespace *pid_ns)
+static int alloc_pidmap_page(struct pidmap *map)
+{
+ if (unlikely(!map->page)) {
+ void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ /*
+ * Free the page if someone raced with us
+ * installing it:
+ */
+ spin_lock_irq(&pidmap_lock);
+ if (!map->page) {
+ map->page = page;
+ page = NULL;
+ }
+ spin_unlock_irq(&pidmap_lock);
+ kfree(page);
+ if (unlikely(!map->page))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static int set_pidmap(struct pid_namespace *pid_ns, int pid)
+{
+ int offset;
+ struct pidmap *map;
+
+ offset = pid & BITS_PER_PAGE_MASK;
+ map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
+
+ if (alloc_pidmap_page(map) < 0)
+ return -ENOMEM;
+
+ if (!test_and_set_bit(offset, map->page)) {
+ atomic_dec(&map->nr_free);
+ return pid;
+ }
+
+ return -EBUSY;
+}
+
+static int alloc_pidmap(struct pid_namespace *pid_ns, int desired_pid)
{
int i, offset, max_scan, pid, last = pid_ns->last_pid;
struct pidmap *map;
+ if (desired_pid)
+ return set_pidmap(pid_ns, desired_pid);
+
pid = last + 1;
if (pid >= pid_max)
pid = RESERVED_PIDS;
@@ -176,22 +220,9 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
*/
max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
for (i = 0; i <= max_scan; ++i) {
- if (unlikely(!map->page)) {
- void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
- /*
- * Free the page if someone raced with us
- * installing it:
- */
- spin_lock_irq(&pidmap_lock);
- if (!map->page) {
- map->page = page;
- page = NULL;
- }
- spin_unlock_irq(&pidmap_lock);
- kfree(page);
- if (unlikely(!map->page))
- break;
- }
+ if (alloc_pidmap_page(map) < 0)
+ break;
+
if (likely(atomic_read(&map->nr_free))) {
do {
if (!test_and_set_bit(offset, map->page)) {
@@ -277,7 +308,7 @@ void free_pid(struct pid *pid)
call_rcu(&pid->rcu, delayed_put_pid);
}
-struct pid *alloc_pid(struct pid_namespace *ns)
+struct pid *alloc_pid(struct pid_namespace *ns, int this_ns_pid)
{
struct pid *pid;
enum pid_type type;
@@ -291,13 +322,14 @@ struct pid *alloc_pid(struct pid_namespace *ns)
tmp = ns;
for (i = ns->level; i >= 0; i--) {
- nr = alloc_pidmap(tmp);
+ nr = alloc_pidmap(tmp, this_ns_pid);
if (nr < 0)
goto out_free;
pid->numbers[i].nr = nr;
pid->numbers[i].ns = tmp;
tmp = tmp->parent;
+ this_ns_pid = 0;
}
get_pid_ns(ns);
--
1.7.6.4
kernel/0006-prctl-Add-PR_CKPT_CTL-helper.patch
0 → 100644
View file @
a4d37dcf
From 8b85d835bca59de5b5c318f6492be0c73d2da313 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Tue, 8 Nov 2011 17:45:52 +0400
Subject: [PATCH 6/6] prctl: Add PR_CKPT_CTL helper
In a sake of checkpoint restore we need a number
of things to be tuned up at restore time, such as
vDSO address, task status, code start/end fields.
This is done via prctl interface.
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
---
arch/x86/vdso/vma.c | 33 +++++++++++++++++++++++++++++++++
include/linux/prctl.h | 12 ++++++++++++
kernel/sys.c | 44 ++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 89 insertions(+), 0 deletions(-)
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 153407c..e0b974f 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -137,6 +137,39 @@ up_fail:
return ret;
}
+int arch_setup_additional_pages_at(void *addr)
+{
+ struct mm_struct *mm = current->mm;
+ int ret;
+
+ if (!vdso_enabled)
+ return 0;
+
+ down_write(&mm->mmap_sem);
+
+ /*
+ * Unmap previous entry.
+ */
+ if (mm->context.vdso) {
+ ret = do_munmap(mm, (unsigned long)mm->context.vdso, vdso_size);
+ if (ret)
+ goto err;
+ }
+
+ mm->context.vdso = addr;
+ ret = install_special_mapping(mm, (unsigned long)addr, vdso_size,
+ VM_READ | VM_EXEC |
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
+ VM_ALWAYSDUMP,
+ vdso_pages);
+ if (ret)
+ mm->context.vdso = NULL;
+
+err:
+ up_write(&mm->mmap_sem);
+ return ret;
+}
+
static __init int vdso_setup(char *s)
{
vdso_enabled = simple_strtoul(s, NULL, 0);
diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index a3baeb2..2b460ac 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -102,4 +102,16 @@
#define PR_MCE_KILL_GET 34
+/* Checkpoint/restore specifics */
+#define PR_CKPT_CTL 35
+# define PR_CKPT_CTL_SETUP_VDSO_AT 1
+# define PR_CKPT_CTL_SET_TASK_FLAGS 2
+# define PR_CKPT_CTL_SET_MM_START_CODE 3
+# define PR_CKPT_CTL_SET_MM_END_CODE 4
+# define PR_CKPT_CTL_SET_MM_START_DATA 5
+# define PR_CKPT_CTL_SET_MM_END_DATA 6
+# define PR_CKPT_CTL_SET_MM_START_STACK 7
+# define PR_CKPT_CTL_SET_MM_START_BRK 8
+# define PR_CKPT_CTL_SET_MM_BRK 9
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 481611f..2bdb30c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -123,6 +123,12 @@ EXPORT_SYMBOL(cad_pid);
void (*pm_power_off_prepare)(void);
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+extern int arch_setup_additional_pages_at(void *addr);
+#else
+static int arch_setup_additional_pages_at(void *addr) { return 0; }
+#endif
+
/*
* Returns true if current's euid is same as p's uid or euid,
* or has CAP_SYS_NICE to p's user_ns.
@@ -1841,6 +1847,44 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
else
error = PR_MCE_KILL_DEFAULT;
break;
+ case PR_CKPT_CTL:
+ if (arg4 | arg5)
+ return -EINVAL;
+ switch (arg2) {
+#ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
+ case PR_CKPT_CTL_SETUP_VDSO_AT:
+ error = arch_setup_additional_pages_at((void *)arg3);
+ break;
+#endif
+ case PR_CKPT_CTL_SET_TASK_FLAGS:
+ current->flags = arg3;
+ break;
+ case PR_CKPT_CTL_SET_MM_START_CODE:
+ current->mm->start_code = arg3;
+ break;
+ case PR_CKPT_CTL_SET_MM_END_CODE:
+ current->mm->end_code = arg3;
+ break;
+ case PR_CKPT_CTL_SET_MM_START_DATA:
+ current->mm->start_data = arg3;
+ break;
+ case PR_CKPT_CTL_SET_MM_END_DATA:
+ current->mm->end_data = arg3;
+ break;
+ case PR_CKPT_CTL_SET_MM_START_STACK:
+ current->mm->start_stack = arg3;
+ break;
+ case PR_CKPT_CTL_SET_MM_START_BRK:
+ current->mm->start_brk = arg3;
+ break;
+ case PR_CKPT_CTL_SET_MM_BRK:
+ current->mm->brk = arg3;
+ break;
+ default:
+ error = -EINVAL;
+ break;
+ }
+ break;
default:
error = -EINVAL;
break;
--
1.7.6.4
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment