Commit 70cb4326 authored by Saied Kazemi's avatar Saied Kazemi Committed by Pavel Emelyanov

rework criu check logic

The "criu check" command to check if the kernel is properly configured
to run criu is broken.

The "criu check --ms" command used to be the way to tell criu to check
only for features that have been merged upstream.  But recent kernels
have a set of features whose presence doesn't necessarily mean that
dump or restore will fail but rather *may* fail depending on whether
the process tree uses those features.

This patch deprecates --ms and introduces --extra, --experimental,
and --all.  See "criu --help" or "man criu" for more info.

Typical use cases are:

	$ sudo criu check
	<zero or more warnings and errors...>
	Looks good.
	$ echo $?
	0

	$ sudo criu check --extra
	<zero or more warnings and errors...>
	Looks good.
	$ echo $?
	1

	$ sudo criu check --extra
	<one or more warnings...>
	Looks good but some kernel features are missing
	which, depending on your process tree, may cause
	dump or restore failure.
	$ echo $?
	1

	$ sudo criu check --feature list
	mnt_id aio_remap timerfd tun userns fdinfo_lock seccomp_suspend \
		seccomp_filters loginuid cgroupns

	$ sudo criu check --feature mnt_id
	Warn  (cr-check.c:283): fdinfo doesn't contain the mnt_id field
	$ echo $?
	1

	$ sudo criu check --feature tun
	tun is supported
	$ echo $?
	0
Signed-off-by: 's avatarSaied Kazemi <saied@google.com>
Signed-off-by: 's avatarPavel Emelyanov <xemul@virtuozzo.com>
parent 69adfe7b
......@@ -311,15 +311,51 @@ relaxing capability with *--cpu-cap*=*none* parameter.
*check*
~~~~~~~
Tests whether the kernel support is up to date.
Checks whether the kernel supports the features that *criu* needs to
successfully dump and restore a process tree.
*--ms*::
Do not check not yet merged features.
There are three categories of kernel support as described below. *criu
check* always checks Category 1 features unless *--feature* is specified
which only checks the specified feature.
- *Category 1*. Absolutely required. These are features like
'/proc/<pid>/map_files', 'NETLINK_SOCK_DIAG' socket
monitoring, '/proc/sys/kernel/ns_last_pid', etc.
- *Category 2*. Required only for specific cases. These are features
like aio remap, '/dev/net/tun', etc. that are
required if the process being dumped or restored
is using them.
- *Category 3*. Experimental. These are features like task-diag that
are used for experimental purposes (mostly
during development).
If there are no errors or warnings, *criu* prints "Looks good." and its
exit code will be 0.
A missing Category 1 feature causes *criu* to print "Does not look good."
and its exit code will be non-zero.
Missing Category 2 and 3 features cause *criu* to print "Looks good but
some kernel features are missing which, depending on your process tree,
may cause dump or restore failure." and its exit code will be non-zero.
Without an argument, *criu check* checks Category 1 features. This
behavior can change with the following options:
*--extra*::
Check kernel support for Category 2 features.
*--experimental*::
Check kernel support for Category 3 features.
*--all*::
Check kernel support for Category 1, 2, and 3 features.
*--feature* '<name>'::
Check a particular feature. Instead of checking everything one may specify
which exactly feature is to be tested. The '<name>' may be: *mnt_id*,
*aio_remap*, *timerfd*, *tun*, *userns*.
Check a specific feature. If '<name>' is 'list', a list of valid
kernel feature names that can be checked will be printed.
*page-server*
~~~~~~~~~~~~~
......
......@@ -41,6 +41,8 @@
#include "pstree.h"
#include "cr_options.h"
static char *feature_name(int (*func)());
static int check_tty(void)
{
int master = -1, slave = -1;
......@@ -169,7 +171,7 @@ static int check_kcmp(void)
return 0;
}
static int check_prctl(void)
static int check_prctl_cat1(void)
{
unsigned long user_auxv = 0;
unsigned int *tid_addr;
......@@ -183,23 +185,18 @@ static int check_prctl(void)
}
/*
* Either new or old interface must be supported in the kernel.
* It's OK if the new interface is not supported because it's
* a Category 2 feature, but the old interface has to be supported.
*/
ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&size, 0, 0);
if (ret < 0) {
if (!opts.check_ms_kernel) {
pr_msg("prctl: PR_SET_MM_MAP is not supported, which "
"is required for restoring user namespaces: %m\n");
return -1;
} else
pr_warn("Skipping unssuported PR_SET_MM_MAP: %m\n");
pr_msg("Info prctl: PR_SET_MM_MAP_SIZE is not supported\n");
ret = prctl(PR_SET_MM, PR_SET_MM_BRK, (unsigned long)sbrk(0), 0, 0);
if (ret < 0) {
if (errno == EPERM)
pr_msg("prctl: One needs CAP_SYS_RESOURCE capability to perform testing\n");
else
pr_msg("prctl: PR_SET_MM is not supported: %m\n");
pr_msg("prctl: PR_SET_MM_BRK is not supported: %m\n");
return -1;
}
......@@ -219,6 +216,19 @@ static int check_prctl(void)
return 0;
}
static int check_prctl_cat2(void)
{
unsigned int size = 0;
int ret;
ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&size, 0, 0);
if (ret) {
pr_warn("prctl: PR_SET_MM_MAP_SIZE is not supported\n");
return -1;
}
return 0;
}
static int check_fcntl(void)
{
u32 v[2];
......@@ -766,11 +776,8 @@ static int check_aio_remap(void)
ctx = (aio_context_t)naddr;
r = syscall(SYS_io_getevents, ctx, 0, 1, NULL, NULL);
if (r < 0) {
if (!opts.check_ms_kernel) {
pr_err("AIO remap doesn't work properly: %m\n");
return -1;
} else
pr_warn("Skipping unsupported AIO remap: %m\n");
pr_err("AIO remap doesn't work properly: %m\n");
return -1;
}
return 0;
......@@ -782,12 +789,8 @@ static int check_fdinfo_lock(void)
return -1;
if (!kdat.has_fdinfo_lock) {
if (!opts.check_ms_kernel) {
pr_err("fdinfo doesn't contain the lock field\n");
return -1;
} else {
pr_warn("fdinfo doesn't contain the lock field\n");
}
pr_err("fdinfo doesn't contain the lock field\n");
return -1;
}
return 0;
......@@ -823,10 +826,6 @@ static int check_clone_parent_vs_pid()
static int check_cgroupns(void)
{
int ret;
if (opts.check_ms_kernel) {
pr_warn("Skipping cgroup namespaces check\n");
return 0;
}
ret = access("/proc/self/ns/cgroup", F_OK);
if (ret < 0) {
......@@ -839,6 +838,29 @@ static int check_cgroupns(void)
static int (*chk_feature)(void);
/*
* There are three categories of kernel features:
*
* 1. Absolutely required (/proc/pid/map_files, ptrace PEEKSIGINFO, etc.).
* 2. Required only for specific cases (aio remap, tun, etc.).
* Checked when --extra or --all is specified.
* 3. Experimental (task-diag).
* Checked when --experimental or --all is specified.
*
* We fail if any feature in category 1 is missing but tolerate failures
* in the other categories. Currently, there is nothing in category 3.
*/
#define CHECK_GOOD "Looks good."
#define CHECK_BAD "Does not look good."
#define CHECK_MAYBE "Looks good but some kernel features are missing\n" \
"which, depending on your process tree, may cause\n" \
"dump or restore failure."
#define CHECK_CAT1(fn) do { \
if ((ret = fn) != 0) { \
print_on_level(DEFAULT_LOGLEVEL, "%s\n", CHECK_BAD); \
return ret; \
} \
} while (0)
int cr_check(void)
{
struct ns_id ns = { .type = NS_CRIU, .ns_pid = PROC_SELF, .nd = &mnt_ns_desc };
......@@ -863,44 +885,71 @@ int cr_check(void)
return -1;
if (chk_feature) {
ret = chk_feature();
goto out;
if (chk_feature())
return -1;
print_on_level(DEFAULT_LOGLEVEL, "%s is supported\n",
feature_name(chk_feature));
return 0;
}
ret |= check_map_files();
ret |= check_sock_diag();
ret |= check_ns_last_pid();
ret |= check_sock_peek_off();
ret |= check_kcmp();
ret |= check_prctl();
ret |= check_fcntl();
ret |= check_proc_stat();
ret |= check_tcp();
ret |= check_fdinfo_ext();
ret |= check_unaligned_vmsplice();
ret |= check_tty();
ret |= check_so_gets();
ret |= check_ipc();
ret |= check_sigqueuinfo();
ret |= check_ptrace_peeksiginfo();
ret |= check_ptrace_suspend_seccomp();
ret |= check_ptrace_dump_seccomp_filters();
ret |= check_mem_dirty_track();
ret |= check_posix_timers();
ret |= check_tun_cr(0);
ret |= check_timerfd();
ret |= check_mnt_id();
ret |= check_aio_remap();
ret |= check_fdinfo_lock();
ret |= check_clone_parent_vs_pid();
ret |= check_cgroupns();
/*
* Category 1 - absolutely required.
* So that the user can see clearly what's missing, we exit with
* non-zero status on the first failure because it gets very
* confusing when there are many warnings and error messages.
*/
CHECK_CAT1(check_map_files());
CHECK_CAT1(check_sock_diag());
CHECK_CAT1(check_ns_last_pid());
CHECK_CAT1(check_sock_peek_off());
CHECK_CAT1(check_kcmp());
CHECK_CAT1(check_prctl_cat1());
CHECK_CAT1(check_fcntl());
CHECK_CAT1(check_proc_stat());
CHECK_CAT1(check_tcp());
CHECK_CAT1(check_fdinfo_ext());
CHECK_CAT1(check_unaligned_vmsplice());
CHECK_CAT1(check_tty());
CHECK_CAT1(check_so_gets());
CHECK_CAT1(check_ipc());
CHECK_CAT1(check_sigqueuinfo());
CHECK_CAT1(check_ptrace_peeksiginfo());
/*
* Category 2 - required for specific cases.
* Unlike Category 1 features, we don't exit with non-zero status
* on a failure because CRIU may still work.
*/
if (opts.check_extra_features) {
ret |= check_prctl_cat2();
ret |= check_ptrace_suspend_seccomp();
ret |= check_ptrace_dump_seccomp_filters();
ret |= check_mem_dirty_track();
ret |= check_posix_timers();
ret |= check_tun_cr(0);
ret |= check_timerfd();
ret |= check_mnt_id();
ret |= check_aio_remap();
ret |= check_fdinfo_lock();
ret |= check_clone_parent_vs_pid();
ret |= check_cgroupns();
}
out:
if (!ret)
print_on_level(DEFAULT_LOGLEVEL, "Looks good.\n");
/*
* Category 3 - experimental.
*/
if (opts.check_experimental_features) {
/* Empty for now */
;
}
print_on_level(DEFAULT_LOGLEVEL, "%s\n", ret ? CHECK_MAYBE : CHECK_GOOD);
return ret;
}
#undef CHECK_GOOD
#undef CHECK_BAD
#undef CHECK_MAYBE
#undef CHECK_CAT1
static int check_tun(void)
{
......@@ -926,7 +975,7 @@ static int check_userns(void)
ret = prctl(PR_SET_MM, PR_SET_MM_MAP_SIZE, (unsigned long)&size, 0, 0);
if (ret < 0) {
pr_perror("No new prctl API");
pr_perror("prctl: PR_SET_MM_MAP_SIZE is not supported");
return -1;
}
......@@ -946,32 +995,53 @@ static int check_loginuid(void)
return 0;
}
struct feature_list {
char *name;
int (*func)();
};
static struct feature_list feature_list[] = {
{ "mnt_id", check_mnt_id },
{ "aio_remap", check_aio_remap },
{ "timerfd", check_timerfd },
{ "tun", check_tun },
{ "userns", check_userns },
{ "fdinfo_lock", check_fdinfo_lock },
{ "seccomp_suspend", check_ptrace_suspend_seccomp },
{ "seccomp_filters", check_ptrace_dump_seccomp_filters },
{ "loginuid", check_loginuid },
{ "cgroupns", check_cgroupns },
{ NULL, NULL },
};
int check_add_feature(char *feat)
{
if (!strcmp(feat, "mnt_id"))
chk_feature = check_mnt_id;
else if (!strcmp(feat, "aio_remap"))
chk_feature = check_aio_remap;
else if (!strcmp(feat, "timerfd"))
chk_feature = check_timerfd;
else if (!strcmp(feat, "tun"))
chk_feature = check_tun;
else if (!strcmp(feat, "userns"))
chk_feature = check_userns;
else if (!strcmp(feat, "fdinfo_lock"))
chk_feature = check_fdinfo_lock;
else if (!strcmp(feat, "seccomp_suspend"))
chk_feature = check_ptrace_suspend_seccomp;
else if (!strcmp(feat, "seccomp_filters"))
chk_feature = check_ptrace_dump_seccomp_filters;
else if (!strcmp(feat, "loginuid"))
chk_feature = check_loginuid;
else if (!strcmp(feat, "cgroupns"))
chk_feature = check_cgroupns;
else {
pr_err("Unknown feature %s\n", feat);
return -1;
struct feature_list *fl;
if (!strcmp(feat, "list")) {
for (fl = feature_list; fl->name; fl++)
pr_msg("%s ", fl->name);
pr_msg("\n");
return 1;
}
return 0;
for (fl = feature_list; fl->name; fl++) {
if (!strcmp(feat, fl->name)) {
chk_feature = fl->func;
return 0;
}
}
pr_err("Unknown feature %s\n", feat);
return -1;
}
static char *feature_name(int (*func)())
{
struct feature_list *fl;
for (fl = feature_list; fl->func; fl++) {
if (fl->func == func)
return fl->name;
}
return NULL;
}
......@@ -567,9 +567,6 @@ static int check(int sk)
setproctitle("check --rpc");
/* Check only minimal kernel support */
opts.check_ms_kernel = true;
if (!cr_check())
resp.success = true;
......
......@@ -275,6 +275,9 @@ int main(int argc, char *argv[], char *envp[])
{ "timeout", required_argument, 0, 1072 },
{ "external", required_argument, 0, 1073 },
{ "empty-ns", required_argument, 0, 1074 },
{ "extra", no_argument, 0, 1077 },
{ "experimental", no_argument, 0, 1078 },
{ "all", no_argument, 0, 1079 },
{ },
};
......@@ -455,8 +458,8 @@ int main(int argc, char *argv[], char *envp[])
opts.force_irmap = true;
break;
case 1054:
opts.check_ms_kernel = true;
break;
pr_err("--ms is deprecated; see \"Check options\" of criu --help\n");
return 1;
case 'L':
opts.libdir = optarg;
break;
......@@ -490,8 +493,11 @@ int main(int argc, char *argv[], char *envp[])
return 1;
break;
case 1063:
if (check_add_feature(optarg) < 0)
ret = check_add_feature(optarg);
if (ret < 0) /* invalid kernel feature name */
return 1;
if (ret > 0) /* list kernel features and exit */
return 0;
break;
case 1064:
if (!add_skip_mount(optarg))
......@@ -554,6 +560,16 @@ int main(int argc, char *argv[], char *envp[])
return 1;
}
break;
case 1077:
opts.check_extra_features = true;
break;
case 1078:
opts.check_experimental_features = true;
break;
case 1079:
opts.check_extra_features = true;
opts.check_experimental_features = true;
break;
case 'V':
pr_msg("Version: %s\n", CRIU_VERSION);
if (strcmp(CRIU_GITID, "0"))
......@@ -715,7 +731,7 @@ usage:
"Usage:\n"
" criu dump|pre-dump -t PID [<options>]\n"
" criu restore [<options>]\n"
" criu check [--ms]\n"
" criu check [--feature FEAT]\n"
" criu exec -p PID <syscall-string>\n"
" criu page-server\n"
" criu service [<options>]\n"
......@@ -808,8 +824,19 @@ usage:
" socket[inode]\n"
" file[mnt_id:inode]\n"
" --empty-ns {net}\n"
" Create a namespace, but don't restore its properies.\n"
" An user will retore them from action scripts.\n"
" Create a namespace, but don't restore its properies.\n"
" An user will retore them from action scripts.\n"
"Check options:\n"
" without any arguments, \"criu check\" checks availability of absolutely required\n"
" kernel features; if any of these features is missing dump and restore will fail\n"
" --extra also check availability of extra kernel features\n"
" --experimental also check availability of experimental kernel features\n"
" --all also check availability of extra and experimental kernel features\n"
" --feature FEAT only check availability of one of the following kernel features\n"
" "
);
check_add_feature("list");
pr_msg(
"\n"
"* Logging:\n"
" -o|--log-file FILE log file name\n"
......@@ -837,7 +864,6 @@ usage:
"Other options:\n"
" -h|--help show this text\n"
" -V|--version show version\n"
" --ms don't check not yet merged kernel features\n"
);
return 0;
......
......@@ -56,7 +56,8 @@ struct cr_options {
int final_state;
char *show_dump_file;
char *show_fmt;
bool check_ms_kernel;
bool check_extra_features;
bool check_experimental_features;
bool show_pages_content;
union {
bool restore_detach;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment