Commit a41cc56c authored by Dmitry Safonov's avatar Dmitry Safonov Committed by Andrei Vagin

x86/kerndat: Add a check for ptrace() bug on Skylake

We need to know if ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, iov)
returns xsave without FP state part.
Signed-off-by: 's avatarDmitry Safonov <dima@arista.com>
Signed-off-by: 's avatarAndrei Vagin <avagin@virtuozzo.com>
parent 3b71b95a
......@@ -3,10 +3,12 @@
#include <string.h>
#include <unistd.h>
#include <elf.h>
#include <sched.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/auxv.h>
#include <sys/wait.h>
#include <sys/ptrace.h>
#include "types.h"
#include "log.h"
......@@ -30,6 +32,7 @@
#include "images/core.pb-c.h"
#include "images/creds.pb-c.h"
/* XXX: Move all kerndat features to per-arch kerndat .c */
int kdat_can_map_vdso(void)
{
pid_t child;
......@@ -188,6 +191,78 @@ int kdat_compatible_cr(void)
}
#endif
static int kdat_x86_has_ptrace_fpu_xsave_bug_child(void *arg)
{
ptrace(PTRACE_TRACEME, 0, 0, 0);
kill(getpid(), SIGSTOP);
pr_err("Continue after SIGSTOP.. Urr what?\n");
_exit(1);
}
/*
* Pre v4.14 kernels have a bug on Skylake CPUs:
* copyout_from_xsaves() creates fpu state for
* ptrace(PTRACE_GETREGSET, pid, NT_X86_XSTATE, &iov)
* without MXCSR and MXCSR_FLAGS if there is SSE/YMM state, but no FP state.
* That is xfeatures had either/both XFEATURE_MASK_{SSE,YMM} set, but not
* XFEATURE_MASK_FP.
* But we *really* need to C/R MXCSR & MXCSR_FLAGS if SSE/YMM active,
* as mxcsr store part of the state.
*/
int kdat_x86_has_ptrace_fpu_xsave_bug(void)
{
user_fpregs_struct_t xsave = { };
struct iovec iov;
char stack[PAGE_SIZE];
int flags = CLONE_VM | CLONE_FILES | CLONE_UNTRACED | SIGCHLD;
int ret = -1;
pid_t child;
int stat;
/* OSXSAVE can't be changed during boot. */
if (!compel_cpu_has_feature(X86_FEATURE_OSXSAVE))
return 0;
child = clone(kdat_x86_has_ptrace_fpu_xsave_bug_child,
stack + ARRAY_SIZE(stack), flags, 0);
if (child < 0) {
pr_perror("%s(): failed to clone()", __func__);
return -1;
}
if (waitpid(child, &stat, WUNTRACED) != child) {
/*
* waitpid() may end with ECHILD if SIGCHLD == SIG_IGN,
* and the child has stopped already.
*/
pr_perror("Failed to wait for %s() test\n", __func__);
goto out_kill;
}
if (!WIFSTOPPED(stat)) {
pr_err("Born child is unstoppable! (might be dead)\n");
goto out_kill;
}
iov.iov_base = &xsave;
iov.iov_len = sizeof(xsave);
if (ptrace(PTRACE_GETREGSET, child, (unsigned)NT_X86_XSTATE, &iov) < 0) {
pr_perror("Can't obtain FPU registers for %d", child);
goto out_kill;
}
/*
* MXCSR should be never 0x0: e.g., it should contain either:
* R+/R-/RZ/RN to determine rounding model.
*/
ret = !xsave.i387.mxcsr;
out_kill:
kill(child, SIGKILL);
waitpid(child, &stat, 0);
return ret;
}
int save_task_regs(void *x, user_regs_struct_t *regs, user_fpregs_struct_t *fpregs)
{
CoreEntry *core = x;
......
......@@ -80,8 +80,10 @@ static inline int set_compat_robust_list(uint32_t head_ptr, uint32_t len)
# define ARCH_MAP_VDSO_64 0x2003
#endif
/* XXX: Introduce per-arch kerndat header */
extern int kdat_compatible_cr(void);
extern int kdat_can_map_vdso(void);
extern int kdat_x86_has_ptrace_fpu_xsave_bug(void);
static inline void
__setup_sas_compat(struct ucontext_ia32* uc, ThreadSasEntry *sas)
......
......@@ -68,6 +68,7 @@ struct kerndat_s {
bool has_link_nsid;
unsigned int sysctl_nr_open;
unsigned long files_stat_max_files;
bool x86_has_ptrace_fpu_xsave_bug;
};
extern struct kerndat_s kdat;
......
......@@ -737,6 +737,22 @@ err:
return ret;
}
int __attribute__((weak)) kdat_x86_has_ptrace_fpu_xsave_bug(void)
{
return 0;
}
static int kerndat_x86_has_ptrace_fpu_xsave_bug(void)
{
int ret = kdat_x86_has_ptrace_fpu_xsave_bug();
if (ret < 0)
return ret;
kdat.x86_has_ptrace_fpu_xsave_bug = !!ret;
return 0;
}
#define KERNDAT_CACHE_FILE KDAT_RUNDIR"/criu.kdat"
#define KERNDAT_CACHE_FILE_TMP KDAT_RUNDIR"/.criu.kdat"
......@@ -974,6 +990,8 @@ int kerndat_init(void)
ret = kerndat_socket_netns();
if (!ret)
ret = kerndat_nsid();
if (!ret)
ret = kerndat_x86_has_ptrace_fpu_xsave_bug();
kerndat_lsm();
kerndat_mmap_min_addr();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment