Commit 3c4dfb6e authored by Cyrill Gorcunov's avatar Cyrill Gorcunov Committed by Andrei Vagin

x86: cpu -- Detect and save fpu status in info

- externd compel_cpuinfo_t to keep all fpu information
   neded for xsaves mode

 - fetch xsaves data in compel_cpuid

All this will allow us to extend criu to support
avx-512 intructions.
Signed-off-by: 's avatarCyrill Gorcunov <gorcunov@gmail.com>
Reviewed-by: 's avatarDmitry Safonov <0x7f454c46@gmail.com>
Signed-off-by: 's avatarAndrei Vagin <avagin@virtuozzo.com>
parent f0fb0bc9
......@@ -6,6 +6,7 @@
#include "common/compiler.h"
#include "log.h"
#include "common/bug.h"
#undef LOG_PREFIX
#define LOG_PREFIX "cpu: "
......@@ -13,6 +14,40 @@
static compel_cpuinfo_t rt_info;
static bool rt_info_done = false;
/*
* Although we spell it out in here, the Processor Trace
* xfeature is completely unused. We use other mechanisms
* to save/restore PT state in Linux.
*/
static const char * const xfeature_names[] = {
"x87 floating point registers" ,
"SSE registers" ,
"AVX registers" ,
"MPX bounds registers" ,
"MPX CSR" ,
"AVX-512 opmask" ,
"AVX-512 Hi256" ,
"AVX-512 ZMM_Hi256" ,
"Processor Trace" ,
"Protection Keys User registers",
"Hardware Duty Cycling" ,
};
static short xsave_cpuid_features[] = {
X86_FEATURE_FPU,
X86_FEATURE_XMM,
X86_FEATURE_AVX,
X86_FEATURE_MPX,
X86_FEATURE_MPX,
X86_FEATURE_AVX512F,
X86_FEATURE_AVX512F,
X86_FEATURE_AVX512F,
X86_FEATURE_INTEL_PT,
X86_FEATURE_PKU,
X86_FEATURE_HDC,
};
void compel_set_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
{
if (likely(feature < NCAPINTS_BITS))
......@@ -32,6 +67,172 @@ int compel_test_cpu_cap(compel_cpuinfo_t *c, unsigned int feature)
return 0;
}
static int compel_fpuid(compel_cpuinfo_t *c)
{
unsigned int last_good_offset;
uint32_t eax, ebx, ecx, edx;
size_t i;
BUILD_BUG_ON(ARRAY_SIZE(xsave_cpuid_features) !=
ARRAY_SIZE(xfeature_names));
if (!compel_test_cpu_cap(c, X86_FEATURE_FPU)) {
pr_err("fpu: No FPU detected\n");
return -1;
}
if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVE)) {
pr_info("fpu: x87 FPU will use %s\n",
compel_test_cpu_cap(c, X86_FEATURE_FXSR) ?
"FXSAVE" : "FSAVE");
return 0;
}
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
c->xfeatures_mask = eax + ((uint64_t)edx << 32);
if ((c->xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) {
/*
* This indicates that something really unexpected happened
* with the enumeration.
*/
pr_err("fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx\n",
(unsigned long long)c->xfeatures_mask);
return -1;
}
/*
* Clear XSAVE features that are disabled in the normal CPUID.
*/
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
if (!compel_test_cpu_cap(c, xsave_cpuid_features[i]))
c->xfeatures_mask &= ~(1 << i);
}
c->xfeatures_mask &= XCNTXT_MASK;
c->xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR;
/*
* xsaves is not enabled in userspace, so
* xsaves is mostly for debug purpose.
*/
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
c->xsave_size = ebx;
c->xsave_size_max = ecx;
cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx);
c->xsaves_size = ebx;
pr_debug("fpu: xfeatures_mask 0x%llx xsave_size %u xsave_size_max %u xsaves_size %u\n",
(unsigned long long)c->xfeatures_mask,
c->xsave_size, c->xsave_size_max, c->xsaves_size);
if (c->xsave_size_max > sizeof(struct xsave_struct))
pr_warn_once("fpu: max xsave frame exceed xsave_struct (%u %u)\n",
c->xsave_size_max, (unsigned)sizeof(struct xsave_struct));
memset(c->xstate_offsets, 0xff, sizeof(c->xstate_offsets));
memset(c->xstate_sizes, 0xff, sizeof(c->xstate_sizes));
memset(c->xstate_comp_offsets, 0xff, sizeof(c->xstate_comp_offsets));
memset(c->xstate_comp_sizes, 0xff, sizeof(c->xstate_comp_sizes));
/* start at the beginnning of the "extended state" */
last_good_offset = offsetof(struct xsave_struct, extended_state_area);
/*
* The FP xstates and SSE xstates are legacy states. They are always
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
c->xstate_offsets[0] = 0;
c->xstate_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
c->xstate_offsets[1] = c->xstate_sizes[0];
c->xstate_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!(c->xfeatures_mask & (1UL << i)))
continue;
/*
* If an xfeature is supervisor state, the offset
* in EBX is invalid. We leave it to -1.
*
* SDM says: If state component 'i' is a user state component,
* ECX[0] return 0; if state component i is a supervisor
* state component, ECX[0] returns 1.
*/
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
if (!(ecx & 1))
c->xstate_offsets[i] = ebx;
c->xstate_sizes[i] = eax;
/*
* In our xstate size checks, we assume that the
* highest-numbered xstate feature has the
* highest offset in the buffer. Ensure it does.
*/
if (last_good_offset > c->xstate_offsets[i])
pr_warn_once("fpu: misordered xstate %d %d\n",
last_good_offset, c->xstate_offsets[i]);
last_good_offset = c->xstate_offsets[i];
}
BUILD_BUG_ON(sizeof(c->xstate_offsets) != sizeof(c->xstate_sizes));
BUILD_BUG_ON(sizeof(c->xstate_comp_offsets) != sizeof(c->xstate_comp_sizes));
c->xstate_comp_offsets[0] = 0;
c->xstate_comp_sizes[0] = offsetof(struct i387_fxsave_struct, xmm_space);
c->xstate_comp_offsets[1] = c->xstate_comp_sizes[0];
c->xstate_comp_sizes[1] = FIELD_SIZEOF(struct i387_fxsave_struct, xmm_space);
if (!compel_test_cpu_cap(c, X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if ((c->xfeatures_mask & (1UL << i))) {
c->xstate_comp_offsets[i] = c->xstate_offsets[i];
c->xstate_comp_sizes[i] = c->xstate_sizes[i];
}
}
} else {
c->xstate_comp_offsets[FIRST_EXTENDED_XFEATURE] =
FXSAVE_SIZE + XSAVE_HDR_SIZE;
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if ((c->xfeatures_mask & (1UL << i)))
c->xstate_comp_sizes[i] = c->xstate_sizes[i];
else
c->xstate_comp_sizes[i] = 0;
if (i > FIRST_EXTENDED_XFEATURE) {
c->xstate_comp_offsets[i] = c->xstate_comp_offsets[i-1]
+ c->xstate_comp_sizes[i-1];
/*
* The value returned by ECX[1] indicates the alignment
* of state component 'i' when the compacted format
* of the extended region of an XSAVE area is used:
*/
cpuid_count(XSTATE_CPUID, i, &eax, &ebx, &ecx, &edx);
if (ecx & 2)
c->xstate_comp_offsets[i] = ALIGN(c->xstate_comp_offsets[i], 64);
}
}
}
if (!pr_quelled(COMPEL_LOG_DEBUG)) {
for (i = 0; i < ARRAY_SIZE(c->xstate_offsets); i++) {
if (!(c->xfeatures_mask & (1UL << i)))
continue;
pr_debug("fpu: %-32s xstate_offsets %6d / %-6d xstate_sizes %6d / %-6d\n",
xfeature_names[i], c->xstate_offsets[i], c->xstate_comp_offsets[i],
c->xstate_sizes[i], c->xstate_comp_sizes[i]);
}
}
return 0;
}
int compel_cpuid(compel_cpuinfo_t *c)
{
uint32_t eax, ebx, ecx, edx;
......@@ -222,7 +423,10 @@ int compel_cpuid(compel_cpuinfo_t *c)
break;
}
return 0;
pr_debug("x86_family %u x86_vendor_id %s x86_model_id %s\n",
c->x86_family, c->x86_vendor_id, c->x86_model_id);
return compel_fpuid(c);
}
bool compel_cpu_has_feature(unsigned int feature)
......
......@@ -3,6 +3,8 @@
#include <stdint.h>
#include <compel/asm/fpu.h>
/*
* Adopted from linux kernel and enhanced from Intel/AMD manuals.
* Note these bits are not ABI for linux kernel but they _are_
......@@ -277,6 +279,7 @@ enum cpuid_leafs {
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
#define X86_FEATURE_HDC (14*32+13) /* HDC base registers present */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
......@@ -318,6 +321,7 @@ enum {
};
struct cpuinfo_x86 {
/* cpu context */
uint8_t x86_family;
uint8_t x86_vendor;
uint8_t x86_model;
......@@ -328,6 +332,17 @@ struct cpuinfo_x86 {
int cpuid_level;
char x86_vendor_id[16];
char x86_model_id[64];
/* fpu context */
uint64_t xfeatures_mask;
uint32_t xsave_size_max;
uint32_t xsave_size;
uint32_t xstate_offsets[XFEATURE_MAX];
uint32_t xstate_sizes[XFEATURE_MAX];
uint32_t xsaves_size;
uint32_t xstate_comp_offsets[XFEATURE_MAX];
uint32_t xstate_comp_sizes[XFEATURE_MAX];
};
typedef struct cpuinfo_x86 compel_cpuinfo_t;
......
......@@ -19,7 +19,66 @@
#define XSTATE_YMM 0x4
#define FXSAVE_SIZE 512
#define XSAVE_SIZE 832
#define XSAVE_SIZE 4096
#define XSAVE_HDR_SIZE 64
#define XSAVE_HDR_OFFSET FXSAVE_SIZE
#define XSAVE_YMM_SIZE 256
#define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET)
/*
* List of XSAVE features Linux knows about:
*/
enum xfeature {
XFEATURE_FP,
XFEATURE_SSE,
/*
* Values above here are "legacy states".
* Those below are "extended states".
*/
XFEATURE_YMM,
XFEATURE_BNDREGS,
XFEATURE_BNDCSR,
XFEATURE_OPMASK,
XFEATURE_ZMM_Hi256,
XFEATURE_Hi16_ZMM,
XFEATURE_PT,
XFEATURE_PKRU,
XFEATURE_HDC,
XFEATURE_MAX,
};
#define XSTATE_CPUID 0x0000000d
#define XFEATURE_MASK_FP (1 << XFEATURE_FP)
#define XFEATURE_MASK_SSE (1 << XFEATURE_SSE)
#define XFEATURE_MASK_YMM (1 << XFEATURE_YMM)
#define XFEATURE_MASK_BNDREGS (1 << XFEATURE_BNDREGS)
#define XFEATURE_MASK_BNDCSR (1 << XFEATURE_BNDCSR)
#define XFEATURE_MASK_OPMASK (1 << XFEATURE_OPMASK)
#define XFEATURE_MASK_ZMM_Hi256 (1 << XFEATURE_ZMM_Hi256)
#define XFEATURE_MASK_Hi16_ZMM (1 << XFEATURE_Hi16_ZMM)
#define XFEATURE_MASK_PT (1 << XFEATURE_PT)
#define XFEATURE_MASK_PKRU (1 << XFEATURE_PKRU)
#define XFEATURE_MASK_HDC (1 << XFEATURE_HDC)
#define XFEATURE_MASK_FPSSE (XFEATURE_MASK_FP | XFEATURE_MASK_SSE)
#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM)
#define FIRST_EXTENDED_XFEATURE XFEATURE_YMM
/* Supervisor features */
#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT | XFEATURE_HDC)
/* All currently supported features */
#define XCNTXT_MASK \
(XFEATURE_MASK_FP | XFEATURE_MASK_SSE | \
XFEATURE_MASK_YMM | XFEATURE_MASK_OPMASK | \
XFEATURE_MASK_ZMM_Hi256 | XFEATURE_MASK_Hi16_ZMM | \
XFEATURE_MASK_PKRU | XFEATURE_MASK_BNDREGS | \
XFEATURE_MASK_BNDCSR)
struct fpx_sw_bytes {
uint32_t magic1;
......@@ -66,27 +125,141 @@ struct i387_fxsave_struct {
struct xsave_hdr_struct {
uint64_t xstate_bv;
uint64_t reserved1[2];
uint64_t reserved2[5];
uint64_t xcomp_bv;
uint64_t reserved[6];
} __packed;
/*
* xstate_header.xcomp_bv[63] indicates that the extended_state_area
* is in compacted format.
*/
#define XCOMP_BV_COMPACTED_FORMAT ((uint64_t)1 << 63)
/*
* State component 2:
*
* There are 16x 256-bit AVX registers named YMM0-YMM15.
* The low 128 bits are aliased to the 16 SSE registers (XMM0-XMM15)
* and are stored in 'struct fxregs_state::xmm_space[]' in the
* "legacy" area.
*
* The high 128 bits are stored here.
*/
struct ymmh_struct {
uint32_t ymmh_space[64];
uint32_t ymmh_space[64];
} __packed;
/* Intel MPX support: */
struct mpx_bndreg {
uint64_t lower_bound;
uint64_t upper_bound;
} __packed;
/*
* State component 3 is used for the 4 128-bit bounds registers
*/
struct mpx_bndreg_state {
struct mpx_bndreg bndreg[4];
} __packed;
/*
* State component 4 is used for the 64-bit user-mode MPX
* configuration register BNDCFGU and the 64-bit MPX status
* register BNDSTATUS. We call the pair "BNDCSR".
*/
struct mpx_bndcsr {
uint64_t bndcfgu;
uint64_t bndstatus;
} __packed;
/*
* The BNDCSR state is padded out to be 64-bytes in size.
*/
struct mpx_bndcsr_state {
union {
struct mpx_bndcsr bndcsr;
uint8_t pad_to_64_bytes[64];
};
} __packed;
/* AVX-512 Components: */
/*
* State component 5 is used for the 8 64-bit opmask registers
* k0-k7 (opmask state).
*/
struct avx_512_opmask_state {
uint64_t opmask_reg[8];
} __packed;
/*
* State component 6 is used for the upper 256 bits of the
* registers ZMM0-ZMM15. These 16 256-bit values are denoted
* ZMM0_H-ZMM15_H (ZMM_Hi256 state).
*/
struct avx_512_zmm_uppers_state {
uint64_t zmm_upper[16 * 4];
} __packed;
/*
* State component 7 is used for the 16 512-bit registers
* ZMM16-ZMM31 (Hi16_ZMM state).
*/
struct avx_512_hi16_state {
uint64_t hi16_zmm[16 * 8];
} __packed;
/*
* State component 9: 32-bit PKRU register. The state is
* 8 bytes long but only 4 bytes is used currently.
*/
struct pkru_state {
uint32_t pkru;
uint32_t pad;
} __packed;
/*
* This is our most modern FPU state format, as saved by the XSAVE
* and restored by the XRSTOR instructions.
*
* It consists of a legacy fxregs portion, an xstate header and
* subsequent areas as defined by the xstate header. Not all CPUs
* support all the extensions, so the size of the extended area
* can vary quite a bit between CPUs.
*
*
* One page should be enough for the whole xsave state.
*/
#define EXTENDED_STATE_AREA_SIZE (4096 - sizeof(struct i387_fxsave_struct) - sizeof(struct xsave_hdr_struct))
/*
* cpu requires it to be 64 byte aligned
*/
struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
union {
/*
* This ymmh is unndeed, for
* backward compatibility.
*/
struct ymmh_struct ymmh;
uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
};
} __aligned(FP_MIN_ALIGN_BYTES) __packed;
struct xsave_struct_ia32 {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
union {
/*
* This ymmh is unndeed, for
* backward compatibility.
*/
struct ymmh_struct ymmh;
uint8_t extended_state_area[EXTENDED_STATE_AREA_SIZE];
};
} __aligned(FXSAVE_ALIGN_BYTES) __packed;
typedef struct {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment