This problem still presents in any grsecurity kernel >= 4.3. The good news is today I have installed Xen on my machine to debug myself, and I made a successful workaround! Please node that, Xen version is 4.4.
There isn't any problems for grsecurity kernel <= 4.2, but since 4.3, Xen guest crashes immediately after bootloader. I can get the follow backtrace from Xen debugging tools:
- Code: Select all
# /usr/lib/xen-4.4/bin/xenctx -s System.map-4.3.3-grsec 12
rip: ffffffff8100b2b0 pmu_msr_read+0x10
flags: 00000282 i s nz
rsp: ffffffff81aeff30
rax: 8000000000000000 rcx: 0000000000000001 rdx: ffffffff81aeffcc
rbx: 00000000c0000080 rsi: ffffffff81aeffa0 rdi: 00000000c0000080
rbp: ffffffff81aeffa0 r8: 0000000000000001 r9: 00000000ffffffff
r10: ffffffff81cf9000 r11: 0000000000000000 r12: ffffffff81aeffcc
r13: ffffffff81aeffc4 r14: ffffffff81aeffc0 r15: 6f73b764afec1c9d
cs: e033 ss: e02b ds: 0000 es: 0000
fs: 0000 @ 0000000000000000
gs: 0000 @ 0000000000000000/0000000000000000
Code (instr addr ffffffff8100b2b0)
00 00 00 00 00 41 54 49 89 d4 55 48 89 f5 53 89 fb 48 83 ec 10 <65> 48 8b 04 25 28 00 00 00 48 89
Stack:
0000000000000001 0000000000000000 0000000000000000 ffffffff8100b2b0
000000010000e030 0000000000010082 ffffffff81aeff70 000000000000e02b
0000000000000000 0000000000000000 00000000c0000080 ffffffff81aeffcc
ffffffff81aeffc8 ffffffff810041c8 ffffffff81aeffc8 ffffffff81aeffcc
Call Trace:
[<ffffffff8100b2b0>] pmu_msr_read+0x10 <--
[<ffffffff8100b2b0>] pmu_msr_read+0x10
[<ffffffff810041c8>] xen_read_msr_safe+0x18
[<ffffffff81be93eb>] xen_start_kernel+0x1b9
Apparently, the kernel crashes just after
- Code: Select all
xen_start_kernel
, at
- Code: Select all
pmu_msr_read
. What is
- Code: Select all
pmu_msr_read
? I did a diff, and find
- Code: Select all
--- ../../4.2.7/linux-4.2.7/arch/x86/xen/enlighten.c 2016-09-11 00:44:12.010022936 +0800
+++ arch/x86/xen/enlighten.c 2015-12-15 13:41:43.000000000 +0800
@@ -1030,6 +1034,9 @@ static u64 xen_read_msr_safe(unsigned in
{
u64 val;
+ if (pmu_msr_read(msr, &val, err))
+ return val;
+
val = native_read_msr_safe(msr, err);
switch (msr) {
case MSR_IA32_APICBASE:
@@ -1074,9 +1081,11 @@ static int xen_write_msr_safe(unsigned i
/* Fast syscall setup is all done in hypercalls, so
these are all ignored. Stub them out here to stop
Xen console noise. */
+ break;
default:
- ret = native_write_msr_safe(msr, low, high);
+ if (!pmu_msr_write(msr, low, high, &ret))
+ ret = native_write_msr_safe(msr, low, high);
}
return ret;
Aha,
- Code: Select all
pmu_msr_write
was a new thing merged in Linux 4.3, it explains everything. Further traced to commits:
xen/PMU: Initialization code for Xen PMU 65d0cf0be79feebeb19e7626fd3ed41ae73f642d
xen/PMU: Describe vendor-specific PMU registers e27b72df01109c689062caeba1defa013b759e0e
xen/PMU: Intercept PMU-related MSR and APIC accesses 6b08cd6328c58a2ae190c5ee03a2ffcab5ef828e
xen/PMU: PMU emulation code bf6dfb154d935725c9a2005033ca33017b9df439
As we see, PMU was a new features, which caused problems on grsec kernels. Let's revert last two commits:
- Code: Select all
wget https://github.com/torvalds/linux/commit/bf6dfb154d935725c9a2005033ca33017b9df439.patch
wget https://github.com/torvalds/linux/commit/6b08cd6328c58a2ae190c5ee03a2ffcab5ef828e.patch
patch -p1 -R < bf6dfb154d935725c9a2005033ca33017b9df439.patch
patch -p1 -R < 6b08cd6328c58a2ae190c5ee03a2ffcab5ef828e.patch
So, pmu_msr_read/write are eliminated and returned to the original version, now Xen boots smoothly!
- Code: Select all
$ uname -r
4.7.3-hardened
By the way, simply reverting two patches only works on 4.3.3, there's a conflict which needs to be resolved manually on grsec 4.7.3, here's a patch for 4.7.3 to workaround the problem:
- Code: Select all
diff -uprN linux-4.7.3-hardened/arch/x86/xen/apic.c linux-4.7.3-hardened.good/arch/x86/xen/apic.c
--- linux-4.7.3-hardened/arch/x86/xen/apic.c 2016-07-24 19:23:50.000000000 +0000
+++ linux-4.7.3-hardened.good/arch/x86/xen/apic.c 2016-09-10 20:05:21.450647009 +0000
@@ -7,7 +7,6 @@
#include <xen/xen.h>
#include <xen/interface/physdev.h>
#include "xen-ops.h"
-#include "pmu.h"
#include "smp.h"
static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
@@ -73,10 +72,8 @@ static u32 xen_apic_read(u32 reg)
static void xen_apic_write(u32 reg, u32 val)
{
- if (reg == APIC_LVTPC) {
- (void)pmu_apic_update(reg);
+ if (reg == APIC_LVTPC)
return;
- }
/* Warn to see if there's any stray references */
WARN(1,"register: %x, value: %x\n", reg, val);
diff -uprN linux-4.7.3-hardened/arch/x86/xen/enlighten.c linux-4.7.3-hardened.good/arch/x86/xen/enlighten.c
--- linux-4.7.3-hardened/arch/x86/xen/enlighten.c 2016-09-10 19:59:29.237313676 +0000
+++ linux-4.7.3-hardened.good/arch/x86/xen/enlighten.c 2016-09-10 20:06:49.683980342 +0000
@@ -1031,9 +1031,6 @@ static u64 xen_read_msr_safe(unsigned in
{
u64 val;
- if (pmu_msr_read(msr, &val, err))
- return val;
-
val = native_read_msr_safe(msr, err);
switch (msr) {
case MSR_IA32_APICBASE:
@@ -1081,13 +1078,17 @@ static int xen_write_msr_safe(unsigned i
break;
default:
- if (!pmu_msr_write(msr, low, high, &ret))
- ret = native_write_msr_safe(msr, low, high);
+ ret = native_write_msr_safe(msr, low, high);
}
return ret;
}
+unsigned long long xen_read_pmc(int counter)
+{
+ return 0;
+}
+
static u64 xen_read_msr(unsigned int msr)
{
/*
diff -uprN linux-4.7.3-hardened/arch/x86/xen/pmu.c linux-4.7.3-hardened.good/arch/x86/xen/pmu.c
--- linux-4.7.3-hardened/arch/x86/xen/pmu.c 2016-07-24 19:23:50.000000000 +0000
+++ linux-4.7.3-hardened.good/arch/x86/xen/pmu.c 2016-09-10 20:05:21.450647009 +0000
@@ -13,20 +13,11 @@
/* x86_pmu.handle_irq definition */
#include "../events/perf_event.h"
-#define XENPMU_IRQ_PROCESSING 1
-struct xenpmu {
- /* Shared page between hypervisor and domain */
- struct xen_pmu_data *xenpmu_data;
- uint8_t flags;
-};
-static DEFINE_PER_CPU(struct xenpmu, xenpmu_shared);
-#define get_xenpmu_data() (this_cpu_ptr(&xenpmu_shared)->xenpmu_data)
-#define get_xenpmu_flags() (this_cpu_ptr(&xenpmu_shared)->flags)
-
-/* Macro for computing address of a PMU MSR bank */
-#define field_offset(ctxt, field) ((void *)((uintptr_t)ctxt + \
- (uintptr_t)ctxt->field))
+/* Shared page between hypervisor and domain */
+static DEFINE_PER_CPU(struct xen_pmu_data *, xenpmu_shared);
+#define get_xenpmu_data() per_cpu(xenpmu_shared, smp_processor_id())
+
/* AMD PMU */
#define F15H_NUM_COUNTERS 6
@@ -60,8 +51,6 @@ static __read_mostly int amd_num_counter
/* Alias registers (0x4c1) for full-width writes to PMCs */
#define MSR_PMC_ALIAS_MASK (~(MSR_IA32_PERFCTR0 ^ MSR_IA32_PMC0))
-#define INTEL_PMC_TYPE_SHIFT 30
-
static __read_mostly int intel_num_arch_counters, intel_num_fixed_counters;
@@ -178,232 +167,6 @@ static int is_intel_pmu_msr(u32 msr_inde
}
}
-static bool xen_intel_pmu_emulate(unsigned int msr, u64 *val, int type,
- int index, bool is_read)
-{
- uint64_t *reg = NULL;
- struct xen_pmu_intel_ctxt *ctxt;
- uint64_t *fix_counters;
- struct xen_pmu_cntr_pair *arch_cntr_pair;
- struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
- uint8_t xenpmu_flags = get_xenpmu_flags();
-
-
- if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
- return false;
-
- ctxt = &xenpmu_data->pmu.c.intel;
-
- switch (msr) {
- case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
- reg = &ctxt->global_ovf_ctrl;
- break;
- case MSR_CORE_PERF_GLOBAL_STATUS:
- reg = &ctxt->global_status;
- break;
- case MSR_CORE_PERF_GLOBAL_CTRL:
- reg = &ctxt->global_ctrl;
- break;
- case MSR_CORE_PERF_FIXED_CTR_CTRL:
- reg = &ctxt->fixed_ctrl;
- break;
- default:
- switch (type) {
- case MSR_TYPE_COUNTER:
- fix_counters = field_offset(ctxt, fixed_counters);
- reg = &fix_counters[index];
- break;
- case MSR_TYPE_ARCH_COUNTER:
- arch_cntr_pair = field_offset(ctxt, arch_counters);
- reg = &arch_cntr_pair[index].counter;
- break;
- case MSR_TYPE_ARCH_CTRL:
- arch_cntr_pair = field_offset(ctxt, arch_counters);
- reg = &arch_cntr_pair[index].control;
- break;
- default:
- return false;
- }
- }
-
- if (reg) {
- if (is_read)
- *val = *reg;
- else {
- *reg = *val;
-
- if (msr == MSR_CORE_PERF_GLOBAL_OVF_CTRL)
- ctxt->global_status &= (~(*val));
- }
- return true;
- }
-
- return false;
-}
-
-static bool xen_amd_pmu_emulate(unsigned int msr, u64 *val, bool is_read)
-{
- uint64_t *reg = NULL;
- int i, off = 0;
- struct xen_pmu_amd_ctxt *ctxt;
- uint64_t *counter_regs, *ctrl_regs;
- struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
- uint8_t xenpmu_flags = get_xenpmu_flags();
-
- if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING))
- return false;
-
- if (k7_counters_mirrored &&
- ((msr >= MSR_K7_EVNTSEL0) && (msr <= MSR_K7_PERFCTR3)))
- msr = get_fam15h_addr(msr);
-
- ctxt = &xenpmu_data->pmu.c.amd;
- for (i = 0; i < amd_num_counters; i++) {
- if (msr == amd_ctrls_base + off) {
- ctrl_regs = field_offset(ctxt, ctrls);
- reg = &ctrl_regs[i];
- break;
- } else if (msr == amd_counters_base + off) {
- counter_regs = field_offset(ctxt, counters);
- reg = &counter_regs[i];
- break;
- }
- off += amd_msr_step;
- }
-
- if (reg) {
- if (is_read)
- *val = *reg;
- else
- *reg = *val;
-
- return true;
- }
- return false;
-}
-
-bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err)
-{
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- if (is_amd_pmu_msr(msr)) {
- if (!xen_amd_pmu_emulate(msr, val, 1))
- *val = native_read_msr_safe(msr, err);
- return true;
- }
- } else {
- int type, index;
-
- if (is_intel_pmu_msr(msr, &type, &index)) {
- if (!xen_intel_pmu_emulate(msr, val, type, index, 1))
- *val = native_read_msr_safe(msr, err);
- return true;
- }
- }
-
- return false;
-}
-
-bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err)
-{
- uint64_t val = ((uint64_t)high << 32) | low;
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- if (is_amd_pmu_msr(msr)) {
- if (!xen_amd_pmu_emulate(msr, &val, 0))
- *err = native_write_msr_safe(msr, low, high);
- return true;
- }
- } else {
- int type, index;
-
- if (is_intel_pmu_msr(msr, &type, &index)) {
- if (!xen_intel_pmu_emulate(msr, &val, type, index, 0))
- *err = native_write_msr_safe(msr, low, high);
- return true;
- }
- }
-
- return false;
-}
-
-static unsigned long long xen_amd_read_pmc(int counter)
-{
- struct xen_pmu_amd_ctxt *ctxt;
- uint64_t *counter_regs;
- struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
- uint8_t xenpmu_flags = get_xenpmu_flags();
-
- if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
- uint32_t msr;
- int err;
-
- msr = amd_counters_base + (counter * amd_msr_step);
- return native_read_msr_safe(msr, &err);
- }
-
- ctxt = &xenpmu_data->pmu.c.amd;
- counter_regs = field_offset(ctxt, counters);
- return counter_regs[counter];
-}
-
-static unsigned long long xen_intel_read_pmc(int counter)
-{
- struct xen_pmu_intel_ctxt *ctxt;
- uint64_t *fixed_counters;
- struct xen_pmu_cntr_pair *arch_cntr_pair;
- struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
- uint8_t xenpmu_flags = get_xenpmu_flags();
-
- if (!xenpmu_data || !(xenpmu_flags & XENPMU_IRQ_PROCESSING)) {
- uint32_t msr;
- int err;
-
- if (counter & (1 << INTEL_PMC_TYPE_SHIFT))
- msr = MSR_CORE_PERF_FIXED_CTR0 + (counter & 0xffff);
- else
- msr = MSR_IA32_PERFCTR0 + counter;
-
- return native_read_msr_safe(msr, &err);
- }
-
- ctxt = &xenpmu_data->pmu.c.intel;
- if (counter & (1 << INTEL_PMC_TYPE_SHIFT)) {
- fixed_counters = field_offset(ctxt, fixed_counters);
- return fixed_counters[counter & 0xffff];
- }
-
- arch_cntr_pair = field_offset(ctxt, arch_counters);
- return arch_cntr_pair[counter].counter;
-}
-
-unsigned long long xen_read_pmc(int counter)
-{
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
- return xen_amd_read_pmc(counter);
- else
- return xen_intel_read_pmc(counter);
-}
-
-int pmu_apic_update(uint32_t val)
-{
- int ret;
- struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
-
- if (!xenpmu_data) {
- pr_warn_once("%s: pmudata not initialized\n", __func__);
- return -EINVAL;
- }
-
- xenpmu_data->pmu.l.lapic_lvtpc = val;
-
- if (get_xenpmu_flags() & XENPMU_IRQ_PROCESSING)
- return 0;
-
- ret = HYPERVISOR_xenpmu_op(XENPMU_lvtpc_set, NULL);
-
- return ret;
-}
-
/* perf callbacks */
static int xen_is_in_guest(void)
{
@@ -476,37 +239,26 @@ static void xen_convert_regs(const struc
irqreturn_t xen_pmu_irq_handler(int irq, void *dev_id)
{
- int err, ret = IRQ_NONE;
+ int ret = IRQ_NONE;
struct pt_regs regs;
const struct xen_pmu_data *xenpmu_data = get_xenpmu_data();
- uint8_t xenpmu_flags = get_xenpmu_flags();
if (!xenpmu_data) {
pr_warn_once("%s: pmudata not initialized\n", __func__);
return ret;
}
- this_cpu_ptr(&xenpmu_shared)->flags =
- xenpmu_flags | XENPMU_IRQ_PROCESSING;
xen_convert_regs(&xenpmu_data->pmu.r.regs, ®s,
xenpmu_data->pmu.pmu_flags);
if (x86_pmu.handle_irq(®s))
ret = IRQ_HANDLED;
- /* Write out cached context to HW */
- err = HYPERVISOR_xenpmu_op(XENPMU_flush, NULL);
- this_cpu_ptr(&xenpmu_shared)->flags = xenpmu_flags;
- if (err) {
- pr_warn_once("%s: failed hypercall, err: %d\n", __func__, err);
- return IRQ_NONE;
- }
-
return ret;
}
bool is_xen_pmu(int cpu)
{
- return (get_xenpmu_data() != NULL);
+ return (per_cpu(xenpmu_shared, cpu) != NULL);
}
void xen_pmu_init(int cpu)
@@ -536,8 +288,7 @@ void xen_pmu_init(int cpu)
if (err)
goto fail;
- per_cpu(xenpmu_shared, cpu).xenpmu_data = xenpmu_data;
- per_cpu(xenpmu_shared, cpu).flags = 0;
+ per_cpu(xenpmu_shared, cpu) = xenpmu_data;
if (cpu == 0) {
perf_register_guest_info_callbacks(&xen_guest_cbs);
@@ -565,6 +316,6 @@ void xen_pmu_finish(int cpu)
(void)HYPERVISOR_xenpmu_op(XENPMU_finish, &xp);
- free_pages((unsigned long)per_cpu(xenpmu_shared, cpu).xenpmu_data, 0);
- per_cpu(xenpmu_shared, cpu).xenpmu_data = NULL;
+ free_pages((unsigned long)per_cpu(xenpmu_shared, cpu), 0);
+ per_cpu(xenpmu_shared, cpu) = NULL;
}
diff -uprN linux-4.7.3-hardened/arch/x86/xen/pmu.h linux-4.7.3-hardened.good/arch/x86/xen/pmu.h
--- linux-4.7.3-hardened/arch/x86/xen/pmu.h 2016-07-24 19:23:50.000000000 +0000
+++ linux-4.7.3-hardened.good/arch/x86/xen/pmu.h 2016-09-10 20:05:21.453980342 +0000
@@ -7,9 +7,5 @@ irqreturn_t xen_pmu_irq_handler(int irq,
void xen_pmu_init(int cpu);
void xen_pmu_finish(int cpu);
bool is_xen_pmu(int cpu);
-bool pmu_msr_read(unsigned int msr, uint64_t *val, int *err);
-bool pmu_msr_write(unsigned int msr, uint32_t low, uint32_t high, int *err);
-int pmu_apic_update(uint32_t reg);
-unsigned long long xen_read_pmc(int counter);
#endif /* __XEN_PMU_H */
But I have no knowledge with Xen. What is the nature of the problem? PaX Team, please investigate it!