mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
Merge remote-tracking branch 'remotes/origin/tmp-dad5c14' into msm-4.14
* remotes/origin/tmp-dad5c14: Linux 4.14.9 linux/compiler.h: Split into compiler.h and compiler_types.h selftests/bpf: add tests for recent bugfixes bpf: fix integer overflows bpf: don't prune branches when a scalar is replaced with a pointer bpf: force strict alignment checks for stack pointers bpf: fix missing error return in check_stack_boundary() bpf: fix 32-bit ALU op verification bpf: fix incorrect tracking of register size truncation bpf: fix incorrect sign extension in check_alu_op() bpf/verifier: fix bounds calculation on BPF_RSH bpf, sparc: fix usage of wrong reg for load_skb_regs after call bpf, ppc64: do not reload skb pointers in non-skb context bpf, s390x: do not reload skb pointers in non-skb context bpf: fix corruption on concurrent perf_event_output calls bpf: fix branch pruning logic mm/sparsemem: Fix ARM64 boot crash when CONFIG_SPARSEMEM_EXTREME=y platform/x86: asus-wireless: send an EV_SYN/SYN_REPORT between state changes thermal/drivers/hisi: Fix multiple alarm interrupts firing thermal/drivers/hisi: Simplify the temperature/step computation thermal/drivers/hisi: Fix kernel panic on alarm interrupt thermal/drivers/hisi: Fix missing interrupt enablement IB/opa_vnic: Properly return the total MACs in UC MAC list IB/opa_vnic: Properly clear Mac Table Digest drm/vc4: Avoid using vrefresh==0 mode in DSI htotal math. cpuidle: fix broadcast control when broadcast can not be entered rtc: set the alarm to the next expiring timer tcp: fix under-evaluated ssthresh in TCP Vegas clk: sunxi-ng: sun6i: Rename HDMI DDC clock to avoid name collision staging: greybus: light: Release memory obtained by kasprintf RDMA/hns: Avoid NULL pointer exception net: ipv6: send NS for DAD when link operationally up ibmvnic: Set state UP fm10k: ensure we process SM mbx when processing VF mbx ARM: exynos_defconfig: Enable UAS support for Odroid HC1 board vfio/pci: Virtualize Maximum Payload Size i40e: fix client notify of VF reset scsi: lpfc: Fix warning messages when NVME_TARGET_FC not defined scsi: lpfc: PLOGI failures during NPIV testing scsi: lpfc: Fix secure firmware updates fm10k: fix mis-ordered parameters in declaration for .ndo_set_vf_bw ASoC: codecs: msm8916-wcd-analog: fix module autoload sctp: silence warns on sctp_stream_init allocations powerpc/watchdog: Do not trigger SMP crash from touch_nmi_watchdog powerpc/xmon: Avoid tripping SMP hardlockup watchdog ASoC: img-parallel-out: Add pm_runtime_get/put to set_fmt callback ASoC: codecs: msm8916-wcd-analog: fix micbias level tracing: Exclude 'generic fields' from histograms PCI/AER: Report non-fatal errors only to the affected endpoint i40e/i40evf: spread CPU affinity hints across online CPUs only Bluetooth: hci_bcm: Fix setting of irq trigger type Bluetooth: hci_uart_set_flow_control: Fix NULL deref when using serdev leds: pca955x: Don't invert requested value in pca955x_gpio_set_value() ipv6: grab rt->rt6i_ref before allocating pcpu rt ip_gre: check packet length and mtu correctly in erspan tx md: always set THREAD_WAKEUP and wake up wqueue if thread existed block,bfq: Disable writeback throttling IB/rxe: check for allocation failure on elem ixgbe: fix use of uninitialized padding iio: st_sensors: add register mask for status register i40e: use the safe hash table iterator when deleting mac filters igb: check memory allocation failure PM / OPP: Move error message to debug level PCI: Create SR-IOV virtfn/physfn links before attaching driver scsi: mpt3sas: Fix IO error occurs on pulling out a drive from RAID1 volume created on two SATA drive scsi: cxgb4i: fix Tx skb leak PCI: Avoid bus reset if bridge itself is broken net: phy: at803x: Change error to EINVAL for invalid MAC kvm, mm: account kvm related kmem slabs to kmemcg rtc: pl031: make interrupt optional crypto: lrw - Fix an error handling path in 'create()' crypto: crypto4xx - increase context and scatter ring buffer elements clk: sunxi-ng: sun5i: Fix bit offset of audio PLL post-divider clk: sunxi-ng: nm: Check if requested rate is supported by fractional clock drm: Add retries for lspcon mode detection backlight: pwm_bl: Fix overflow condition optee: fix invalid of_node_put() in optee_driver_init() x86/cpufeatures: Make CPU bugs sticky x86/paravirt: Provide a way to check for hypervisors x86/paravirt: Dont patch flush_tlb_single x86/entry/64: Make cpu_entry_area.tss read-only x86/entry: Clean up the SYSENTER_stack code x86/entry/64: Remove the SYSENTER stack canary x86/entry/64: Move the IST stacks into struct cpu_entry_area x86/entry/64: Create a per-CPU SYSCALL entry trampoline x86/entry/64: Return to userspace from the trampoline stack x86/entry/64: Use a per-CPU trampoline stack for IDT entries x86/espfix/64: Stop assuming that pt_regs is on the entry stack x86/entry/64: Separate cpu_current_top_of_stack from TSS.sp0 x86/entry: Remap the TSS into the CPU entry area x86/entry: Move SYSENTER_stack to the beginning of struct tss_struct x86/dumpstack: Handle stack overflow on all stacks x86/entry: Fix assumptions that the HW TSS is at the beginning of cpu_tss x86/kasan/64: Teach KASAN about the cpu_entry_area x86/mm/fixmap: Generalize the GDT fixmap mechanism, introduce struct cpu_entry_area x86/entry/gdt: Put per-CPU GDT remaps in ascending order x86/dumpstack: Add get_stack_info() support for the SYSENTER stack x86/entry/64: Allocate and enable the SYSENTER stack x86/irq/64: Print the offending IP in the stack overflow warning x86/irq: Remove an old outdated comment about context tracking races x86/unwinder: Handle stack overflows more gracefully x86/unwinder/orc: Dont bail on stack overflow x86/entry/64/paravirt: Use paravirt-safe macro to access eflags x86/mm/kasan: Don't use vmemmap_populate() to initialize shadow locking/barriers: Convert users of lockless_dereference() to READ_ONCE() locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE() bpf: fix build issues on um due to mising bpf_perf_event.h perf/x86: Enable free running PEBS for REGS_USER/INTR x86: Make X86_BUG_FXSAVE_LEAK detectable in CPUID on AMD x86/cpufeature: Add User-Mode Instruction Prevention definitions drivers/misc/intel/pti: Rename the header file to free up the namespace x86/virt: Add enum for hypervisors to replace x86_hyper x86/virt, x86/platform: Merge 'struct x86_hyper' into 'struct x86_platform' and 'struct x86_init' ACPI / APEI: Replace ioremap_page_range() with fixmap selftests/x86/ldt_gdt: Run most existing LDT test cases against the GDT as well selftests/x86/ldt_gdt: Add infrastructure to test set_thread_area() x86/cpufeatures: Fix various details in the feature definitions x86/cpufeatures: Re-tabulate the X86_FEATURE definitions x86/mm: Define _PAGE_TABLE using _KERNPG_TABLE bitops: Revert cbe96375025e ("bitops: Add clear/set_bit32() to linux/bitops.h") x86/cpuid: Replace set/clear_bit32() x86/entry/64: Shorten TEST instructions x86/traps: Use a new on_thread_stack() helper to clean up an assertion x86/entry/64: Remove thread_struct::sp0 x86/entry/32: Fix cpu_current_top_of_stack initialization at boot x86/entry/64: Remove all remaining direct thread_struct::sp0 reads x86/entry/64: Stop initializing TSS.sp0 at boot x86/xen/64, x86/entry/64: Clean up SP code in cpu_initialize_context() x86/entry: Add task_top_of_stack() to find the top of a task's stack x86/entry/64: Pass SP0 directly to load_sp0() x86/entry/32: Pull the MSR_IA32_SYSENTER_CS update code out of native_load_sp0() x86/entry/64: De-Xen-ify our NMI code xen, x86/entry/64: Add xen NMI trap entry x86/entry/64: Remove the RESTORE_..._REGS infrastructure x86/entry/64: Use POP instead of MOV to restore regs on NMI return x86/entry/64: Merge the fast and slow SYSRET paths x86/entry/64: Use pop instead of movq in syscall_return_via_sysret x86/entry/64: Shrink paranoid_exit_restore and make labels local x86/entry/64: Simplify reg restore code in the standard IRET paths x86/entry/64: Move SWAPGS into the common IRET-to-usermode path x86/entry/64: Split the IRET-to-user and IRET-to-kernel paths x86/entry/64: Remove the restore_c_regs_and_iret label ptrace,x86: Make user_64bit_mode() available to 32-bit builds x86/boot: Relocate definition of the initial state of CR0 x86/mm: Relocate page fault error codes to traps.h x86/cpufeatures: Enable new SSE/AVX/AVX512 CPU features x86/mm/64: Rename the register_page_bootmem_memmap() 'size' parameter to 'nr_pages' x86/build: Beautify build log of syscall headers x86/asm: Don't use the confusing '.ifeq' directive ACPI / APEI: remove the unused dead-code for SEA/NMI notification type x86/xen: Drop 5-level paging support code from the XEN_PV code x86/xen: Provide pre-built page tables only for CONFIG_XEN_PV=y and CONFIG_XEN_PVH=y x86/kasan: Use the same shadow offset for 4- and 5-level paging mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y x86/cpuid: Prevent out of bound access in do_clear_cpu_cap() objtool: Print top level commands on incorrect usage x86/platform/UV: Convert timers to use timer_setup() x86/fpu: Remove the explicit clearing of XSAVE dependent features x86/fpu: Make XSAVE check the base CPUID features before enabling x86/fpu: Parse clearcpuid= as early XSAVE argument x86/cpuid: Add generic table for CPUID dependencies bitops: Add clear/set_bit32() to linux/bitops.h x86/unwind: Make CONFIG_UNWINDER_ORC=y the default in kconfig for 64-bit x86/unwind: Rename unwinder config options to 'CONFIG_UNWINDER_*' x86/fpu/debug: Remove unused 'x86_fpu_state' and 'x86_fpu_deactivate_state' tracepoints x86/unwinder: Make CONFIG_UNWINDER_ORC=y the default in the 64-bit defconfig ACPI / APEI: adjust a local variable type in ghes_ioremap_pfn_irq() x86/head: Add unwind hint annotations x86/xen: Add unwind hint annotations x86/xen: Fix xen head ELF annotations x86/boot: Annotate verify_cpu() as a callable function x86/head: Fix head ELF function annotations x86/head: Remove unused 'bad_address' code x86/head: Remove confusing comment objtool: Don't report end of section error after an empty unwind hint x86/asm: Remove unnecessary \n\t in front of CC_SET() from asm templates Change-Id: I8d4690608f817c55a142827e64ff04f7d66a70d1 Signed-off-by: Runmin Wang <runminw@codeaurora.org>
This commit is contained in:
commit
a35d45d785
@ -4,7 +4,7 @@ ORC unwinder
|
||||
Overview
|
||||
--------
|
||||
|
||||
The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
|
||||
The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
|
||||
similar in concept to a DWARF unwinder. The difference is that the
|
||||
format of the ORC data is much simpler than DWARF, which in turn allows
|
||||
the ORC unwinder to be much simpler and faster.
|
||||
|
@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
|
||||
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
||||
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||
... unused hole ...
|
||||
ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
|
||||
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
||||
... unused hole ...
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
|
6
Makefile
6
Makefile
@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 14
|
||||
SUBLEVEL = 8
|
||||
SUBLEVEL = 9
|
||||
EXTRAVERSION =
|
||||
NAME = Petit Gorille
|
||||
|
||||
@ -949,8 +949,8 @@ ifdef CONFIG_STACK_VALIDATION
|
||||
ifeq ($(has_libelf),1)
|
||||
objtool_target := tools/objtool FORCE
|
||||
else
|
||||
ifdef CONFIG_ORC_UNWINDER
|
||||
$(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
ifdef CONFIG_UNWINDER_ORC
|
||||
$(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
else
|
||||
$(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
|
||||
endif
|
||||
|
@ -244,7 +244,7 @@ CONFIG_USB_STORAGE_ONETOUCH=m
|
||||
CONFIG_USB_STORAGE_KARMA=m
|
||||
CONFIG_USB_STORAGE_CYPRESS_ATACB=m
|
||||
CONFIG_USB_STORAGE_ENE_UB6250=m
|
||||
CONFIG_USB_UAS=m
|
||||
CONFIG_USB_UAS=y
|
||||
CONFIG_USB_DWC3=y
|
||||
CONFIG_USB_DWC2=y
|
||||
CONFIG_USB_HSIC_USB3503=y
|
||||
|
@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs);
|
||||
/*
|
||||
* kprobe-based event tracer support
|
||||
*/
|
||||
#include <linux/stddef.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/compiler.h>
|
||||
#define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0))
|
||||
|
||||
extern int regs_query_register_offset(const char *name);
|
||||
|
@ -51,6 +51,13 @@ enum fixed_addresses {
|
||||
|
||||
FIX_EARLYCON_MEM_BASE,
|
||||
FIX_TEXT_POKE0,
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI_GHES
|
||||
/* Used for GHES mapping from assorted contexts */
|
||||
FIX_APEI_GHES_IRQ,
|
||||
FIX_APEI_GHES_NMI,
|
||||
#endif /* CONFIG_ACPI_APEI_GHES */
|
||||
|
||||
__end_of_permanent_fixed_addresses,
|
||||
|
||||
/*
|
||||
|
@ -276,9 +276,12 @@ void arch_touch_nmi_watchdog(void)
|
||||
{
|
||||
unsigned long ticks = tb_ticks_per_usec * wd_timer_period_ms * 1000;
|
||||
int cpu = smp_processor_id();
|
||||
u64 tb = get_tb();
|
||||
|
||||
if (get_tb() - per_cpu(wd_timer_tb, cpu) >= ticks)
|
||||
watchdog_timer_interrupt(cpu);
|
||||
if (tb - per_cpu(wd_timer_tb, cpu) >= ticks) {
|
||||
per_cpu(wd_timer_tb, cpu) = tb;
|
||||
wd_smp_clear_cpu_pending(cpu, tb);
|
||||
}
|
||||
}
|
||||
EXPORT_SYMBOL(arch_touch_nmi_watchdog);
|
||||
|
||||
|
@ -762,7 +762,8 @@ emit_clear:
|
||||
func = (u8 *) __bpf_call_base + imm;
|
||||
|
||||
/* Save skb pointer if we need to re-cache skb data */
|
||||
if (bpf_helper_changes_pkt_data(func))
|
||||
if ((ctx->seen & SEEN_SKB) &&
|
||||
bpf_helper_changes_pkt_data(func))
|
||||
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
|
||||
|
||||
bpf_jit_emit_func_call(image, ctx, (u64)func);
|
||||
@ -771,7 +772,8 @@ emit_clear:
|
||||
PPC_MR(b2p[BPF_REG_0], 3);
|
||||
|
||||
/* refresh skb cache */
|
||||
if (bpf_helper_changes_pkt_data(func)) {
|
||||
if ((ctx->seen & SEEN_SKB) &&
|
||||
bpf_helper_changes_pkt_data(func)) {
|
||||
/* reload skb pointer to r3 */
|
||||
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
|
||||
bpf_jit_emit_skb_loads(image, ctx);
|
||||
|
@ -530,14 +530,19 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
|
||||
|
||||
waiting:
|
||||
secondary = 1;
|
||||
spin_begin();
|
||||
while (secondary && !xmon_gate) {
|
||||
if (in_xmon == 0) {
|
||||
if (fromipi)
|
||||
if (fromipi) {
|
||||
spin_end();
|
||||
goto leave;
|
||||
}
|
||||
secondary = test_and_set_bit(0, &in_xmon);
|
||||
}
|
||||
barrier();
|
||||
spin_cpu_relax();
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
spin_end();
|
||||
|
||||
if (!secondary && !xmon_gate) {
|
||||
/* we are the first cpu to come in */
|
||||
@ -568,21 +573,25 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
|
||||
mb();
|
||||
xmon_gate = 1;
|
||||
barrier();
|
||||
touch_nmi_watchdog();
|
||||
}
|
||||
|
||||
cmdloop:
|
||||
while (in_xmon) {
|
||||
if (secondary) {
|
||||
spin_begin();
|
||||
if (cpu == xmon_owner) {
|
||||
if (!test_and_set_bit(0, &xmon_taken)) {
|
||||
secondary = 0;
|
||||
spin_end();
|
||||
continue;
|
||||
}
|
||||
/* missed it */
|
||||
while (cpu == xmon_owner)
|
||||
barrier();
|
||||
spin_cpu_relax();
|
||||
}
|
||||
barrier();
|
||||
spin_cpu_relax();
|
||||
touch_nmi_watchdog();
|
||||
} else {
|
||||
cmd = cmds(regs);
|
||||
if (cmd != 0) {
|
||||
|
@ -55,8 +55,7 @@ struct bpf_jit {
|
||||
#define SEEN_LITERAL 8 /* code uses literals */
|
||||
#define SEEN_FUNC 16 /* calls C functions */
|
||||
#define SEEN_TAIL_CALL 32 /* code uses tail calls */
|
||||
#define SEEN_SKB_CHANGE 64 /* code changes skb data */
|
||||
#define SEEN_REG_AX 128 /* code uses constant blinding */
|
||||
#define SEEN_REG_AX 64 /* code uses constant blinding */
|
||||
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM | SEEN_SKB)
|
||||
|
||||
/*
|
||||
@ -448,12 +447,12 @@ static void bpf_jit_prologue(struct bpf_jit *jit)
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
|
||||
REG_15, 152);
|
||||
}
|
||||
if (jit->seen & SEEN_SKB)
|
||||
if (jit->seen & SEEN_SKB) {
|
||||
emit_load_skb_data_hlen(jit);
|
||||
if (jit->seen & SEEN_SKB_CHANGE)
|
||||
/* stg %b1,ST_OFF_SKBP(%r0,%r15) */
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0024, BPF_REG_1, REG_0, REG_15,
|
||||
STK_OFF_SKBP);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -983,8 +982,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
|
||||
EMIT2(0x0d00, REG_14, REG_W1);
|
||||
/* lgr %b0,%r2: load return value into %b0 */
|
||||
EMIT4(0xb9040000, BPF_REG_0, REG_2);
|
||||
if (bpf_helper_changes_pkt_data((void *)func)) {
|
||||
jit->seen |= SEEN_SKB_CHANGE;
|
||||
if ((jit->seen & SEEN_SKB) &&
|
||||
bpf_helper_changes_pkt_data((void *)func)) {
|
||||
/* lg %b1,ST_OFF_SKBP(%r15) */
|
||||
EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
|
||||
REG_15, STK_OFF_SKBP);
|
||||
|
@ -7,6 +7,7 @@
|
||||
#if defined(__sparc__) && defined(__arch64__)
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/threads.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
|
@ -1245,14 +1245,16 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
|
||||
u8 *func = ((u8 *)__bpf_call_base) + imm;
|
||||
|
||||
ctx->saw_call = true;
|
||||
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
|
||||
emit_reg_move(bpf2sparc[BPF_REG_1], L7, ctx);
|
||||
|
||||
emit_call((u32 *)func, ctx);
|
||||
emit_nop(ctx);
|
||||
|
||||
emit_reg_move(O0, bpf2sparc[BPF_REG_0], ctx);
|
||||
|
||||
if (bpf_helper_changes_pkt_data(func) && ctx->saw_ld_abs_ind)
|
||||
load_skb_regs(ctx, bpf2sparc[BPF_REG_6]);
|
||||
if (ctx->saw_ld_abs_ind && bpf_helper_changes_pkt_data(func))
|
||||
load_skb_regs(ctx, L7);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
generic-y += barrier.h
|
||||
generic-y += bpf_perf_event.h
|
||||
generic-y += bug.h
|
||||
generic-y += clkdev.h
|
||||
generic-y += current.h
|
||||
|
@ -41,7 +41,7 @@
|
||||
typedef int (*initcall_t)(void);
|
||||
typedef void (*exitcall_t)(void);
|
||||
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/compiler_types.h>
|
||||
|
||||
/* These are for everybody (although not all archs will actually
|
||||
discard it in modules) */
|
||||
|
@ -108,7 +108,7 @@ config X86
|
||||
select HAVE_ARCH_AUDITSYSCALL
|
||||
select HAVE_ARCH_HUGE_VMAP if X86_64 || X86_PAE
|
||||
select HAVE_ARCH_JUMP_LABEL
|
||||
select HAVE_ARCH_KASAN if X86_64 && SPARSEMEM_VMEMMAP
|
||||
select HAVE_ARCH_KASAN if X86_64
|
||||
select HAVE_ARCH_KGDB
|
||||
select HAVE_ARCH_KMEMCHECK
|
||||
select HAVE_ARCH_MMAP_RND_BITS if MMU
|
||||
@ -171,7 +171,7 @@ config X86
|
||||
select HAVE_PERF_USER_STACK_DUMP
|
||||
select HAVE_RCU_TABLE_FREE
|
||||
select HAVE_REGS_AND_STACK_ACCESS_API
|
||||
select HAVE_RELIABLE_STACKTRACE if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
|
||||
select HAVE_RELIABLE_STACKTRACE if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
|
||||
select HAVE_STACK_VALIDATION if X86_64
|
||||
select HAVE_SYSCALL_TRACEPOINTS
|
||||
select HAVE_UNSTABLE_SCHED_CLOCK
|
||||
@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
|
||||
config KASAN_SHADOW_OFFSET
|
||||
hex
|
||||
depends on KASAN
|
||||
default 0xdff8000000000000 if X86_5LEVEL
|
||||
default 0xdffffc0000000000
|
||||
|
||||
config HAVE_INTEL_TXT
|
||||
|
@ -359,28 +359,14 @@ config PUNIT_ATOM_DEBUG
|
||||
|
||||
choice
|
||||
prompt "Choose kernel unwinder"
|
||||
default FRAME_POINTER_UNWINDER
|
||||
default UNWINDER_ORC if X86_64
|
||||
default UNWINDER_FRAME_POINTER if X86_32
|
||||
---help---
|
||||
This determines which method will be used for unwinding kernel stack
|
||||
traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
|
||||
livepatch, lockdep, and more.
|
||||
|
||||
config FRAME_POINTER_UNWINDER
|
||||
bool "Frame pointer unwinder"
|
||||
select FRAME_POINTER
|
||||
---help---
|
||||
This option enables the frame pointer unwinder for unwinding kernel
|
||||
stack traces.
|
||||
|
||||
The unwinder itself is fast and it uses less RAM than the ORC
|
||||
unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
overall performance will degrade by roughly 5-10%.
|
||||
|
||||
This option is recommended if you want to use the livepatch
|
||||
consistency model, as this is currently the only way to get a
|
||||
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
|
||||
config ORC_UNWINDER
|
||||
config UNWINDER_ORC
|
||||
bool "ORC unwinder"
|
||||
depends on X86_64
|
||||
select STACK_VALIDATION
|
||||
@ -396,7 +382,22 @@ config ORC_UNWINDER
|
||||
Enabling this option will increase the kernel's runtime memory usage
|
||||
by roughly 2-4MB, depending on your kernel config.
|
||||
|
||||
config GUESS_UNWINDER
|
||||
config UNWINDER_FRAME_POINTER
|
||||
bool "Frame pointer unwinder"
|
||||
select FRAME_POINTER
|
||||
---help---
|
||||
This option enables the frame pointer unwinder for unwinding kernel
|
||||
stack traces.
|
||||
|
||||
The unwinder itself is fast and it uses less RAM than the ORC
|
||||
unwinder, but the kernel text size will grow by ~3% and the kernel's
|
||||
overall performance will degrade by roughly 5-10%.
|
||||
|
||||
This option is recommended if you want to use the livepatch
|
||||
consistency model, as this is currently the only way to get a
|
||||
reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
|
||||
|
||||
config UNWINDER_GUESS
|
||||
bool "Guess unwinder"
|
||||
depends on EXPERT
|
||||
---help---
|
||||
@ -411,7 +412,7 @@ config GUESS_UNWINDER
|
||||
endchoice
|
||||
|
||||
config FRAME_POINTER
|
||||
depends on !ORC_UNWINDER && !GUESS_UNWINDER
|
||||
depends on !UNWINDER_ORC && !UNWINDER_GUESS
|
||||
bool
|
||||
|
||||
endmenu
|
||||
|
@ -1,5 +1,5 @@
|
||||
CONFIG_NOHIGHMEM=y
|
||||
# CONFIG_HIGHMEM4G is not set
|
||||
# CONFIG_HIGHMEM64G is not set
|
||||
CONFIG_GUESS_UNWINDER=y
|
||||
# CONFIG_FRAME_POINTER_UNWINDER is not set
|
||||
CONFIG_UNWINDER_GUESS=y
|
||||
# CONFIG_UNWINDER_FRAME_POINTER is not set
|
||||
|
@ -299,6 +299,7 @@ CONFIG_DEBUG_STACKOVERFLOW=y
|
||||
# CONFIG_DEBUG_RODATA_TEST is not set
|
||||
CONFIG_DEBUG_BOOT_PARAMS=y
|
||||
CONFIG_OPTIMIZE_INLINING=y
|
||||
CONFIG_UNWINDER_ORC=y
|
||||
CONFIG_SECURITY=y
|
||||
CONFIG_SECURITY_NETWORK=y
|
||||
CONFIG_SECURITY_SELINUX=y
|
||||
|
@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
UNWIND_HINT_REGS offset=\offset
|
||||
.endm
|
||||
|
||||
.macro RESTORE_EXTRA_REGS offset=0
|
||||
movq 0*8+\offset(%rsp), %r15
|
||||
movq 1*8+\offset(%rsp), %r14
|
||||
movq 2*8+\offset(%rsp), %r13
|
||||
movq 3*8+\offset(%rsp), %r12
|
||||
movq 4*8+\offset(%rsp), %rbp
|
||||
movq 5*8+\offset(%rsp), %rbx
|
||||
UNWIND_HINT_REGS offset=\offset extra=0
|
||||
.macro POP_EXTRA_REGS
|
||||
popq %r15
|
||||
popq %r14
|
||||
popq %r13
|
||||
popq %r12
|
||||
popq %rbp
|
||||
popq %rbx
|
||||
.endm
|
||||
|
||||
.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
|
||||
.if \rstor_r11
|
||||
movq 6*8(%rsp), %r11
|
||||
.endif
|
||||
.if \rstor_r8910
|
||||
movq 7*8(%rsp), %r10
|
||||
movq 8*8(%rsp), %r9
|
||||
movq 9*8(%rsp), %r8
|
||||
.endif
|
||||
.if \rstor_rax
|
||||
movq 10*8(%rsp), %rax
|
||||
.endif
|
||||
.if \rstor_rcx
|
||||
movq 11*8(%rsp), %rcx
|
||||
.endif
|
||||
.if \rstor_rdx
|
||||
movq 12*8(%rsp), %rdx
|
||||
.endif
|
||||
movq 13*8(%rsp), %rsi
|
||||
movq 14*8(%rsp), %rdi
|
||||
UNWIND_HINT_IRET_REGS offset=16*8
|
||||
.endm
|
||||
.macro RESTORE_C_REGS
|
||||
RESTORE_C_REGS_HELPER 1,1,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RAX
|
||||
RESTORE_C_REGS_HELPER 0,1,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RCX
|
||||
RESTORE_C_REGS_HELPER 1,0,1,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_R11
|
||||
RESTORE_C_REGS_HELPER 1,1,0,1,1
|
||||
.endm
|
||||
.macro RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
RESTORE_C_REGS_HELPER 1,0,0,1,1
|
||||
.endm
|
||||
|
||||
.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
|
||||
subq $-(15*8+\addskip), %rsp
|
||||
.macro POP_C_REGS
|
||||
popq %r11
|
||||
popq %r10
|
||||
popq %r9
|
||||
popq %r8
|
||||
popq %rax
|
||||
popq %rcx
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
popq %rdi
|
||||
.endm
|
||||
|
||||
.macro icebp
|
||||
|
@ -941,7 +941,8 @@ ENTRY(debug)
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Ldebug_from_sysenter_stack
|
||||
@ -984,7 +985,8 @@ ENTRY(nmi)
|
||||
movl %esp, %eax # pt_regs pointer
|
||||
|
||||
/* Are we currently on the SYSENTER stack? */
|
||||
PER_CPU(cpu_tss + CPU_TSS_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx)
|
||||
movl PER_CPU_VAR(cpu_entry_area), %ecx
|
||||
addl $CPU_ENTRY_AREA_SYSENTER_stack + SIZEOF_SYSENTER_stack, %ecx
|
||||
subl %eax, %ecx /* ecx = (end of SYSENTER_stack) - esp */
|
||||
cmpl $SIZEOF_SYSENTER_stack, %ecx
|
||||
jb .Lnmi_from_sysenter_stack
|
||||
|
@ -136,6 +136,64 @@ END(native_usergs_sysret64)
|
||||
* with them due to bugs in both AMD and Intel CPUs.
|
||||
*/
|
||||
|
||||
.pushsection .entry_trampoline, "ax"
|
||||
|
||||
/*
|
||||
* The code in here gets remapped into cpu_entry_area's trampoline. This means
|
||||
* that the assembler and linker have the wrong idea as to where this code
|
||||
* lives (and, in fact, it's mapped more than once, so it's not even at a
|
||||
* fixed address). So we can't reference any symbols outside the entry
|
||||
* trampoline and expect it to work.
|
||||
*
|
||||
* Instead, we carefully abuse %rip-relative addressing.
|
||||
* _entry_trampoline(%rip) refers to the start of the remapped) entry
|
||||
* trampoline. We can thus find cpu_entry_area with this macro:
|
||||
*/
|
||||
|
||||
#define CPU_ENTRY_AREA \
|
||||
_entry_trampoline - CPU_ENTRY_AREA_entry_trampoline(%rip)
|
||||
|
||||
/* The top word of the SYSENTER stack is hot and is usable as scratch space. */
|
||||
#define RSP_SCRATCH CPU_ENTRY_AREA_SYSENTER_stack + \
|
||||
SIZEOF_SYSENTER_stack - 8 + CPU_ENTRY_AREA
|
||||
|
||||
ENTRY(entry_SYSCALL_64_trampoline)
|
||||
UNWIND_HINT_EMPTY
|
||||
swapgs
|
||||
|
||||
/* Stash the user RSP. */
|
||||
movq %rsp, RSP_SCRATCH
|
||||
|
||||
/* Load the top of the task stack into RSP */
|
||||
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
|
||||
|
||||
/* Start building the simulated IRET frame. */
|
||||
pushq $__USER_DS /* pt_regs->ss */
|
||||
pushq RSP_SCRATCH /* pt_regs->sp */
|
||||
pushq %r11 /* pt_regs->flags */
|
||||
pushq $__USER_CS /* pt_regs->cs */
|
||||
pushq %rcx /* pt_regs->ip */
|
||||
|
||||
/*
|
||||
* x86 lacks a near absolute jump, and we can't jump to the real
|
||||
* entry text with a relative jump. We could push the target
|
||||
* address and then use retq, but this destroys the pipeline on
|
||||
* many CPUs (wasting over 20 cycles on Sandy Bridge). Instead,
|
||||
* spill RDI and restore it in a second-stage trampoline.
|
||||
*/
|
||||
pushq %rdi
|
||||
movq $entry_SYSCALL_64_stage2, %rdi
|
||||
jmp *%rdi
|
||||
END(entry_SYSCALL_64_trampoline)
|
||||
|
||||
.popsection
|
||||
|
||||
ENTRY(entry_SYSCALL_64_stage2)
|
||||
UNWIND_HINT_EMPTY
|
||||
popq %rdi
|
||||
jmp entry_SYSCALL_64_after_hwframe
|
||||
END(entry_SYSCALL_64_stage2)
|
||||
|
||||
ENTRY(entry_SYSCALL_64)
|
||||
UNWIND_HINT_EMPTY
|
||||
/*
|
||||
@ -221,10 +279,9 @@ entry_SYSCALL_64_fastpath:
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQs on */
|
||||
movq RIP(%rsp), %rcx
|
||||
movq EFLAGS(%rsp), %r11
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
addq $6*8, %rsp /* skip extra regs -- they were preserved */
|
||||
UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
jmp .Lpop_c_regs_except_rcx_r11_and_sysret
|
||||
|
||||
1:
|
||||
/*
|
||||
@ -246,17 +303,18 @@ entry_SYSCALL64_slow_path:
|
||||
call do_syscall_64 /* returns with IRQs disabled */
|
||||
|
||||
return_from_SYSCALL_64:
|
||||
RESTORE_EXTRA_REGS
|
||||
TRACE_IRQS_IRETQ /* we're about to change IF */
|
||||
|
||||
/*
|
||||
* Try to use SYSRET instead of IRET if we're returning to
|
||||
* a completely clean 64-bit userspace context.
|
||||
* a completely clean 64-bit userspace context. If we're not,
|
||||
* go to the slow exit path.
|
||||
*/
|
||||
movq RCX(%rsp), %rcx
|
||||
movq RIP(%rsp), %r11
|
||||
cmpq %rcx, %r11 /* RCX == RIP */
|
||||
jne opportunistic_sysret_failed
|
||||
|
||||
cmpq %rcx, %r11 /* SYSRET requires RCX == RIP */
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
|
||||
@ -274,14 +332,14 @@ return_from_SYSCALL_64:
|
||||
|
||||
/* If this changed %rcx, it was not canonical */
|
||||
cmpq %rcx, %r11
|
||||
jne opportunistic_sysret_failed
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
cmpq $__USER_CS, CS(%rsp) /* CS must match SYSRET */
|
||||
jne opportunistic_sysret_failed
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
movq R11(%rsp), %r11
|
||||
cmpq %r11, EFLAGS(%rsp) /* R11 == RFLAGS */
|
||||
jne opportunistic_sysret_failed
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
|
||||
@ -302,12 +360,12 @@ return_from_SYSCALL_64:
|
||||
* would never get past 'stuck_here'.
|
||||
*/
|
||||
testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
|
||||
jnz opportunistic_sysret_failed
|
||||
jnz swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/* nothing to check for RSP */
|
||||
|
||||
cmpq $__USER_DS, SS(%rsp) /* SS must match SYSRET */
|
||||
jne opportunistic_sysret_failed
|
||||
jne swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
/*
|
||||
* We win! This label is here just for ease of understanding
|
||||
@ -315,14 +373,36 @@ return_from_SYSCALL_64:
|
||||
*/
|
||||
syscall_return_via_sysret:
|
||||
/* rcx and r11 are already restored (see code above) */
|
||||
RESTORE_C_REGS_EXCEPT_RCX_R11
|
||||
movq RSP(%rsp), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
USERGS_SYSRET64
|
||||
POP_EXTRA_REGS
|
||||
.Lpop_c_regs_except_rcx_r11_and_sysret:
|
||||
popq %rsi /* skip r11 */
|
||||
popq %r10
|
||||
popq %r9
|
||||
popq %r8
|
||||
popq %rax
|
||||
popq %rsi /* skip rcx */
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
|
||||
opportunistic_sysret_failed:
|
||||
SWAPGS
|
||||
jmp restore_c_regs_and_iret
|
||||
/*
|
||||
* Now all regs are restored except RSP and RDI.
|
||||
* Save old stack pointer and switch to trampoline stack.
|
||||
*/
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
|
||||
|
||||
pushq RSP-RDI(%rdi) /* RSP */
|
||||
pushq (%rdi) /* RDI */
|
||||
|
||||
/*
|
||||
* We are on the trampoline stack. All regs except RDI are live.
|
||||
* We can do future final exit work right here.
|
||||
*/
|
||||
|
||||
popq %rdi
|
||||
popq %rsp
|
||||
USERGS_SYSRET64
|
||||
END(entry_SYSCALL_64)
|
||||
|
||||
ENTRY(stub_ptregs_64)
|
||||
@ -423,8 +503,7 @@ ENTRY(ret_from_fork)
|
||||
movq %rsp, %rdi
|
||||
call syscall_return_slowpath /* returns with IRQs disabled */
|
||||
TRACE_IRQS_ON /* user mode is traced as IRQS on */
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
1:
|
||||
/* kernel thread */
|
||||
@ -457,12 +536,13 @@ END(irq_entries_start)
|
||||
|
||||
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
pushfq
|
||||
testl $X86_EFLAGS_IF, (%rsp)
|
||||
pushq %rax
|
||||
SAVE_FLAGS(CLBR_RAX)
|
||||
testl $X86_EFLAGS_IF, %eax
|
||||
jz .Lokay_\@
|
||||
ud2
|
||||
.Lokay_\@:
|
||||
addq $8, %rsp
|
||||
popq %rax
|
||||
#endif
|
||||
.endm
|
||||
|
||||
@ -554,6 +634,13 @@ END(irq_entries_start)
|
||||
/* 0(%rsp): ~(interrupt number) */
|
||||
.macro interrupt func
|
||||
cld
|
||||
|
||||
testb $3, CS-ORIG_RAX(%rsp)
|
||||
jz 1f
|
||||
SWAPGS
|
||||
call switch_to_thread_stack
|
||||
1:
|
||||
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
SAVE_C_REGS
|
||||
SAVE_EXTRA_REGS
|
||||
@ -563,12 +650,8 @@ END(irq_entries_start)
|
||||
jz 1f
|
||||
|
||||
/*
|
||||
* IRQ from user mode. Switch to kernel gsbase and inform context
|
||||
* tracking that we're in kernel mode.
|
||||
*/
|
||||
SWAPGS
|
||||
|
||||
/*
|
||||
* IRQ from user mode.
|
||||
*
|
||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||
* we fix gsbase, and we should do it before enter_from_user_mode
|
||||
* (which can take locks). Since TRACE_IRQS_OFF idempotent,
|
||||
@ -612,8 +695,52 @@ GLOBAL(retint_user)
|
||||
mov %rsp,%rdi
|
||||
call prepare_exit_to_usermode
|
||||
TRACE_IRQS_IRETQ
|
||||
|
||||
GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
/* Assert that pt_regs indicates user mode. */
|
||||
testb $3, CS(%rsp)
|
||||
jnz 1f
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
POP_EXTRA_REGS
|
||||
popq %r11
|
||||
popq %r10
|
||||
popq %r9
|
||||
popq %r8
|
||||
popq %rax
|
||||
popq %rcx
|
||||
popq %rdx
|
||||
popq %rsi
|
||||
|
||||
/*
|
||||
* The stack is now user RDI, orig_ax, RIP, CS, EFLAGS, RSP, SS.
|
||||
* Save old stack pointer and switch to trampoline stack.
|
||||
*/
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
|
||||
|
||||
/* Copy the IRET frame to the trampoline stack. */
|
||||
pushq 6*8(%rdi) /* SS */
|
||||
pushq 5*8(%rdi) /* RSP */
|
||||
pushq 4*8(%rdi) /* EFLAGS */
|
||||
pushq 3*8(%rdi) /* CS */
|
||||
pushq 2*8(%rdi) /* RIP */
|
||||
|
||||
/* Push user RDI on the trampoline stack. */
|
||||
pushq (%rdi)
|
||||
|
||||
/*
|
||||
* We are on the trampoline stack. All regs except RDI are live.
|
||||
* We can do future final exit work right here.
|
||||
*/
|
||||
|
||||
/* Restore RDI. */
|
||||
popq %rdi
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
INTERRUPT_RETURN
|
||||
|
||||
|
||||
/* Returning to kernel space */
|
||||
retint_kernel:
|
||||
@ -633,15 +760,17 @@ retint_kernel:
|
||||
*/
|
||||
TRACE_IRQS_IRETQ
|
||||
|
||||
/*
|
||||
* At this label, code paths which return to kernel and to user,
|
||||
* which come from interrupts/exception and from syscalls, merge.
|
||||
*/
|
||||
GLOBAL(restore_regs_and_iret)
|
||||
RESTORE_EXTRA_REGS
|
||||
restore_c_regs_and_iret:
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
GLOBAL(restore_regs_and_return_to_kernel)
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
/* Assert that pt_regs indicates kernel mode. */
|
||||
testb $3, CS(%rsp)
|
||||
jz 1f
|
||||
ud2
|
||||
1:
|
||||
#endif
|
||||
POP_EXTRA_REGS
|
||||
POP_C_REGS
|
||||
addq $8, %rsp /* skip regs->orig_ax */
|
||||
INTERRUPT_RETURN
|
||||
|
||||
ENTRY(native_iret)
|
||||
@ -805,7 +934,33 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||
/*
|
||||
* Exception entry points.
|
||||
*/
|
||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
|
||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
|
||||
|
||||
/*
|
||||
* Switch to the thread stack. This is called with the IRET frame and
|
||||
* orig_ax on the stack. (That is, RDI..R12 are not on the stack and
|
||||
* space has not been allocated for them.)
|
||||
*/
|
||||
ENTRY(switch_to_thread_stack)
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
pushq %rdi
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
|
||||
|
||||
pushq 7*8(%rdi) /* regs->ss */
|
||||
pushq 6*8(%rdi) /* regs->rsp */
|
||||
pushq 5*8(%rdi) /* regs->eflags */
|
||||
pushq 4*8(%rdi) /* regs->cs */
|
||||
pushq 3*8(%rdi) /* regs->ip */
|
||||
pushq 2*8(%rdi) /* regs->orig_ax */
|
||||
pushq 8(%rdi) /* return address */
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
movq (%rdi), %rdi
|
||||
ret
|
||||
END(switch_to_thread_stack)
|
||||
|
||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
||||
ENTRY(\sym)
|
||||
@ -818,17 +973,18 @@ ENTRY(\sym)
|
||||
|
||||
ASM_CLAC
|
||||
|
||||
.ifeq \has_error_code
|
||||
.if \has_error_code == 0
|
||||
pushq $-1 /* ORIG_RAX: no syscall to restart */
|
||||
.endif
|
||||
|
||||
ALLOC_PT_GPREGS_ON_STACK
|
||||
|
||||
.if \paranoid
|
||||
.if \paranoid == 1
|
||||
.if \paranoid < 2
|
||||
testb $3, CS(%rsp) /* If coming from userspace, switch stacks */
|
||||
jnz 1f
|
||||
jnz .Lfrom_usermode_switch_stack_\@
|
||||
.endif
|
||||
|
||||
.if \paranoid
|
||||
call paranoid_entry
|
||||
.else
|
||||
call error_entry
|
||||
@ -870,20 +1026,15 @@ ENTRY(\sym)
|
||||
jmp error_exit
|
||||
.endif
|
||||
|
||||
.if \paranoid == 1
|
||||
.if \paranoid < 2
|
||||
/*
|
||||
* Paranoid entry from userspace. Switch stacks and treat it
|
||||
* Entry from userspace. Switch stacks and treat it
|
||||
* as a normal entry. This means that paranoid handlers
|
||||
* run in real process context if user_mode(regs).
|
||||
*/
|
||||
1:
|
||||
.Lfrom_usermode_switch_stack_\@:
|
||||
call error_entry
|
||||
|
||||
|
||||
movq %rsp, %rdi /* pt_regs pointer */
|
||||
call sync_regs
|
||||
movq %rax, %rsp /* switch stack */
|
||||
|
||||
movq %rsp, %rdi /* pt_regs pointer */
|
||||
|
||||
.if \has_error_code
|
||||
@ -1059,6 +1210,7 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
||||
idtentry stack_segment do_stack_segment has_error_code=1
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
idtentry xennmi do_nmi has_error_code=0
|
||||
idtentry xendebug do_debug has_error_code=0
|
||||
idtentry xenint3 do_int3 has_error_code=0
|
||||
#endif
|
||||
@ -1112,17 +1264,14 @@ ENTRY(paranoid_exit)
|
||||
DISABLE_INTERRUPTS(CLBR_ANY)
|
||||
TRACE_IRQS_OFF_DEBUG
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
jnz paranoid_exit_no_swapgs
|
||||
jnz .Lparanoid_exit_no_swapgs
|
||||
TRACE_IRQS_IRETQ
|
||||
SWAPGS_UNSAFE_STACK
|
||||
jmp paranoid_exit_restore
|
||||
paranoid_exit_no_swapgs:
|
||||
jmp .Lparanoid_exit_restore
|
||||
.Lparanoid_exit_no_swapgs:
|
||||
TRACE_IRQS_IRETQ_DEBUG
|
||||
paranoid_exit_restore:
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
REMOVE_PT_GPREGS_FROM_STACK 8
|
||||
INTERRUPT_RETURN
|
||||
.Lparanoid_exit_restore:
|
||||
jmp restore_regs_and_return_to_kernel
|
||||
END(paranoid_exit)
|
||||
|
||||
/*
|
||||
@ -1146,6 +1295,14 @@ ENTRY(error_entry)
|
||||
SWAPGS
|
||||
|
||||
.Lerror_entry_from_usermode_after_swapgs:
|
||||
/* Put us onto the real thread stack. */
|
||||
popq %r12 /* save return addr in %12 */
|
||||
movq %rsp, %rdi /* arg0 = pt_regs pointer */
|
||||
call sync_regs
|
||||
movq %rax, %rsp /* switch stack */
|
||||
ENCODE_FRAME_POINTER
|
||||
pushq %r12
|
||||
|
||||
/*
|
||||
* We need to tell lockdep that IRQs are off. We can't do this until
|
||||
* we fix gsbase, and we should do it before enter_from_user_mode
|
||||
@ -1223,10 +1380,13 @@ ENTRY(error_exit)
|
||||
jmp retint_user
|
||||
END(error_exit)
|
||||
|
||||
/* Runs on exception stack */
|
||||
/* XXX: broken on Xen PV */
|
||||
/*
|
||||
* Runs on exception stack. Xen PV does not go through this path at all,
|
||||
* so we can use real assembly here.
|
||||
*/
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
|
||||
/*
|
||||
* We allow breakpoints in NMIs. If a breakpoint occurs, then
|
||||
* the iretq it performs will take us out of NMI context.
|
||||
@ -1284,7 +1444,7 @@ ENTRY(nmi)
|
||||
* stacks lest we corrupt the "NMI executing" variable.
|
||||
*/
|
||||
|
||||
SWAPGS_UNSAFE_STACK
|
||||
swapgs
|
||||
cld
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
@ -1328,8 +1488,7 @@ ENTRY(nmi)
|
||||
* Return back to user mode. We must *not* do the normal exit
|
||||
* work, because we don't want to enable interrupts.
|
||||
*/
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
jmp swapgs_restore_regs_and_return_to_usermode
|
||||
|
||||
.Lnmi_from_kernel:
|
||||
/*
|
||||
@ -1450,7 +1609,7 @@ nested_nmi_out:
|
||||
popq %rdx
|
||||
|
||||
/* We are returning to kernel mode, so this cannot result in a fault. */
|
||||
INTERRUPT_RETURN
|
||||
iretq
|
||||
|
||||
first_nmi:
|
||||
/* Restore rdx. */
|
||||
@ -1481,7 +1640,7 @@ first_nmi:
|
||||
pushfq /* RFLAGS */
|
||||
pushq $__KERNEL_CS /* CS */
|
||||
pushq $1f /* RIP */
|
||||
INTERRUPT_RETURN /* continues at repeat_nmi below */
|
||||
iretq /* continues at repeat_nmi below */
|
||||
UNWIND_HINT_IRET_REGS
|
||||
1:
|
||||
#endif
|
||||
@ -1544,29 +1703,34 @@ end_repeat_nmi:
|
||||
nmi_swapgs:
|
||||
SWAPGS_UNSAFE_STACK
|
||||
nmi_restore:
|
||||
RESTORE_EXTRA_REGS
|
||||
RESTORE_C_REGS
|
||||
POP_EXTRA_REGS
|
||||
POP_C_REGS
|
||||
|
||||
/* Point RSP at the "iret" frame. */
|
||||
REMOVE_PT_GPREGS_FROM_STACK 6*8
|
||||
/*
|
||||
* Skip orig_ax and the "outermost" frame to point RSP at the "iret"
|
||||
* at the "iret" frame.
|
||||
*/
|
||||
addq $6*8, %rsp
|
||||
|
||||
/*
|
||||
* Clear "NMI executing". Set DF first so that we can easily
|
||||
* distinguish the remaining code between here and IRET from
|
||||
* the SYSCALL entry and exit paths. On a native kernel, we
|
||||
* could just inspect RIP, but, on paravirt kernels,
|
||||
* INTERRUPT_RETURN can translate into a jump into a
|
||||
* hypercall page.
|
||||
* the SYSCALL entry and exit paths.
|
||||
*
|
||||
* We arguably should just inspect RIP instead, but I (Andy) wrote
|
||||
* this code when I had the misapprehension that Xen PV supported
|
||||
* NMIs, and Xen PV would break that approach.
|
||||
*/
|
||||
std
|
||||
movq $0, 5*8(%rsp) /* clear "NMI executing" */
|
||||
|
||||
/*
|
||||
* INTERRUPT_RETURN reads the "iret" frame and exits the NMI
|
||||
* stack in a single instruction. We are returning to kernel
|
||||
* mode, so this cannot result in a fault.
|
||||
* iretq reads the "iret" frame and exits the NMI stack in a
|
||||
* single instruction. We are returning to kernel mode, so this
|
||||
* cannot result in a fault. Similarly, we don't need to worry
|
||||
* about espfix64 on the way back to kernel mode.
|
||||
*/
|
||||
INTERRUPT_RETURN
|
||||
iretq
|
||||
END(nmi)
|
||||
|
||||
ENTRY(ignore_sysret)
|
||||
|
@ -48,7 +48,7 @@
|
||||
*/
|
||||
ENTRY(entry_SYSENTER_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
SWAPGS_UNSAFE_STACK
|
||||
SWAPGS
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
/*
|
||||
@ -306,8 +306,11 @@ ENTRY(entry_INT80_compat)
|
||||
*/
|
||||
movl %eax, %eax
|
||||
|
||||
/* Construct struct pt_regs on stack (iret frame is already on stack) */
|
||||
pushq %rax /* pt_regs->orig_ax */
|
||||
|
||||
/* switch to thread stack expects orig_ax to be pushed */
|
||||
call switch_to_thread_stack
|
||||
|
||||
pushq %rdi /* pt_regs->di */
|
||||
pushq %rsi /* pt_regs->si */
|
||||
pushq %rdx /* pt_regs->dx */
|
||||
@ -337,8 +340,7 @@ ENTRY(entry_INT80_compat)
|
||||
|
||||
/* Go back to user mode. */
|
||||
TRACE_IRQS_ON
|
||||
SWAPGS
|
||||
jmp restore_regs_and_iret
|
||||
jmp swapgs_restore_regs_and_return_to_usermode
|
||||
END(entry_INT80_compat)
|
||||
|
||||
ENTRY(stub32_clone)
|
||||
|
@ -1,6 +1,6 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
out := $(obj)/../../include/generated/asm
|
||||
uapi := $(obj)/../../include/generated/uapi/asm
|
||||
out := arch/$(SRCARCH)/include/generated/asm
|
||||
uapi := arch/$(SRCARCH)/include/generated/uapi/asm
|
||||
|
||||
# Create output directory if not already present
|
||||
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
|
||||
|
@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment)
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
/* IRQs are off, so this synchronizes with smp_store_release */
|
||||
ldt = lockless_dereference(current->active_mm->context.ldt);
|
||||
ldt = READ_ONCE(current->active_mm->context.ldt);
|
||||
if (!ldt || idx >= ldt->nr_entries)
|
||||
return 0;
|
||||
|
||||
|
@ -2958,6 +2958,10 @@ static unsigned long intel_pmu_free_running_flags(struct perf_event *event)
|
||||
|
||||
if (event->attr.use_clockid)
|
||||
flags &= ~PERF_SAMPLE_TIME;
|
||||
if (!event->attr.exclude_kernel)
|
||||
flags &= ~PERF_SAMPLE_REGS_USER;
|
||||
if (event->attr.sample_regs_user & ~PEBS_REGS)
|
||||
flags &= ~(PERF_SAMPLE_REGS_USER | PERF_SAMPLE_REGS_INTR);
|
||||
return flags;
|
||||
}
|
||||
|
||||
|
@ -85,13 +85,15 @@ struct amd_nb {
|
||||
* Flags PEBS can handle without an PMI.
|
||||
*
|
||||
* TID can only be handled by flushing at context switch.
|
||||
* REGS_USER can be handled for events limited to ring 3.
|
||||
*
|
||||
*/
|
||||
#define PEBS_FREERUNNING_FLAGS \
|
||||
(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
|
||||
PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
|
||||
PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
|
||||
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR)
|
||||
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
|
||||
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
|
||||
|
||||
/*
|
||||
* A debug store configuration.
|
||||
@ -110,6 +112,26 @@ struct debug_store {
|
||||
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
||||
};
|
||||
|
||||
#define PEBS_REGS \
|
||||
(PERF_REG_X86_AX | \
|
||||
PERF_REG_X86_BX | \
|
||||
PERF_REG_X86_CX | \
|
||||
PERF_REG_X86_DX | \
|
||||
PERF_REG_X86_DI | \
|
||||
PERF_REG_X86_SI | \
|
||||
PERF_REG_X86_SP | \
|
||||
PERF_REG_X86_BP | \
|
||||
PERF_REG_X86_IP | \
|
||||
PERF_REG_X86_FLAGS | \
|
||||
PERF_REG_X86_R8 | \
|
||||
PERF_REG_X86_R9 | \
|
||||
PERF_REG_X86_R10 | \
|
||||
PERF_REG_X86_R11 | \
|
||||
PERF_REG_X86_R12 | \
|
||||
PERF_REG_X86_R13 | \
|
||||
PERF_REG_X86_R14 | \
|
||||
PERF_REG_X86_R15)
|
||||
|
||||
/*
|
||||
* Per register state.
|
||||
*/
|
||||
|
@ -113,7 +113,7 @@ void hyperv_init(void)
|
||||
u64 guest_id;
|
||||
union hv_x64_msr_hypercall_contents hypercall_msr;
|
||||
|
||||
if (x86_hyper != &x86_hyper_ms_hyperv)
|
||||
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
|
||||
return;
|
||||
|
||||
/* Allocate percpu VP index */
|
||||
|
@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
|
||||
bool ok;
|
||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||
do {
|
||||
asm volatile(RDRAND_LONG "\n\t"
|
||||
asm volatile(RDRAND_LONG
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
if (ok)
|
||||
@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||
bool ok;
|
||||
unsigned int retry = RDRAND_RETRY_LOOPS;
|
||||
do {
|
||||
asm volatile(RDRAND_INT "\n\t"
|
||||
asm volatile(RDRAND_INT
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
if (ok)
|
||||
@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
|
||||
static inline bool rdseed_long(unsigned long *v)
|
||||
{
|
||||
bool ok;
|
||||
asm volatile(RDSEED_LONG "\n\t"
|
||||
asm volatile(RDSEED_LONG
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
return ok;
|
||||
@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
|
||||
static inline bool rdseed_int(unsigned int *v)
|
||||
{
|
||||
bool ok;
|
||||
asm volatile(RDSEED_INT "\n\t"
|
||||
asm volatile(RDSEED_INT
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (ok), "=a" (*v));
|
||||
return ok;
|
||||
|
@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
|
||||
static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
|
||||
{
|
||||
bool negative;
|
||||
asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
|
||||
asm volatile(LOCK_PREFIX "andb %2,%1"
|
||||
CC_SET(s)
|
||||
: CC_OUT(s) (negative), ADDR
|
||||
: "ir" ((char) ~(1 << nr)) : "memory");
|
||||
@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
asm("bts %2,%1\n\t"
|
||||
asm("bts %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr));
|
||||
@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
asm volatile("btr %2,%1\n\t"
|
||||
asm volatile("btr %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr));
|
||||
@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
asm volatile("btc %2,%1\n\t"
|
||||
asm volatile("btc %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit), ADDR
|
||||
: "Ir" (nr) : "memory");
|
||||
@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
asm volatile("bt %2,%1\n\t"
|
||||
asm volatile("bt %2,%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit)
|
||||
: "m" (*(unsigned long *)addr), "Ir" (nr));
|
||||
|
@ -7,6 +7,7 @@
|
||||
*/
|
||||
#include <linux/types.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/task_stack.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/user32.h>
|
||||
#include <asm/unistd.h>
|
||||
|
@ -126,16 +126,17 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
|
||||
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
|
||||
|
||||
#define set_cpu_cap(c, bit) set_bit(bit, (unsigned long *)((c)->x86_capability))
|
||||
#define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability))
|
||||
#define setup_clear_cpu_cap(bit) do { \
|
||||
clear_cpu_cap(&boot_cpu_data, bit); \
|
||||
set_bit(bit, (unsigned long *)cpu_caps_cleared); \
|
||||
} while (0)
|
||||
|
||||
extern void setup_clear_cpu_cap(unsigned int bit);
|
||||
extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
|
||||
|
||||
#define setup_force_cpu_cap(bit) do { \
|
||||
set_cpu_cap(&boot_cpu_data, bit); \
|
||||
set_bit(bit, (unsigned long *)cpu_caps_set); \
|
||||
} while (0)
|
||||
|
||||
#define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit)
|
||||
|
||||
#if defined(CC_HAVE_ASM_GOTO) && defined(CONFIG_X86_FAST_FEATURE_TESTS)
|
||||
/*
|
||||
* Static testing of CPU features. Used the same as boot_cpu_has().
|
||||
|
@ -13,173 +13,176 @@
|
||||
/*
|
||||
* Defines x86 CPU feature bits
|
||||
*/
|
||||
#define NCAPINTS 18 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
#define NCAPINTS 18 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
|
||||
/*
|
||||
* Note: If the comment begins with a quoted string, that string is used
|
||||
* in /proc/cpuinfo instead of the macro name. If the string is "",
|
||||
* this feature bit is not displayed in /proc/cpuinfo at all.
|
||||
*
|
||||
* When adding new features here that depend on other features,
|
||||
* please update the table in kernel/cpu/cpuid-deps.c as well.
|
||||
*/
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
|
||||
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
|
||||
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
|
||||
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
|
||||
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
|
||||
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
|
||||
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
|
||||
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
|
||||
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
|
||||
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
|
||||
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
|
||||
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
|
||||
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */
|
||||
/* (plus FCMOVcc, FCOMI with FPU) */
|
||||
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
|
||||
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
|
||||
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
|
||||
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
|
||||
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
|
||||
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
|
||||
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
|
||||
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
|
||||
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
|
||||
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
|
||||
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
|
||||
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
|
||||
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
|
||||
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
|
||||
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
|
||||
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
|
||||
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
|
||||
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
|
||||
#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
|
||||
#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
|
||||
#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
|
||||
#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
|
||||
#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
|
||||
#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
|
||||
#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
|
||||
#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
|
||||
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
|
||||
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
|
||||
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
|
||||
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
|
||||
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
|
||||
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
|
||||
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
|
||||
#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
|
||||
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
|
||||
#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
|
||||
#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
|
||||
#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
|
||||
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
|
||||
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
|
||||
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
|
||||
#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
|
||||
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
|
||||
#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
|
||||
#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
|
||||
/* Don't duplicate feature flags which are redundant with Intel! */
|
||||
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
|
||||
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */
|
||||
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
|
||||
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
|
||||
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
|
||||
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
|
||||
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
|
||||
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */
|
||||
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */
|
||||
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */
|
||||
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
|
||||
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
|
||||
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
|
||||
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
|
||||
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
|
||||
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
|
||||
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
|
||||
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
|
||||
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
|
||||
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
|
||||
|
||||
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
|
||||
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
|
||||
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
|
||||
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
|
||||
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
|
||||
#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
|
||||
#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
|
||||
|
||||
/* Other features, Linux-defined mapping, word 3 */
|
||||
/* This range is used for feature bits which conflict or are synthesized */
|
||||
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
|
||||
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
|
||||
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
|
||||
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
||||
/* cpu types for specific tunings: */
|
||||
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
|
||||
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
|
||||
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
|
||||
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
|
||||
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
|
||||
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */
|
||||
#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */
|
||||
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
|
||||
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
|
||||
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
|
||||
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */
|
||||
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */
|
||||
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
|
||||
#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
|
||||
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
|
||||
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
|
||||
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
|
||||
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
|
||||
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */
|
||||
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
|
||||
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
|
||||
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */
|
||||
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
|
||||
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
|
||||
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
|
||||
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
|
||||
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
|
||||
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
|
||||
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */
|
||||
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
|
||||
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */
|
||||
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
|
||||
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
|
||||
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
|
||||
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
|
||||
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */
|
||||
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
|
||||
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
|
||||
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */
|
||||
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
|
||||
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
|
||||
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */
|
||||
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
|
||||
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */
|
||||
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */
|
||||
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
|
||||
/* CPU types for specific tunings: */
|
||||
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
|
||||
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
|
||||
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
|
||||
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
|
||||
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
|
||||
#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
|
||||
#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
|
||||
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
|
||||
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
|
||||
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
|
||||
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
|
||||
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
|
||||
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
|
||||
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
|
||||
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
|
||||
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
|
||||
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
|
||||
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
|
||||
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
|
||||
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
|
||||
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
|
||||
#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
|
||||
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
|
||||
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
|
||||
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
|
||||
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
|
||||
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
|
||||
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
|
||||
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
|
||||
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
|
||||
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
|
||||
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
|
||||
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
|
||||
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
|
||||
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
|
||||
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
|
||||
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
|
||||
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
|
||||
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
|
||||
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
|
||||
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
|
||||
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
|
||||
#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
|
||||
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
|
||||
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
|
||||
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
|
||||
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
|
||||
#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
|
||||
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
|
||||
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
|
||||
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
|
||||
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
|
||||
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
|
||||
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
|
||||
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
|
||||
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
|
||||
#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
|
||||
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
|
||||
|
||||
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
|
||||
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
|
||||
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
|
||||
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
|
||||
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
|
||||
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
|
||||
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
|
||||
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
|
||||
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
|
||||
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
|
||||
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
|
||||
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
|
||||
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
|
||||
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
|
||||
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
|
||||
#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
|
||||
#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
|
||||
#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
|
||||
#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
|
||||
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
|
||||
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
|
||||
|
||||
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
|
||||
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
|
||||
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
|
||||
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */
|
||||
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
|
||||
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
|
||||
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
|
||||
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
|
||||
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
|
||||
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
|
||||
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
|
||||
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
|
||||
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
|
||||
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
|
||||
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
|
||||
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
|
||||
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
|
||||
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */
|
||||
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
|
||||
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */
|
||||
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */
|
||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */
|
||||
#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */
|
||||
#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */
|
||||
/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
|
||||
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
|
||||
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
|
||||
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
|
||||
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
|
||||
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
|
||||
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
|
||||
#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
|
||||
#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
|
||||
#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
|
||||
#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
|
||||
#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
|
||||
#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
|
||||
#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
|
||||
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
|
||||
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
|
||||
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
|
||||
#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
|
||||
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
|
||||
#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
|
||||
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
|
||||
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
|
||||
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
|
||||
#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
|
||||
#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
|
||||
#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
|
||||
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
|
||||
|
||||
/*
|
||||
* Auxiliary flags: Linux defined - For features scattered in various
|
||||
@ -187,146 +190,155 @@
|
||||
*
|
||||
* Reuse free bits when adding new feature flags!
|
||||
*/
|
||||
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
|
||||
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
|
||||
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
|
||||
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
|
||||
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
|
||||
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
|
||||
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
|
||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
|
||||
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
|
||||
|
||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
|
||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
|
||||
|
||||
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */
|
||||
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */
|
||||
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
|
||||
#define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
|
||||
|
||||
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
|
||||
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
|
||||
|
||||
/* Virtualization flags: Linux defined, word 8 */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
|
||||
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
|
||||
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
|
||||
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
|
||||
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
|
||||
#define X86_FEATURE_VNMI ( 8*32+ 1) /* Intel Virtual NMI */
|
||||
#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 2) /* Intel FlexPriority */
|
||||
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
|
||||
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
|
||||
|
||||
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */
|
||||
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
|
||||
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
|
||||
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
|
||||
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */
|
||||
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/
|
||||
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */
|
||||
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
|
||||
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
|
||||
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
|
||||
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
|
||||
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
|
||||
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
|
||||
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
|
||||
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
|
||||
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
|
||||
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
|
||||
#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
|
||||
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
|
||||
#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
|
||||
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
|
||||
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
|
||||
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
|
||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
|
||||
#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
|
||||
#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
|
||||
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
|
||||
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
|
||||
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
|
||||
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
|
||||
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
|
||||
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
|
||||
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
|
||||
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
|
||||
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
|
||||
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
|
||||
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
|
||||
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
|
||||
#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
|
||||
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
|
||||
#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
|
||||
#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
|
||||
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
|
||||
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
|
||||
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
|
||||
#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
|
||||
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
|
||||
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
|
||||
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
|
||||
#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
|
||||
#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
|
||||
#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
|
||||
|
||||
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */
|
||||
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */
|
||||
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */
|
||||
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
|
||||
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
|
||||
/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
|
||||
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
|
||||
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
|
||||
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
|
||||
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
|
||||
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
|
||||
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
|
||||
#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
|
||||
#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
|
||||
#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
|
||||
#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
|
||||
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
|
||||
#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
|
||||
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
|
||||
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
|
||||
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
|
||||
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
|
||||
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
|
||||
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
|
||||
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
|
||||
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
|
||||
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
|
||||
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
|
||||
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
||||
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
||||
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
|
||||
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
|
||||
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
|
||||
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
|
||||
#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
|
||||
#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
|
||||
#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
|
||||
#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
|
||||
#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
|
||||
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
|
||||
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
|
||||
|
||||
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */
|
||||
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
||||
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
|
||||
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
|
||||
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
|
||||
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
|
||||
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
|
||||
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
|
||||
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
|
||||
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
|
||||
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
|
||||
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
|
||||
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
|
||||
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
|
||||
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
|
||||
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
|
||||
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
|
||||
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
|
||||
#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
|
||||
#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
|
||||
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
|
||||
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
|
||||
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
|
||||
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
|
||||
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
|
||||
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
|
||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
|
||||
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
|
||||
#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
|
||||
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
|
||||
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
|
||||
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
|
||||
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
|
||||
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
|
||||
#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
|
||||
#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
|
||||
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
|
||||
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
|
||||
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
|
||||
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
|
||||
|
||||
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */
|
||||
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */
|
||||
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
|
||||
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
|
||||
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
|
||||
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
|
||||
#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
|
||||
#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
*/
|
||||
#define X86_BUG(x) (NCAPINTS*32 + (x))
|
||||
#define X86_BUG(x) (NCAPINTS*32 + (x))
|
||||
|
||||
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
|
||||
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
|
||||
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
|
||||
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
|
||||
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
|
||||
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
|
||||
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
|
||||
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
|
||||
#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
|
||||
#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
|
||||
#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
|
||||
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
|
||||
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
|
||||
#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
|
||||
#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
|
||||
#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
|
||||
#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
|
||||
* to avoid confusion.
|
||||
*/
|
||||
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
|
||||
#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
|
||||
#endif
|
||||
#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
|
||||
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||
#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
|
||||
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
@ -60,17 +60,10 @@ static inline struct desc_struct *get_current_gdt_rw(void)
|
||||
return this_cpu_ptr(&gdt_page)->gdt;
|
||||
}
|
||||
|
||||
/* Get the fixmap index for a specific processor */
|
||||
static inline unsigned int get_cpu_gdt_ro_index(int cpu)
|
||||
{
|
||||
return FIX_GDT_REMAP_BEGIN + cpu;
|
||||
}
|
||||
|
||||
/* Provide the fixmap address of the remapped GDT */
|
||||
static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
|
||||
{
|
||||
unsigned int idx = get_cpu_gdt_ro_index(cpu);
|
||||
return (struct desc_struct *)__fix_to_virt(idx);
|
||||
return (struct desc_struct *)&get_cpu_entry_area(cpu)->gdt;
|
||||
}
|
||||
|
||||
/* Provide the current read-only GDT */
|
||||
@ -185,7 +178,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr,
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
|
||||
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, struct x86_hw_tss *addr)
|
||||
{
|
||||
struct desc_struct *d = get_cpu_gdt_rw(cpu);
|
||||
tss_desc tss;
|
||||
|
@ -44,6 +44,45 @@ extern unsigned long __FIXADDR_TOP;
|
||||
PAGE_SIZE)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* cpu_entry_area is a percpu region in the fixmap that contains things
|
||||
* needed by the CPU and early entry/exit code. Real types aren't used
|
||||
* for all fields here to avoid circular header dependencies.
|
||||
*
|
||||
* Every field is a virtual alias of some other allocated backing store.
|
||||
* There is no direct allocation of a struct cpu_entry_area.
|
||||
*/
|
||||
struct cpu_entry_area {
|
||||
char gdt[PAGE_SIZE];
|
||||
|
||||
/*
|
||||
* The GDT is just below SYSENTER_stack and thus serves (on x86_64) as
|
||||
* a a read-only guard page.
|
||||
*/
|
||||
struct SYSENTER_stack_page SYSENTER_stack_page;
|
||||
|
||||
/*
|
||||
* On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
|
||||
* we need task switches to work, and task switches write to the TSS.
|
||||
*/
|
||||
struct tss_struct tss;
|
||||
|
||||
char entry_trampoline[PAGE_SIZE];
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Exception stacks used for IST entries.
|
||||
*
|
||||
* In the future, this should have a separate slot for each stack
|
||||
* with guard pages between them.
|
||||
*/
|
||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||
#endif
|
||||
};
|
||||
|
||||
#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
|
||||
|
||||
extern void setup_cpu_entry_areas(void);
|
||||
|
||||
/*
|
||||
* Here we define all the compile-time 'special' virtual
|
||||
@ -101,8 +140,14 @@ enum fixed_addresses {
|
||||
FIX_LNW_VRTC,
|
||||
#endif
|
||||
/* Fixmap entries to remap the GDTs, one per processor. */
|
||||
FIX_GDT_REMAP_BEGIN,
|
||||
FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
|
||||
FIX_CPU_ENTRY_AREA_TOP,
|
||||
FIX_CPU_ENTRY_AREA_BOTTOM = FIX_CPU_ENTRY_AREA_TOP + (CPU_ENTRY_AREA_PAGES * NR_CPUS) - 1,
|
||||
|
||||
#ifdef CONFIG_ACPI_APEI_GHES
|
||||
/* Used for GHES mapping from assorted contexts */
|
||||
FIX_APEI_GHES_IRQ,
|
||||
FIX_APEI_GHES_NMI,
|
||||
#endif
|
||||
|
||||
__end_of_permanent_fixed_addresses,
|
||||
|
||||
@ -185,5 +230,30 @@ void __init *early_memremap_decrypted_wp(resource_size_t phys_addr,
|
||||
void __early_set_fixmap(enum fixed_addresses idx,
|
||||
phys_addr_t phys, pgprot_t flags);
|
||||
|
||||
static inline unsigned int __get_cpu_entry_area_page_index(int cpu, int page)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
|
||||
|
||||
return FIX_CPU_ENTRY_AREA_BOTTOM - cpu*CPU_ENTRY_AREA_PAGES - page;
|
||||
}
|
||||
|
||||
#define __get_cpu_entry_area_offset_index(cpu, offset) ({ \
|
||||
BUILD_BUG_ON(offset % PAGE_SIZE != 0); \
|
||||
__get_cpu_entry_area_page_index(cpu, offset / PAGE_SIZE); \
|
||||
})
|
||||
|
||||
#define get_cpu_entry_area_index(cpu, field) \
|
||||
__get_cpu_entry_area_offset_index((cpu), offsetof(struct cpu_entry_area, field))
|
||||
|
||||
static inline struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||
{
|
||||
return (struct cpu_entry_area *)__fix_to_virt(__get_cpu_entry_area_page_index(cpu, 0));
|
||||
}
|
||||
|
||||
static inline struct SYSENTER_stack *cpu_SYSENTER_stack(int cpu)
|
||||
{
|
||||
return &get_cpu_entry_area(cpu)->SYSENTER_stack_page.stack;
|
||||
}
|
||||
|
||||
#endif /* !__ASSEMBLY__ */
|
||||
#endif /* _ASM_X86_FIXMAP_H */
|
||||
|
@ -20,14 +20,22 @@
|
||||
#ifndef _ASM_X86_HYPERVISOR_H
|
||||
#define _ASM_X86_HYPERVISOR_H
|
||||
|
||||
/* x86 hypervisor types */
|
||||
enum x86_hypervisor_type {
|
||||
X86_HYPER_NATIVE = 0,
|
||||
X86_HYPER_VMWARE,
|
||||
X86_HYPER_MS_HYPERV,
|
||||
X86_HYPER_XEN_PV,
|
||||
X86_HYPER_XEN_HVM,
|
||||
X86_HYPER_KVM,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_HYPERVISOR_GUEST
|
||||
|
||||
#include <asm/kvm_para.h>
|
||||
#include <asm/x86_init.h>
|
||||
#include <asm/xen/hypervisor.h>
|
||||
|
||||
/*
|
||||
* x86 hypervisor information
|
||||
*/
|
||||
struct hypervisor_x86 {
|
||||
/* Hypervisor name */
|
||||
const char *name;
|
||||
@ -35,40 +43,27 @@ struct hypervisor_x86 {
|
||||
/* Detection routine */
|
||||
uint32_t (*detect)(void);
|
||||
|
||||
/* Platform setup (run once per boot) */
|
||||
void (*init_platform)(void);
|
||||
/* Hypervisor type */
|
||||
enum x86_hypervisor_type type;
|
||||
|
||||
/* X2APIC detection (run once per boot) */
|
||||
bool (*x2apic_available)(void);
|
||||
/* init time callbacks */
|
||||
struct x86_hyper_init init;
|
||||
|
||||
/* pin current vcpu to specified physical cpu (run rarely) */
|
||||
void (*pin_vcpu)(int);
|
||||
|
||||
/* called during init_mem_mapping() to setup early mappings. */
|
||||
void (*init_mem_mapping)(void);
|
||||
/* runtime callbacks */
|
||||
struct x86_hyper_runtime runtime;
|
||||
};
|
||||
|
||||
extern const struct hypervisor_x86 *x86_hyper;
|
||||
|
||||
/* Recognized hypervisors */
|
||||
extern const struct hypervisor_x86 x86_hyper_vmware;
|
||||
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
|
||||
extern const struct hypervisor_x86 x86_hyper_xen_pv;
|
||||
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
|
||||
extern const struct hypervisor_x86 x86_hyper_kvm;
|
||||
|
||||
extern enum x86_hypervisor_type x86_hyper_type;
|
||||
extern void init_hypervisor_platform(void);
|
||||
extern bool hypervisor_x2apic_available(void);
|
||||
extern void hypervisor_pin_vcpu(int cpu);
|
||||
|
||||
static inline void hypervisor_init_mem_mapping(void)
|
||||
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
|
||||
{
|
||||
if (x86_hyper && x86_hyper->init_mem_mapping)
|
||||
x86_hyper->init_mem_mapping();
|
||||
return x86_hyper_type == type;
|
||||
}
|
||||
#else
|
||||
static inline void init_hypervisor_platform(void) { }
|
||||
static inline bool hypervisor_x2apic_available(void) { return false; }
|
||||
static inline void hypervisor_init_mem_mapping(void) { }
|
||||
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
|
||||
{
|
||||
return type == X86_HYPER_NATIVE;
|
||||
}
|
||||
#endif /* CONFIG_HYPERVISOR_GUEST */
|
||||
#endif /* _ASM_X86_HYPERVISOR_H */
|
||||
|
@ -142,6 +142,9 @@ static inline notrace unsigned long arch_local_irq_save(void)
|
||||
swapgs; \
|
||||
sysretl
|
||||
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
#define SAVE_FLAGS(x) pushfq; popq %rax
|
||||
#endif
|
||||
#else
|
||||
#define INTERRUPT_RETURN iret
|
||||
#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
||||
|
@ -26,6 +26,7 @@ extern void die(const char *, struct pt_regs *,long);
|
||||
extern int __must_check __die(const char *, struct pt_regs *, long);
|
||||
extern void show_stack_regs(struct pt_regs *regs);
|
||||
extern void __show_regs(struct pt_regs *regs, int all);
|
||||
extern void show_iret_regs(struct pt_regs *regs);
|
||||
extern unsigned long oops_begin(void);
|
||||
extern void oops_end(unsigned long, struct pt_regs *, int signr);
|
||||
|
||||
|
@ -73,8 +73,8 @@ static inline void load_mm_ldt(struct mm_struct *mm)
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
struct ldt_struct *ldt;
|
||||
|
||||
/* lockless_dereference synchronizes with smp_store_release */
|
||||
ldt = lockless_dereference(mm->context.ldt);
|
||||
/* READ_ONCE synchronizes with smp_store_release */
|
||||
ldt = READ_ONCE(mm->context.ldt);
|
||||
|
||||
/*
|
||||
* Any change to mm->context.ldt is followed by an IPI to all
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <asm/orc_types.h>
|
||||
|
||||
struct mod_arch_specific {
|
||||
#ifdef CONFIG_ORC_UNWINDER
|
||||
#ifdef CONFIG_UNWINDER_ORC
|
||||
unsigned int num_orcs;
|
||||
int *orc_unwind_ip;
|
||||
struct orc_entry *orc_unwind;
|
||||
|
@ -16,10 +16,9 @@
|
||||
#include <linux/cpumask.h>
|
||||
#include <asm/frame.h>
|
||||
|
||||
static inline void load_sp0(struct tss_struct *tss,
|
||||
struct thread_struct *thread)
|
||||
static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
|
||||
PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
|
||||
}
|
||||
|
||||
/* The paravirtualized CPUID instruction. */
|
||||
@ -928,6 +927,15 @@ extern void default_banner(void);
|
||||
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
|
||||
CLBR_NONE, \
|
||||
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
|
||||
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
#define SAVE_FLAGS(clobbers) \
|
||||
PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_save_fl), clobbers, \
|
||||
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
|
||||
call PARA_INDIRECT(pv_irq_ops+PV_IRQ_save_fl); \
|
||||
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_X86_32 */
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
|
@ -134,7 +134,7 @@ struct pv_cpu_ops {
|
||||
void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||
void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
|
||||
|
||||
void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
|
||||
void (*load_sp0)(unsigned long sp0);
|
||||
|
||||
void (*set_iopl_mask)(unsigned mask);
|
||||
|
||||
|
@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
|
||||
{
|
||||
bool oldbit;
|
||||
|
||||
asm volatile("bt "__percpu_arg(2)",%1\n\t"
|
||||
asm volatile("bt "__percpu_arg(2)",%1"
|
||||
CC_SET(c)
|
||||
: CC_OUT(c) (oldbit)
|
||||
: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
|
||||
|
@ -200,10 +200,9 @@ enum page_cache_mode {
|
||||
|
||||
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
|
||||
|
||||
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
||||
_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
|
||||
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
|
||||
_PAGE_DIRTY | _PAGE_ENC)
|
||||
#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
|
||||
|
||||
#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
|
||||
#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
|
||||
|
@ -162,9 +162,9 @@ enum cpuid_regs_idx {
|
||||
extern struct cpuinfo_x86 boot_cpu_data;
|
||||
extern struct cpuinfo_x86 new_cpu_data;
|
||||
|
||||
extern struct tss_struct doublefault_tss;
|
||||
extern __u32 cpu_caps_cleared[NCAPINTS];
|
||||
extern __u32 cpu_caps_set[NCAPINTS];
|
||||
extern struct x86_hw_tss doublefault_tss;
|
||||
extern __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
|
||||
extern __u32 cpu_caps_set[NCAPINTS + NBUGINTS];
|
||||
|
||||
#ifdef CONFIG_SMP
|
||||
DECLARE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
|
||||
@ -252,6 +252,11 @@ static inline void load_cr3(pgd_t *pgdir)
|
||||
write_cr3(__sme_pa(pgdir));
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that while the legacy 'TSS' name comes from 'Task State Segment',
|
||||
* on modern x86 CPUs the TSS also holds information important to 64-bit mode,
|
||||
* unrelated to the task-switch mechanism:
|
||||
*/
|
||||
#ifdef CONFIG_X86_32
|
||||
/* This is the TSS defined by the hardware. */
|
||||
struct x86_hw_tss {
|
||||
@ -304,7 +309,13 @@ struct x86_hw_tss {
|
||||
struct x86_hw_tss {
|
||||
u32 reserved1;
|
||||
u64 sp0;
|
||||
|
||||
/*
|
||||
* We store cpu_current_top_of_stack in sp1 so it's always accessible.
|
||||
* Linux does not use ring 1, so sp1 is not otherwise needed.
|
||||
*/
|
||||
u64 sp1;
|
||||
|
||||
u64 sp2;
|
||||
u64 reserved2;
|
||||
u64 ist[7];
|
||||
@ -322,12 +333,22 @@ struct x86_hw_tss {
|
||||
#define IO_BITMAP_BITS 65536
|
||||
#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
|
||||
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
|
||||
#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
|
||||
#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
|
||||
#define INVALID_IO_BITMAP_OFFSET 0x8000
|
||||
|
||||
struct SYSENTER_stack {
|
||||
unsigned long words[64];
|
||||
};
|
||||
|
||||
struct SYSENTER_stack_page {
|
||||
struct SYSENTER_stack stack;
|
||||
} __aligned(PAGE_SIZE);
|
||||
|
||||
struct tss_struct {
|
||||
/*
|
||||
* The hardware state:
|
||||
* The fixed hardware portion. This must not cross a page boundary
|
||||
* at risk of violating the SDM's advice and potentially triggering
|
||||
* errata.
|
||||
*/
|
||||
struct x86_hw_tss x86_tss;
|
||||
|
||||
@ -338,18 +359,9 @@ struct tss_struct {
|
||||
* be within the limit.
|
||||
*/
|
||||
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
|
||||
} __aligned(PAGE_SIZE);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* Space for the temporary SYSENTER stack.
|
||||
*/
|
||||
unsigned long SYSENTER_stack_canary;
|
||||
unsigned long SYSENTER_stack[64];
|
||||
#endif
|
||||
|
||||
} ____cacheline_aligned;
|
||||
|
||||
DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
|
||||
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
|
||||
|
||||
/*
|
||||
* sizeof(unsigned long) coming from an extra "long" at the end
|
||||
@ -363,6 +375,9 @@ DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||
#else
|
||||
/* The RO copy can't be accessed with this_cpu_xyz(), so use the RW copy. */
|
||||
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
|
||||
#endif
|
||||
|
||||
/*
|
||||
@ -431,7 +446,9 @@ typedef struct {
|
||||
struct thread_struct {
|
||||
/* Cached TLS descriptors: */
|
||||
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long sp0;
|
||||
#endif
|
||||
unsigned long sp;
|
||||
#ifdef CONFIG_X86_32
|
||||
unsigned long sysenter_cs;
|
||||
@ -518,16 +535,9 @@ static inline void native_set_iopl_mask(unsigned mask)
|
||||
}
|
||||
|
||||
static inline void
|
||||
native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
|
||||
native_load_sp0(unsigned long sp0)
|
||||
{
|
||||
tss->x86_tss.sp0 = thread->sp0;
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||
if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
|
||||
tss->x86_tss.ss1 = thread->sysenter_cs;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
}
|
||||
#endif
|
||||
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
|
||||
}
|
||||
|
||||
static inline void native_swapgs(void)
|
||||
@ -539,12 +549,18 @@ static inline void native_swapgs(void)
|
||||
|
||||
static inline unsigned long current_top_of_stack(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
|
||||
#else
|
||||
/* sp0 on x86_32 is special in and around vm86 mode. */
|
||||
/*
|
||||
* We can't read directly from tss.sp0: sp0 on x86_32 is special in
|
||||
* and around vm86 mode and sp0 on x86_64 is special because of the
|
||||
* entry trampoline.
|
||||
*/
|
||||
return this_cpu_read_stable(cpu_current_top_of_stack);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool on_thread_stack(void)
|
||||
{
|
||||
return (unsigned long)(current_top_of_stack() -
|
||||
current_stack_pointer) < THREAD_SIZE;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
@ -552,10 +568,9 @@ static inline unsigned long current_top_of_stack(void)
|
||||
#else
|
||||
#define __cpuid native_cpuid
|
||||
|
||||
static inline void load_sp0(struct tss_struct *tss,
|
||||
struct thread_struct *thread)
|
||||
static inline void load_sp0(unsigned long sp0)
|
||||
{
|
||||
native_load_sp0(tss, thread);
|
||||
native_load_sp0(sp0);
|
||||
}
|
||||
|
||||
#define set_iopl_mask native_set_iopl_mask
|
||||
@ -804,6 +819,15 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
|
||||
TOP_OF_KERNEL_STACK_PADDING)
|
||||
|
||||
#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
|
||||
|
||||
#define task_pt_regs(task) \
|
||||
({ \
|
||||
unsigned long __ptr = (unsigned long)task_stack_page(task); \
|
||||
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
|
||||
((struct pt_regs *)__ptr) - 1; \
|
||||
})
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/*
|
||||
* User space process size: 3GB (default).
|
||||
@ -823,23 +847,6 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
.addr_limit = KERNEL_DS, \
|
||||
}
|
||||
|
||||
/*
|
||||
* TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
|
||||
* This is necessary to guarantee that the entire "struct pt_regs"
|
||||
* is accessible even if the CPU haven't stored the SS/ESP registers
|
||||
* on the stack (interrupt gate does not save these registers
|
||||
* when switching to the same priv ring).
|
||||
* Therefore beware: accessing the ss/esp fields of the
|
||||
* "struct pt_regs" is possible, but they may contain the
|
||||
* completely wrong values.
|
||||
*/
|
||||
#define task_pt_regs(task) \
|
||||
({ \
|
||||
unsigned long __ptr = (unsigned long)task_stack_page(task); \
|
||||
__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
|
||||
((struct pt_regs *)__ptr) - 1; \
|
||||
})
|
||||
|
||||
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
|
||||
|
||||
#else
|
||||
@ -873,11 +880,9 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
#define STACK_TOP_MAX TASK_SIZE_MAX
|
||||
|
||||
#define INIT_THREAD { \
|
||||
.sp0 = TOP_OF_INIT_STACK, \
|
||||
.addr_limit = KERNEL_DS, \
|
||||
}
|
||||
|
||||
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
|
||||
extern unsigned long KSTK_ESP(struct task_struct *task);
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
#ifndef CONFIG_PARAVIRT
|
||||
/*
|
||||
* On non-paravirt systems, this is the only long mode CPL 3
|
||||
@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
|
||||
/* Headers are too twisted for this to go in paravirt.h. */
|
||||
return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
|
||||
#endif
|
||||
#else /* !CONFIG_X86_64 */
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
#define current_user_stack_pointer() current_pt_regs()->sp
|
||||
#define compat_user_stack_pointer() current_pt_regs()->sp
|
||||
#endif
|
||||
|
@ -29,7 +29,7 @@ cc_label: \
|
||||
#define __GEN_RMWcc(fullop, var, cc, clobbers, ...) \
|
||||
do { \
|
||||
bool c; \
|
||||
asm volatile (fullop ";" CC_SET(cc) \
|
||||
asm volatile (fullop CC_SET(cc) \
|
||||
: [counter] "+m" (var), CC_OUT(cc) (c) \
|
||||
: __VA_ARGS__ : clobbers); \
|
||||
return c; \
|
||||
|
@ -16,6 +16,7 @@ enum stack_type {
|
||||
STACK_TYPE_TASK,
|
||||
STACK_TYPE_IRQ,
|
||||
STACK_TYPE_SOFTIRQ,
|
||||
STACK_TYPE_SYSENTER,
|
||||
STACK_TYPE_EXCEPTION,
|
||||
STACK_TYPE_EXCEPTION_LAST = STACK_TYPE_EXCEPTION + N_EXCEPTION_STACKS-1,
|
||||
};
|
||||
@ -28,6 +29,8 @@ struct stack_info {
|
||||
bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info);
|
||||
|
||||
bool in_sysenter_stack(unsigned long *stack, struct stack_info *info);
|
||||
|
||||
int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
struct stack_info *info, unsigned long *visit_mask);
|
||||
|
||||
|
@ -2,6 +2,8 @@
|
||||
#ifndef _ASM_X86_SWITCH_TO_H
|
||||
#define _ASM_X86_SWITCH_TO_H
|
||||
|
||||
#include <linux/sched/task_stack.h>
|
||||
|
||||
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
||||
|
||||
struct task_struct *__switch_to_asm(struct task_struct *prev,
|
||||
@ -73,4 +75,28 @@ do { \
|
||||
((last) = __switch_to_asm((prev), (next))); \
|
||||
} while (0)
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
static inline void refresh_sysenter_cs(struct thread_struct *thread)
|
||||
{
|
||||
/* Only happens when SEP is enabled, no need to test "SEP"arately: */
|
||||
if (unlikely(this_cpu_read(cpu_tss_rw.x86_tss.ss1) == thread->sysenter_cs))
|
||||
return;
|
||||
|
||||
this_cpu_write(cpu_tss_rw.x86_tss.ss1, thread->sysenter_cs);
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* This is used when switching tasks or entering/exiting vm86 mode. */
|
||||
static inline void update_sp0(struct task_struct *task)
|
||||
{
|
||||
/* On x86_64, sp0 always points to the entry trampoline stack, which is constant: */
|
||||
#ifdef CONFIG_X86_32
|
||||
load_sp0(task->thread.sp0);
|
||||
#else
|
||||
if (static_cpu_has(X86_FEATURE_XENPV))
|
||||
load_sp0(task_top_of_stack(task));
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* _ASM_X86_SWITCH_TO_H */
|
||||
|
@ -207,7 +207,7 @@ static inline int arch_within_stack_frames(const void * const stack,
|
||||
#else /* !__ASSEMBLY__ */
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
# define cpu_current_top_of_stack (cpu_tss + TSS_sp0)
|
||||
# define cpu_current_top_of_stack (cpu_tss_rw + TSS_sp1)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
|
||||
)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_state,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
|
||||
TP_ARGS(fpu)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
|
||||
TP_PROTO(struct fpu *fpu),
|
||||
TP_ARGS(fpu)
|
||||
|
@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void);
|
||||
|
||||
#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
|
||||
asmlinkage void xen_divide_error(void);
|
||||
asmlinkage void xen_xennmi(void);
|
||||
asmlinkage void xen_xendebug(void);
|
||||
asmlinkage void xen_xenint3(void);
|
||||
asmlinkage void xen_nmi(void);
|
||||
asmlinkage void xen_overflow(void);
|
||||
asmlinkage void xen_bounds(void);
|
||||
asmlinkage void xen_invalid_op(void);
|
||||
@ -75,7 +75,6 @@ dotraplinkage void do_segment_not_present(struct pt_regs *, long);
|
||||
dotraplinkage void do_stack_segment(struct pt_regs *, long);
|
||||
#ifdef CONFIG_X86_64
|
||||
dotraplinkage void do_double_fault(struct pt_regs *, long);
|
||||
asmlinkage struct pt_regs *sync_regs(struct pt_regs *);
|
||||
#endif
|
||||
dotraplinkage void do_general_protection(struct pt_regs *, long);
|
||||
dotraplinkage void do_page_fault(struct pt_regs *, unsigned long);
|
||||
@ -145,4 +144,22 @@ enum {
|
||||
X86_TRAP_IRET = 32, /* 32, IRET Exception */
|
||||
};
|
||||
|
||||
/*
|
||||
* Page fault error code bits:
|
||||
*
|
||||
* bit 0 == 0: no page found 1: protection fault
|
||||
* bit 1 == 0: read access 1: write access
|
||||
* bit 2 == 0: kernel-mode access 1: user-mode access
|
||||
* bit 3 == 1: use of reserved bit detected
|
||||
* bit 4 == 1: fault was an instruction fetch
|
||||
* bit 5 == 1: protection keys block access
|
||||
*/
|
||||
enum x86_pf_error_code {
|
||||
X86_PF_PROT = 1 << 0,
|
||||
X86_PF_WRITE = 1 << 1,
|
||||
X86_PF_USER = 1 << 2,
|
||||
X86_PF_RSVD = 1 << 3,
|
||||
X86_PF_INSTR = 1 << 4,
|
||||
X86_PF_PK = 1 << 5,
|
||||
};
|
||||
#endif /* _ASM_X86_TRAPS_H */
|
||||
|
@ -7,17 +7,20 @@
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/stacktrace.h>
|
||||
|
||||
#define IRET_FRAME_OFFSET (offsetof(struct pt_regs, ip))
|
||||
#define IRET_FRAME_SIZE (sizeof(struct pt_regs) - IRET_FRAME_OFFSET)
|
||||
|
||||
struct unwind_state {
|
||||
struct stack_info stack_info;
|
||||
unsigned long stack_mask;
|
||||
struct task_struct *task;
|
||||
int graph_idx;
|
||||
bool error;
|
||||
#if defined(CONFIG_ORC_UNWINDER)
|
||||
#if defined(CONFIG_UNWINDER_ORC)
|
||||
bool signal, full_regs;
|
||||
unsigned long sp, bp, ip;
|
||||
struct pt_regs *regs;
|
||||
#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
bool got_irq;
|
||||
unsigned long *bp, *orig_sp, ip;
|
||||
struct pt_regs *regs;
|
||||
@ -51,7 +54,11 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
__unwind_start(state, task, regs, first_frame);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
|
||||
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
/*
|
||||
* WARNING: The entire pt_regs may not be safe to dereference. In some cases,
|
||||
* only the iret frame registers are accessible. Use with caution!
|
||||
*/
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
@ -66,7 +73,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_ORC_UNWINDER
|
||||
#ifdef CONFIG_UNWINDER_ORC
|
||||
void unwind_init(void);
|
||||
void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
|
||||
void *orc, size_t orc_size);
|
||||
|
@ -114,6 +114,18 @@ struct x86_init_pci {
|
||||
void (*fixup_irqs)(void);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_hyper_init - x86 hypervisor init functions
|
||||
* @init_platform: platform setup
|
||||
* @x2apic_available: X2APIC detection
|
||||
* @init_mem_mapping: setup early mappings during init_mem_mapping()
|
||||
*/
|
||||
struct x86_hyper_init {
|
||||
void (*init_platform)(void);
|
||||
bool (*x2apic_available)(void);
|
||||
void (*init_mem_mapping)(void);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_init_ops - functions for platform specific setup
|
||||
*
|
||||
@ -127,6 +139,7 @@ struct x86_init_ops {
|
||||
struct x86_init_timers timers;
|
||||
struct x86_init_iommu iommu;
|
||||
struct x86_init_pci pci;
|
||||
struct x86_hyper_init hyper;
|
||||
};
|
||||
|
||||
/**
|
||||
@ -199,6 +212,15 @@ struct x86_legacy_features {
|
||||
struct x86_legacy_devices devices;
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_hyper_runtime - x86 hypervisor specific runtime callbacks
|
||||
*
|
||||
* @pin_vcpu: pin current vcpu to specified physical cpu (run rarely)
|
||||
*/
|
||||
struct x86_hyper_runtime {
|
||||
void (*pin_vcpu)(int cpu);
|
||||
};
|
||||
|
||||
/**
|
||||
* struct x86_platform_ops - platform specific runtime functions
|
||||
* @calibrate_cpu: calibrate CPU
|
||||
@ -218,6 +240,7 @@ struct x86_legacy_features {
|
||||
* possible in x86_early_init_platform_quirks() by
|
||||
* only using the current x86_hardware_subarch
|
||||
* semantics.
|
||||
* @hyper: x86 hypervisor specific runtime callbacks
|
||||
*/
|
||||
struct x86_platform_ops {
|
||||
unsigned long (*calibrate_cpu)(void);
|
||||
@ -233,6 +256,7 @@ struct x86_platform_ops {
|
||||
void (*apic_post_init)(void);
|
||||
struct x86_legacy_features legacy;
|
||||
void (*set_legacy_features)(void);
|
||||
struct x86_hyper_runtime hyper;
|
||||
};
|
||||
|
||||
struct pci_dev;
|
||||
|
@ -152,5 +152,8 @@
|
||||
#define CX86_ARR_BASE 0xc4
|
||||
#define CX86_RCR_BASE 0xdc
|
||||
|
||||
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
X86_CR0_PG)
|
||||
|
||||
#endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
|
||||
|
@ -25,9 +25,9 @@ endif
|
||||
KASAN_SANITIZE_head$(BITS).o := n
|
||||
KASAN_SANITIZE_dumpstack.o := n
|
||||
KASAN_SANITIZE_dumpstack_$(BITS).o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
KASAN_SANITIZE_stacktrace.o := n
|
||||
KASAN_SANITIZE_paravirt.o := n
|
||||
|
||||
OBJECT_FILES_NON_STANDARD_head_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
|
||||
OBJECT_FILES_NON_STANDARD_test_nx.o := y
|
||||
@ -128,9 +128,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
|
||||
obj-$(CONFIG_TRACING) += tracepoint.o
|
||||
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
|
||||
|
||||
obj-$(CONFIG_ORC_UNWINDER) += unwind_orc.o
|
||||
obj-$(CONFIG_FRAME_POINTER_UNWINDER) += unwind_frame.o
|
||||
obj-$(CONFIG_GUESS_UNWINDER) += unwind_guess.o
|
||||
obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
|
||||
obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
|
||||
obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o
|
||||
|
||||
###
|
||||
# 64 bit specific files
|
||||
|
@ -1645,7 +1645,7 @@ static __init void try_to_enable_x2apic(int remap_mode)
|
||||
* under KVM
|
||||
*/
|
||||
if (max_physical_apicid > 255 ||
|
||||
!hypervisor_x2apic_available()) {
|
||||
!x86_init.hyper.x2apic_available()) {
|
||||
pr_info("x2apic: IRQ remapping doesn't support X2APIC mode\n");
|
||||
x2apic_disable();
|
||||
return;
|
||||
|
@ -920,9 +920,8 @@ static __init void uv_rtc_init(void)
|
||||
/*
|
||||
* percpu heartbeat timer
|
||||
*/
|
||||
static void uv_heartbeat(unsigned long ignored)
|
||||
static void uv_heartbeat(struct timer_list *timer)
|
||||
{
|
||||
struct timer_list *timer = &uv_scir_info->timer;
|
||||
unsigned char bits = uv_scir_info->state;
|
||||
|
||||
/* Flip heartbeat bit: */
|
||||
@ -947,7 +946,7 @@ static int uv_heartbeat_enable(unsigned int cpu)
|
||||
struct timer_list *timer = &uv_cpu_scir_info(cpu)->timer;
|
||||
|
||||
uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY);
|
||||
setup_pinned_timer(timer, uv_heartbeat, cpu);
|
||||
timer_setup(timer, uv_heartbeat, TIMER_PINNED);
|
||||
timer->expires = jiffies + SCIR_CPU_HB_INTERVAL;
|
||||
add_timer_on(timer, cpu);
|
||||
uv_cpu_scir_info(cpu)->enabled = 1;
|
||||
|
@ -93,4 +93,10 @@ void common(void) {
|
||||
|
||||
BLANK();
|
||||
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
|
||||
|
||||
/* Layout info for cpu_entry_area */
|
||||
OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
|
||||
OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
|
||||
OFFSET(CPU_ENTRY_AREA_SYSENTER_stack, cpu_entry_area, SYSENTER_stack_page);
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(struct SYSENTER_stack));
|
||||
}
|
||||
|
@ -47,13 +47,8 @@ void foo(void)
|
||||
BLANK();
|
||||
|
||||
/* Offset from the sysenter stack to tss.sp0 */
|
||||
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
|
||||
offsetofend(struct tss_struct, SYSENTER_stack));
|
||||
|
||||
/* Offset from cpu_tss to SYSENTER_stack */
|
||||
OFFSET(CPU_TSS_SYSENTER_stack, tss_struct, SYSENTER_stack);
|
||||
/* Size of SYSENTER_stack */
|
||||
DEFINE(SIZEOF_SYSENTER_stack, sizeof(((struct tss_struct *)0)->SYSENTER_stack));
|
||||
DEFINE(TSS_sysenter_sp0, offsetof(struct cpu_entry_area, tss.x86_tss.sp0) -
|
||||
offsetofend(struct cpu_entry_area, SYSENTER_stack_page.stack));
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
BLANK();
|
||||
|
@ -23,6 +23,9 @@ int main(void)
|
||||
#ifdef CONFIG_PARAVIRT
|
||||
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
|
||||
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
|
||||
#ifdef CONFIG_DEBUG_ENTRY
|
||||
OFFSET(PV_IRQ_save_fl, pv_irq_ops, save_fl);
|
||||
#endif
|
||||
BLANK();
|
||||
#endif
|
||||
|
||||
@ -63,6 +66,7 @@ int main(void)
|
||||
|
||||
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
|
||||
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
|
||||
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
|
||||
BLANK();
|
||||
|
||||
#ifdef CONFIG_CC_STACKPROTECTOR
|
||||
|
@ -23,6 +23,7 @@ obj-y += rdrand.o
|
||||
obj-y += match.o
|
||||
obj-y += bugs.o
|
||||
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
|
||||
obj-y += cpuid-deps.o
|
||||
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
|
||||
|
@ -804,8 +804,11 @@ static void init_amd(struct cpuinfo_x86 *c)
|
||||
case 0x17: init_amd_zn(c); break;
|
||||
}
|
||||
|
||||
/* Enable workaround for FXSAVE leak */
|
||||
if (c->x86 >= 6)
|
||||
/*
|
||||
* Enable workaround for FXSAVE leak on CPUs
|
||||
* without a XSaveErPtr feature
|
||||
*/
|
||||
if ((c->x86 >= 6) && (!cpu_has(c, X86_FEATURE_XSAVEERPTR)))
|
||||
set_cpu_bug(c, X86_BUG_FXSAVE_LEAK);
|
||||
|
||||
cpu_detect_cache_sizes(c);
|
||||
|
@ -452,8 +452,8 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
|
||||
return NULL; /* Not found */
|
||||
}
|
||||
|
||||
__u32 cpu_caps_cleared[NCAPINTS];
|
||||
__u32 cpu_caps_set[NCAPINTS];
|
||||
__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
|
||||
__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
|
||||
|
||||
void load_percpu_segment(int cpu)
|
||||
{
|
||||
@ -466,27 +466,116 @@ void load_percpu_segment(int cpu)
|
||||
load_stack_canary_segment();
|
||||
}
|
||||
|
||||
/* Setup the fixmap mapping only once per-processor */
|
||||
static inline void setup_fixmap_gdt(int cpu)
|
||||
#ifdef CONFIG_X86_32
|
||||
/* The 32-bit entry code needs to find cpu_entry_area. */
|
||||
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Special IST stacks which the CPU switches to when it calls
|
||||
* an IST-marked descriptor entry. Up to 7 stacks (hardware
|
||||
* limit), all of them are 4K, except the debug stack which
|
||||
* is 8K.
|
||||
*/
|
||||
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
#endif
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct SYSENTER_stack_page,
|
||||
SYSENTER_stack_storage);
|
||||
|
||||
static void __init
|
||||
set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
|
||||
{
|
||||
for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
|
||||
__set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
|
||||
}
|
||||
|
||||
/* Setup the fixmap mappings only once per-processor */
|
||||
static void __init setup_cpu_entry_area(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT. */
|
||||
pgprot_t prot = PAGE_KERNEL_RO;
|
||||
extern char _entry_trampoline[];
|
||||
|
||||
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
||||
pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
||||
pgprot_t tss_prot = PAGE_KERNEL_RO;
|
||||
#else
|
||||
/*
|
||||
* On native 32-bit systems, the GDT cannot be read-only because
|
||||
* our double fault handler uses a task gate, and entering through
|
||||
* a task gate needs to change an available TSS to busy. If the GDT
|
||||
* is read-only, that will triple fault.
|
||||
* a task gate needs to change an available TSS to busy. If the
|
||||
* GDT is read-only, that will triple fault. The TSS cannot be
|
||||
* read-only because the CPU writes to it on task switches.
|
||||
*
|
||||
* On Xen PV, the GDT must be read-only because the hypervisor requires
|
||||
* it.
|
||||
* On Xen PV, the GDT must be read-only because the hypervisor
|
||||
* requires it.
|
||||
*/
|
||||
pgprot_t prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
||||
pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
|
||||
PAGE_KERNEL_RO : PAGE_KERNEL;
|
||||
pgprot_t tss_prot = PAGE_KERNEL;
|
||||
#endif
|
||||
|
||||
__set_fixmap(get_cpu_gdt_ro_index(cpu), get_cpu_gdt_paddr(cpu), prot);
|
||||
__set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, SYSENTER_stack_page),
|
||||
per_cpu_ptr(&SYSENTER_stack_storage, cpu), 1,
|
||||
PAGE_KERNEL);
|
||||
|
||||
/*
|
||||
* The Intel SDM says (Volume 3, 7.2.1):
|
||||
*
|
||||
* Avoid placing a page boundary in the part of the TSS that the
|
||||
* processor reads during a task switch (the first 104 bytes). The
|
||||
* processor may not correctly perform address translations if a
|
||||
* boundary occurs in this area. During a task switch, the processor
|
||||
* reads and writes into the first 104 bytes of each TSS (using
|
||||
* contiguous physical addresses beginning with the physical address
|
||||
* of the first byte of the TSS). So, after TSS access begins, if
|
||||
* part of the 104 bytes is not physically contiguous, the processor
|
||||
* will access incorrect information without generating a page-fault
|
||||
* exception.
|
||||
*
|
||||
* There are also a lot of errata involving the TSS spanning a page
|
||||
* boundary. Assert that we're not doing that.
|
||||
*/
|
||||
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
||||
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
||||
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
|
||||
&per_cpu(cpu_tss_rw, cpu),
|
||||
sizeof(struct tss_struct) / PAGE_SIZE,
|
||||
tss_prot);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
||||
BUILD_BUG_ON(sizeof(exception_stacks) !=
|
||||
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
|
||||
set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
|
||||
&per_cpu(exception_stacks, cpu),
|
||||
sizeof(exception_stacks) / PAGE_SIZE,
|
||||
PAGE_KERNEL);
|
||||
|
||||
__set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
|
||||
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
|
||||
#endif
|
||||
}
|
||||
|
||||
void __init setup_cpu_entry_areas(void)
|
||||
{
|
||||
unsigned int cpu;
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
setup_cpu_entry_area(cpu);
|
||||
}
|
||||
|
||||
/* Load the original GDT from the per-cpu structure */
|
||||
@ -723,7 +812,7 @@ static void apply_forced_caps(struct cpuinfo_x86 *c)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < NCAPINTS; i++) {
|
||||
for (i = 0; i < NCAPINTS + NBUGINTS; i++) {
|
||||
c->x86_capability[i] &= ~cpu_caps_cleared[i];
|
||||
c->x86_capability[i] |= cpu_caps_set[i];
|
||||
}
|
||||
@ -1225,7 +1314,7 @@ void enable_sep_cpu(void)
|
||||
return;
|
||||
|
||||
cpu = get_cpu();
|
||||
tss = &per_cpu(cpu_tss, cpu);
|
||||
tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
/*
|
||||
* We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
|
||||
@ -1234,11 +1323,7 @@ void enable_sep_cpu(void)
|
||||
|
||||
tss->x86_tss.ss1 = __KERNEL_CS;
|
||||
wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
|
||||
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP,
|
||||
(unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
|
||||
0);
|
||||
|
||||
wrmsr(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1), 0);
|
||||
wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)entry_SYSENTER_32, 0);
|
||||
|
||||
put_cpu();
|
||||
@ -1301,18 +1386,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
|
||||
pr_cont(")\n");
|
||||
}
|
||||
|
||||
static __init int setup_disablecpuid(char *arg)
|
||||
/*
|
||||
* clearcpuid= was already parsed in fpu__init_parse_early_param.
|
||||
* But we need to keep a dummy __setup around otherwise it would
|
||||
* show up as an environment variable for init.
|
||||
*/
|
||||
static __init int setup_clearcpuid(char *arg)
|
||||
{
|
||||
int bit;
|
||||
|
||||
if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
|
||||
setup_clear_cpu_cap(bit);
|
||||
else
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
__setup("clearcpuid=", setup_disablecpuid);
|
||||
__setup("clearcpuid=", setup_clearcpuid);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
||||
@ -1334,25 +1417,19 @@ DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
||||
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||
EXPORT_PER_CPU_SYMBOL(__preempt_count);
|
||||
|
||||
/*
|
||||
* Special IST stacks which the CPU switches to when it calls
|
||||
* an IST-marked descriptor entry. Up to 7 stacks (hardware
|
||||
* limit), all of them are 4K, except the debug stack which
|
||||
* is 8K.
|
||||
*/
|
||||
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
||||
};
|
||||
|
||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
||||
|
||||
/* May not be marked __init: used by software suspend */
|
||||
void syscall_init(void)
|
||||
{
|
||||
extern char _entry_trampoline[];
|
||||
extern char entry_SYSCALL_64_trampoline[];
|
||||
|
||||
int cpu = smp_processor_id();
|
||||
unsigned long SYSCALL64_entry_trampoline =
|
||||
(unsigned long)get_cpu_entry_area(cpu)->entry_trampoline +
|
||||
(entry_SYSCALL_64_trampoline - _entry_trampoline);
|
||||
|
||||
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
|
||||
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
||||
wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
|
||||
@ -1363,7 +1440,7 @@ void syscall_init(void)
|
||||
* AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
|
||||
*/
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_ESP, (unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
||||
wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat);
|
||||
#else
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret);
|
||||
@ -1507,7 +1584,7 @@ void cpu_init(void)
|
||||
if (cpu)
|
||||
load_ucode_ap();
|
||||
|
||||
t = &per_cpu(cpu_tss, cpu);
|
||||
t = &per_cpu(cpu_tss_rw, cpu);
|
||||
oist = &per_cpu(orig_ist, cpu);
|
||||
|
||||
#ifdef CONFIG_NUMA
|
||||
@ -1546,7 +1623,7 @@ void cpu_init(void)
|
||||
* set up and load the per-CPU TSS
|
||||
*/
|
||||
if (!oist->ist[0]) {
|
||||
char *estacks = per_cpu(exception_stacks, cpu);
|
||||
char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
|
||||
|
||||
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
||||
estacks += exception_stack_sizes[v];
|
||||
@ -1557,7 +1634,7 @@ void cpu_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
||||
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
|
||||
/*
|
||||
* <= is required because the CPU will access up to
|
||||
@ -1572,9 +1649,14 @@ void cpu_init(void)
|
||||
initialize_tlbstate_and_flush();
|
||||
enter_lazy_tlb(&init_mm, me);
|
||||
|
||||
load_sp0(t, ¤t->thread);
|
||||
set_tss_desc(cpu, t);
|
||||
/*
|
||||
* Initialize the TSS. sp0 points to the entry trampoline stack
|
||||
* regardless of what task is running.
|
||||
*/
|
||||
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
load_TR_desc();
|
||||
load_sp0((unsigned long)(cpu_SYSENTER_stack(cpu) + 1));
|
||||
|
||||
load_mm_ldt(&init_mm);
|
||||
|
||||
clear_all_debug_regs();
|
||||
@ -1585,7 +1667,6 @@ void cpu_init(void)
|
||||
if (is_uv_system())
|
||||
uv_cpu_init();
|
||||
|
||||
setup_fixmap_gdt(cpu);
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
|
||||
@ -1595,8 +1676,7 @@ void cpu_init(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct task_struct *curr = current;
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
struct thread_struct *thread = &curr->thread;
|
||||
struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
wait_for_master_cpu(cpu);
|
||||
|
||||
@ -1627,12 +1707,16 @@ void cpu_init(void)
|
||||
initialize_tlbstate_and_flush();
|
||||
enter_lazy_tlb(&init_mm, curr);
|
||||
|
||||
load_sp0(t, thread);
|
||||
set_tss_desc(cpu, t);
|
||||
/*
|
||||
* Initialize the TSS. Don't bother initializing sp0, as the initial
|
||||
* task never enters user mode.
|
||||
*/
|
||||
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
load_TR_desc();
|
||||
|
||||
load_mm_ldt(&init_mm);
|
||||
|
||||
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
||||
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||
|
||||
#ifdef CONFIG_DOUBLEFAULT
|
||||
/* Set up doublefault TSS pointer in the GDT */
|
||||
@ -1644,7 +1728,6 @@ void cpu_init(void)
|
||||
|
||||
fpu__init_cpu();
|
||||
|
||||
setup_fixmap_gdt(cpu);
|
||||
load_fixmap_gdt(cpu);
|
||||
}
|
||||
#endif
|
||||
|
121
arch/x86/kernel/cpu/cpuid-deps.c
Normal file
121
arch/x86/kernel/cpu/cpuid-deps.c
Normal file
@ -0,0 +1,121 @@
|
||||
/* Declare dependencies between CPUIDs */
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
struct cpuid_dep {
|
||||
unsigned int feature;
|
||||
unsigned int depends;
|
||||
};
|
||||
|
||||
/*
|
||||
* Table of CPUID features that depend on others.
|
||||
*
|
||||
* This only includes dependencies that can be usefully disabled, not
|
||||
* features part of the base set (like FPU).
|
||||
*
|
||||
* Note this all is not __init / __initdata because it can be
|
||||
* called from cpu hotplug. It shouldn't do anything in this case,
|
||||
* but it's difficult to tell that to the init reference checker.
|
||||
*/
|
||||
const static struct cpuid_dep cpuid_deps[] = {
|
||||
{ X86_FEATURE_XSAVEOPT, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_XSAVEC, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_XSAVES, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_AVX, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_PKU, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_MPX, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_XGETBV1, X86_FEATURE_XSAVE },
|
||||
{ X86_FEATURE_FXSR_OPT, X86_FEATURE_FXSR },
|
||||
{ X86_FEATURE_XMM, X86_FEATURE_FXSR },
|
||||
{ X86_FEATURE_XMM2, X86_FEATURE_XMM },
|
||||
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||
{ X86_FEATURE_XMM4_1, X86_FEATURE_XMM2 },
|
||||
{ X86_FEATURE_XMM4_2, X86_FEATURE_XMM2 },
|
||||
{ X86_FEATURE_XMM3, X86_FEATURE_XMM2 },
|
||||
{ X86_FEATURE_PCLMULQDQ, X86_FEATURE_XMM2 },
|
||||
{ X86_FEATURE_SSSE3, X86_FEATURE_XMM2, },
|
||||
{ X86_FEATURE_F16C, X86_FEATURE_XMM2, },
|
||||
{ X86_FEATURE_AES, X86_FEATURE_XMM2 },
|
||||
{ X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 },
|
||||
{ X86_FEATURE_FMA, X86_FEATURE_AVX },
|
||||
{ X86_FEATURE_AVX2, X86_FEATURE_AVX, },
|
||||
{ X86_FEATURE_AVX512F, X86_FEATURE_AVX, },
|
||||
{ X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512PF, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512ER, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512CD, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512DQ, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512BW, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_GFNI, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_VAES, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL },
|
||||
{ X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_4FMAPS, X86_FEATURE_AVX512F },
|
||||
{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F },
|
||||
{}
|
||||
};
|
||||
|
||||
static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
{
|
||||
/*
|
||||
* Note: This could use the non atomic __*_bit() variants, but the
|
||||
* rest of the cpufeature code uses atomics as well, so keep it for
|
||||
* consistency. Cleanup all of it separately.
|
||||
*/
|
||||
if (!c) {
|
||||
clear_cpu_cap(&boot_cpu_data, feature);
|
||||
set_bit(feature, (unsigned long *)cpu_caps_cleared);
|
||||
} else {
|
||||
clear_bit(feature, (unsigned long *)c->x86_capability);
|
||||
}
|
||||
}
|
||||
|
||||
/* Take the capabilities and the BUG bits into account */
|
||||
#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
|
||||
|
||||
static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
{
|
||||
DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
|
||||
const struct cpuid_dep *d;
|
||||
bool changed;
|
||||
|
||||
if (WARN_ON(feature >= MAX_FEATURE_BITS))
|
||||
return;
|
||||
|
||||
clear_feature(c, feature);
|
||||
|
||||
/* Collect all features to disable, handling dependencies */
|
||||
memset(disable, 0, sizeof(disable));
|
||||
__set_bit(feature, disable);
|
||||
|
||||
/* Loop until we get a stable state. */
|
||||
do {
|
||||
changed = false;
|
||||
for (d = cpuid_deps; d->feature; d++) {
|
||||
if (!test_bit(d->depends, disable))
|
||||
continue;
|
||||
if (__test_and_set_bit(d->feature, disable))
|
||||
continue;
|
||||
|
||||
changed = true;
|
||||
clear_feature(c, d->feature);
|
||||
}
|
||||
} while (changed);
|
||||
}
|
||||
|
||||
void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
|
||||
{
|
||||
do_clear_cpu_cap(c, feature);
|
||||
}
|
||||
|
||||
void setup_clear_cpu_cap(unsigned int feature)
|
||||
{
|
||||
do_clear_cpu_cap(NULL, feature);
|
||||
}
|
@ -26,6 +26,12 @@
|
||||
#include <asm/processor.h>
|
||||
#include <asm/hypervisor.h>
|
||||
|
||||
extern const struct hypervisor_x86 x86_hyper_vmware;
|
||||
extern const struct hypervisor_x86 x86_hyper_ms_hyperv;
|
||||
extern const struct hypervisor_x86 x86_hyper_xen_pv;
|
||||
extern const struct hypervisor_x86 x86_hyper_xen_hvm;
|
||||
extern const struct hypervisor_x86 x86_hyper_kvm;
|
||||
|
||||
static const __initconst struct hypervisor_x86 * const hypervisors[] =
|
||||
{
|
||||
#ifdef CONFIG_XEN_PV
|
||||
@ -41,54 +47,52 @@ static const __initconst struct hypervisor_x86 * const hypervisors[] =
|
||||
#endif
|
||||
};
|
||||
|
||||
const struct hypervisor_x86 *x86_hyper;
|
||||
EXPORT_SYMBOL(x86_hyper);
|
||||
enum x86_hypervisor_type x86_hyper_type;
|
||||
EXPORT_SYMBOL(x86_hyper_type);
|
||||
|
||||
static inline void __init
|
||||
static inline const struct hypervisor_x86 * __init
|
||||
detect_hypervisor_vendor(void)
|
||||
{
|
||||
const struct hypervisor_x86 *h, * const *p;
|
||||
const struct hypervisor_x86 *h = NULL, * const *p;
|
||||
uint32_t pri, max_pri = 0;
|
||||
|
||||
for (p = hypervisors; p < hypervisors + ARRAY_SIZE(hypervisors); p++) {
|
||||
h = *p;
|
||||
pri = h->detect();
|
||||
if (pri != 0 && pri > max_pri) {
|
||||
pri = (*p)->detect();
|
||||
if (pri > max_pri) {
|
||||
max_pri = pri;
|
||||
x86_hyper = h;
|
||||
h = *p;
|
||||
}
|
||||
}
|
||||
|
||||
if (max_pri)
|
||||
pr_info("Hypervisor detected: %s\n", x86_hyper->name);
|
||||
if (h)
|
||||
pr_info("Hypervisor detected: %s\n", h->name);
|
||||
|
||||
return h;
|
||||
}
|
||||
|
||||
static void __init copy_array(const void *src, void *target, unsigned int size)
|
||||
{
|
||||
unsigned int i, n = size / sizeof(void *);
|
||||
const void * const *from = (const void * const *)src;
|
||||
const void **to = (const void **)target;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (from[i])
|
||||
to[i] = from[i];
|
||||
}
|
||||
|
||||
void __init init_hypervisor_platform(void)
|
||||
{
|
||||
const struct hypervisor_x86 *h;
|
||||
|
||||
detect_hypervisor_vendor();
|
||||
h = detect_hypervisor_vendor();
|
||||
|
||||
if (!x86_hyper)
|
||||
if (!h)
|
||||
return;
|
||||
|
||||
if (x86_hyper->init_platform)
|
||||
x86_hyper->init_platform();
|
||||
}
|
||||
copy_array(&h->init, &x86_init.hyper, sizeof(h->init));
|
||||
copy_array(&h->runtime, &x86_platform.hyper, sizeof(h->runtime));
|
||||
|
||||
bool __init hypervisor_x2apic_available(void)
|
||||
{
|
||||
return x86_hyper &&
|
||||
x86_hyper->x2apic_available &&
|
||||
x86_hyper->x2apic_available();
|
||||
}
|
||||
|
||||
void hypervisor_pin_vcpu(int cpu)
|
||||
{
|
||||
if (!x86_hyper)
|
||||
return;
|
||||
|
||||
if (x86_hyper->pin_vcpu)
|
||||
x86_hyper->pin_vcpu(cpu);
|
||||
else
|
||||
WARN_ONCE(1, "vcpu pinning requested but not supported!\n");
|
||||
x86_hyper_type = h->type;
|
||||
x86_init.hyper.init_platform();
|
||||
}
|
||||
|
@ -254,9 +254,9 @@ static void __init ms_hyperv_init_platform(void)
|
||||
#endif
|
||||
}
|
||||
|
||||
const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
|
||||
const __initconst struct hypervisor_x86 x86_hyper_ms_hyperv = {
|
||||
.name = "Microsoft Hyper-V",
|
||||
.detect = ms_hyperv_platform,
|
||||
.init_platform = ms_hyperv_init_platform,
|
||||
.type = X86_HYPER_MS_HYPERV,
|
||||
.init.init_platform = ms_hyperv_init_platform,
|
||||
};
|
||||
EXPORT_SYMBOL(x86_hyper_ms_hyperv);
|
||||
|
@ -205,10 +205,10 @@ static bool __init vmware_legacy_x2apic_available(void)
|
||||
(eax & (1 << VMWARE_PORT_CMD_LEGACY_X2APIC)) != 0;
|
||||
}
|
||||
|
||||
const __refconst struct hypervisor_x86 x86_hyper_vmware = {
|
||||
const __initconst struct hypervisor_x86 x86_hyper_vmware = {
|
||||
.name = "VMware",
|
||||
.detect = vmware_platform,
|
||||
.init_platform = vmware_platform_setup,
|
||||
.x2apic_available = vmware_legacy_x2apic_available,
|
||||
.type = X86_HYPER_VMWARE,
|
||||
.init.init_platform = vmware_platform_setup,
|
||||
.init.x2apic_available = vmware_legacy_x2apic_available,
|
||||
};
|
||||
EXPORT_SYMBOL(x86_hyper_vmware);
|
||||
|
@ -50,25 +50,23 @@ static void doublefault_fn(void)
|
||||
cpu_relax();
|
||||
}
|
||||
|
||||
struct tss_struct doublefault_tss __cacheline_aligned = {
|
||||
.x86_tss = {
|
||||
.sp0 = STACK_START,
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ldt = 0,
|
||||
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
|
||||
struct x86_hw_tss doublefault_tss __cacheline_aligned = {
|
||||
.sp0 = STACK_START,
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ldt = 0,
|
||||
.io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
|
||||
|
||||
.ip = (unsigned long) doublefault_fn,
|
||||
/* 0x2 bit is always set */
|
||||
.flags = X86_EFLAGS_SF | 0x2,
|
||||
.sp = STACK_START,
|
||||
.es = __USER_DS,
|
||||
.cs = __KERNEL_CS,
|
||||
.ss = __KERNEL_DS,
|
||||
.ds = __USER_DS,
|
||||
.fs = __KERNEL_PERCPU,
|
||||
.ip = (unsigned long) doublefault_fn,
|
||||
/* 0x2 bit is always set */
|
||||
.flags = X86_EFLAGS_SF | 0x2,
|
||||
.sp = STACK_START,
|
||||
.es = __USER_DS,
|
||||
.cs = __KERNEL_CS,
|
||||
.ss = __KERNEL_DS,
|
||||
.ds = __USER_DS,
|
||||
.fs = __KERNEL_PERCPU,
|
||||
|
||||
.__cr3 = __pa_nodebug(swapper_pg_dir),
|
||||
}
|
||||
.__cr3 = __pa_nodebug(swapper_pg_dir),
|
||||
};
|
||||
|
||||
/* dummy for do_double_fault() call */
|
||||
|
@ -43,6 +43,24 @@ bool in_task_stack(unsigned long *stack, struct task_struct *task,
|
||||
return true;
|
||||
}
|
||||
|
||||
bool in_sysenter_stack(unsigned long *stack, struct stack_info *info)
|
||||
{
|
||||
struct SYSENTER_stack *ss = cpu_SYSENTER_stack(smp_processor_id());
|
||||
|
||||
void *begin = ss;
|
||||
void *end = ss + 1;
|
||||
|
||||
if ((void *)stack < begin || (void *)stack >= end)
|
||||
return false;
|
||||
|
||||
info->type = STACK_TYPE_SYSENTER;
|
||||
info->begin = begin;
|
||||
info->end = end;
|
||||
info->next_sp = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void printk_stack_address(unsigned long address, int reliable,
|
||||
char *log_lvl)
|
||||
{
|
||||
@ -50,6 +68,28 @@ static void printk_stack_address(unsigned long address, int reliable,
|
||||
printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address);
|
||||
}
|
||||
|
||||
void show_iret_regs(struct pt_regs *regs)
|
||||
{
|
||||
printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip);
|
||||
printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss,
|
||||
regs->sp, regs->flags);
|
||||
}
|
||||
|
||||
static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
|
||||
{
|
||||
if (on_stack(info, regs, sizeof(*regs)))
|
||||
__show_regs(regs, 0);
|
||||
else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
|
||||
IRET_FRAME_SIZE)) {
|
||||
/*
|
||||
* When an interrupt or exception occurs in entry code, the
|
||||
* full pt_regs might not have been saved yet. In that case
|
||||
* just print the iret frame.
|
||||
*/
|
||||
show_iret_regs(regs);
|
||||
}
|
||||
}
|
||||
|
||||
void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
unsigned long *stack, char *log_lvl)
|
||||
{
|
||||
@ -71,31 +111,35 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
* - task stack
|
||||
* - interrupt stack
|
||||
* - HW exception stacks (double fault, nmi, debug, mce)
|
||||
* - SYSENTER stack
|
||||
*
|
||||
* x86-32 can have up to three stacks:
|
||||
* x86-32 can have up to four stacks:
|
||||
* - task stack
|
||||
* - softirq stack
|
||||
* - hardirq stack
|
||||
* - SYSENTER stack
|
||||
*/
|
||||
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
|
||||
const char *stack_name;
|
||||
|
||||
/*
|
||||
* If we overflowed the task stack into a guard page, jump back
|
||||
* to the bottom of the usable stack.
|
||||
*/
|
||||
if (task_stack_page(task) - (void *)stack < PAGE_SIZE)
|
||||
stack = task_stack_page(task);
|
||||
|
||||
if (get_stack_info(stack, task, &stack_info, &visit_mask))
|
||||
break;
|
||||
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
|
||||
/*
|
||||
* We weren't on a valid stack. It's possible that
|
||||
* we overflowed a valid stack into a guard page.
|
||||
* See if the next page up is valid so that we can
|
||||
* generate some kind of backtrace if this happens.
|
||||
*/
|
||||
stack = (unsigned long *)PAGE_ALIGN((unsigned long)stack);
|
||||
if (get_stack_info(stack, task, &stack_info, &visit_mask))
|
||||
break;
|
||||
}
|
||||
|
||||
stack_name = stack_type_name(stack_info.type);
|
||||
if (stack_name)
|
||||
printk("%s <%s>\n", log_lvl, stack_name);
|
||||
|
||||
if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
|
||||
__show_regs(regs, 0);
|
||||
if (regs)
|
||||
show_regs_safe(&stack_info, regs);
|
||||
|
||||
/*
|
||||
* Scan the stack, printing any text addresses we find. At the
|
||||
@ -119,7 +163,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
|
||||
/*
|
||||
* Don't print regs->ip again if it was already printed
|
||||
* by __show_regs() below.
|
||||
* by show_regs_safe() below.
|
||||
*/
|
||||
if (regs && stack == ®s->ip)
|
||||
goto next;
|
||||
@ -155,8 +199,8 @@ next:
|
||||
|
||||
/* if the frame has entry regs, print them */
|
||||
regs = unwind_get_entry_regs(&state);
|
||||
if (regs && on_stack(&stack_info, regs, sizeof(*regs)))
|
||||
__show_regs(regs, 0);
|
||||
if (regs)
|
||||
show_regs_safe(&stack_info, regs);
|
||||
}
|
||||
|
||||
if (stack_name)
|
||||
|
@ -26,6 +26,9 @@ const char *stack_type_name(enum stack_type type)
|
||||
if (type == STACK_TYPE_SOFTIRQ)
|
||||
return "SOFTIRQ";
|
||||
|
||||
if (type == STACK_TYPE_SYSENTER)
|
||||
return "SYSENTER";
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -93,6 +96,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
if (task != current)
|
||||
goto unknown;
|
||||
|
||||
if (in_sysenter_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_hardirq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
|
@ -37,6 +37,9 @@ const char *stack_type_name(enum stack_type type)
|
||||
if (type == STACK_TYPE_IRQ)
|
||||
return "IRQ";
|
||||
|
||||
if (type == STACK_TYPE_SYSENTER)
|
||||
return "SYSENTER";
|
||||
|
||||
if (type >= STACK_TYPE_EXCEPTION && type <= STACK_TYPE_EXCEPTION_LAST)
|
||||
return exception_stack_names[type - STACK_TYPE_EXCEPTION];
|
||||
|
||||
@ -115,6 +118,9 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
|
||||
if (in_irq_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
if (in_sysenter_stack(stack, info))
|
||||
goto recursion_check;
|
||||
|
||||
goto unknown;
|
||||
|
||||
recursion_check:
|
||||
|
@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
|
||||
*/
|
||||
static void __init fpu__init_parse_early_param(void)
|
||||
{
|
||||
char arg[32];
|
||||
char *argptr = arg;
|
||||
int bit;
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "no387"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_FPU);
|
||||
|
||||
@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
|
||||
if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
|
||||
sizeof(arg)) &&
|
||||
get_option(&argptr, &bit) &&
|
||||
bit >= 0 &&
|
||||
bit < NCAPINTS * 32)
|
||||
setup_clear_cpu_cap(bit);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include <asm/fpu/xstate.h>
|
||||
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/cpufeature.h>
|
||||
|
||||
/*
|
||||
* Although we spell it out in here, the Processor Trace
|
||||
@ -36,6 +37,19 @@ static const char *xfeature_names[] =
|
||||
"unknown xstate feature" ,
|
||||
};
|
||||
|
||||
static short xsave_cpuid_features[] __initdata = {
|
||||
X86_FEATURE_FPU,
|
||||
X86_FEATURE_XMM,
|
||||
X86_FEATURE_AVX,
|
||||
X86_FEATURE_MPX,
|
||||
X86_FEATURE_MPX,
|
||||
X86_FEATURE_AVX512F,
|
||||
X86_FEATURE_AVX512F,
|
||||
X86_FEATURE_AVX512F,
|
||||
X86_FEATURE_INTEL_PT,
|
||||
X86_FEATURE_PKU,
|
||||
};
|
||||
|
||||
/*
|
||||
* Mask of xstate features supported by the CPU and the kernel:
|
||||
*/
|
||||
@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
|
||||
void fpu__xstate_clear_all_cpu_caps(void)
|
||||
{
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XSAVES);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX2);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512F);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
|
||||
setup_clear_cpu_cap(X86_FEATURE_MPX);
|
||||
setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
|
||||
setup_clear_cpu_cap(X86_FEATURE_PKU);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
|
||||
setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
static int on_boot_cpu __initdata = 1;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
WARN_ON_FPU(!on_boot_cpu);
|
||||
on_boot_cpu = 0;
|
||||
@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
|
||||
goto out_disable;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear XSAVE features that are disabled in the normal CPUID.
|
||||
*/
|
||||
for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
|
||||
if (!boot_cpu_has(xsave_cpuid_features[i]))
|
||||
xfeatures_mask &= ~BIT(i);
|
||||
}
|
||||
|
||||
xfeatures_mask &= fpu__get_supported_xfeatures_mask();
|
||||
|
||||
/* Enable xstate instructions to be able to continue with initialization: */
|
||||
|
@ -212,9 +212,6 @@ ENTRY(startup_32_smp)
|
||||
#endif
|
||||
|
||||
.Ldefault_entry:
|
||||
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
X86_CR0_PG)
|
||||
movl $(CR0_STATE & ~X86_CR0_PG),%eax
|
||||
movl %eax,%cr0
|
||||
|
||||
@ -402,7 +399,7 @@ ENTRY(early_idt_handler_array)
|
||||
# 24(%rsp) error code
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||
pushl $0 # Dummy error code, to make stack frame uniform
|
||||
.endif
|
||||
pushl $i # 20(%esp) Vector number
|
||||
|
@ -38,11 +38,12 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
|
||||
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
||||
|
||||
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
|
||||
PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
|
||||
#endif
|
||||
L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
|
||||
.text
|
||||
@ -50,6 +51,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
|
||||
.code64
|
||||
.globl startup_64
|
||||
startup_64:
|
||||
UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||
* and someone has loaded an identity mapped page table
|
||||
@ -89,6 +91,7 @@ startup_64:
|
||||
addq $(early_top_pgt - __START_KERNEL_map), %rax
|
||||
jmp 1f
|
||||
ENTRY(secondary_startup_64)
|
||||
UNWIND_HINT_EMPTY
|
||||
/*
|
||||
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
|
||||
* and someone has loaded a mapped page table.
|
||||
@ -133,6 +136,7 @@ ENTRY(secondary_startup_64)
|
||||
movq $1f, %rax
|
||||
jmp *%rax
|
||||
1:
|
||||
UNWIND_HINT_EMPTY
|
||||
|
||||
/* Check if nx is implemented */
|
||||
movl $0x80000001, %eax
|
||||
@ -150,9 +154,6 @@ ENTRY(secondary_startup_64)
|
||||
1: wrmsr /* Make changes effective */
|
||||
|
||||
/* Setup cr0 */
|
||||
#define CR0_STATE (X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
|
||||
X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
|
||||
X86_CR0_PG)
|
||||
movl $CR0_STATE, %eax
|
||||
/* Make changes effective */
|
||||
movq %rax, %cr0
|
||||
@ -235,7 +236,7 @@ ENTRY(secondary_startup_64)
|
||||
pushq %rax # target address in negative space
|
||||
lretq
|
||||
.Lafter_lret:
|
||||
ENDPROC(secondary_startup_64)
|
||||
END(secondary_startup_64)
|
||||
|
||||
#include "verify_cpu.S"
|
||||
|
||||
@ -247,6 +248,7 @@ ENDPROC(secondary_startup_64)
|
||||
*/
|
||||
ENTRY(start_cpu0)
|
||||
movq initial_stack(%rip), %rsp
|
||||
UNWIND_HINT_EMPTY
|
||||
jmp .Ljump_to_C_code
|
||||
ENDPROC(start_cpu0)
|
||||
#endif
|
||||
@ -266,26 +268,24 @@ ENDPROC(start_cpu0)
|
||||
.quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
|
||||
__FINITDATA
|
||||
|
||||
bad_address:
|
||||
jmp bad_address
|
||||
|
||||
__INIT
|
||||
ENTRY(early_idt_handler_array)
|
||||
# 104(%rsp) %rflags
|
||||
# 96(%rsp) %cs
|
||||
# 88(%rsp) %rip
|
||||
# 80(%rsp) error code
|
||||
i = 0
|
||||
.rept NUM_EXCEPTION_VECTORS
|
||||
.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
|
||||
pushq $0 # Dummy error code, to make stack frame uniform
|
||||
.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
|
||||
UNWIND_HINT_IRET_REGS
|
||||
pushq $0 # Dummy error code, to make stack frame uniform
|
||||
.else
|
||||
UNWIND_HINT_IRET_REGS offset=8
|
||||
.endif
|
||||
pushq $i # 72(%rsp) Vector number
|
||||
jmp early_idt_handler_common
|
||||
UNWIND_HINT_IRET_REGS
|
||||
i = i + 1
|
||||
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
|
||||
.endr
|
||||
ENDPROC(early_idt_handler_array)
|
||||
UNWIND_HINT_IRET_REGS offset=16
|
||||
END(early_idt_handler_array)
|
||||
|
||||
early_idt_handler_common:
|
||||
/*
|
||||
@ -313,6 +313,7 @@ early_idt_handler_common:
|
||||
pushq %r13 /* pt_regs->r13 */
|
||||
pushq %r14 /* pt_regs->r14 */
|
||||
pushq %r15 /* pt_regs->r15 */
|
||||
UNWIND_HINT_REGS
|
||||
|
||||
cmpq $14,%rsi /* Page fault? */
|
||||
jnz 10f
|
||||
@ -327,8 +328,8 @@ early_idt_handler_common:
|
||||
|
||||
20:
|
||||
decl early_recursion_flag(%rip)
|
||||
jmp restore_regs_and_iret
|
||||
ENDPROC(early_idt_handler_common)
|
||||
jmp restore_regs_and_return_to_kernel
|
||||
END(early_idt_handler_common)
|
||||
|
||||
__INITDATA
|
||||
|
||||
@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
|
||||
|
||||
.data
|
||||
|
||||
#ifndef CONFIG_XEN
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.fill 512,8,0
|
||||
#else
|
||||
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
|
||||
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
|
||||
@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
|
||||
* Don't set NX because code runs from these pages.
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
#else
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
.fill 512,8,0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
@ -435,7 +436,7 @@ ENTRY(phys_base)
|
||||
EXPORT_SYMBOL(phys_base)
|
||||
|
||||
#include "../../x86/xen/xen-head.S"
|
||||
|
||||
|
||||
__PAGE_ALIGNED_BSS
|
||||
NEXT_PAGE(empty_zero_page)
|
||||
.skip PAGE_SIZE
|
||||
|
@ -67,7 +67,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
||||
* because the ->io_bitmap_max value must match the bitmap
|
||||
* contents:
|
||||
*/
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
tss = &per_cpu(cpu_tss_rw, get_cpu());
|
||||
|
||||
if (turn_on)
|
||||
bitmap_clear(t->io_bitmap_ptr, from, num);
|
||||
|
@ -219,18 +219,6 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
||||
/* high bit used in ret_from_ code */
|
||||
unsigned vector = ~regs->orig_ax;
|
||||
|
||||
/*
|
||||
* NB: Unlike exception entries, IRQ entries do not reliably
|
||||
* handle context tracking in the low-level entry code. This is
|
||||
* because syscall entries execute briefly with IRQs on before
|
||||
* updating context tracking state, so we can take an IRQ from
|
||||
* kernel mode with CONTEXT_USER. The low-level entry code only
|
||||
* updates the context if we came from user mode, so we won't
|
||||
* switch to CONTEXT_KERNEL. We'll fix that once the syscall
|
||||
* code is cleaned up enough that we can cleanly defer enabling
|
||||
* IRQs.
|
||||
*/
|
||||
|
||||
entering_irq();
|
||||
|
||||
/* entering_irq() tells RCU that we're not quiescent. Check it. */
|
||||
|
@ -57,10 +57,10 @@ static inline void stack_overflow_check(struct pt_regs *regs)
|
||||
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
|
||||
return;
|
||||
|
||||
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx)\n",
|
||||
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
|
||||
current->comm, curbase, regs->sp,
|
||||
irq_stack_top, irq_stack_bottom,
|
||||
estack_top, estack_bottom);
|
||||
estack_top, estack_bottom, (void *)regs->ip);
|
||||
|
||||
if (sysctl_panic_on_stackoverflow)
|
||||
panic("low stack detected by irq handler - check messages\n");
|
||||
|
@ -544,12 +544,12 @@ static uint32_t __init kvm_detect(void)
|
||||
return kvm_cpuid_base();
|
||||
}
|
||||
|
||||
const struct hypervisor_x86 x86_hyper_kvm __refconst = {
|
||||
const __initconst struct hypervisor_x86 x86_hyper_kvm = {
|
||||
.name = "KVM",
|
||||
.detect = kvm_detect,
|
||||
.x2apic_available = kvm_para_available,
|
||||
.type = X86_HYPER_KVM,
|
||||
.init.x2apic_available = kvm_para_available,
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(x86_hyper_kvm);
|
||||
|
||||
static __init int activate_jump_labels(void)
|
||||
{
|
||||
|
@ -103,7 +103,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt)
|
||||
static void install_ldt(struct mm_struct *current_mm,
|
||||
struct ldt_struct *ldt)
|
||||
{
|
||||
/* Synchronizes with lockless_dereference in load_mm_ldt. */
|
||||
/* Synchronizes with READ_ONCE in load_mm_ldt. */
|
||||
smp_store_release(¤t_mm->context.ldt, ldt);
|
||||
|
||||
/* Activate the LDT for all CPUs using current_mm. */
|
||||
|
@ -10,7 +10,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq; popq %rax");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
|
||||
DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
|
||||
DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
|
||||
DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
|
||||
DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
|
||||
|
||||
DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
|
||||
@ -60,7 +59,6 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
|
||||
PATCH_SITE(pv_mmu_ops, read_cr2);
|
||||
PATCH_SITE(pv_mmu_ops, read_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, write_cr3);
|
||||
PATCH_SITE(pv_mmu_ops, flush_tlb_single);
|
||||
PATCH_SITE(pv_cpu_ops, wbinvd);
|
||||
#if defined(CONFIG_PARAVIRT_SPINLOCKS)
|
||||
case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
|
||||
|
@ -47,9 +47,25 @@
|
||||
* section. Since TSS's are completely CPU-local, we want them
|
||||
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
|
||||
*/
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
|
||||
.x86_tss = {
|
||||
.sp0 = TOP_OF_INIT_STACK,
|
||||
/*
|
||||
* .sp0 is only used when entering ring 0 from a lower
|
||||
* privilege level. Since the init task never runs anything
|
||||
* but ring 0 code, there is no need for a valid value here.
|
||||
* Poison it.
|
||||
*/
|
||||
.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* .sp1 is cpu_current_top_of_stack. The init task never
|
||||
* runs user code, but cpu_current_top_of_stack should still
|
||||
* be well defined before the first context switch.
|
||||
*/
|
||||
.sp1 = TOP_OF_INIT_STACK,
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
.ss0 = __KERNEL_DS,
|
||||
.ss1 = __KERNEL_CS,
|
||||
@ -65,11 +81,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
|
||||
*/
|
||||
.io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
|
||||
#endif
|
||||
#ifdef CONFIG_X86_32
|
||||
.SYSENTER_stack_canary = STACK_END_MAGIC,
|
||||
#endif
|
||||
};
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tss);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
|
||||
|
||||
DEFINE_PER_CPU(bool, __tss_limit_invalid);
|
||||
EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid);
|
||||
@ -98,7 +111,7 @@ void exit_thread(struct task_struct *tsk)
|
||||
struct fpu *fpu = &t->fpu;
|
||||
|
||||
if (bp) {
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
|
||||
|
||||
t->io_bitmap_ptr = NULL;
|
||||
clear_thread_flag(TIF_IO_BITMAP);
|
||||
|
@ -234,7 +234,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct fpu *prev_fpu = &prev->fpu;
|
||||
struct fpu *next_fpu = &next->fpu;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
|
||||
|
||||
@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
|
||||
/*
|
||||
* Reload esp0 and cpu_current_top_of_stack. This changes
|
||||
* current_thread_info().
|
||||
* current_thread_info(). Refresh the SYSENTER configuration in
|
||||
* case prev or next is vm86.
|
||||
*/
|
||||
load_sp0(tss, next);
|
||||
update_sp0(next_p);
|
||||
refresh_sysenter_cs(next);
|
||||
this_cpu_write(cpu_current_top_of_stack,
|
||||
(unsigned long)task_stack_page(next_p) +
|
||||
THREAD_SIZE);
|
||||
|
@ -69,9 +69,8 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
unsigned int fsindex, gsindex;
|
||||
unsigned int ds, cs, es;
|
||||
|
||||
printk(KERN_DEFAULT "RIP: %04lx:%pS\n", regs->cs, (void *)regs->ip);
|
||||
printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx", regs->ss,
|
||||
regs->sp, regs->flags);
|
||||
show_iret_regs(regs);
|
||||
|
||||
if (regs->orig_ax != -1)
|
||||
pr_cont(" ORIG_RAX: %016lx\n", regs->orig_ax);
|
||||
else
|
||||
@ -88,6 +87,9 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
|
||||
regs->r13, regs->r14, regs->r15);
|
||||
|
||||
if (!all)
|
||||
return;
|
||||
|
||||
asm("movl %%ds,%0" : "=r" (ds));
|
||||
asm("movl %%cs,%0" : "=r" (cs));
|
||||
asm("movl %%es,%0" : "=r" (es));
|
||||
@ -98,9 +100,6 @@ void __show_regs(struct pt_regs *regs, int all)
|
||||
rdmsrl(MSR_GS_BASE, gs);
|
||||
rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
|
||||
|
||||
if (!all)
|
||||
return;
|
||||
|
||||
cr0 = read_cr0();
|
||||
cr2 = read_cr2();
|
||||
cr3 = __read_cr3();
|
||||
@ -274,7 +273,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
|
||||
struct inactive_task_frame *frame;
|
||||
struct task_struct *me = current;
|
||||
|
||||
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
|
||||
childregs = task_pt_regs(p);
|
||||
fork_frame = container_of(childregs, struct fork_frame, regs);
|
||||
frame = &fork_frame->frame;
|
||||
@ -401,7 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
struct fpu *prev_fpu = &prev->fpu;
|
||||
struct fpu *next_fpu = &next->fpu;
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
|
||||
struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu);
|
||||
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
|
||||
this_cpu_read(irq_count) != -1);
|
||||
@ -463,9 +461,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
||||
* Switch the PDA and FPU contexts.
|
||||
*/
|
||||
this_cpu_write(current_task, next_p);
|
||||
this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
|
||||
|
||||
/* Reload esp0 and ss1. This changes current_thread_info(). */
|
||||
load_sp0(tss, next);
|
||||
/* Reload sp0. */
|
||||
update_sp0(next_p);
|
||||
|
||||
/*
|
||||
* Now maybe reload the debug registers and handle I/O bitmaps
|
||||
|
@ -962,8 +962,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Stack for startup_32 can be just as for start_secondary onwards */
|
||||
irq_ctx_init(cpu);
|
||||
per_cpu(cpu_current_top_of_stack, cpu) =
|
||||
(unsigned long)task_stack_page(idle) + THREAD_SIZE;
|
||||
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
|
||||
#else
|
||||
initial_gs = per_cpu_offset(cpu);
|
||||
#endif
|
||||
|
@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
|
||||
* will catch asm bugs and any attempt to use ist_preempt_enable
|
||||
* from double_fault.
|
||||
*/
|
||||
BUG_ON((unsigned long)(current_top_of_stack() -
|
||||
current_stack_pointer) >= THREAD_SIZE);
|
||||
BUG_ON(!on_thread_stack());
|
||||
|
||||
preempt_enable_no_resched();
|
||||
}
|
||||
@ -349,9 +348,15 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
|
||||
/*
|
||||
* If IRET takes a non-IST fault on the espfix64 stack, then we
|
||||
* end up promoting it to a doublefault. In that case, modify
|
||||
* the stack to make it look like we just entered the #GP
|
||||
* handler from user space, similar to bad_iret.
|
||||
* end up promoting it to a doublefault. In that case, take
|
||||
* advantage of the fact that we're not using the normal (TSS.sp0)
|
||||
* stack right now. We can write a fake #GP(0) frame at TSS.sp0
|
||||
* and then modify our own IRET frame so that, when we return,
|
||||
* we land directly at the #GP(0) vector with the stack already
|
||||
* set up according to its expectations.
|
||||
*
|
||||
* The net result is that our #GP handler will think that we
|
||||
* entered from usermode with the bad user context.
|
||||
*
|
||||
* No need for ist_enter here because we don't use RCU.
|
||||
*/
|
||||
@ -359,13 +364,26 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
regs->cs == __KERNEL_CS &&
|
||||
regs->ip == (unsigned long)native_irq_return_iret)
|
||||
{
|
||||
struct pt_regs *normal_regs = task_pt_regs(current);
|
||||
struct pt_regs *gpregs = (struct pt_regs *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
|
||||
/* Fake a #GP(0) from userspace. */
|
||||
memmove(&normal_regs->ip, (void *)regs->sp, 5*8);
|
||||
normal_regs->orig_ax = 0; /* Missing (lost) #GP error code */
|
||||
/*
|
||||
* regs->sp points to the failing IRET frame on the
|
||||
* ESPFIX64 stack. Copy it to the entry stack. This fills
|
||||
* in gpregs->ss through gpregs->ip.
|
||||
*
|
||||
*/
|
||||
memmove(&gpregs->ip, (void *)regs->sp, 5*8);
|
||||
gpregs->orig_ax = 0; /* Missing (lost) #GP error code */
|
||||
|
||||
/*
|
||||
* Adjust our frame so that we return straight to the #GP
|
||||
* vector with the expected RSP value. This is safe because
|
||||
* we won't enable interupts or schedule before we invoke
|
||||
* general_protection, so nothing will clobber the stack
|
||||
* frame we just set up.
|
||||
*/
|
||||
regs->ip = (unsigned long)general_protection;
|
||||
regs->sp = (unsigned long)&normal_regs->orig_ax;
|
||||
regs->sp = (unsigned long)&gpregs->orig_ax;
|
||||
|
||||
return;
|
||||
}
|
||||
@ -390,7 +408,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
*
|
||||
* Processors update CR2 whenever a page fault is detected. If a
|
||||
* second page fault occurs while an earlier page fault is being
|
||||
* deliv- ered, the faulting linear address of the second fault will
|
||||
* delivered, the faulting linear address of the second fault will
|
||||
* overwrite the contents of CR2 (replacing the previous
|
||||
* address). These updates to CR2 occur even if the page fault
|
||||
* results in a double fault or occurs during the delivery of a
|
||||
@ -601,14 +619,15 @@ NOKPROBE_SYMBOL(do_int3);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
/*
|
||||
* Help handler running on IST stack to switch off the IST stack if the
|
||||
* interrupted code was in user mode. The actual stack switch is done in
|
||||
* entry_64.S
|
||||
* Help handler running on a per-cpu (IST or entry trampoline) stack
|
||||
* to switch to the normal thread stack if the interrupted code was in
|
||||
* user mode. The actual stack switch is done in entry_64.S
|
||||
*/
|
||||
asmlinkage __visible notrace struct pt_regs *sync_regs(struct pt_regs *eregs)
|
||||
{
|
||||
struct pt_regs *regs = task_pt_regs(current);
|
||||
*regs = *eregs;
|
||||
struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
|
||||
if (regs != eregs)
|
||||
*regs = *eregs;
|
||||
return regs;
|
||||
}
|
||||
NOKPROBE_SYMBOL(sync_regs);
|
||||
@ -624,13 +643,13 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
|
||||
/*
|
||||
* This is called from entry_64.S early in handling a fault
|
||||
* caused by a bad iret to user mode. To handle the fault
|
||||
* correctly, we want move our stack frame to task_pt_regs
|
||||
* and we want to pretend that the exception came from the
|
||||
* iret target.
|
||||
* correctly, we want to move our stack frame to where it would
|
||||
* be had we entered directly on the entry stack (rather than
|
||||
* just below the IRET frame) and we want to pretend that the
|
||||
* exception came from the IRET target.
|
||||
*/
|
||||
struct bad_iret_stack *new_stack =
|
||||
container_of(task_pt_regs(current),
|
||||
struct bad_iret_stack, regs);
|
||||
(struct bad_iret_stack *)this_cpu_read(cpu_tss_rw.x86_tss.sp0) - 1;
|
||||
|
||||
/* Copy the IRET target to the new stack. */
|
||||
memmove(&new_stack->regs.ip, (void *)s->regs.sp, 5*8);
|
||||
@ -795,14 +814,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
|
||||
debug_stack_usage_dec();
|
||||
|
||||
exit:
|
||||
#if defined(CONFIG_X86_32)
|
||||
/*
|
||||
* This is the most likely code path that involves non-trivial use
|
||||
* of the SYSENTER stack. Check that we haven't overrun it.
|
||||
*/
|
||||
WARN(this_cpu_read(cpu_tss.SYSENTER_stack_canary) != STACK_END_MAGIC,
|
||||
"Overran or corrupted SYSENTER stack\n");
|
||||
#endif
|
||||
ist_exit(regs);
|
||||
}
|
||||
NOKPROBE_SYMBOL(do_debug);
|
||||
@ -929,6 +940,9 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
||||
|
||||
void __init trap_init(void)
|
||||
{
|
||||
/* Init cpu_entry_area before IST entries are set up */
|
||||
setup_cpu_entry_areas();
|
||||
|
||||
idt_setup_traps();
|
||||
|
||||
/*
|
||||
|
@ -253,22 +253,15 @@ unsigned long *unwind_get_return_address_ptr(struct unwind_state *state)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static bool stack_access_ok(struct unwind_state *state, unsigned long addr,
|
||||
static bool stack_access_ok(struct unwind_state *state, unsigned long _addr,
|
||||
size_t len)
|
||||
{
|
||||
struct stack_info *info = &state->stack_info;
|
||||
void *addr = (void *)_addr;
|
||||
|
||||
/*
|
||||
* If the address isn't on the current stack, switch to the next one.
|
||||
*
|
||||
* We may have to traverse multiple stacks to deal with the possibility
|
||||
* that info->next_sp could point to an empty stack and the address
|
||||
* could be on a subsequent stack.
|
||||
*/
|
||||
while (!on_stack(info, (void *)addr, len))
|
||||
if (get_stack_info(info->next_sp, state->task, info,
|
||||
&state->stack_mask))
|
||||
return false;
|
||||
if (!on_stack(info, addr, len) &&
|
||||
(get_stack_info(addr, state->task, info, &state->stack_mask)))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -283,42 +276,32 @@ static bool deref_stack_reg(struct unwind_state *state, unsigned long addr,
|
||||
return true;
|
||||
}
|
||||
|
||||
#define REGS_SIZE (sizeof(struct pt_regs))
|
||||
#define SP_OFFSET (offsetof(struct pt_regs, sp))
|
||||
#define IRET_REGS_SIZE (REGS_SIZE - offsetof(struct pt_regs, ip))
|
||||
#define IRET_SP_OFFSET (SP_OFFSET - offsetof(struct pt_regs, ip))
|
||||
|
||||
static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
|
||||
unsigned long *ip, unsigned long *sp, bool full)
|
||||
unsigned long *ip, unsigned long *sp)
|
||||
{
|
||||
size_t regs_size = full ? REGS_SIZE : IRET_REGS_SIZE;
|
||||
size_t sp_offset = full ? SP_OFFSET : IRET_SP_OFFSET;
|
||||
struct pt_regs *regs = (struct pt_regs *)(addr + regs_size - REGS_SIZE);
|
||||
struct pt_regs *regs = (struct pt_regs *)addr;
|
||||
|
||||
if (IS_ENABLED(CONFIG_X86_64)) {
|
||||
if (!stack_access_ok(state, addr, regs_size))
|
||||
return false;
|
||||
/* x86-32 support will be more complicated due to the ®s->sp hack */
|
||||
BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_32));
|
||||
|
||||
*ip = regs->ip;
|
||||
*sp = regs->sp;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!stack_access_ok(state, addr, sp_offset))
|
||||
if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
|
||||
return false;
|
||||
|
||||
*ip = regs->ip;
|
||||
*sp = regs->sp;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (user_mode(regs)) {
|
||||
if (!stack_access_ok(state, addr + sp_offset,
|
||||
REGS_SIZE - SP_OFFSET))
|
||||
return false;
|
||||
static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr,
|
||||
unsigned long *ip, unsigned long *sp)
|
||||
{
|
||||
struct pt_regs *regs = (void *)addr - IRET_FRAME_OFFSET;
|
||||
|
||||
*sp = regs->sp;
|
||||
} else
|
||||
*sp = (unsigned long)®s->sp;
|
||||
if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
|
||||
return false;
|
||||
|
||||
*ip = regs->ip;
|
||||
*sp = regs->sp;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -327,7 +310,6 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
unsigned long ip_p, sp, orig_ip, prev_sp = state->sp;
|
||||
enum stack_type prev_type = state->stack_info.type;
|
||||
struct orc_entry *orc;
|
||||
struct pt_regs *ptregs;
|
||||
bool indirect = false;
|
||||
|
||||
if (unwind_done(state))
|
||||
@ -435,7 +417,7 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
break;
|
||||
|
||||
case ORC_TYPE_REGS:
|
||||
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, true)) {
|
||||
if (!deref_stack_regs(state, sp, &state->ip, &state->sp)) {
|
||||
orc_warn("can't dereference registers at %p for ip %pB\n",
|
||||
(void *)sp, (void *)orig_ip);
|
||||
goto done;
|
||||
@ -447,20 +429,14 @@ bool unwind_next_frame(struct unwind_state *state)
|
||||
break;
|
||||
|
||||
case ORC_TYPE_REGS_IRET:
|
||||
if (!deref_stack_regs(state, sp, &state->ip, &state->sp, false)) {
|
||||
if (!deref_stack_iret_regs(state, sp, &state->ip, &state->sp)) {
|
||||
orc_warn("can't dereference iret registers at %p for ip %pB\n",
|
||||
(void *)sp, (void *)orig_ip);
|
||||
goto done;
|
||||
}
|
||||
|
||||
ptregs = container_of((void *)sp, struct pt_regs, ip);
|
||||
if ((unsigned long)ptregs >= prev_sp &&
|
||||
on_stack(&state->stack_info, ptregs, REGS_SIZE)) {
|
||||
state->regs = ptregs;
|
||||
state->full_regs = false;
|
||||
} else
|
||||
state->regs = NULL;
|
||||
|
||||
state->regs = (void *)sp - IRET_FRAME_OFFSET;
|
||||
state->full_regs = false;
|
||||
state->signal = true;
|
||||
break;
|
||||
|
||||
@ -553,8 +529,18 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
}
|
||||
|
||||
if (get_stack_info((unsigned long *)state->sp, state->task,
|
||||
&state->stack_info, &state->stack_mask))
|
||||
return;
|
||||
&state->stack_info, &state->stack_mask)) {
|
||||
/*
|
||||
* We weren't on a valid stack. It's possible that
|
||||
* we overflowed a valid stack into a guard page.
|
||||
* See if the next page up is valid so that we can
|
||||
* generate some kind of backtrace if this happens.
|
||||
*/
|
||||
void *next_page = (void *)PAGE_ALIGN((unsigned long)state->sp);
|
||||
if (get_stack_info(next_page, state->task, &state->stack_info,
|
||||
&state->stack_mask))
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller can provide the address of the first frame directly
|
||||
|
@ -33,7 +33,7 @@
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/msr-index.h>
|
||||
|
||||
verify_cpu:
|
||||
ENTRY(verify_cpu)
|
||||
pushf # Save caller passed flags
|
||||
push $0 # Kill any dangerous flags
|
||||
popf
|
||||
@ -139,3 +139,4 @@ verify_cpu:
|
||||
popf # Restore caller passed flags
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
ENDPROC(verify_cpu)
|
||||
|
@ -55,6 +55,7 @@
|
||||
#include <asm/irq.h>
|
||||
#include <asm/traps.h>
|
||||
#include <asm/vm86.h>
|
||||
#include <asm/switch_to.h>
|
||||
|
||||
/*
|
||||
* Known problems:
|
||||
@ -94,7 +95,6 @@
|
||||
|
||||
void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86plus_struct __user *user;
|
||||
struct vm86 *vm86 = current->thread.vm86;
|
||||
@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
|
||||
do_exit(SIGSEGV);
|
||||
}
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
preempt_disable();
|
||||
tsk->thread.sp0 = vm86->saved_sp0;
|
||||
tsk->thread.sysenter_cs = __KERNEL_CS;
|
||||
load_sp0(tss, &tsk->thread);
|
||||
update_sp0(tsk);
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
vm86->saved_sp0 = 0;
|
||||
put_cpu();
|
||||
preempt_enable();
|
||||
|
||||
memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs));
|
||||
|
||||
@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
|
||||
|
||||
static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
{
|
||||
struct tss_struct *tss;
|
||||
struct task_struct *tsk = current;
|
||||
struct vm86 *vm86 = tsk->thread.vm86;
|
||||
struct kernel_vm86_regs vm86regs;
|
||||
@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
|
||||
vm86->saved_sp0 = tsk->thread.sp0;
|
||||
lazy_save_gs(vm86->regs32.gs);
|
||||
|
||||
tss = &per_cpu(cpu_tss, get_cpu());
|
||||
/* make room for real-mode segments */
|
||||
preempt_disable();
|
||||
tsk->thread.sp0 += 16;
|
||||
|
||||
if (static_cpu_has(X86_FEATURE_SEP))
|
||||
if (static_cpu_has(X86_FEATURE_SEP)) {
|
||||
tsk->thread.sysenter_cs = 0;
|
||||
refresh_sysenter_cs(&tsk->thread);
|
||||
}
|
||||
|
||||
load_sp0(tss, &tsk->thread);
|
||||
put_cpu();
|
||||
update_sp0(tsk);
|
||||
preempt_enable();
|
||||
|
||||
if (vm86->flags & VM86_SCREEN_BITMAP)
|
||||
mark_screen_rdonly(tsk->mm);
|
||||
|
@ -107,6 +107,15 @@ SECTIONS
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
_entry_trampoline = .;
|
||||
*(.entry_trampoline)
|
||||
. = ALIGN(PAGE_SIZE);
|
||||
ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big");
|
||||
#endif
|
||||
|
||||
/* End of text section */
|
||||
_etext = .;
|
||||
} :text = 0x9090
|
||||
|
@ -28,6 +28,8 @@ void x86_init_noop(void) { }
|
||||
void __init x86_init_uint_noop(unsigned int unused) { }
|
||||
int __init iommu_init_noop(void) { return 0; }
|
||||
void iommu_shutdown_noop(void) { }
|
||||
bool __init bool_x86_init_noop(void) { return false; }
|
||||
void x86_op_int_noop(int cpu) { }
|
||||
|
||||
/*
|
||||
* The platform setup functions are preset with the default functions
|
||||
@ -81,6 +83,12 @@ struct x86_init_ops x86_init __initdata = {
|
||||
.init_irq = x86_default_pci_init_irq,
|
||||
.fixup_irqs = x86_default_pci_fixup_irqs,
|
||||
},
|
||||
|
||||
.hyper = {
|
||||
.init_platform = x86_init_noop,
|
||||
.x2apic_available = bool_x86_init_noop,
|
||||
.init_mem_mapping = x86_init_noop,
|
||||
},
|
||||
};
|
||||
|
||||
struct x86_cpuinit_ops x86_cpuinit = {
|
||||
@ -101,6 +109,7 @@ struct x86_platform_ops x86_platform __ro_after_init = {
|
||||
.get_nmi_reason = default_get_nmi_reason,
|
||||
.save_sched_clock_state = tsc_save_sched_clock_state,
|
||||
.restore_sched_clock_state = tsc_restore_sched_clock_state,
|
||||
.hyper.pin_vcpu = x86_op_int_noop,
|
||||
};
|
||||
|
||||
EXPORT_SYMBOL_GPL(x86_platform);
|
||||
|
@ -5476,13 +5476,13 @@ int kvm_mmu_module_init(void)
|
||||
|
||||
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
|
||||
sizeof(struct pte_list_desc),
|
||||
0, 0, NULL);
|
||||
0, SLAB_ACCOUNT, NULL);
|
||||
if (!pte_list_desc_cache)
|
||||
goto nomem;
|
||||
|
||||
mmu_page_header_cache = kmem_cache_create("kvm_mmu_page_header",
|
||||
sizeof(struct kvm_mmu_page),
|
||||
0, 0, NULL);
|
||||
0, SLAB_ACCOUNT, NULL);
|
||||
if (!mmu_page_header_cache)
|
||||
goto nomem;
|
||||
|
||||
|
@ -2295,7 +2295,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
||||
* processors. See 22.2.4.
|
||||
*/
|
||||
vmcs_writel(HOST_TR_BASE,
|
||||
(unsigned long)this_cpu_ptr(&cpu_tss));
|
||||
(unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
|
||||
|
||||
/*
|
||||
|
@ -107,10 +107,10 @@ static void delay_mwaitx(unsigned long __loops)
|
||||
delay = min_t(u64, MWAITX_MAX_LOOPS, loops);
|
||||
|
||||
/*
|
||||
* Use cpu_tss as a cacheline-aligned, seldomly
|
||||
* Use cpu_tss_rw as a cacheline-aligned, seldomly
|
||||
* accessed per-cpu variable as the monitor target.
|
||||
*/
|
||||
__monitorx(raw_cpu_ptr(&cpu_tss), 0, 0);
|
||||
__monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0);
|
||||
|
||||
/*
|
||||
* AMD, like Intel, supports the EAX hint and EAX=0xf
|
||||
|
@ -29,26 +29,6 @@
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include <asm/trace/exceptions.h>
|
||||
|
||||
/*
|
||||
* Page fault error code bits:
|
||||
*
|
||||
* bit 0 == 0: no page found 1: protection fault
|
||||
* bit 1 == 0: read access 1: write access
|
||||
* bit 2 == 0: kernel-mode access 1: user-mode access
|
||||
* bit 3 == 1: use of reserved bit detected
|
||||
* bit 4 == 1: fault was an instruction fetch
|
||||
* bit 5 == 1: protection keys block access
|
||||
*/
|
||||
enum x86_pf_error_code {
|
||||
|
||||
PF_PROT = 1 << 0,
|
||||
PF_WRITE = 1 << 1,
|
||||
PF_USER = 1 << 2,
|
||||
PF_RSVD = 1 << 3,
|
||||
PF_INSTR = 1 << 4,
|
||||
PF_PK = 1 << 5,
|
||||
};
|
||||
|
||||
/*
|
||||
* Returns 0 if mmiotrace is disabled, or if the fault is not
|
||||
* handled by mmiotrace:
|
||||
@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
* If it was a exec (instruction fetch) fault on NX page, then
|
||||
* do not ignore the fault:
|
||||
*/
|
||||
if (error_code & PF_INSTR)
|
||||
if (error_code & X86_PF_INSTR)
|
||||
return 0;
|
||||
|
||||
instr = (void *)convert_ip_to_linear(current, regs);
|
||||
@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
||||
* siginfo so userspace can discover which protection key was set
|
||||
* on the PTE.
|
||||
*
|
||||
* If we get here, we know that the hardware signaled a PF_PK
|
||||
* If we get here, we know that the hardware signaled a X86_PF_PK
|
||||
* fault and that there was a VMA once we got in the fault
|
||||
* handler. It does *not* guarantee that the VMA we find here
|
||||
* was the one that we faulted on.
|
||||
@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
|
||||
/*
|
||||
* force_sig_info_fault() is called from a number of
|
||||
* contexts, some of which have a VMA and some of which
|
||||
* do not. The PF_PK handing happens after we have a
|
||||
* do not. The X86_PF_PK handing happens after we have a
|
||||
* valid VMA, so we should never reach this without a
|
||||
* valid VMA.
|
||||
*/
|
||||
@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
||||
if (!oops_may_print())
|
||||
return;
|
||||
|
||||
if (error_code & PF_INSTR) {
|
||||
if (error_code & X86_PF_INSTR) {
|
||||
unsigned int level;
|
||||
pgd_t *pgd;
|
||||
pte_t *pte;
|
||||
@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||
*/
|
||||
if (current->thread.sig_on_uaccess_err && signal) {
|
||||
tsk->thread.trap_nr = X86_TRAP_PF;
|
||||
tsk->thread.error_code = error_code | PF_USER;
|
||||
tsk->thread.error_code = error_code | X86_PF_USER;
|
||||
tsk->thread.cr2 = address;
|
||||
|
||||
/* XXX: hwpoison faults will set the wrong code. */
|
||||
@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
struct task_struct *tsk = current;
|
||||
|
||||
/* User mode accesses just cause a SIGSEGV */
|
||||
if (error_code & PF_USER) {
|
||||
if (error_code & X86_PF_USER) {
|
||||
/*
|
||||
* It's possible to have interrupts off here:
|
||||
*/
|
||||
@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
* Instruction fetch faults in the vsyscall page might need
|
||||
* emulation.
|
||||
*/
|
||||
if (unlikely((error_code & PF_INSTR) &&
|
||||
if (unlikely((error_code & X86_PF_INSTR) &&
|
||||
((address & ~0xfff) == VSYSCALL_ADDR))) {
|
||||
if (emulate_vsyscall(regs, address))
|
||||
return;
|
||||
@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
||||
* are always protection faults.
|
||||
*/
|
||||
if (address >= TASK_SIZE_MAX)
|
||||
error_code |= PF_PROT;
|
||||
error_code |= X86_PF_PROT;
|
||||
|
||||
if (likely(show_unhandled_signals))
|
||||
show_signal_msg(regs, error_code, address, tsk);
|
||||
@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_OSPKE))
|
||||
return false;
|
||||
if (error_code & PF_PK)
|
||||
if (error_code & X86_PF_PK)
|
||||
return true;
|
||||
/* this checks permission keys on the VMA: */
|
||||
if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
||||
(error_code & PF_INSTR), foreign))
|
||||
if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||
(error_code & X86_PF_INSTR), foreign))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
|
||||
int code = BUS_ADRERR;
|
||||
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
if (!(error_code & PF_USER)) {
|
||||
if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
|
||||
return;
|
||||
}
|
||||
@ -1053,14 +1033,14 @@ static noinline void
|
||||
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||
unsigned long address, u32 *pkey, unsigned int fault)
|
||||
{
|
||||
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
|
||||
if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fault & VM_FAULT_OOM) {
|
||||
/* Kernel mode? Handle exceptions or die: */
|
||||
if (!(error_code & PF_USER)) {
|
||||
if (!(error_code & X86_PF_USER)) {
|
||||
no_context(regs, error_code, address,
|
||||
SIGSEGV, SEGV_MAPERR);
|
||||
return;
|
||||
@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
||||
|
||||
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
||||
{
|
||||
if ((error_code & PF_WRITE) && !pte_write(*pte))
|
||||
if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
|
||||
return 0;
|
||||
|
||||
if ((error_code & PF_INSTR) && !pte_exec(*pte))
|
||||
if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
|
||||
return 0;
|
||||
/*
|
||||
* Note: We do not do lazy flushing on protection key
|
||||
* changes, so no spurious fault will ever set PF_PK.
|
||||
* changes, so no spurious fault will ever set X86_PF_PK.
|
||||
*/
|
||||
if ((error_code & PF_PK))
|
||||
if ((error_code & X86_PF_PK))
|
||||
return 1;
|
||||
|
||||
return 1;
|
||||
@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
|
||||
* change, so user accesses are not expected to cause spurious
|
||||
* faults.
|
||||
*/
|
||||
if (error_code != (PF_WRITE | PF_PROT)
|
||||
&& error_code != (PF_INSTR | PF_PROT))
|
||||
if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
|
||||
error_code != (X86_PF_INSTR | X86_PF_PROT))
|
||||
return 0;
|
||||
|
||||
pgd = init_mm.pgd + pgd_index(address);
|
||||
@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||
* always an unconditional error and can never result in
|
||||
* a follow-up action to resolve the fault, like a COW.
|
||||
*/
|
||||
if (error_code & PF_PK)
|
||||
if (error_code & X86_PF_PK)
|
||||
return 1;
|
||||
|
||||
/*
|
||||
* Make sure to check the VMA so that we do not perform
|
||||
* faults just to hit a PF_PK as soon as we fill in a
|
||||
* faults just to hit a X86_PF_PK as soon as we fill in a
|
||||
* page.
|
||||
*/
|
||||
if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
|
||||
(error_code & PF_INSTR), foreign))
|
||||
if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
|
||||
(error_code & X86_PF_INSTR), foreign))
|
||||
return 1;
|
||||
|
||||
if (error_code & PF_WRITE) {
|
||||
if (error_code & X86_PF_WRITE) {
|
||||
/* write, present and write, not present: */
|
||||
if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
||||
return 1;
|
||||
@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
/* read, present: */
|
||||
if (unlikely(error_code & PF_PROT))
|
||||
if (unlikely(error_code & X86_PF_PROT))
|
||||
return 1;
|
||||
|
||||
/* read, not present: */
|
||||
@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
|
||||
if (!static_cpu_has(X86_FEATURE_SMAP))
|
||||
return false;
|
||||
|
||||
if (error_code & PF_USER)
|
||||
if (error_code & X86_PF_USER)
|
||||
return false;
|
||||
|
||||
if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
|
||||
@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
* protection error (error_code & 9) == 0.
|
||||
*/
|
||||
if (unlikely(fault_in_kernel_space(address))) {
|
||||
if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
|
||||
if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
|
||||
if (vmalloc_fault(address) >= 0)
|
||||
return;
|
||||
|
||||
@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
if (unlikely(kprobes_fault(regs)))
|
||||
return;
|
||||
|
||||
if (unlikely(error_code & PF_RSVD))
|
||||
if (unlikely(error_code & X86_PF_RSVD))
|
||||
pgtable_bad(regs, error_code, address);
|
||||
|
||||
if (unlikely(smap_violation(error_code, regs))) {
|
||||
@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
*/
|
||||
if (user_mode(regs)) {
|
||||
local_irq_enable();
|
||||
error_code |= PF_USER;
|
||||
error_code |= X86_PF_USER;
|
||||
flags |= FAULT_FLAG_USER;
|
||||
} else {
|
||||
if (regs->flags & X86_EFLAGS_IF)
|
||||
@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
|
||||
perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
|
||||
|
||||
if (error_code & PF_WRITE)
|
||||
if (error_code & X86_PF_WRITE)
|
||||
flags |= FAULT_FLAG_WRITE;
|
||||
if (error_code & PF_INSTR)
|
||||
if (error_code & X86_PF_INSTR)
|
||||
flags |= FAULT_FLAG_INSTRUCTION;
|
||||
|
||||
/*
|
||||
@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
|
||||
* space check, thus avoiding the deadlock:
|
||||
*/
|
||||
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
||||
if ((error_code & PF_USER) == 0 &&
|
||||
if (!(error_code & X86_PF_USER) &&
|
||||
!search_exception_tables(regs->ip)) {
|
||||
bad_area_nosemaphore(regs, error_code, address, NULL);
|
||||
return;
|
||||
@ -1409,7 +1389,7 @@ retry:
|
||||
bad_area(regs, error_code, address);
|
||||
return;
|
||||
}
|
||||
if (error_code & PF_USER) {
|
||||
if (error_code & X86_PF_USER) {
|
||||
/*
|
||||
* Accessing the stack below %sp is always a bug.
|
||||
* The large cushion allows instructions like enter
|
||||
|
@ -671,7 +671,7 @@ void __init init_mem_mapping(void)
|
||||
load_cr3(swapper_pg_dir);
|
||||
__flush_tlb_all();
|
||||
|
||||
hypervisor_init_mem_mapping();
|
||||
x86_init.hyper.init_mem_mapping();
|
||||
|
||||
early_memtest(0, max_pfn_mapped << PAGE_SHIFT);
|
||||
}
|
||||
|
@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
|
||||
|
||||
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
|
||||
void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
struct page *start_page, unsigned long size)
|
||||
struct page *start_page, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long addr = (unsigned long)start_page;
|
||||
unsigned long end = (unsigned long)(start_page + size);
|
||||
unsigned long end = (unsigned long)(start_page + nr_pages);
|
||||
unsigned long next;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
unsigned int nr_pages;
|
||||
unsigned int nr_pmd_pages;
|
||||
struct page *page;
|
||||
|
||||
for (; addr < end; addr = next) {
|
||||
@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
|
||||
nr_pages = 1 << (get_order(PMD_SIZE));
|
||||
nr_pmd_pages = 1 << get_order(PMD_SIZE);
|
||||
page = pmd_page(*pmd);
|
||||
while (nr_pages--)
|
||||
while (nr_pmd_pages--)
|
||||
get_page_bootmem(section_nr, page++,
|
||||
SECTION_INFO);
|
||||
}
|
||||
|
@ -4,19 +4,150 @@
|
||||
#include <linux/bootmem.h>
|
||||
#include <linux/kasan.h>
|
||||
#include <linux/kdebug.h>
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
#include <asm/e820/types.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/pgtable.h>
|
||||
|
||||
extern struct range pfn_mapped[E820_MAX_ENTRIES];
|
||||
|
||||
static int __init map_range(struct range *range)
|
||||
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
|
||||
|
||||
static __init void *early_alloc(size_t size, int nid)
|
||||
{
|
||||
return memblock_virt_alloc_try_nid_nopanic(size, size,
|
||||
__pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid);
|
||||
}
|
||||
|
||||
static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr,
|
||||
unsigned long end, int nid)
|
||||
{
|
||||
pte_t *pte;
|
||||
|
||||
if (pmd_none(*pmd)) {
|
||||
void *p;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PSE) &&
|
||||
((end - addr) == PMD_SIZE) &&
|
||||
IS_ALIGNED(addr, PMD_SIZE)) {
|
||||
p = early_alloc(PMD_SIZE, nid);
|
||||
if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL))
|
||||
return;
|
||||
else if (p)
|
||||
memblock_free(__pa(p), PMD_SIZE);
|
||||
}
|
||||
|
||||
p = early_alloc(PAGE_SIZE, nid);
|
||||
pmd_populate_kernel(&init_mm, pmd, p);
|
||||
}
|
||||
|
||||
pte = pte_offset_kernel(pmd, addr);
|
||||
do {
|
||||
pte_t entry;
|
||||
void *p;
|
||||
|
||||
if (!pte_none(*pte))
|
||||
continue;
|
||||
|
||||
p = early_alloc(PAGE_SIZE, nid);
|
||||
entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL);
|
||||
set_pte_at(&init_mm, addr, pte, entry);
|
||||
} while (pte++, addr += PAGE_SIZE, addr != end);
|
||||
}
|
||||
|
||||
static void __init kasan_populate_pud(pud_t *pud, unsigned long addr,
|
||||
unsigned long end, int nid)
|
||||
{
|
||||
pmd_t *pmd;
|
||||
unsigned long next;
|
||||
|
||||
if (pud_none(*pud)) {
|
||||
void *p;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_GBPAGES) &&
|
||||
((end - addr) == PUD_SIZE) &&
|
||||
IS_ALIGNED(addr, PUD_SIZE)) {
|
||||
p = early_alloc(PUD_SIZE, nid);
|
||||
if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL))
|
||||
return;
|
||||
else if (p)
|
||||
memblock_free(__pa(p), PUD_SIZE);
|
||||
}
|
||||
|
||||
p = early_alloc(PAGE_SIZE, nid);
|
||||
pud_populate(&init_mm, pud, p);
|
||||
}
|
||||
|
||||
pmd = pmd_offset(pud, addr);
|
||||
do {
|
||||
next = pmd_addr_end(addr, end);
|
||||
if (!pmd_large(*pmd))
|
||||
kasan_populate_pmd(pmd, addr, next, nid);
|
||||
} while (pmd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr,
|
||||
unsigned long end, int nid)
|
||||
{
|
||||
pud_t *pud;
|
||||
unsigned long next;
|
||||
|
||||
if (p4d_none(*p4d)) {
|
||||
void *p = early_alloc(PAGE_SIZE, nid);
|
||||
|
||||
p4d_populate(&init_mm, p4d, p);
|
||||
}
|
||||
|
||||
pud = pud_offset(p4d, addr);
|
||||
do {
|
||||
next = pud_addr_end(addr, end);
|
||||
if (!pud_large(*pud))
|
||||
kasan_populate_pud(pud, addr, next, nid);
|
||||
} while (pud++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr,
|
||||
unsigned long end, int nid)
|
||||
{
|
||||
void *p;
|
||||
p4d_t *p4d;
|
||||
unsigned long next;
|
||||
|
||||
if (pgd_none(*pgd)) {
|
||||
p = early_alloc(PAGE_SIZE, nid);
|
||||
pgd_populate(&init_mm, pgd, p);
|
||||
}
|
||||
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
kasan_populate_p4d(p4d, addr, next, nid);
|
||||
} while (p4d++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void __init kasan_populate_shadow(unsigned long addr, unsigned long end,
|
||||
int nid)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
unsigned long next;
|
||||
|
||||
addr = addr & PAGE_MASK;
|
||||
end = round_up(end, PAGE_SIZE);
|
||||
pgd = pgd_offset_k(addr);
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
kasan_populate_pgd(pgd, addr, next, nid);
|
||||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
static void __init map_range(struct range *range)
|
||||
{
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
@ -24,15 +155,17 @@ static int __init map_range(struct range *range)
|
||||
start = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->start));
|
||||
end = (unsigned long)kasan_mem_to_shadow(pfn_to_kaddr(range->end));
|
||||
|
||||
return vmemmap_populate(start, end, NUMA_NO_NODE);
|
||||
kasan_populate_shadow(start, end, early_pfn_to_nid(range->start));
|
||||
}
|
||||
|
||||
static void __init clear_pgds(unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
/* See comment in kasan_init() */
|
||||
unsigned long pgd_end = end & PGDIR_MASK;
|
||||
|
||||
for (; start < end; start += PGDIR_SIZE) {
|
||||
for (; start < pgd_end; start += PGDIR_SIZE) {
|
||||
pgd = pgd_offset_k(start);
|
||||
/*
|
||||
* With folded p4d, pgd_clear() is nop, use p4d_clear()
|
||||
@ -43,29 +176,61 @@ static void __init clear_pgds(unsigned long start,
|
||||
else
|
||||
pgd_clear(pgd);
|
||||
}
|
||||
|
||||
pgd = pgd_offset_k(start);
|
||||
for (; start < end; start += P4D_SIZE)
|
||||
p4d_clear(p4d_offset(pgd, start));
|
||||
}
|
||||
|
||||
static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
|
||||
{
|
||||
unsigned long p4d;
|
||||
|
||||
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
|
||||
return (p4d_t *)pgd;
|
||||
|
||||
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
|
||||
p4d += __START_KERNEL_map - phys_base;
|
||||
return (p4d_t *)p4d + p4d_index(addr);
|
||||
}
|
||||
|
||||
static void __init kasan_early_p4d_populate(pgd_t *pgd,
|
||||
unsigned long addr,
|
||||
unsigned long end)
|
||||
{
|
||||
pgd_t pgd_entry;
|
||||
p4d_t *p4d, p4d_entry;
|
||||
unsigned long next;
|
||||
|
||||
if (pgd_none(*pgd)) {
|
||||
pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
|
||||
set_pgd(pgd, pgd_entry);
|
||||
}
|
||||
|
||||
p4d = early_p4d_offset(pgd, addr);
|
||||
do {
|
||||
next = p4d_addr_end(addr, end);
|
||||
|
||||
if (!p4d_none(*p4d))
|
||||
continue;
|
||||
|
||||
p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
|
||||
set_p4d(p4d, p4d_entry);
|
||||
} while (p4d++, addr = next, addr != end && p4d_none(*p4d));
|
||||
}
|
||||
|
||||
static void __init kasan_map_early_shadow(pgd_t *pgd)
|
||||
{
|
||||
int i;
|
||||
unsigned long start = KASAN_SHADOW_START;
|
||||
/* See comment in kasan_init() */
|
||||
unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
|
||||
unsigned long end = KASAN_SHADOW_END;
|
||||
unsigned long next;
|
||||
|
||||
for (i = pgd_index(start); start < end; i++) {
|
||||
switch (CONFIG_PGTABLE_LEVELS) {
|
||||
case 4:
|
||||
pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
|
||||
_KERNPG_TABLE);
|
||||
break;
|
||||
case 5:
|
||||
pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
|
||||
_KERNPG_TABLE);
|
||||
break;
|
||||
default:
|
||||
BUILD_BUG();
|
||||
}
|
||||
start += PGDIR_SIZE;
|
||||
}
|
||||
pgd += pgd_index(addr);
|
||||
do {
|
||||
next = pgd_addr_end(addr, end);
|
||||
kasan_early_p4d_populate(pgd, addr, next);
|
||||
} while (pgd++, addr = next, addr != end);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
@ -102,7 +267,7 @@ void __init kasan_early_init(void)
|
||||
for (i = 0; i < PTRS_PER_PUD; i++)
|
||||
kasan_zero_pud[i] = __pud(pud_val);
|
||||
|
||||
for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
|
||||
for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
|
||||
kasan_zero_p4d[i] = __p4d(p4d_val);
|
||||
|
||||
kasan_map_early_shadow(early_top_pgt);
|
||||
@ -112,37 +277,76 @@ void __init kasan_early_init(void)
|
||||
void __init kasan_init(void)
|
||||
{
|
||||
int i;
|
||||
void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
|
||||
|
||||
#ifdef CONFIG_KASAN_INLINE
|
||||
register_die_notifier(&kasan_die_notifier);
|
||||
#endif
|
||||
|
||||
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
|
||||
|
||||
/*
|
||||
* We use the same shadow offset for 4- and 5-level paging to
|
||||
* facilitate boot-time switching between paging modes.
|
||||
* As result in 5-level paging mode KASAN_SHADOW_START and
|
||||
* KASAN_SHADOW_END are not aligned to PGD boundary.
|
||||
*
|
||||
* KASAN_SHADOW_START doesn't share PGD with anything else.
|
||||
* We claim whole PGD entry to make things easier.
|
||||
*
|
||||
* KASAN_SHADOW_END lands in the last PGD entry and it collides with
|
||||
* bunch of things like kernel code, modules, EFI mapping, etc.
|
||||
* We need to take extra steps to not overwrite them.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
|
||||
void *ptr;
|
||||
|
||||
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
|
||||
memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
|
||||
set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
|
||||
__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
|
||||
}
|
||||
|
||||
load_cr3(early_top_pgt);
|
||||
__flush_tlb_all();
|
||||
|
||||
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
|
||||
clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
|
||||
|
||||
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
|
||||
kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET));
|
||||
|
||||
for (i = 0; i < E820_MAX_ENTRIES; i++) {
|
||||
if (pfn_mapped[i].end == 0)
|
||||
break;
|
||||
|
||||
if (map_range(&pfn_mapped[i]))
|
||||
panic("kasan: unable to allocate shadow!");
|
||||
map_range(&pfn_mapped[i]);
|
||||
}
|
||||
|
||||
kasan_populate_zero_shadow(
|
||||
kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
|
||||
kasan_mem_to_shadow((void *)__START_KERNEL_map));
|
||||
|
||||
vmemmap_populate((unsigned long)kasan_mem_to_shadow(_stext),
|
||||
(unsigned long)kasan_mem_to_shadow(_end),
|
||||
NUMA_NO_NODE);
|
||||
kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
|
||||
(unsigned long)kasan_mem_to_shadow(_end),
|
||||
early_pfn_to_nid(__pa(_stext)));
|
||||
|
||||
shadow_cpu_entry_begin = (void *)__fix_to_virt(FIX_CPU_ENTRY_AREA_BOTTOM);
|
||||
shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
|
||||
shadow_cpu_entry_begin = (void *)round_down((unsigned long)shadow_cpu_entry_begin,
|
||||
PAGE_SIZE);
|
||||
|
||||
shadow_cpu_entry_end = (void *)(__fix_to_virt(FIX_CPU_ENTRY_AREA_TOP) + PAGE_SIZE);
|
||||
shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
|
||||
shadow_cpu_entry_end = (void *)round_up((unsigned long)shadow_cpu_entry_end,
|
||||
PAGE_SIZE);
|
||||
|
||||
kasan_populate_zero_shadow(kasan_mem_to_shadow((void *)MODULES_END),
|
||||
(void *)KASAN_SHADOW_END);
|
||||
shadow_cpu_entry_begin);
|
||||
|
||||
kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
|
||||
(unsigned long)shadow_cpu_entry_end, 0);
|
||||
|
||||
kasan_populate_zero_shadow(shadow_cpu_entry_end, (void *)KASAN_SHADOW_END);
|
||||
|
||||
load_cr3(init_top_pgt);
|
||||
__flush_tlb_all();
|
||||
|
@ -160,17 +160,19 @@ static void do_fpu_end(void)
|
||||
static void fix_processor_context(void)
|
||||
{
|
||||
int cpu = smp_processor_id();
|
||||
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
|
||||
#ifdef CONFIG_X86_64
|
||||
struct desc_struct *desc = get_cpu_gdt_rw(cpu);
|
||||
tss_desc tss;
|
||||
#endif
|
||||
set_tss_desc(cpu, t); /*
|
||||
* This just modifies memory; should not be
|
||||
* necessary. But... This is necessary, because
|
||||
* 386 hardware has concept of busy TSS or some
|
||||
* similar stupidity.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We need to reload TR, which requires that we change the
|
||||
* GDT entry to indicate "available" first.
|
||||
*
|
||||
* XXX: This could probably all be replaced by a call to
|
||||
* force_reload_TR().
|
||||
*/
|
||||
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
memcpy(&tss, &desc[GDT_ENTRY_TSS], sizeof(tss_desc));
|
||||
|
@ -226,12 +226,12 @@ static uint32_t __init xen_platform_hvm(void)
|
||||
return xen_cpuid_base();
|
||||
}
|
||||
|
||||
const struct hypervisor_x86 x86_hyper_xen_hvm = {
|
||||
const __initconst struct hypervisor_x86 x86_hyper_xen_hvm = {
|
||||
.name = "Xen HVM",
|
||||
.detect = xen_platform_hvm,
|
||||
.init_platform = xen_hvm_guest_init,
|
||||
.pin_vcpu = xen_pin_vcpu,
|
||||
.x2apic_available = xen_x2apic_para_available,
|
||||
.init_mem_mapping = xen_hvm_init_mem_mapping,
|
||||
.type = X86_HYPER_XEN_HVM,
|
||||
.init.init_platform = xen_hvm_guest_init,
|
||||
.init.x2apic_available = xen_x2apic_para_available,
|
||||
.init.init_mem_mapping = xen_hvm_init_mem_mapping,
|
||||
.runtime.pin_vcpu = xen_pin_vcpu,
|
||||
};
|
||||
EXPORT_SYMBOL(x86_hyper_xen_hvm);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user