mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
Merge remote-tracking branch 'remotes/origin/tmp-9189141' into msm-4.14
* remotes/origin/tmp-9189141: Linux 4.14.13 KVM: s390: prevent buffer overrun on memory hotplug during migration KVM: s390: fix cmma migration for multiple memory slots mtd: nand: pxa3xx: Fix READOOB implementation parisc: qemu idle sleep support parisc: Fix alignment of pa_tlb_lock in assembly on 32-bit SMP kernel apparmor: fix regression in mount mediation when feature set is pinned x86/microcode/AMD: Add support for fam17h microcode loading Input: elantech - add new icbody type 15 powerpc/mm: Fix SEGV on mapped region to return SEGV_ACCERR ARC: uaccess: dont use "l" gcc inline asm constraint modifier iommu/arm-smmu-v3: Cope with duplicated Stream IDs iommu/arm-smmu-v3: Don't free page table ops twice kernel/signal.c: remove the no longer needed SIGNAL_UNKILLABLE check in complete_signal() kernel/signal.c: protect the SIGNAL_UNKILLABLE tasks from !sig_kernel_only() signals kernel/signal.c: protect the traced SIGNAL_UNKILLABLE tasks from SIGKILL x86 / CPU: Always show current CPU frequency in /proc/cpuinfo x86 / CPU: Avoid unnecessary IPIs in arch_freq_get_on_cpu() fscache: Fix the default for fscache_maybe_release_page() sunxi-rsb: Include OF based modalias in device uevent drm/i915: Apply Display WA #1183 on skl, kbl, and cfl drm/i915: Disable DC states around GMBUS on GLK crypto: chelsio - select CRYPTO_GF128MUL crypto: pcrypt - fix freeing pcrypt instances crypto: chacha20poly1305 - validate the digest size crypto: n2 - cure use after free efi/capsule-loader: Reinstate virtual capsule mapping btrfs: fix refcount_t usage when deleting btrfs_delayed_nodes userfaultfd: clear the vma->vm_userfaultfd_ctx if UFFD_EVENT_FORK fails mm/sparse.c: wrong allocation for mem_section mm/mprotect: add a cond_resched() inside change_pmd_range() kernel/acct.c: fix the acct->needcheck check in check_free_space() x86/pti: Rename BUG_CPU_INSECURE to BUG_CPU_MELTDOWN x86/alternatives: Add missing '\n' at end of ALTERNATIVE inline asm x86/tlb: Drop the _GPL from the cpu_tlbstate export x86/events/intel/ds: Use the proper cache flush method for mapping ds buffers x86/kaslr: Fix the vaddr_end mess x86/mm: Map cpu_entry_area at the same place on 4/5 level x86/mm: Set MODULES_END to 0xffffffffff000000 ANDROID: netfilter: xt_qtaguid: Fix 4.14 compilation ANDROID: Squashfs: optimize reading uncompressed data ANDROID: Squashfs: implement .readpages() ANDROID: Squashfs: replace buffer_head with BIO ANDROID: Squashfs: refactor page_actor ANDROID: usb: f_fs: Prevent gadget unbind if it is already unbound Linux 4.14.12 rtc: m41t80: remove unneeded checks from m41t80_sqw_set_rate rtc: m41t80: avoid i2c read in m41t80_sqw_is_prepared rtc: m41t80: avoid i2c read in m41t80_sqw_recalc_rate rtc: m41t80: fix m41t80_sqw_round_rate return value rtc: m41t80: m41t80_sqw_set_rate should return 0 on success Revert "xfrm: Fix stack-out-of-bounds read in xfrm_state_find." x86/process: Define cpu_tss_rw in same section as declaration x86/pti: Switch to kernel CR3 at early in entry_SYSCALL_compat() x86/dumpstack: Print registers for first stack frame x86/dumpstack: Fix partial register dumps x86/pti: Make sure the user/kernel PTEs match x86/cpu, x86/pti: Do not enable PTI on AMD processors capabilities: fix buffer overread on very short xattr exec: Weaken dumpability for secureexec Linux 4.14.11 tty: fix tty_ldisc_receive_buf() documentation n_tty: fix EXTPROC vs ICANON interaction with TIOCINQ (aka FIONREAD) x86/ldt: Make LDT pgtable free conditional x86/ldt: Plug memory leak in error path x86/espfix/64: Fix espfix double-fault handling on 5-level systems x86-32: Fix kexec with stack canary (CONFIG_CC_STACKPROTECTOR) x86/mm: Remove preempt_disable/enable() from __native_flush_tlb() x86/smpboot: Remove stale TLB flush invocations nohz: Prevent a timer interrupt storm in tick_nohz_stop_sched_tick() staging: android: ion: Fix dma direction for dma_sync_sg_for_cpu/device drivers: base: cacheinfo: fix cache type for non-architected system cache phy: tegra: fix device-tree node lookups binder: fix proc->files use-after-free timers: Reinitialize per cpu bases on hotplug timers: Invoke timer_start_debug() where it makes sense timers: Use deferrable base independent of base::nohz_active usb: xhci: Add XHCI_TRUST_TX_LENGTH for Renesas uPD720201 USB: Fix off by one in type-specific length check of BOS SSP capability usb: add RESET_RESUME for ELSA MicroLink 56K usb: Add device quirk for Logitech HD Pro Webcam C925e USB: serial: option: adding support for YUGA CLM920-NC5 USB: serial: option: add support for Telit ME910 PID 0x1101 USB: serial: qcserial: add Sierra Wireless EM7565 USB: serial: ftdi_sio: add id for Airbus DS P8GR USB: chipidea: msm: fix ulpi-node lookup usbip: vhci: stop printing kernel pointer addresses in messages usbip: stub: stop printing kernel pointer addresses in messages usbip: prevent leaking socket pointer address in messages usbip: fix usbip bind writing random string after command in match_busid sparc64: repair calling incorrect hweight function from stubs skbuff: in skb_copy_ubufs unclone before releasing zerocopy skbuff: skb_copy_ubufs must release uarg even without user frags skbuff: orphan frags before zerocopy clone Revert "mlx5: move affinity hints assignments to generic code" ipv6: set all.accept_dad to 0 by default ipv4: fib: Fix metrics match when deleting a route phylink: ensure AN is enabled phylink: ensure the PHY interface mode is appropriately set bnxt_en: Fix sources of spurious netpoll warnings net: sched: fix static key imbalance in case of ingress/clsact_init error vxlan: restore dev->mtu setting based on lower device net/mlx5: FPGA, return -EINVAL if size is zero tcp: refresh tcp_mstamp from timers callbacks ipv6: Honor specified parameters in fibmatch lookup net: phy: marvell: Limit 88m1101 autoneg errata to 88E1145 as well. tcp: fix potential underestimation on rcv_rtt mlxsw: spectrum: Disable MAC learning for ovs port tipc: fix hanging poll() for stream sockets sctp: make sure stream nums can match optlen in sctp_setsockopt_reset_streams s390/qeth: fix error handling in checksum cmd callback net: dsa: bcm_sf2: Clear IDDQ_GLOBAL_PWR bit for PHY sfc: pass valid pointers from efx_enqueue_unwind openvswitch: Fix pop_vlan action for double tagged frames net/mlx5: Fix error flow in CREATE_QP command net/mlx5e: Prevent possible races in VXLAN control flow net/mlx5e: Add refcount to VXLAN structure net/mlx5e: Fix features check of IPv6 traffic net/mlx5e: Fix possible deadlock of VXLAN lock net/mlx5: Fix rate limit packet pacing naming and struct tcp: invalidate rate samples during SACK reneging sock: free skb in skb_complete_tx_timestamp on error net: phy: micrel: ksz9031: reconfigure autoneg after phy autoneg workaround net: Fix double free and memory corruption in get_net_ns_by_id() net: bridge: fix early call to br_stp_change_bridge_id and plug newlink leaks ipv4: Fix use-after-free when flushing FIB tables ip6_gre: fix device features for ioctl setup adding missing rcu_read_unlock in ipxip6_rcv sctp: Replace use of sockets_allocated with specified macro. net: mvmdio: disable/unprepare clocks in EPROBE_DEFER case net: ipv4: fix for a race condition in raw_sendmsg s390/qeth: update takeover IPs after configuration change s390/qeth: lock IP table while applying takeover changes s390/qeth: don't apply takeover changes to RXIP s390/qeth: apply takeover changes when mode is toggled tcp_bbr: reset long-term bandwidth sampling on loss recovery undo tcp_bbr: reset full pipe detection on loss recovery undo tg3: Fix rx hang on MTU change with 5717/5719 tcp md5sig: Use skb's saddr when replying to an incoming segment tcp_bbr: record "full bw reached" decision in new full_bw_reached bit RDS: Check cmsg_len before dereferencing CMSG_DATA ptr_ring: add barriers net: reevalulate autoflowlabel setting after sysctl setting net: qmi_wwan: add Sierra EM7565 1199:9091 netlink: Add netns check on taps net: igmp: Use correct source address on IGMPv3 reports net: fec: unmap the xmit buffer that are not transferred by DMA ipv6: mcast: better catch silly mtu values ipv4: igmp: guard against silly MTU values kbuild: add '-fno-stack-check' to kernel build options block: don't let passthrough IO go into .make_request_fn() block: fix blk_rq_append_bio cpufreq: schedutil: Use idle_calls counter of the remote CPU ALSA: hda - Fix missing COEF init for ALC225/295/299 ALSA: hda - fix headset mic detection issue on a Dell machine ALSA: hda - change the location for one mic on a Lenovo machine ALSA: hda - Add MIC_NO_PRESENCE fixup for 2 HP machines ALSA: hda: Drop useless WARN_ON() IB/core: Verify that QP is security enabled in create and destroy IB/uverbs: Fix command checking as part of ib_uverbs_ex_modify_qp() IB/mlx5: Serialize access to the VMA list IB/hfi: Only read capability registers if the capability exists gpio: fix "gpio-line-names" property retrieval ASoC: tlv320aic31xx: Fix GPIO1 register definition ASoC: twl4030: fix child-node lookup ASoC: fsl_ssi: AC'97 ops need regmap, clock and cleaning up on failure ASoC: da7218: fix fix child-node lookup ASoC: wm_adsp: Fix validation of firmware and coeff lengths ASoC: codecs: msm8916-wcd: Fix supported formats iw_cxgb4: Only validate the MSN for successful completions ring-buffer: Do no reuse reader page if still in use ring-buffer: Mask out the info bits when returning buffer page length x86/ldt: Make the LDT mapping RO x86/mm/dump_pagetables: Allow dumping current pagetables x86/mm/dump_pagetables: Check user space page table for WX pages x86/mm/dump_pagetables: Add page table directory to the debugfs VFS hierarchy x86/mm/pti: Add Kconfig x86/dumpstack: Indicate in Oops whether PTI is configured and enabled x86/mm: Clarify the whole ASID/kernel PCID/user PCID naming x86/mm: Use INVPCID for __native_flush_tlb_single() x86/mm: Optimize RESTORE_CR3 x86/mm: Use/Fix PCID to optimize user/kernel switches x86/mm: Abstract switching CR3 x86/mm: Allow flushing for future ASID switches x86/pti: Map the vsyscall page if needed x86/pti: Put the LDT in its own PGD if PTI is on x86/mm/64: Make a full PGD-entry size hole in the memory map x86/events/intel/ds: Map debug buffers in cpu_entry_area x86/cpu_entry_area: Add debugstore entries to cpu_entry_area x86/mm/pti: Map ESPFIX into user space x86/mm/pti: Share entry text PMD x86/entry: Align entry text section to PMD boundary x86/mm/pti: Share cpu_entry_area with user space page tables x86/mm/pti: Force entry through trampoline when PTI active x86/mm/pti: Add functions to clone kernel PMDs x86/mm/pti: Populate user PGD x86/mm/pti: Allocate a separate user PGD x86/mm/pti: Allow NX poison to be set in p4d/pgd x86/mm/pti: Add mapping helper functions x86/pti: Add the pti= cmdline option and documentation x86/mm/pti: Add infrastructure for page table isolation x86/mm/pti: Prepare the x86/entry assembly code for entry/exit CR3 switching x86/mm/pti: Disable global pages if PAGE_TABLE_ISOLATION=y x86/cpufeatures: Add X86_BUG_CPU_INSECURE tracing: Fix crash when it fails to alloc ring buffer tracing: Fix possible double free on failure of allocating trace buffer tracing: Remove extra zeroing out of the ring buffer page Conflicts: drivers/staging/android/ion/ion.c kernel/time/timer.c Change-Id: Ia5b16c96ab44e640e2f10ab535c4c672b670cbdc Signed-off-by: Runmin Wang <runminw@codeaurora.org>
This commit is contained in:
commit
5682ea9f33
@ -2694,6 +2694,8 @@
|
||||
steal time is computed, but won't influence scheduler
|
||||
behaviour
|
||||
|
||||
nopti [X86-64] Disable kernel page table isolation
|
||||
|
||||
nolapic [X86-32,APIC] Do not enable or use the local APIC.
|
||||
|
||||
nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
|
||||
@ -3262,6 +3264,12 @@
|
||||
pt. [PARIDE]
|
||||
See Documentation/blockdev/paride.txt.
|
||||
|
||||
pti= [X86_64]
|
||||
Control user/kernel address space isolation:
|
||||
on - enable
|
||||
off - disable
|
||||
auto - default setting
|
||||
|
||||
pty.legacy_count=
|
||||
[KNL] Number of legacy pty's. Overwrites compiled-in
|
||||
default number.
|
||||
|
@ -12,7 +12,9 @@ ffffea0000000000 - ffffeaffffffffff (=40 bits) virtual memory map (1TB)
|
||||
... unused hole ...
|
||||
ffffec0000000000 - fffffbffffffffff (=44 bits) kasan shadow memory (16TB)
|
||||
... unused hole ...
|
||||
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
|
||||
vaddr_end for KASLR
|
||||
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
|
||||
fffffe8000000000 - fffffeffffffffff (=39 bits) LDT remap for PTI
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||
@ -29,20 +31,22 @@ Virtual memory map with 5 level page tables:
|
||||
hole caused by [56:63] sign extension
|
||||
ff00000000000000 - ff0fffffffffffff (=52 bits) guard hole, reserved for hypervisor
|
||||
ff10000000000000 - ff8fffffffffffff (=55 bits) direct mapping of all phys. memory
|
||||
ff90000000000000 - ff91ffffffffffff (=49 bits) hole
|
||||
ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
|
||||
ff90000000000000 - ff9fffffffffffff (=52 bits) LDT remap for PTI
|
||||
ffa0000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space (12800 TB)
|
||||
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
|
||||
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
|
||||
... unused hole ...
|
||||
ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
|
||||
... unused hole ...
|
||||
fffffe8000000000 - fffffeffffffffff (=39 bits) cpu_entry_area mapping
|
||||
vaddr_end for KASLR
|
||||
fffffe0000000000 - fffffe7fffffffff (=39 bits) cpu_entry_area mapping
|
||||
... unused hole ...
|
||||
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
|
||||
... unused hole ...
|
||||
ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space
|
||||
... unused hole ...
|
||||
ffffffff80000000 - ffffffff9fffffff (=512 MB) kernel text mapping, from phys 0
|
||||
ffffffffa0000000 - [fixmap start] (~1526 MB) module mapping space
|
||||
ffffffffa0000000 - fffffffffeffffff (1520 MB) module mapping space
|
||||
[fixmap start] - ffffffffff5fffff kernel-internal fixmap range
|
||||
ffffffffff600000 - ffffffffff600fff (=4 kB) legacy vsyscall ABI
|
||||
ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole
|
||||
@ -66,9 +70,10 @@ memory window (this size is arbitrary, it can be raised later if needed).
|
||||
The mappings are not part of any other kernel PGD and are only available
|
||||
during EFI runtime calls.
|
||||
|
||||
The module mapping space size changes based on the CONFIG requirements for the
|
||||
following fixmap section.
|
||||
|
||||
Note that if CONFIG_RANDOMIZE_MEMORY is enabled, the direct mapping of all
|
||||
physical memory, vmalloc/ioremap space and virtual memory map are randomized.
|
||||
Their order is preserved but their base will be offset early at boot time.
|
||||
|
||||
Be very careful vs. KASLR when changing anything here. The KASLR address
|
||||
range must not overlap with anything except the KASAN shadow area, which is
|
||||
correct as KASAN disables KASLR.
|
||||
|
5
Makefile
5
Makefile
@ -1,7 +1,7 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
VERSION = 4
|
||||
PATCHLEVEL = 14
|
||||
SUBLEVEL = 10
|
||||
SUBLEVEL = 13
|
||||
EXTRAVERSION =
|
||||
NAME = Petit Gorille
|
||||
|
||||
@ -817,6 +817,9 @@ KBUILD_CFLAGS += $(call cc-disable-warning, pointer-sign)
|
||||
# disable invalid "can't wrap" optimizations for signed / pointers
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-strict-overflow)
|
||||
|
||||
# Make sure -fstack-check isn't enabled (like gentoo apparently did)
|
||||
KBUILD_CFLAGS += $(call cc-option,-fno-stack-check,)
|
||||
|
||||
# conserve stack if available
|
||||
KBUILD_CFLAGS += $(call cc-option,-fconserve-stack)
|
||||
|
||||
|
@ -668,6 +668,7 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
|
||||
return 0;
|
||||
|
||||
__asm__ __volatile__(
|
||||
" mov lp_count, %5 \n"
|
||||
" lp 3f \n"
|
||||
"1: ldb.ab %3, [%2, 1] \n"
|
||||
" breq.d %3, 0, 3f \n"
|
||||
@ -684,8 +685,8 @@ __arc_strncpy_from_user(char *dst, const char __user *src, long count)
|
||||
" .word 1b, 4b \n"
|
||||
" .previous \n"
|
||||
: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
|
||||
: "g"(-EFAULT), "l"(count)
|
||||
: "memory");
|
||||
: "g"(-EFAULT), "r"(count)
|
||||
: "lp_count", "lp_start", "lp_end", "memory");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
for the semaphore. */
|
||||
|
||||
#define __PA_LDCW_ALIGNMENT 16
|
||||
#define __PA_LDCW_ALIGN_ORDER 4
|
||||
#define __ldcw_align(a) ({ \
|
||||
unsigned long __ret = (unsigned long) &(a)->lock[0]; \
|
||||
__ret = (__ret + __PA_LDCW_ALIGNMENT - 1) \
|
||||
@ -29,6 +30,7 @@
|
||||
ldcd). */
|
||||
|
||||
#define __PA_LDCW_ALIGNMENT 4
|
||||
#define __PA_LDCW_ALIGN_ORDER 2
|
||||
#define __ldcw_align(a) (&(a)->slock)
|
||||
#define __LDCW "ldcw,co"
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/signal.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <asm/ldcw.h>
|
||||
#include <asm/thread_info.h>
|
||||
|
||||
#include <linux/linkage.h>
|
||||
@ -46,6 +47,14 @@
|
||||
#endif
|
||||
|
||||
.import pa_tlb_lock,data
|
||||
.macro load_pa_tlb_lock reg
|
||||
#if __PA_LDCW_ALIGNMENT > 4
|
||||
load32 PA(pa_tlb_lock) + __PA_LDCW_ALIGNMENT-1, \reg
|
||||
depi 0,31,__PA_LDCW_ALIGN_ORDER, \reg
|
||||
#else
|
||||
load32 PA(pa_tlb_lock), \reg
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/* space_to_prot macro creates a prot id from a space id */
|
||||
|
||||
@ -457,7 +466,7 @@
|
||||
.macro tlb_lock spc,ptp,pte,tmp,tmp1,fault
|
||||
#ifdef CONFIG_SMP
|
||||
cmpib,COND(=),n 0,\spc,2f
|
||||
load32 PA(pa_tlb_lock),\tmp
|
||||
load_pa_tlb_lock \tmp
|
||||
1: LDCW 0(\tmp),\tmp1
|
||||
cmpib,COND(=) 0,\tmp1,1b
|
||||
nop
|
||||
@ -480,7 +489,7 @@
|
||||
/* Release pa_tlb_lock lock. */
|
||||
.macro tlb_unlock1 spc,tmp
|
||||
#ifdef CONFIG_SMP
|
||||
load32 PA(pa_tlb_lock),\tmp
|
||||
load_pa_tlb_lock \tmp
|
||||
tlb_unlock0 \spc,\tmp
|
||||
#endif
|
||||
.endm
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <asm/assembly.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/ldcw.h>
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
@ -333,8 +334,12 @@ ENDPROC_CFI(flush_data_cache_local)
|
||||
|
||||
.macro tlb_lock la,flags,tmp
|
||||
#ifdef CONFIG_SMP
|
||||
ldil L%pa_tlb_lock,%r1
|
||||
ldo R%pa_tlb_lock(%r1),\la
|
||||
#if __PA_LDCW_ALIGNMENT > 4
|
||||
load32 pa_tlb_lock + __PA_LDCW_ALIGNMENT-1, \la
|
||||
depi 0,31,__PA_LDCW_ALIGN_ORDER, \la
|
||||
#else
|
||||
load32 pa_tlb_lock, \la
|
||||
#endif
|
||||
rsm PSW_SM_I,\flags
|
||||
1: LDCW 0(\la),\tmp
|
||||
cmpib,<>,n 0,\tmp,3f
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/cpu.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/personality.h>
|
||||
#include <linux/ptrace.h>
|
||||
@ -183,6 +184,44 @@ int dump_task_fpu (struct task_struct *tsk, elf_fpregset_t *r)
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Idle thread support
|
||||
*
|
||||
* Detect when running on QEMU with SeaBIOS PDC Firmware and let
|
||||
* QEMU idle the host too.
|
||||
*/
|
||||
|
||||
int running_on_qemu __read_mostly;
|
||||
|
||||
void __cpuidle arch_cpu_idle_dead(void)
|
||||
{
|
||||
/* nop on real hardware, qemu will offline CPU. */
|
||||
asm volatile("or %%r31,%%r31,%%r31\n":::);
|
||||
}
|
||||
|
||||
void __cpuidle arch_cpu_idle(void)
|
||||
{
|
||||
local_irq_enable();
|
||||
|
||||
/* nop on real hardware, qemu will idle sleep. */
|
||||
asm volatile("or %%r10,%%r10,%%r10\n":::);
|
||||
}
|
||||
|
||||
static int __init parisc_idle_init(void)
|
||||
{
|
||||
const char *marker;
|
||||
|
||||
/* check QEMU/SeaBIOS marker in PAGE0 */
|
||||
marker = (char *) &PAGE0->pad0;
|
||||
running_on_qemu = (memcmp(marker, "SeaBIOS", 8) == 0);
|
||||
|
||||
if (!running_on_qemu)
|
||||
cpu_idle_poll_ctrl(1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
arch_initcall(parisc_idle_init);
|
||||
|
||||
/*
|
||||
* Copy architecture-specific thread state
|
||||
*/
|
||||
|
@ -145,6 +145,11 @@ static noinline int bad_area(struct pt_regs *regs, unsigned long address)
|
||||
return __bad_area(regs, address, SEGV_MAPERR);
|
||||
}
|
||||
|
||||
static noinline int bad_access(struct pt_regs *regs, unsigned long address)
|
||||
{
|
||||
return __bad_area(regs, address, SEGV_ACCERR);
|
||||
}
|
||||
|
||||
static int do_sigbus(struct pt_regs *regs, unsigned long address,
|
||||
unsigned int fault)
|
||||
{
|
||||
@ -490,7 +495,7 @@ retry:
|
||||
|
||||
good_area:
|
||||
if (unlikely(access_error(is_write, is_exec, vma)))
|
||||
return bad_area(regs, address);
|
||||
return bad_access(regs, address);
|
||||
|
||||
/*
|
||||
* If for any reason at all we couldn't handle the fault,
|
||||
|
@ -794,11 +794,12 @@ static int kvm_s390_vm_start_migration(struct kvm *kvm)
|
||||
|
||||
if (kvm->arch.use_cmma) {
|
||||
/*
|
||||
* Get the last slot. They should be sorted by base_gfn, so the
|
||||
* last slot is also the one at the end of the address space.
|
||||
* We have verified above that at least one slot is present.
|
||||
* Get the first slot. They are reverse sorted by base_gfn, so
|
||||
* the first slot is also the one at the end of the address
|
||||
* space. We have verified above that at least one slot is
|
||||
* present.
|
||||
*/
|
||||
ms = slots->memslots + slots->used_slots - 1;
|
||||
ms = slots->memslots;
|
||||
/* round up so we only use full longs */
|
||||
ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
|
||||
/* allocate enough bytes to store all the bits */
|
||||
|
@ -1009,7 +1009,7 @@ static inline int do_essa(struct kvm_vcpu *vcpu, const int orc)
|
||||
cbrlo[entries] = gfn << PAGE_SHIFT;
|
||||
}
|
||||
|
||||
if (orc) {
|
||||
if (orc && gfn < ms->bitmap_size) {
|
||||
/* increment only if we are really flipping the bit to 1 */
|
||||
if (!test_and_set_bit(gfn, ms->pgste_bitmap))
|
||||
atomic64_inc(&ms->dirty_pages);
|
||||
|
@ -44,8 +44,8 @@ EXPORT_SYMBOL(__arch_hweight32)
|
||||
.previous
|
||||
|
||||
ENTRY(__arch_hweight64)
|
||||
sethi %hi(__sw_hweight16), %g1
|
||||
jmpl %g1 + %lo(__sw_hweight16), %g0
|
||||
sethi %hi(__sw_hweight64), %g1
|
||||
jmpl %g1 + %lo(__sw_hweight64), %g0
|
||||
nop
|
||||
ENDPROC(__arch_hweight64)
|
||||
EXPORT_SYMBOL(__arch_hweight64)
|
||||
|
@ -23,6 +23,9 @@
|
||||
*/
|
||||
#undef CONFIG_AMD_MEM_ENCRYPT
|
||||
|
||||
/* No PAGE_TABLE_ISOLATION support needed either: */
|
||||
#undef CONFIG_PAGE_TABLE_ISOLATION
|
||||
|
||||
#include "misc.h"
|
||||
|
||||
/* These actually do the work of building the kernel identity maps. */
|
||||
|
@ -1,6 +1,11 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#include <linux/jump_label.h>
|
||||
#include <asm/unwind_hints.h>
|
||||
#include <asm/cpufeatures.h>
|
||||
#include <asm/page_types.h>
|
||||
#include <asm/percpu.h>
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
/*
|
||||
|
||||
@ -187,6 +192,146 @@ For 32-bit we have the following conventions - kernel is built with
|
||||
#endif
|
||||
.endm
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
|
||||
/*
|
||||
* PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
|
||||
* halves:
|
||||
*/
|
||||
#define PTI_SWITCH_PGTABLES_MASK (1<<PAGE_SHIFT)
|
||||
#define PTI_SWITCH_MASK (PTI_SWITCH_PGTABLES_MASK|(1<<X86_CR3_PTI_SWITCH_BIT))
|
||||
|
||||
.macro SET_NOFLUSH_BIT reg:req
|
||||
bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
|
||||
.endm
|
||||
|
||||
.macro ADJUST_KERNEL_CR3 reg:req
|
||||
ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
|
||||
/* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
|
||||
andq $(~PTI_SWITCH_MASK), \reg
|
||||
.endm
|
||||
|
||||
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
|
||||
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
|
||||
mov %cr3, \scratch_reg
|
||||
ADJUST_KERNEL_CR3 \scratch_reg
|
||||
mov \scratch_reg, %cr3
|
||||
.Lend_\@:
|
||||
.endm
|
||||
|
||||
#define THIS_CPU_user_pcid_flush_mask \
|
||||
PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
|
||||
|
||||
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
|
||||
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
|
||||
mov %cr3, \scratch_reg
|
||||
|
||||
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
|
||||
|
||||
/*
|
||||
* Test if the ASID needs a flush.
|
||||
*/
|
||||
movq \scratch_reg, \scratch_reg2
|
||||
andq $(0x7FF), \scratch_reg /* mask ASID */
|
||||
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||
jnc .Lnoflush_\@
|
||||
|
||||
/* Flush needed, clear the bit */
|
||||
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||
movq \scratch_reg2, \scratch_reg
|
||||
jmp .Lwrcr3_\@
|
||||
|
||||
.Lnoflush_\@:
|
||||
movq \scratch_reg2, \scratch_reg
|
||||
SET_NOFLUSH_BIT \scratch_reg
|
||||
|
||||
.Lwrcr3_\@:
|
||||
/* Flip the PGD and ASID to the user version */
|
||||
orq $(PTI_SWITCH_MASK), \scratch_reg
|
||||
mov \scratch_reg, %cr3
|
||||
.Lend_\@:
|
||||
.endm
|
||||
|
||||
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
|
||||
pushq %rax
|
||||
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
|
||||
popq %rax
|
||||
.endm
|
||||
|
||||
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
|
||||
ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
|
||||
movq %cr3, \scratch_reg
|
||||
movq \scratch_reg, \save_reg
|
||||
/*
|
||||
* Is the "switch mask" all zero? That means that both of
|
||||
* these are zero:
|
||||
*
|
||||
* 1. The user/kernel PCID bit, and
|
||||
* 2. The user/kernel "bit" that points CR3 to the
|
||||
* bottom half of the 8k PGD
|
||||
*
|
||||
* That indicates a kernel CR3 value, not a user CR3.
|
||||
*/
|
||||
testq $(PTI_SWITCH_MASK), \scratch_reg
|
||||
jz .Ldone_\@
|
||||
|
||||
ADJUST_KERNEL_CR3 \scratch_reg
|
||||
movq \scratch_reg, %cr3
|
||||
|
||||
.Ldone_\@:
|
||||
.endm
|
||||
|
||||
.macro RESTORE_CR3 scratch_reg:req save_reg:req
|
||||
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
|
||||
|
||||
ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
|
||||
|
||||
/*
|
||||
* KERNEL pages can always resume with NOFLUSH as we do
|
||||
* explicit flushes.
|
||||
*/
|
||||
bt $X86_CR3_PTI_SWITCH_BIT, \save_reg
|
||||
jnc .Lnoflush_\@
|
||||
|
||||
/*
|
||||
* Check if there's a pending flush for the user ASID we're
|
||||
* about to set.
|
||||
*/
|
||||
movq \save_reg, \scratch_reg
|
||||
andq $(0x7FF), \scratch_reg
|
||||
bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||
jnc .Lnoflush_\@
|
||||
|
||||
btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
|
||||
jmp .Lwrcr3_\@
|
||||
|
||||
.Lnoflush_\@:
|
||||
SET_NOFLUSH_BIT \save_reg
|
||||
|
||||
.Lwrcr3_\@:
|
||||
/*
|
||||
* The CR3 write could be avoided when not changing its value,
|
||||
* but would require a CR3 read *and* a scratch register.
|
||||
*/
|
||||
movq \save_reg, %cr3
|
||||
.Lend_\@:
|
||||
.endm
|
||||
|
||||
#else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
|
||||
|
||||
.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
|
||||
.endm
|
||||
.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
|
||||
.endm
|
||||
.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
|
||||
.endm
|
||||
.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
|
||||
.endm
|
||||
.macro RESTORE_CR3 scratch_reg:req save_reg:req
|
||||
.endm
|
||||
|
||||
#endif
|
||||
|
||||
#endif /* CONFIG_X86_64 */
|
||||
|
||||
/*
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include <asm/segment.h>
|
||||
#include <asm/cache.h>
|
||||
#include <asm/errno.h>
|
||||
#include "calling.h"
|
||||
#include <asm/asm-offsets.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/unistd.h>
|
||||
@ -40,6 +39,8 @@
|
||||
#include <asm/frame.h>
|
||||
#include <linux/err.h>
|
||||
|
||||
#include "calling.h"
|
||||
|
||||
.code64
|
||||
.section .entry.text, "ax"
|
||||
|
||||
@ -164,6 +165,9 @@ ENTRY(entry_SYSCALL_64_trampoline)
|
||||
/* Stash the user RSP. */
|
||||
movq %rsp, RSP_SCRATCH
|
||||
|
||||
/* Note: using %rsp as a scratch reg. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
|
||||
|
||||
/* Load the top of the task stack into RSP */
|
||||
movq CPU_ENTRY_AREA_tss + TSS_sp1 + CPU_ENTRY_AREA, %rsp
|
||||
|
||||
@ -203,6 +207,10 @@ ENTRY(entry_SYSCALL_64)
|
||||
*/
|
||||
|
||||
swapgs
|
||||
/*
|
||||
* This path is not taken when PAGE_TABLE_ISOLATION is disabled so it
|
||||
* is not required to switch CR3.
|
||||
*/
|
||||
movq %rsp, PER_CPU_VAR(rsp_scratch)
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
@ -399,6 +407,7 @@ syscall_return_via_sysret:
|
||||
* We are on the trampoline stack. All regs except RDI are live.
|
||||
* We can do future final exit work right here.
|
||||
*/
|
||||
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||
|
||||
popq %rdi
|
||||
popq %rsp
|
||||
@ -736,6 +745,8 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
|
||||
* We can do future final exit work right here.
|
||||
*/
|
||||
|
||||
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||
|
||||
/* Restore RDI. */
|
||||
popq %rdi
|
||||
SWAPGS
|
||||
@ -818,7 +829,9 @@ native_irq_return_ldt:
|
||||
*/
|
||||
|
||||
pushq %rdi /* Stash user RDI */
|
||||
SWAPGS
|
||||
SWAPGS /* to kernel GS */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi /* to kernel CR3 */
|
||||
|
||||
movq PER_CPU_VAR(espfix_waddr), %rdi
|
||||
movq %rax, (0*8)(%rdi) /* user RAX */
|
||||
movq (1*8)(%rsp), %rax /* user RIP */
|
||||
@ -834,7 +847,6 @@ native_irq_return_ldt:
|
||||
/* Now RAX == RSP. */
|
||||
|
||||
andl $0xffff0000, %eax /* RAX = (RSP & 0xffff0000) */
|
||||
popq %rdi /* Restore user RDI */
|
||||
|
||||
/*
|
||||
* espfix_stack[31:16] == 0. The page tables are set up such that
|
||||
@ -845,7 +857,11 @@ native_irq_return_ldt:
|
||||
* still points to an RO alias of the ESPFIX stack.
|
||||
*/
|
||||
orq PER_CPU_VAR(espfix_stack), %rax
|
||||
SWAPGS
|
||||
|
||||
SWITCH_TO_USER_CR3_STACK scratch_reg=%rdi
|
||||
SWAPGS /* to user GS */
|
||||
popq %rdi /* Restore user RDI */
|
||||
|
||||
movq %rax, %rsp
|
||||
UNWIND_HINT_IRET_REGS offset=8
|
||||
|
||||
@ -945,6 +961,8 @@ ENTRY(switch_to_thread_stack)
|
||||
UNWIND_HINT_FUNC
|
||||
|
||||
pushq %rdi
|
||||
/* Need to switch before accessing the thread stack. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi
|
||||
movq %rsp, %rdi
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
UNWIND_HINT sp_offset=16 sp_reg=ORC_REG_DI
|
||||
@ -1244,7 +1262,11 @@ ENTRY(paranoid_entry)
|
||||
js 1f /* negative -> in kernel */
|
||||
SWAPGS
|
||||
xorl %ebx, %ebx
|
||||
1: ret
|
||||
|
||||
1:
|
||||
SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg=%rax save_reg=%r14
|
||||
|
||||
ret
|
||||
END(paranoid_entry)
|
||||
|
||||
/*
|
||||
@ -1266,6 +1288,7 @@ ENTRY(paranoid_exit)
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
jnz .Lparanoid_exit_no_swapgs
|
||||
TRACE_IRQS_IRETQ
|
||||
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
|
||||
SWAPGS_UNSAFE_STACK
|
||||
jmp .Lparanoid_exit_restore
|
||||
.Lparanoid_exit_no_swapgs:
|
||||
@ -1293,6 +1316,8 @@ ENTRY(error_entry)
|
||||
* from user mode due to an IRET fault.
|
||||
*/
|
||||
SWAPGS
|
||||
/* We have user CR3. Change to kernel CR3. */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||
|
||||
.Lerror_entry_from_usermode_after_swapgs:
|
||||
/* Put us onto the real thread stack. */
|
||||
@ -1339,6 +1364,7 @@ ENTRY(error_entry)
|
||||
* .Lgs_change's error handler with kernel gsbase.
|
||||
*/
|
||||
SWAPGS
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||
jmp .Lerror_entry_done
|
||||
|
||||
.Lbstep_iret:
|
||||
@ -1348,10 +1374,11 @@ ENTRY(error_entry)
|
||||
|
||||
.Lerror_bad_iret:
|
||||
/*
|
||||
* We came from an IRET to user mode, so we have user gsbase.
|
||||
* Switch to kernel gsbase:
|
||||
* We came from an IRET to user mode, so we have user
|
||||
* gsbase and CR3. Switch to kernel gsbase and CR3:
|
||||
*/
|
||||
SWAPGS
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
|
||||
|
||||
/*
|
||||
* Pretend that the exception came from user mode: set up pt_regs
|
||||
@ -1383,6 +1410,10 @@ END(error_exit)
|
||||
/*
|
||||
* Runs on exception stack. Xen PV does not go through this path at all,
|
||||
* so we can use real assembly here.
|
||||
*
|
||||
* Registers:
|
||||
* %r14: Used to save/restore the CR3 of the interrupted context
|
||||
* when PAGE_TABLE_ISOLATION is in use. Do not clobber.
|
||||
*/
|
||||
ENTRY(nmi)
|
||||
UNWIND_HINT_IRET_REGS
|
||||
@ -1446,6 +1477,7 @@ ENTRY(nmi)
|
||||
|
||||
swapgs
|
||||
cld
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
|
||||
movq %rsp, %rdx
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
UNWIND_HINT_IRET_REGS base=%rdx offset=8
|
||||
@ -1698,6 +1730,8 @@ end_repeat_nmi:
|
||||
movq $-1, %rsi
|
||||
call do_nmi
|
||||
|
||||
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
|
||||
|
||||
testl %ebx, %ebx /* swapgs needed? */
|
||||
jnz nmi_restore
|
||||
nmi_swapgs:
|
||||
|
@ -49,6 +49,10 @@
|
||||
ENTRY(entry_SYSENTER_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
SWAPGS
|
||||
|
||||
/* We are about to clobber %rsp anyway, clobbering here is OK */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
|
||||
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
/*
|
||||
@ -186,8 +190,13 @@ ENTRY(entry_SYSCALL_compat)
|
||||
/* Interrupts are off on entry. */
|
||||
swapgs
|
||||
|
||||
/* Stash user ESP and switch to the kernel stack. */
|
||||
/* Stash user ESP */
|
||||
movl %esp, %r8d
|
||||
|
||||
/* Use %rsp as scratch reg. User ESP is stashed in r8 */
|
||||
SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
|
||||
|
||||
/* Switch to the kernel stack */
|
||||
movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp
|
||||
|
||||
/* Construct struct pt_regs on stack */
|
||||
@ -256,10 +265,22 @@ sysret32_from_system_call:
|
||||
* when the system call started, which is already known to user
|
||||
* code. We zero R8-R10 to avoid info leaks.
|
||||
*/
|
||||
movq RSP-ORIG_RAX(%rsp), %rsp
|
||||
|
||||
/*
|
||||
* The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
|
||||
* on the process stack which is not mapped to userspace and
|
||||
* not readable after we SWITCH_TO_USER_CR3. Delay the CR3
|
||||
* switch until after after the last reference to the process
|
||||
* stack.
|
||||
*
|
||||
* %r8/%r9 are zeroed before the sysret, thus safe to clobber.
|
||||
*/
|
||||
SWITCH_TO_USER_CR3_NOSTACK scratch_reg=%r8 scratch_reg2=%r9
|
||||
|
||||
xorq %r8, %r8
|
||||
xorq %r9, %r9
|
||||
xorq %r10, %r10
|
||||
movq RSP-ORIG_RAX(%rsp), %rsp
|
||||
swapgs
|
||||
sysretl
|
||||
END(entry_SYSCALL_compat)
|
||||
|
@ -344,14 +344,14 @@ int in_gate_area_no_mm(unsigned long addr)
|
||||
* vsyscalls but leave the page not present. If so, we skip calling
|
||||
* this.
|
||||
*/
|
||||
static void __init set_vsyscall_pgtable_user_bits(void)
|
||||
void __init set_vsyscall_pgtable_user_bits(pgd_t *root)
|
||||
{
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
pmd_t *pmd;
|
||||
|
||||
pgd = pgd_offset_k(VSYSCALL_ADDR);
|
||||
pgd = pgd_offset_pgd(root, VSYSCALL_ADDR);
|
||||
set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
|
||||
p4d = p4d_offset(pgd, VSYSCALL_ADDR);
|
||||
#if CONFIG_PGTABLE_LEVELS >= 5
|
||||
@ -373,7 +373,7 @@ void __init map_vsyscall(void)
|
||||
vsyscall_mode == NATIVE
|
||||
? PAGE_KERNEL_VSYSCALL
|
||||
: PAGE_KERNEL_VVAR);
|
||||
set_vsyscall_pgtable_user_bits();
|
||||
set_vsyscall_pgtable_user_bits(swapper_pg_dir);
|
||||
}
|
||||
|
||||
BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
|
||||
|
@ -3,16 +3,19 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <asm/cpu_entry_area.h>
|
||||
#include <asm/perf_event.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/insn.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
|
||||
/* Waste a full page so it can be mapped into the cpu_entry_area */
|
||||
DEFINE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
|
||||
|
||||
/* The size of a BTS record in bytes: */
|
||||
#define BTS_RECORD_SIZE 24
|
||||
|
||||
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||
#define PEBS_FIXUP_SIZE PAGE_SIZE
|
||||
|
||||
/*
|
||||
@ -279,17 +282,67 @@ void fini_debug_store_on_cpu(int cpu)
|
||||
|
||||
static DEFINE_PER_CPU(void *, insn_buffer);
|
||||
|
||||
static void ds_update_cea(void *cea, void *addr, size_t size, pgprot_t prot)
|
||||
{
|
||||
unsigned long start = (unsigned long)cea;
|
||||
phys_addr_t pa;
|
||||
size_t msz = 0;
|
||||
|
||||
pa = virt_to_phys(addr);
|
||||
|
||||
preempt_disable();
|
||||
for (; msz < size; msz += PAGE_SIZE, pa += PAGE_SIZE, cea += PAGE_SIZE)
|
||||
cea_set_pte(cea, pa, prot);
|
||||
|
||||
/*
|
||||
* This is a cross-CPU update of the cpu_entry_area, we must shoot down
|
||||
* all TLB entries for it.
|
||||
*/
|
||||
flush_tlb_kernel_range(start, start + size);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void ds_clear_cea(void *cea, size_t size)
|
||||
{
|
||||
unsigned long start = (unsigned long)cea;
|
||||
size_t msz = 0;
|
||||
|
||||
preempt_disable();
|
||||
for (; msz < size; msz += PAGE_SIZE, cea += PAGE_SIZE)
|
||||
cea_set_pte(cea, 0, PAGE_NONE);
|
||||
|
||||
flush_tlb_kernel_range(start, start + size);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static void *dsalloc_pages(size_t size, gfp_t flags, int cpu)
|
||||
{
|
||||
unsigned int order = get_order(size);
|
||||
int node = cpu_to_node(cpu);
|
||||
struct page *page;
|
||||
|
||||
page = __alloc_pages_node(node, flags | __GFP_ZERO, order);
|
||||
return page ? page_address(page) : NULL;
|
||||
}
|
||||
|
||||
static void dsfree_pages(const void *buffer, size_t size)
|
||||
{
|
||||
if (buffer)
|
||||
free_pages((unsigned long)buffer, get_order(size));
|
||||
}
|
||||
|
||||
static int alloc_pebs_buffer(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
int node = cpu_to_node(cpu);
|
||||
int max;
|
||||
void *buffer, *ibuffer;
|
||||
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||
struct debug_store *ds = hwev->ds;
|
||||
size_t bsiz = x86_pmu.pebs_buffer_size;
|
||||
int max, node = cpu_to_node(cpu);
|
||||
void *buffer, *ibuffer, *cea;
|
||||
|
||||
if (!x86_pmu.pebs)
|
||||
return 0;
|
||||
|
||||
buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
|
||||
buffer = dsalloc_pages(bsiz, GFP_KERNEL, cpu);
|
||||
if (unlikely(!buffer))
|
||||
return -ENOMEM;
|
||||
|
||||
@ -300,25 +353,27 @@ static int alloc_pebs_buffer(int cpu)
|
||||
if (x86_pmu.intel_cap.pebs_format < 2) {
|
||||
ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
|
||||
if (!ibuffer) {
|
||||
kfree(buffer);
|
||||
dsfree_pages(buffer, bsiz);
|
||||
return -ENOMEM;
|
||||
}
|
||||
per_cpu(insn_buffer, cpu) = ibuffer;
|
||||
}
|
||||
|
||||
max = x86_pmu.pebs_buffer_size / x86_pmu.pebs_record_size;
|
||||
|
||||
ds->pebs_buffer_base = (u64)(unsigned long)buffer;
|
||||
hwev->ds_pebs_vaddr = buffer;
|
||||
/* Update the cpu entry area mapping */
|
||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
||||
ds->pebs_buffer_base = (unsigned long) cea;
|
||||
ds_update_cea(cea, buffer, bsiz, PAGE_KERNEL);
|
||||
ds->pebs_index = ds->pebs_buffer_base;
|
||||
ds->pebs_absolute_maximum = ds->pebs_buffer_base +
|
||||
max * x86_pmu.pebs_record_size;
|
||||
|
||||
max = x86_pmu.pebs_record_size * (bsiz / x86_pmu.pebs_record_size);
|
||||
ds->pebs_absolute_maximum = ds->pebs_buffer_base + max;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_pebs_buffer(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||
struct debug_store *ds = hwev->ds;
|
||||
void *cea;
|
||||
|
||||
if (!ds || !x86_pmu.pebs)
|
||||
return;
|
||||
@ -326,73 +381,70 @@ static void release_pebs_buffer(int cpu)
|
||||
kfree(per_cpu(insn_buffer, cpu));
|
||||
per_cpu(insn_buffer, cpu) = NULL;
|
||||
|
||||
kfree((void *)(unsigned long)ds->pebs_buffer_base);
|
||||
/* Clear the fixmap */
|
||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.pebs_buffer;
|
||||
ds_clear_cea(cea, x86_pmu.pebs_buffer_size);
|
||||
ds->pebs_buffer_base = 0;
|
||||
dsfree_pages(hwev->ds_pebs_vaddr, x86_pmu.pebs_buffer_size);
|
||||
hwev->ds_pebs_vaddr = NULL;
|
||||
}
|
||||
|
||||
static int alloc_bts_buffer(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
int node = cpu_to_node(cpu);
|
||||
int max, thresh;
|
||||
void *buffer;
|
||||
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||
struct debug_store *ds = hwev->ds;
|
||||
void *buffer, *cea;
|
||||
int max;
|
||||
|
||||
if (!x86_pmu.bts)
|
||||
return 0;
|
||||
|
||||
buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
|
||||
buffer = dsalloc_pages(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, cpu);
|
||||
if (unlikely(!buffer)) {
|
||||
WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
|
||||
thresh = max / 16;
|
||||
|
||||
ds->bts_buffer_base = (u64)(unsigned long)buffer;
|
||||
hwev->ds_bts_vaddr = buffer;
|
||||
/* Update the fixmap */
|
||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
|
||||
ds->bts_buffer_base = (unsigned long) cea;
|
||||
ds_update_cea(cea, buffer, BTS_BUFFER_SIZE, PAGE_KERNEL);
|
||||
ds->bts_index = ds->bts_buffer_base;
|
||||
ds->bts_absolute_maximum = ds->bts_buffer_base +
|
||||
max * BTS_RECORD_SIZE;
|
||||
ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
|
||||
thresh * BTS_RECORD_SIZE;
|
||||
|
||||
max = BTS_RECORD_SIZE * (BTS_BUFFER_SIZE / BTS_RECORD_SIZE);
|
||||
ds->bts_absolute_maximum = ds->bts_buffer_base + max;
|
||||
ds->bts_interrupt_threshold = ds->bts_absolute_maximum - (max / 16);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_bts_buffer(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
struct cpu_hw_events *hwev = per_cpu_ptr(&cpu_hw_events, cpu);
|
||||
struct debug_store *ds = hwev->ds;
|
||||
void *cea;
|
||||
|
||||
if (!ds || !x86_pmu.bts)
|
||||
return;
|
||||
|
||||
kfree((void *)(unsigned long)ds->bts_buffer_base);
|
||||
/* Clear the fixmap */
|
||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers.bts_buffer;
|
||||
ds_clear_cea(cea, BTS_BUFFER_SIZE);
|
||||
ds->bts_buffer_base = 0;
|
||||
dsfree_pages(hwev->ds_bts_vaddr, BTS_BUFFER_SIZE);
|
||||
hwev->ds_bts_vaddr = NULL;
|
||||
}
|
||||
|
||||
static int alloc_ds_buffer(int cpu)
|
||||
{
|
||||
int node = cpu_to_node(cpu);
|
||||
struct debug_store *ds;
|
||||
|
||||
ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
|
||||
if (unlikely(!ds))
|
||||
return -ENOMEM;
|
||||
struct debug_store *ds = &get_cpu_entry_area(cpu)->cpu_debug_store;
|
||||
|
||||
memset(ds, 0, sizeof(*ds));
|
||||
per_cpu(cpu_hw_events, cpu).ds = ds;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void release_ds_buffer(int cpu)
|
||||
{
|
||||
struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
|
||||
|
||||
if (!ds)
|
||||
return;
|
||||
|
||||
per_cpu(cpu_hw_events, cpu).ds = NULL;
|
||||
kfree(ds);
|
||||
}
|
||||
|
||||
void release_ds_buffers(void)
|
||||
|
@ -14,6 +14,8 @@
|
||||
|
||||
#include <linux/perf_event.h>
|
||||
|
||||
#include <asm/intel_ds.h>
|
||||
|
||||
/* To enable MSR tracing please use the generic trace points. */
|
||||
|
||||
/*
|
||||
@ -77,8 +79,6 @@ struct amd_nb {
|
||||
struct event_constraint event_constraints[X86_PMC_IDX_MAX];
|
||||
};
|
||||
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS 8
|
||||
#define PEBS_COUNTER_MASK ((1ULL << MAX_PEBS_EVENTS) - 1)
|
||||
|
||||
/*
|
||||
@ -95,23 +95,6 @@ struct amd_nb {
|
||||
PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR | \
|
||||
PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER)
|
||||
|
||||
/*
|
||||
* A debug store configuration.
|
||||
*
|
||||
* We only support architectures that use 64bit fields.
|
||||
*/
|
||||
struct debug_store {
|
||||
u64 bts_buffer_base;
|
||||
u64 bts_index;
|
||||
u64 bts_absolute_maximum;
|
||||
u64 bts_interrupt_threshold;
|
||||
u64 pebs_buffer_base;
|
||||
u64 pebs_index;
|
||||
u64 pebs_absolute_maximum;
|
||||
u64 pebs_interrupt_threshold;
|
||||
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
||||
};
|
||||
|
||||
#define PEBS_REGS \
|
||||
(PERF_REG_X86_AX | \
|
||||
PERF_REG_X86_BX | \
|
||||
@ -216,6 +199,8 @@ struct cpu_hw_events {
|
||||
* Intel DebugStore bits
|
||||
*/
|
||||
struct debug_store *ds;
|
||||
void *ds_pebs_vaddr;
|
||||
void *ds_bts_vaddr;
|
||||
u64 pebs_enabled;
|
||||
int n_pebs;
|
||||
int n_large_pebs;
|
||||
|
@ -140,7 +140,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
|
||||
".popsection\n" \
|
||||
".pushsection .altinstr_replacement, \"ax\"\n" \
|
||||
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
|
||||
".popsection"
|
||||
".popsection\n"
|
||||
|
||||
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
|
||||
OLDINSTR_2(oldinstr, 1, 2) \
|
||||
@ -151,7 +151,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
|
||||
".pushsection .altinstr_replacement, \"ax\"\n" \
|
||||
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
|
||||
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
|
||||
".popsection"
|
||||
".popsection\n"
|
||||
|
||||
/*
|
||||
* Alternative instructions for different CPU types or capabilities.
|
||||
|
@ -5,6 +5,7 @@
|
||||
|
||||
#include <linux/percpu-defs.h>
|
||||
#include <asm/processor.h>
|
||||
#include <asm/intel_ds.h>
|
||||
|
||||
/*
|
||||
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
||||
@ -40,6 +41,18 @@ struct cpu_entry_area {
|
||||
*/
|
||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
||||
#endif
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
/*
|
||||
* Per CPU debug store for Intel performance monitoring. Wastes a
|
||||
* full page at the moment.
|
||||
*/
|
||||
struct debug_store cpu_debug_store;
|
||||
/*
|
||||
* The actual PEBS/BTS buffers must be mapped to user space
|
||||
* Reserve enough fixmap PTEs.
|
||||
*/
|
||||
struct debug_store_buffers cpu_debug_buffers;
|
||||
#endif
|
||||
};
|
||||
|
||||
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
|
||||
|
@ -197,11 +197,12 @@
|
||||
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
|
||||
#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
|
||||
#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
|
||||
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
|
||||
|
||||
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
|
||||
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
|
||||
#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */
|
||||
|
||||
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
|
||||
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
|
||||
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
|
||||
#define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
|
||||
@ -340,5 +341,6 @@
|
||||
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
|
||||
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
|
||||
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
|
||||
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
|
||||
|
||||
#endif /* _ASM_X86_CPUFEATURES_H */
|
||||
|
@ -21,6 +21,8 @@ static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *in
|
||||
|
||||
desc->type = (info->read_exec_only ^ 1) << 1;
|
||||
desc->type |= info->contents << 2;
|
||||
/* Set the ACCESS bit so it can be mapped RO */
|
||||
desc->type |= 1;
|
||||
|
||||
desc->s = 1;
|
||||
desc->dpl = 0x3;
|
||||
|
@ -44,6 +44,12 @@
|
||||
# define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31))
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
# define DISABLE_PTI 0
|
||||
#else
|
||||
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Make sure to add features to the correct mask
|
||||
*/
|
||||
@ -54,7 +60,7 @@
|
||||
#define DISABLED_MASK4 (DISABLE_PCID)
|
||||
#define DISABLED_MASK5 0
|
||||
#define DISABLED_MASK6 0
|
||||
#define DISABLED_MASK7 0
|
||||
#define DISABLED_MASK7 (DISABLE_PTI)
|
||||
#define DISABLED_MASK8 0
|
||||
#define DISABLED_MASK9 (DISABLE_MPX)
|
||||
#define DISABLED_MASK10 0
|
||||
|
36
arch/x86/include/asm/intel_ds.h
Normal file
36
arch/x86/include/asm/intel_ds.h
Normal file
@ -0,0 +1,36 @@
|
||||
#ifndef _ASM_INTEL_DS_H
|
||||
#define _ASM_INTEL_DS_H
|
||||
|
||||
#include <linux/percpu-defs.h>
|
||||
|
||||
#define BTS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||
#define PEBS_BUFFER_SIZE (PAGE_SIZE << 4)
|
||||
|
||||
/* The maximal number of PEBS events: */
|
||||
#define MAX_PEBS_EVENTS 8
|
||||
|
||||
/*
|
||||
* A debug store configuration.
|
||||
*
|
||||
* We only support architectures that use 64bit fields.
|
||||
*/
|
||||
struct debug_store {
|
||||
u64 bts_buffer_base;
|
||||
u64 bts_index;
|
||||
u64 bts_absolute_maximum;
|
||||
u64 bts_interrupt_threshold;
|
||||
u64 pebs_buffer_base;
|
||||
u64 pebs_index;
|
||||
u64 pebs_absolute_maximum;
|
||||
u64 pebs_interrupt_threshold;
|
||||
u64 pebs_event_reset[MAX_PEBS_EVENTS];
|
||||
} __aligned(PAGE_SIZE);
|
||||
|
||||
DECLARE_PER_CPU_PAGE_ALIGNED(struct debug_store, cpu_debug_store);
|
||||
|
||||
struct debug_store_buffers {
|
||||
char bts_buffer[BTS_BUFFER_SIZE];
|
||||
char pebs_buffer[PEBS_BUFFER_SIZE];
|
||||
};
|
||||
|
||||
#endif
|
@ -50,10 +50,33 @@ struct ldt_struct {
|
||||
* call gates. On native, we could merge the ldt_struct and LDT
|
||||
* allocations, but it's not worth trying to optimize.
|
||||
*/
|
||||
struct desc_struct *entries;
|
||||
unsigned int nr_entries;
|
||||
struct desc_struct *entries;
|
||||
unsigned int nr_entries;
|
||||
|
||||
/*
|
||||
* If PTI is in use, then the entries array is not mapped while we're
|
||||
* in user mode. The whole array will be aliased at the addressed
|
||||
* given by ldt_slot_va(slot). We use two slots so that we can allocate
|
||||
* and map, and enable a new LDT without invalidating the mapping
|
||||
* of an older, still-in-use LDT.
|
||||
*
|
||||
* slot will be -1 if this LDT doesn't have an alias mapping.
|
||||
*/
|
||||
int slot;
|
||||
};
|
||||
|
||||
/* This is a multiple of PAGE_SIZE. */
|
||||
#define LDT_SLOT_STRIDE (LDT_ENTRIES * LDT_ENTRY_SIZE)
|
||||
|
||||
static inline void *ldt_slot_va(int slot)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
return (void *)(LDT_BASE_ADDR + LDT_SLOT_STRIDE * slot);
|
||||
#else
|
||||
BUG();
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Used for LDT copy/destruction.
|
||||
*/
|
||||
@ -64,6 +87,7 @@ static inline void init_new_context_ldt(struct mm_struct *mm)
|
||||
}
|
||||
int ldt_dup_context(struct mm_struct *oldmm, struct mm_struct *mm);
|
||||
void destroy_context_ldt(struct mm_struct *mm);
|
||||
void ldt_arch_exit_mmap(struct mm_struct *mm);
|
||||
#else /* CONFIG_MODIFY_LDT_SYSCALL */
|
||||
static inline void init_new_context_ldt(struct mm_struct *mm) { }
|
||||
static inline int ldt_dup_context(struct mm_struct *oldmm,
|
||||
@ -71,7 +95,8 @@ static inline int ldt_dup_context(struct mm_struct *oldmm,
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
static inline void destroy_context_ldt(struct mm_struct *mm) {}
|
||||
static inline void destroy_context_ldt(struct mm_struct *mm) { }
|
||||
static inline void ldt_arch_exit_mmap(struct mm_struct *mm) { }
|
||||
#endif
|
||||
|
||||
static inline void load_mm_ldt(struct mm_struct *mm)
|
||||
@ -96,10 +121,31 @@ static inline void load_mm_ldt(struct mm_struct *mm)
|
||||
* that we can see.
|
||||
*/
|
||||
|
||||
if (unlikely(ldt))
|
||||
set_ldt(ldt->entries, ldt->nr_entries);
|
||||
else
|
||||
if (unlikely(ldt)) {
|
||||
if (static_cpu_has(X86_FEATURE_PTI)) {
|
||||
if (WARN_ON_ONCE((unsigned long)ldt->slot > 1)) {
|
||||
/*
|
||||
* Whoops -- either the new LDT isn't mapped
|
||||
* (if slot == -1) or is mapped into a bogus
|
||||
* slot (if slot > 1).
|
||||
*/
|
||||
clear_LDT();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If page table isolation is enabled, ldt->entries
|
||||
* will not be mapped in the userspace pagetables.
|
||||
* Tell the CPU to access the LDT through the alias
|
||||
* at ldt_slot_va(ldt->slot).
|
||||
*/
|
||||
set_ldt(ldt_slot_va(ldt->slot), ldt->nr_entries);
|
||||
} else {
|
||||
set_ldt(ldt->entries, ldt->nr_entries);
|
||||
}
|
||||
} else {
|
||||
clear_LDT();
|
||||
}
|
||||
#else
|
||||
clear_LDT();
|
||||
#endif
|
||||
@ -194,6 +240,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
||||
static inline void arch_exit_mmap(struct mm_struct *mm)
|
||||
{
|
||||
paravirt_arch_exit_mmap(mm);
|
||||
ldt_arch_exit_mmap(mm);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_64
|
||||
|
@ -30,6 +30,17 @@ static inline void paravirt_release_p4d(unsigned long pfn) {}
|
||||
*/
|
||||
extern gfp_t __userpte_alloc_gfp;
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* Instead of one PGD, we acquire two PGDs. Being order-1, it is
|
||||
* both 8k in size and 8k-aligned. That lets us just flip bit 12
|
||||
* in a pointer to swap between the two 4k halves.
|
||||
*/
|
||||
#define PGD_ALLOCATION_ORDER 1
|
||||
#else
|
||||
#define PGD_ALLOCATION_ORDER 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Allocate and free page tables.
|
||||
*/
|
||||
|
@ -28,6 +28,7 @@ extern pgd_t early_top_pgt[PTRS_PER_PGD];
|
||||
int __init __early_make_pgtable(unsigned long address, pmdval_t pmd);
|
||||
|
||||
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
|
||||
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user);
|
||||
void ptdump_walk_pgd_level_checkwx(void);
|
||||
|
||||
#ifdef CONFIG_DEBUG_WX
|
||||
@ -846,7 +847,12 @@ static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address)
|
||||
|
||||
static inline int p4d_bad(p4d_t p4d)
|
||||
{
|
||||
return (p4d_flags(p4d) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
|
||||
unsigned long ignore_flags = _KERNPG_TABLE | _PAGE_USER;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
|
||||
ignore_flags |= _PAGE_NX;
|
||||
|
||||
return (p4d_flags(p4d) & ~ignore_flags) != 0;
|
||||
}
|
||||
#endif /* CONFIG_PGTABLE_LEVELS > 3 */
|
||||
|
||||
@ -880,7 +886,12 @@ static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
|
||||
|
||||
static inline int pgd_bad(pgd_t pgd)
|
||||
{
|
||||
return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
||||
unsigned long ignore_flags = _PAGE_USER;
|
||||
|
||||
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
|
||||
ignore_flags |= _PAGE_NX;
|
||||
|
||||
return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
|
||||
}
|
||||
|
||||
static inline int pgd_none(pgd_t pgd)
|
||||
@ -909,7 +920,11 @@ static inline int pgd_none(pgd_t pgd)
|
||||
* pgd_offset() returns a (pgd_t *)
|
||||
* pgd_index() is used get the offset into the pgd page's array of pgd_t's;
|
||||
*/
|
||||
#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
|
||||
#define pgd_offset_pgd(pgd, address) (pgd + pgd_index((address)))
|
||||
/*
|
||||
* a shortcut to get a pgd_t in a given mm
|
||||
*/
|
||||
#define pgd_offset(mm, address) pgd_offset_pgd((mm)->pgd, (address))
|
||||
/*
|
||||
* a shortcut which implies the use of the kernel's pgd, instead
|
||||
* of a process's
|
||||
@ -1111,7 +1126,14 @@ static inline int pud_write(pud_t pud)
|
||||
*/
|
||||
static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
|
||||
{
|
||||
memcpy(dst, src, count * sizeof(pgd_t));
|
||||
memcpy(dst, src, count * sizeof(pgd_t));
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
/* Clone the user space pgd as well */
|
||||
memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
|
||||
count * sizeof(pgd_t));
|
||||
#endif
|
||||
}
|
||||
|
||||
#define PTE_SHIFT ilog2(PTRS_PER_PTE)
|
||||
|
@ -131,9 +131,97 @@ static inline pud_t native_pudp_get_and_clear(pud_t *xp)
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* All top-level PAGE_TABLE_ISOLATION page tables are order-1 pages
|
||||
* (8k-aligned and 8k in size). The kernel one is at the beginning 4k and
|
||||
* the user one is in the last 4k. To switch between them, you
|
||||
* just need to flip the 12th bit in their addresses.
|
||||
*/
|
||||
#define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
|
||||
|
||||
/*
|
||||
* This generates better code than the inline assembly in
|
||||
* __set_bit().
|
||||
*/
|
||||
static inline void *ptr_set_bit(void *ptr, int bit)
|
||||
{
|
||||
unsigned long __ptr = (unsigned long)ptr;
|
||||
|
||||
__ptr |= BIT(bit);
|
||||
return (void *)__ptr;
|
||||
}
|
||||
static inline void *ptr_clear_bit(void *ptr, int bit)
|
||||
{
|
||||
unsigned long __ptr = (unsigned long)ptr;
|
||||
|
||||
__ptr &= ~BIT(bit);
|
||||
return (void *)__ptr;
|
||||
}
|
||||
|
||||
static inline pgd_t *kernel_to_user_pgdp(pgd_t *pgdp)
|
||||
{
|
||||
return ptr_set_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
|
||||
}
|
||||
|
||||
static inline pgd_t *user_to_kernel_pgdp(pgd_t *pgdp)
|
||||
{
|
||||
return ptr_clear_bit(pgdp, PTI_PGTABLE_SWITCH_BIT);
|
||||
}
|
||||
|
||||
static inline p4d_t *kernel_to_user_p4dp(p4d_t *p4dp)
|
||||
{
|
||||
return ptr_set_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
|
||||
}
|
||||
|
||||
static inline p4d_t *user_to_kernel_p4dp(p4d_t *p4dp)
|
||||
{
|
||||
return ptr_clear_bit(p4dp, PTI_PGTABLE_SWITCH_BIT);
|
||||
}
|
||||
#endif /* CONFIG_PAGE_TABLE_ISOLATION */
|
||||
|
||||
/*
|
||||
* Page table pages are page-aligned. The lower half of the top
|
||||
* level is used for userspace and the top half for the kernel.
|
||||
*
|
||||
* Returns true for parts of the PGD that map userspace and
|
||||
* false for the parts that map the kernel.
|
||||
*/
|
||||
static inline bool pgdp_maps_userspace(void *__ptr)
|
||||
{
|
||||
unsigned long ptr = (unsigned long)__ptr;
|
||||
|
||||
return (ptr & ~PAGE_MASK) < (PAGE_SIZE / 2);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd);
|
||||
|
||||
/*
|
||||
* Take a PGD location (pgdp) and a pgd value that needs to be set there.
|
||||
* Populates the user and returns the resulting PGD that must be set in
|
||||
* the kernel copy of the page tables.
|
||||
*/
|
||||
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return pgd;
|
||||
return __pti_set_user_pgd(pgdp, pgd);
|
||||
}
|
||||
#else
|
||||
static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
return pgd;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
|
||||
{
|
||||
#if defined(CONFIG_PAGE_TABLE_ISOLATION) && !defined(CONFIG_X86_5LEVEL)
|
||||
p4dp->pgd = pti_set_user_pgd(&p4dp->pgd, p4d.pgd);
|
||||
#else
|
||||
*p4dp = p4d;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void native_p4d_clear(p4d_t *p4d)
|
||||
@ -147,7 +235,11 @@ static inline void native_p4d_clear(p4d_t *p4d)
|
||||
|
||||
static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
*pgdp = pti_set_user_pgd(pgdp, pgd);
|
||||
#else
|
||||
*pgdp = pgd;
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void native_pgd_clear(pgd_t *pgd)
|
||||
|
@ -75,17 +75,27 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
|
||||
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
||||
|
||||
/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
|
||||
/*
|
||||
* See Documentation/x86/x86_64/mm.txt for a description of the memory map.
|
||||
*
|
||||
* Be very careful vs. KASLR when changing anything here. The KASLR address
|
||||
* range must not overlap with anything except the KASAN shadow area, which
|
||||
* is correct as KASAN disables KASLR.
|
||||
*/
|
||||
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
# define VMALLOC_SIZE_TB _AC(16384, UL)
|
||||
# define __VMALLOC_BASE _AC(0xff92000000000000, UL)
|
||||
# define VMALLOC_SIZE_TB _AC(12800, UL)
|
||||
# define __VMALLOC_BASE _AC(0xffa0000000000000, UL)
|
||||
# define __VMEMMAP_BASE _AC(0xffd4000000000000, UL)
|
||||
# define LDT_PGD_ENTRY _AC(-112, UL)
|
||||
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
|
||||
#else
|
||||
# define VMALLOC_SIZE_TB _AC(32, UL)
|
||||
# define __VMALLOC_BASE _AC(0xffffc90000000000, UL)
|
||||
# define __VMEMMAP_BASE _AC(0xffffea0000000000, UL)
|
||||
# define LDT_PGD_ENTRY _AC(-3, UL)
|
||||
# define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_RANDOMIZE_MEMORY
|
||||
@ -100,13 +110,13 @@ typedef struct { pteval_t pte; } pte_t;
|
||||
|
||||
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
|
||||
/* The module sections ends with the start of the fixmap */
|
||||
#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
|
||||
#define MODULES_END _AC(0xffffffffff000000, UL)
|
||||
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
||||
|
||||
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
|
||||
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << P4D_SHIFT)
|
||||
|
||||
#define CPU_ENTRY_AREA_PGD _AC(-3, UL)
|
||||
#define CPU_ENTRY_AREA_PGD _AC(-4, UL)
|
||||
#define CPU_ENTRY_AREA_BASE (CPU_ENTRY_AREA_PGD << P4D_SHIFT)
|
||||
|
||||
#define EFI_VA_START ( -4 * (_AC(1, UL) << 30))
|
||||
|
@ -38,6 +38,11 @@
|
||||
#define CR3_ADDR_MASK __sme_clr(0x7FFFFFFFFFFFF000ull)
|
||||
#define CR3_PCID_MASK 0xFFFull
|
||||
#define CR3_NOFLUSH BIT_ULL(63)
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
# define X86_CR3_PTI_SWITCH_BIT 11
|
||||
#endif
|
||||
|
||||
#else
|
||||
/*
|
||||
* CR3_ADDR_MASK needs at least bits 31:5 set on PAE systems, and we save
|
||||
|
@ -851,13 +851,22 @@ static inline void spin_lock_prefetch(const void *x)
|
||||
|
||||
#else
|
||||
/*
|
||||
* User space process size. 47bits minus one guard page. The guard
|
||||
* page is necessary on Intel CPUs: if a SYSCALL instruction is at
|
||||
* the highest possible canonical userspace address, then that
|
||||
* syscall will enter the kernel with a non-canonical return
|
||||
* address, and SYSRET will explode dangerously. We avoid this
|
||||
* particular problem by preventing anything from being mapped
|
||||
* at the maximum canonical address.
|
||||
* User space process size. This is the first address outside the user range.
|
||||
* There are a few constraints that determine this:
|
||||
*
|
||||
* On Intel CPUs, if a SYSCALL instruction is at the highest canonical
|
||||
* address, then that syscall will enter the kernel with a
|
||||
* non-canonical return address, and SYSRET will explode dangerously.
|
||||
* We avoid this particular problem by preventing anything executable
|
||||
* from being mapped at the maximum canonical address.
|
||||
*
|
||||
* On AMD CPUs in the Ryzen family, there's a nasty bug in which the
|
||||
* CPUs malfunction if they execute code from the highest canonical page.
|
||||
* They'll speculate right off the end of the canonical space, and
|
||||
* bad things happen. This is worked around in the same way as the
|
||||
* Intel problem.
|
||||
*
|
||||
* With page table isolation enabled, we map the LDT in ... [stay tuned]
|
||||
*/
|
||||
#define TASK_SIZE_MAX ((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
|
||||
|
||||
|
14
arch/x86/include/asm/pti.h
Normal file
14
arch/x86/include/asm/pti.h
Normal file
@ -0,0 +1,14 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
#ifndef _ASM_X86_PTI_H
|
||||
#define _ASM_X86_PTI_H
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
extern void pti_init(void);
|
||||
extern void pti_check_boottime_disable(void);
|
||||
#else
|
||||
static inline void pti_check_boottime_disable(void) { }
|
||||
#endif
|
||||
|
||||
#endif /* __ASSEMBLY__ */
|
||||
#endif /* _ASM_X86_PTI_H */
|
@ -10,38 +10,90 @@
|
||||
#include <asm/special_insns.h>
|
||||
#include <asm/smp.h>
|
||||
#include <asm/invpcid.h>
|
||||
#include <asm/pti.h>
|
||||
#include <asm/processor-flags.h>
|
||||
|
||||
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* Bump the generation count. This also serves as a full barrier
|
||||
* that synchronizes with switch_mm(): callers are required to order
|
||||
* their read of mm_cpumask after their writes to the paging
|
||||
* structures.
|
||||
*/
|
||||
return atomic64_inc_return(&mm->context.tlb_gen);
|
||||
}
|
||||
/*
|
||||
* The x86 feature is called PCID (Process Context IDentifier). It is similar
|
||||
* to what is traditionally called ASID on the RISC processors.
|
||||
*
|
||||
* We don't use the traditional ASID implementation, where each process/mm gets
|
||||
* its own ASID and flush/restart when we run out of ASID space.
|
||||
*
|
||||
* Instead we have a small per-cpu array of ASIDs and cache the last few mm's
|
||||
* that came by on this CPU, allowing cheaper switch_mm between processes on
|
||||
* this CPU.
|
||||
*
|
||||
* We end up with different spaces for different things. To avoid confusion we
|
||||
* use different names for each of them:
|
||||
*
|
||||
* ASID - [0, TLB_NR_DYN_ASIDS-1]
|
||||
* the canonical identifier for an mm
|
||||
*
|
||||
* kPCID - [1, TLB_NR_DYN_ASIDS]
|
||||
* the value we write into the PCID part of CR3; corresponds to the
|
||||
* ASID+1, because PCID 0 is special.
|
||||
*
|
||||
* uPCID - [2048 + 1, 2048 + TLB_NR_DYN_ASIDS]
|
||||
* for KPTI each mm has two address spaces and thus needs two
|
||||
* PCID values, but we can still do with a single ASID denomination
|
||||
* for each mm. Corresponds to kPCID + 2048.
|
||||
*
|
||||
*/
|
||||
|
||||
/* There are 12 bits of space for ASIDS in CR3 */
|
||||
#define CR3_HW_ASID_BITS 12
|
||||
|
||||
/*
|
||||
* When enabled, PAGE_TABLE_ISOLATION consumes a single bit for
|
||||
* user/kernel switches
|
||||
*/
|
||||
#define PTI_CONSUMED_ASID_BITS 0
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
# define PTI_CONSUMED_PCID_BITS 1
|
||||
#else
|
||||
# define PTI_CONSUMED_PCID_BITS 0
|
||||
#endif
|
||||
|
||||
#define CR3_AVAIL_PCID_BITS (X86_CR3_PCID_BITS - PTI_CONSUMED_PCID_BITS)
|
||||
|
||||
#define CR3_AVAIL_ASID_BITS (CR3_HW_ASID_BITS - PTI_CONSUMED_ASID_BITS)
|
||||
/*
|
||||
* ASIDs are zero-based: 0->MAX_AVAIL_ASID are valid. -1 below to account
|
||||
* for them being zero-based. Another -1 is because ASID 0 is reserved for
|
||||
* for them being zero-based. Another -1 is because PCID 0 is reserved for
|
||||
* use by non-PCID-aware users.
|
||||
*/
|
||||
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_ASID_BITS) - 2)
|
||||
#define MAX_ASID_AVAILABLE ((1 << CR3_AVAIL_PCID_BITS) - 2)
|
||||
|
||||
/*
|
||||
* 6 because 6 should be plenty and struct tlb_state will fit in two cache
|
||||
* lines.
|
||||
*/
|
||||
#define TLB_NR_DYN_ASIDS 6
|
||||
|
||||
/*
|
||||
* Given @asid, compute kPCID
|
||||
*/
|
||||
static inline u16 kern_pcid(u16 asid)
|
||||
{
|
||||
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* Make sure that the dynamic ASID space does not confict with the
|
||||
* bit we are using to switch between user and kernel ASIDs.
|
||||
*/
|
||||
BUILD_BUG_ON(TLB_NR_DYN_ASIDS >= (1 << X86_CR3_PTI_SWITCH_BIT));
|
||||
|
||||
/*
|
||||
* The ASID being passed in here should have respected the
|
||||
* MAX_ASID_AVAILABLE and thus never have the switch bit set.
|
||||
*/
|
||||
VM_WARN_ON_ONCE(asid & (1 << X86_CR3_PTI_SWITCH_BIT));
|
||||
#endif
|
||||
/*
|
||||
* The dynamically-assigned ASIDs that get passed in are small
|
||||
* (<TLB_NR_DYN_ASIDS). They never have the high switch bit set,
|
||||
* so do not bother to clear it.
|
||||
*
|
||||
* If PCID is on, ASID-aware code paths put the ASID+1 into the
|
||||
* PCID bits. This serves two purposes. It prevents a nasty
|
||||
* situation in which PCID-unaware code saves CR3, loads some other
|
||||
@ -53,6 +105,18 @@ static inline u16 kern_pcid(u16 asid)
|
||||
return asid + 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Given @asid, compute uPCID
|
||||
*/
|
||||
static inline u16 user_pcid(u16 asid)
|
||||
{
|
||||
u16 ret = kern_pcid(asid);
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
ret |= 1 << X86_CR3_PTI_SWITCH_BIT;
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct pgd_t;
|
||||
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid)
|
||||
{
|
||||
@ -95,12 +159,6 @@ static inline bool tlb_defer_switch_to_init_mm(void)
|
||||
return !static_cpu_has(X86_FEATURE_PCID);
|
||||
}
|
||||
|
||||
/*
|
||||
* 6 because 6 should be plenty and struct tlb_state will fit in
|
||||
* two cache lines.
|
||||
*/
|
||||
#define TLB_NR_DYN_ASIDS 6
|
||||
|
||||
struct tlb_context {
|
||||
u64 ctx_id;
|
||||
u64 tlb_gen;
|
||||
@ -134,6 +192,24 @@ struct tlb_state {
|
||||
*/
|
||||
bool is_lazy;
|
||||
|
||||
/*
|
||||
* If set we changed the page tables in such a way that we
|
||||
* needed an invalidation of all contexts (aka. PCIDs / ASIDs).
|
||||
* This tells us to go invalidate all the non-loaded ctxs[]
|
||||
* on the next context switch.
|
||||
*
|
||||
* The current ctx was kept up-to-date as it ran and does not
|
||||
* need to be invalidated.
|
||||
*/
|
||||
bool invalidate_other;
|
||||
|
||||
/*
|
||||
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
|
||||
* the corresponding user PCID needs a flush next time we
|
||||
* switch to it; see SWITCH_TO_USER_CR3.
|
||||
*/
|
||||
unsigned short user_pcid_flush_mask;
|
||||
|
||||
/*
|
||||
* Access to this CR4 shadow and to H/W CR4 is protected by
|
||||
* disabling interrupts when modifying either one.
|
||||
@ -211,6 +287,14 @@ static inline unsigned long cr4_read_shadow(void)
|
||||
return this_cpu_read(cpu_tlbstate.cr4);
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark all other ASIDs as invalid, preserves the current.
|
||||
*/
|
||||
static inline void invalidate_other_asid(void)
|
||||
{
|
||||
this_cpu_write(cpu_tlbstate.invalidate_other, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Save some of cr4 feature set we're using (e.g. Pentium 4MB
|
||||
* enable and PPro Global page enable), so that any CPU's that boot
|
||||
@ -230,19 +314,48 @@ static inline void cr4_set_bits_and_update_boot(unsigned long mask)
|
||||
|
||||
extern void initialize_tlbstate_and_flush(void);
|
||||
|
||||
/*
|
||||
* Given an ASID, flush the corresponding user ASID. We can delay this
|
||||
* until the next time we switch to it.
|
||||
*
|
||||
* See SWITCH_TO_USER_CR3.
|
||||
*/
|
||||
static inline void invalidate_user_asid(u16 asid)
|
||||
{
|
||||
/* There is no user ASID if address space separation is off */
|
||||
if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
|
||||
return;
|
||||
|
||||
/*
|
||||
* We only have a single ASID if PCID is off and the CR3
|
||||
* write will have flushed it.
|
||||
*/
|
||||
if (!cpu_feature_enabled(X86_FEATURE_PCID))
|
||||
return;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
|
||||
__set_bit(kern_pcid(asid),
|
||||
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
|
||||
}
|
||||
|
||||
/*
|
||||
* flush the entire current user mapping
|
||||
*/
|
||||
static inline void __native_flush_tlb(void)
|
||||
{
|
||||
/*
|
||||
* If current->mm == NULL then we borrow a mm which may change during a
|
||||
* task switch and therefore we must not be preempted while we write CR3
|
||||
* back:
|
||||
* Preemption or interrupts must be disabled to protect the access
|
||||
* to the per CPU variable and to prevent being preempted between
|
||||
* read_cr3() and write_cr3().
|
||||
*/
|
||||
preempt_disable();
|
||||
WARN_ON_ONCE(preemptible());
|
||||
|
||||
invalidate_user_asid(this_cpu_read(cpu_tlbstate.loaded_mm_asid));
|
||||
|
||||
/* If current->mm == NULL then the read_cr3() "borrows" an mm */
|
||||
native_write_cr3(__native_read_cr3());
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -256,6 +369,8 @@ static inline void __native_flush_tlb_global(void)
|
||||
/*
|
||||
* Using INVPCID is considerably faster than a pair of writes
|
||||
* to CR4 sandwiched inside an IRQ flag save/restore.
|
||||
*
|
||||
* Note, this works with CR4.PCIDE=0 or 1.
|
||||
*/
|
||||
invpcid_flush_all();
|
||||
return;
|
||||
@ -282,7 +397,21 @@ static inline void __native_flush_tlb_global(void)
|
||||
*/
|
||||
static inline void __native_flush_tlb_single(unsigned long addr)
|
||||
{
|
||||
u32 loaded_mm_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
|
||||
|
||||
asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Some platforms #GP if we call invpcid(type=1/2) before CR4.PCIDE=1.
|
||||
* Just use invalidate_user_asid() in case we are called early.
|
||||
*/
|
||||
if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE))
|
||||
invalidate_user_asid(loaded_mm_asid);
|
||||
else
|
||||
invpcid_flush_one(user_pcid(loaded_mm_asid), addr);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -298,14 +427,6 @@ static inline void __flush_tlb_all(void)
|
||||
*/
|
||||
__flush_tlb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Note: if we somehow had PCID but not PGE, then this wouldn't work --
|
||||
* we'd end up flushing kernel translations for the current ASID but
|
||||
* we might fail to flush kernel translations for other cached ASIDs.
|
||||
*
|
||||
* To avoid this issue, we force PCID off if PGE is off.
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
@ -315,6 +436,16 @@ static inline void __flush_tlb_one(unsigned long addr)
|
||||
{
|
||||
count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
|
||||
__flush_tlb_single(addr);
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
|
||||
/*
|
||||
* __flush_tlb_single() will have cleared the TLB entry for this ASID,
|
||||
* but since kernel space is replicated across all, we must also
|
||||
* invalidate all others.
|
||||
*/
|
||||
invalidate_other_asid();
|
||||
}
|
||||
|
||||
#define TLB_FLUSH_ALL -1UL
|
||||
@ -375,6 +506,17 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
|
||||
void native_flush_tlb_others(const struct cpumask *cpumask,
|
||||
const struct flush_tlb_info *info);
|
||||
|
||||
static inline u64 inc_mm_tlb_gen(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* Bump the generation count. This also serves as a full barrier
|
||||
* that synchronizes with switch_mm(): callers are required to order
|
||||
* their read of mm_cpumask after their writes to the paging
|
||||
* structures.
|
||||
*/
|
||||
return atomic64_inc_return(&mm->context.tlb_gen);
|
||||
}
|
||||
|
||||
static inline void arch_tlbbatch_add_mm(struct arch_tlbflush_unmap_batch *batch,
|
||||
struct mm_struct *mm)
|
||||
{
|
||||
|
@ -56,18 +56,27 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
|
||||
|
||||
#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
|
||||
/*
|
||||
* WARNING: The entire pt_regs may not be safe to dereference. In some cases,
|
||||
* only the iret frame registers are accessible. Use with caution!
|
||||
* If 'partial' returns true, only the iret frame registers are valid.
|
||||
*/
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
|
||||
bool *partial)
|
||||
{
|
||||
if (unwind_done(state))
|
||||
return NULL;
|
||||
|
||||
if (partial) {
|
||||
#ifdef CONFIG_UNWINDER_ORC
|
||||
*partial = !state->full_regs;
|
||||
#else
|
||||
*partial = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
return state->regs;
|
||||
}
|
||||
#else
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
|
||||
static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state,
|
||||
bool *partial)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
@ -7,6 +7,7 @@
|
||||
|
||||
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||
extern void map_vsyscall(void);
|
||||
extern void set_vsyscall_pgtable_user_bits(pgd_t *root);
|
||||
|
||||
/*
|
||||
* Called on instruction fetch fault in vsyscall page.
|
||||
|
@ -78,7 +78,12 @@
|
||||
#define X86_CR3_PWT _BITUL(X86_CR3_PWT_BIT)
|
||||
#define X86_CR3_PCD_BIT 4 /* Page Cache Disable */
|
||||
#define X86_CR3_PCD _BITUL(X86_CR3_PCD_BIT)
|
||||
#define X86_CR3_PCID_MASK _AC(0x00000fff,UL) /* PCID Mask */
|
||||
|
||||
#define X86_CR3_PCID_BITS 12
|
||||
#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
|
||||
|
||||
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
|
||||
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
|
||||
|
||||
/*
|
||||
* Intel CPU features in CR4
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <asm/sigframe.h>
|
||||
#include <asm/bootparam.h>
|
||||
#include <asm/suspend.h>
|
||||
#include <asm/tlbflush.h>
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
#include <xen/interface/xen.h>
|
||||
@ -94,6 +95,9 @@ void common(void) {
|
||||
BLANK();
|
||||
DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
|
||||
|
||||
/* TLB state for the entry code */
|
||||
OFFSET(TLB_STATE_user_pcid_flush_mask, tlb_state, user_pcid_flush_mask);
|
||||
|
||||
/* Layout info for cpu_entry_area */
|
||||
OFFSET(CPU_ENTRY_AREA_tss, cpu_entry_area, tss);
|
||||
OFFSET(CPU_ENTRY_AREA_entry_trampoline, cpu_entry_area, entry_trampoline);
|
||||
|
@ -22,7 +22,7 @@ obj-y += common.o
|
||||
obj-y += rdrand.o
|
||||
obj-y += match.o
|
||||
obj-y += bugs.o
|
||||
obj-$(CONFIG_CPU_FREQ) += aperfmperf.o
|
||||
obj-y += aperfmperf.o
|
||||
obj-y += cpuid-deps.o
|
||||
|
||||
obj-$(CONFIG_PROC_FS) += proc.o
|
||||
|
@ -14,6 +14,8 @@
|
||||
#include <linux/percpu.h>
|
||||
#include <linux/smp.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
struct aperfmperf_sample {
|
||||
unsigned int khz;
|
||||
ktime_t time;
|
||||
@ -24,7 +26,7 @@ struct aperfmperf_sample {
|
||||
static DEFINE_PER_CPU(struct aperfmperf_sample, samples);
|
||||
|
||||
#define APERFMPERF_CACHE_THRESHOLD_MS 10
|
||||
#define APERFMPERF_REFRESH_DELAY_MS 20
|
||||
#define APERFMPERF_REFRESH_DELAY_MS 10
|
||||
#define APERFMPERF_STALE_THRESHOLD_MS 1000
|
||||
|
||||
/*
|
||||
@ -38,14 +40,8 @@ static void aperfmperf_snapshot_khz(void *dummy)
|
||||
u64 aperf, aperf_delta;
|
||||
u64 mperf, mperf_delta;
|
||||
struct aperfmperf_sample *s = this_cpu_ptr(&samples);
|
||||
ktime_t now = ktime_get();
|
||||
s64 time_delta = ktime_ms_delta(now, s->time);
|
||||
unsigned long flags;
|
||||
|
||||
/* Don't bother re-computing within the cache threshold time. */
|
||||
if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
|
||||
return;
|
||||
|
||||
local_irq_save(flags);
|
||||
rdmsrl(MSR_IA32_APERF, aperf);
|
||||
rdmsrl(MSR_IA32_MPERF, mperf);
|
||||
@ -61,31 +57,68 @@ static void aperfmperf_snapshot_khz(void *dummy)
|
||||
if (mperf_delta == 0)
|
||||
return;
|
||||
|
||||
s->time = now;
|
||||
s->time = ktime_get();
|
||||
s->aperf = aperf;
|
||||
s->mperf = mperf;
|
||||
|
||||
/* If the previous iteration was too long ago, discard it. */
|
||||
if (time_delta > APERFMPERF_STALE_THRESHOLD_MS)
|
||||
s->khz = 0;
|
||||
else
|
||||
s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
|
||||
s->khz = div64_u64((cpu_khz * aperf_delta), mperf_delta);
|
||||
}
|
||||
|
||||
unsigned int arch_freq_get_on_cpu(int cpu)
|
||||
static bool aperfmperf_snapshot_cpu(int cpu, ktime_t now, bool wait)
|
||||
{
|
||||
unsigned int khz;
|
||||
s64 time_delta = ktime_ms_delta(now, per_cpu(samples.time, cpu));
|
||||
|
||||
/* Don't bother re-computing within the cache threshold time. */
|
||||
if (time_delta < APERFMPERF_CACHE_THRESHOLD_MS)
|
||||
return true;
|
||||
|
||||
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, wait);
|
||||
|
||||
/* Return false if the previous iteration was too long ago. */
|
||||
return time_delta <= APERFMPERF_STALE_THRESHOLD_MS;
|
||||
}
|
||||
|
||||
unsigned int aperfmperf_get_khz(int cpu)
|
||||
{
|
||||
if (!cpu_khz)
|
||||
return 0;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
|
||||
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
|
||||
khz = per_cpu(samples.khz, cpu);
|
||||
if (khz)
|
||||
return khz;
|
||||
aperfmperf_snapshot_cpu(cpu, ktime_get(), true);
|
||||
return per_cpu(samples.khz, cpu);
|
||||
}
|
||||
|
||||
void arch_freq_prepare_all(void)
|
||||
{
|
||||
ktime_t now = ktime_get();
|
||||
bool wait = false;
|
||||
int cpu;
|
||||
|
||||
if (!cpu_khz)
|
||||
return;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return;
|
||||
|
||||
for_each_online_cpu(cpu)
|
||||
if (!aperfmperf_snapshot_cpu(cpu, now, false))
|
||||
wait = true;
|
||||
|
||||
if (wait)
|
||||
msleep(APERFMPERF_REFRESH_DELAY_MS);
|
||||
}
|
||||
|
||||
unsigned int arch_freq_get_on_cpu(int cpu)
|
||||
{
|
||||
if (!cpu_khz)
|
||||
return 0;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_APERFMPERF))
|
||||
return 0;
|
||||
|
||||
if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true))
|
||||
return per_cpu(samples.khz, cpu);
|
||||
|
||||
msleep(APERFMPERF_REFRESH_DELAY_MS);
|
||||
smp_call_function_single(cpu, aperfmperf_snapshot_khz, NULL, 1);
|
||||
|
@ -898,6 +898,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
||||
}
|
||||
|
||||
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
|
||||
|
||||
if (c->x86_vendor != X86_VENDOR_AMD)
|
||||
setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
|
||||
|
||||
fpu__init_system(c);
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -1335,7 +1339,10 @@ void syscall_init(void)
|
||||
(entry_SYSCALL_64_trampoline - _entry_trampoline);
|
||||
|
||||
wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS);
|
||||
wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
|
||||
if (static_cpu_has(X86_FEATURE_PTI))
|
||||
wrmsrl(MSR_LSTAR, SYSCALL64_entry_trampoline);
|
||||
else
|
||||
wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64);
|
||||
|
||||
#ifdef CONFIG_IA32_EMULATION
|
||||
wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat);
|
||||
|
@ -47,4 +47,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
|
||||
|
||||
extern void get_cpu_cap(struct cpuinfo_x86 *c);
|
||||
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
|
||||
|
||||
unsigned int aperfmperf_get_khz(int cpu);
|
||||
|
||||
#endif /* ARCH_X86_CPU_H */
|
||||
|
@ -470,6 +470,7 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
|
||||
#define F14H_MPB_MAX_SIZE 1824
|
||||
#define F15H_MPB_MAX_SIZE 4096
|
||||
#define F16H_MPB_MAX_SIZE 3458
|
||||
#define F17H_MPB_MAX_SIZE 3200
|
||||
|
||||
switch (family) {
|
||||
case 0x14:
|
||||
@ -481,6 +482,9 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size,
|
||||
case 0x16:
|
||||
max_size = F16H_MPB_MAX_SIZE;
|
||||
break;
|
||||
case 0x17:
|
||||
max_size = F17H_MPB_MAX_SIZE;
|
||||
break;
|
||||
default:
|
||||
max_size = F1XH_MPB_MAX_SIZE;
|
||||
break;
|
||||
|
@ -5,6 +5,8 @@
|
||||
#include <linux/seq_file.h>
|
||||
#include <linux/cpufreq.h>
|
||||
|
||||
#include "cpu.h"
|
||||
|
||||
/*
|
||||
* Get CPU information for use by the procfs.
|
||||
*/
|
||||
@ -78,8 +80,10 @@ static int show_cpuinfo(struct seq_file *m, void *v)
|
||||
seq_printf(m, "microcode\t: 0x%x\n", c->microcode);
|
||||
|
||||
if (cpu_has(c, X86_FEATURE_TSC)) {
|
||||
unsigned int freq = cpufreq_quick_get(cpu);
|
||||
unsigned int freq = aperfmperf_get_khz(cpu);
|
||||
|
||||
if (!freq)
|
||||
freq = cpufreq_quick_get(cpu);
|
||||
if (!freq)
|
||||
freq = cpu_khz;
|
||||
seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
|
||||
|
@ -76,12 +76,23 @@ void show_iret_regs(struct pt_regs *regs)
|
||||
regs->sp, regs->flags);
|
||||
}
|
||||
|
||||
static void show_regs_safe(struct stack_info *info, struct pt_regs *regs)
|
||||
static void show_regs_if_on_stack(struct stack_info *info, struct pt_regs *regs,
|
||||
bool partial)
|
||||
{
|
||||
if (on_stack(info, regs, sizeof(*regs)))
|
||||
/*
|
||||
* These on_stack() checks aren't strictly necessary: the unwind code
|
||||
* has already validated the 'regs' pointer. The checks are done for
|
||||
* ordering reasons: if the registers are on the next stack, we don't
|
||||
* want to print them out yet. Otherwise they'll be shown as part of
|
||||
* the wrong stack. Later, when show_trace_log_lvl() switches to the
|
||||
* next stack, this function will be called again with the same regs so
|
||||
* they can be printed in the right context.
|
||||
*/
|
||||
if (!partial && on_stack(info, regs, sizeof(*regs))) {
|
||||
__show_regs(regs, 0);
|
||||
else if (on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
|
||||
IRET_FRAME_SIZE)) {
|
||||
|
||||
} else if (partial && on_stack(info, (void *)regs + IRET_FRAME_OFFSET,
|
||||
IRET_FRAME_SIZE)) {
|
||||
/*
|
||||
* When an interrupt or exception occurs in entry code, the
|
||||
* full pt_regs might not have been saved yet. In that case
|
||||
@ -98,11 +109,13 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
struct stack_info stack_info = {0};
|
||||
unsigned long visit_mask = 0;
|
||||
int graph_idx = 0;
|
||||
bool partial;
|
||||
|
||||
printk("%sCall Trace:\n", log_lvl);
|
||||
|
||||
unwind_start(&state, task, regs, stack);
|
||||
stack = stack ? : get_stack_pointer(task, regs);
|
||||
regs = unwind_get_entry_regs(&state, &partial);
|
||||
|
||||
/*
|
||||
* Iterate through the stacks, starting with the current stack pointer.
|
||||
@ -120,7 +133,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
* - hardirq stack
|
||||
* - entry stack
|
||||
*/
|
||||
for (regs = NULL; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
|
||||
for ( ; stack; stack = PTR_ALIGN(stack_info.next_sp, sizeof(long))) {
|
||||
const char *stack_name;
|
||||
|
||||
if (get_stack_info(stack, task, &stack_info, &visit_mask)) {
|
||||
@ -140,7 +153,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
printk("%s <%s>\n", log_lvl, stack_name);
|
||||
|
||||
if (regs)
|
||||
show_regs_safe(&stack_info, regs);
|
||||
show_regs_if_on_stack(&stack_info, regs, partial);
|
||||
|
||||
/*
|
||||
* Scan the stack, printing any text addresses we find. At the
|
||||
@ -164,7 +177,7 @@ void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
||||
|
||||
/*
|
||||
* Don't print regs->ip again if it was already printed
|
||||
* by show_regs_safe() below.
|
||||
* by show_regs_if_on_stack().
|
||||
*/
|
||||
if (regs && stack == ®s->ip)
|
||||
goto next;
|
||||
@ -199,9 +212,9 @@ next:
|
||||
unwind_next_frame(&state);
|
||||
|
||||
/* if the frame has entry regs, print them */
|
||||
regs = unwind_get_entry_regs(&state);
|
||||
regs = unwind_get_entry_regs(&state, &partial);
|
||||
if (regs)
|
||||
show_regs_safe(&stack_info, regs);
|
||||
show_regs_if_on_stack(&stack_info, regs, partial);
|
||||
}
|
||||
|
||||
if (stack_name)
|
||||
@ -297,11 +310,13 @@ int __die(const char *str, struct pt_regs *regs, long err)
|
||||
unsigned long sp;
|
||||
#endif
|
||||
printk(KERN_DEFAULT
|
||||
"%s: %04lx [#%d]%s%s%s%s\n", str, err & 0xffff, ++die_counter,
|
||||
"%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter,
|
||||
IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
|
||||
IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
|
||||
debug_pagealloc_enabled() ? " DEBUG_PAGEALLOC" : "",
|
||||
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "");
|
||||
IS_ENABLED(CONFIG_KASAN) ? " KASAN" : "",
|
||||
IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ?
|
||||
(boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : "");
|
||||
|
||||
if (notify_die(DIE_OOPS, str, regs, err,
|
||||
current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP)
|
||||
|
@ -341,6 +341,27 @@ GLOBAL(early_recursion_flag)
|
||||
.balign PAGE_SIZE; \
|
||||
GLOBAL(name)
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
/*
|
||||
* Each PGD needs to be 8k long and 8k aligned. We do not
|
||||
* ever go out to userspace with these, so we do not
|
||||
* strictly *need* the second page, but this allows us to
|
||||
* have a single set_pgd() implementation that does not
|
||||
* need to worry about whether it has 4k or 8k to work
|
||||
* with.
|
||||
*
|
||||
* This ensures PGDs are 8k long:
|
||||
*/
|
||||
#define PTI_USER_PGD_FILL 512
|
||||
/* This ensures they are 8k-aligned: */
|
||||
#define NEXT_PGD_PAGE(name) \
|
||||
.balign 2 * PAGE_SIZE; \
|
||||
GLOBAL(name)
|
||||
#else
|
||||
#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
|
||||
#define PTI_USER_PGD_FILL 0
|
||||
#endif
|
||||
|
||||
/* Automate the creation of 1 to 1 mapping pmd entries */
|
||||
#define PMDS(START, PERM, COUNT) \
|
||||
i = 0 ; \
|
||||
@ -350,13 +371,14 @@ GLOBAL(name)
|
||||
.endr
|
||||
|
||||
__INITDATA
|
||||
NEXT_PAGE(early_top_pgt)
|
||||
NEXT_PGD_PAGE(early_top_pgt)
|
||||
.fill 511,8,0
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
.quad level4_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
|
||||
#else
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
|
||||
#endif
|
||||
.fill PTI_USER_PGD_FILL,8,0
|
||||
|
||||
NEXT_PAGE(early_dynamic_pgts)
|
||||
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
|
||||
@ -364,13 +386,14 @@ NEXT_PAGE(early_dynamic_pgts)
|
||||
.data
|
||||
|
||||
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
NEXT_PGD_PAGE(init_top_pgt)
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
|
||||
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0
|
||||
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
|
||||
.org init_top_pgt + PGD_START_KERNEL*8, 0
|
||||
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
|
||||
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
|
||||
.fill PTI_USER_PGD_FILL,8,0
|
||||
|
||||
NEXT_PAGE(level3_ident_pgt)
|
||||
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
|
||||
@ -381,8 +404,9 @@ NEXT_PAGE(level2_ident_pgt)
|
||||
*/
|
||||
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
|
||||
#else
|
||||
NEXT_PAGE(init_top_pgt)
|
||||
NEXT_PGD_PAGE(init_top_pgt)
|
||||
.fill 512,8,0
|
||||
.fill PTI_USER_PGD_FILL,8,0
|
||||
#endif
|
||||
|
||||
#ifdef CONFIG_X86_5LEVEL
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/ldt.h>
|
||||
#include <asm/tlb.h>
|
||||
#include <asm/desc.h>
|
||||
#include <asm/mmu_context.h>
|
||||
#include <asm/syscalls.h>
|
||||
@ -51,13 +52,11 @@ static void refresh_ldt_segments(void)
|
||||
static void flush_ldt(void *__mm)
|
||||
{
|
||||
struct mm_struct *mm = __mm;
|
||||
mm_context_t *pc;
|
||||
|
||||
if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
|
||||
return;
|
||||
|
||||
pc = &mm->context;
|
||||
set_ldt(pc->ldt->entries, pc->ldt->nr_entries);
|
||||
load_mm_ldt(mm);
|
||||
|
||||
refresh_ldt_segments();
|
||||
}
|
||||
@ -94,10 +93,126 @@ static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* The new LDT isn't aliased for PTI yet. */
|
||||
new_ldt->slot = -1;
|
||||
|
||||
new_ldt->nr_entries = num_entries;
|
||||
return new_ldt;
|
||||
}
|
||||
|
||||
/*
|
||||
* If PTI is enabled, this maps the LDT into the kernelmode and
|
||||
* usermode tables for the given mm.
|
||||
*
|
||||
* There is no corresponding unmap function. Even if the LDT is freed, we
|
||||
* leave the PTEs around until the slot is reused or the mm is destroyed.
|
||||
* This is harmless: the LDT is always in ordinary memory, and no one will
|
||||
* access the freed slot.
|
||||
*
|
||||
* If we wanted to unmap freed LDTs, we'd also need to do a flush to make
|
||||
* it useful, and the flush would slow down modify_ldt().
|
||||
*/
|
||||
static int
|
||||
map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
bool is_vmalloc, had_top_level_entry;
|
||||
unsigned long va;
|
||||
spinlock_t *ptl;
|
||||
pgd_t *pgd;
|
||||
int i;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Any given ldt_struct should have map_ldt_struct() called at most
|
||||
* once.
|
||||
*/
|
||||
WARN_ON(ldt->slot != -1);
|
||||
|
||||
/*
|
||||
* Did we already have the top level entry allocated? We can't
|
||||
* use pgd_none() for this because it doens't do anything on
|
||||
* 4-level page table kernels.
|
||||
*/
|
||||
pgd = pgd_offset(mm, LDT_BASE_ADDR);
|
||||
had_top_level_entry = (pgd->pgd != 0);
|
||||
|
||||
is_vmalloc = is_vmalloc_addr(ldt->entries);
|
||||
|
||||
for (i = 0; i * PAGE_SIZE < ldt->nr_entries * LDT_ENTRY_SIZE; i++) {
|
||||
unsigned long offset = i << PAGE_SHIFT;
|
||||
const void *src = (char *)ldt->entries + offset;
|
||||
unsigned long pfn;
|
||||
pte_t pte, *ptep;
|
||||
|
||||
va = (unsigned long)ldt_slot_va(slot) + offset;
|
||||
pfn = is_vmalloc ? vmalloc_to_pfn(src) :
|
||||
page_to_pfn(virt_to_page(src));
|
||||
/*
|
||||
* Treat the PTI LDT range as a *userspace* range.
|
||||
* get_locked_pte() will allocate all needed pagetables
|
||||
* and account for them in this mm.
|
||||
*/
|
||||
ptep = get_locked_pte(mm, va, &ptl);
|
||||
if (!ptep)
|
||||
return -ENOMEM;
|
||||
/*
|
||||
* Map it RO so the easy to find address is not a primary
|
||||
* target via some kernel interface which misses a
|
||||
* permission check.
|
||||
*/
|
||||
pte = pfn_pte(pfn, __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL));
|
||||
set_pte_at(mm, va, ptep, pte);
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
}
|
||||
|
||||
if (mm->context.ldt) {
|
||||
/*
|
||||
* We already had an LDT. The top-level entry should already
|
||||
* have been allocated and synchronized with the usermode
|
||||
* tables.
|
||||
*/
|
||||
WARN_ON(!had_top_level_entry);
|
||||
if (static_cpu_has(X86_FEATURE_PTI))
|
||||
WARN_ON(!kernel_to_user_pgdp(pgd)->pgd);
|
||||
} else {
|
||||
/*
|
||||
* This is the first time we're mapping an LDT for this process.
|
||||
* Sync the pgd to the usermode tables.
|
||||
*/
|
||||
WARN_ON(had_top_level_entry);
|
||||
if (static_cpu_has(X86_FEATURE_PTI)) {
|
||||
WARN_ON(kernel_to_user_pgdp(pgd)->pgd);
|
||||
set_pgd(kernel_to_user_pgdp(pgd), *pgd);
|
||||
}
|
||||
}
|
||||
|
||||
va = (unsigned long)ldt_slot_va(slot);
|
||||
flush_tlb_mm_range(mm, va, va + LDT_SLOT_STRIDE, 0);
|
||||
|
||||
ldt->slot = slot;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_ldt_pgtables(struct mm_struct *mm)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
struct mmu_gather tlb;
|
||||
unsigned long start = LDT_BASE_ADDR;
|
||||
unsigned long end = start + (1UL << PGDIR_SHIFT);
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
|
||||
tlb_gather_mmu(&tlb, mm, start, end);
|
||||
free_pgd_range(&tlb, start, end, start, end);
|
||||
tlb_finish_mmu(&tlb, start, end);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* After calling this, the LDT is immutable. */
|
||||
static void finalize_ldt_struct(struct ldt_struct *ldt)
|
||||
{
|
||||
@ -156,6 +271,12 @@ int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
|
||||
new_ldt->nr_entries * LDT_ENTRY_SIZE);
|
||||
finalize_ldt_struct(new_ldt);
|
||||
|
||||
retval = map_ldt_struct(mm, new_ldt, 0);
|
||||
if (retval) {
|
||||
free_ldt_pgtables(mm);
|
||||
free_ldt_struct(new_ldt);
|
||||
goto out_unlock;
|
||||
}
|
||||
mm->context.ldt = new_ldt;
|
||||
|
||||
out_unlock:
|
||||
@ -174,6 +295,11 @@ void destroy_context_ldt(struct mm_struct *mm)
|
||||
mm->context.ldt = NULL;
|
||||
}
|
||||
|
||||
void ldt_arch_exit_mmap(struct mm_struct *mm)
|
||||
{
|
||||
free_ldt_pgtables(mm);
|
||||
}
|
||||
|
||||
static int read_ldt(void __user *ptr, unsigned long bytecount)
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
@ -287,6 +413,25 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
|
||||
new_ldt->entries[ldt_info.entry_number] = ldt;
|
||||
finalize_ldt_struct(new_ldt);
|
||||
|
||||
/*
|
||||
* If we are using PTI, map the new LDT into the userspace pagetables.
|
||||
* If there is already an LDT, use the other slot so that other CPUs
|
||||
* will continue to use the old LDT until install_ldt() switches
|
||||
* them over to the new LDT.
|
||||
*/
|
||||
error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
|
||||
if (error) {
|
||||
/*
|
||||
* This only can fail for the first LDT setup. If an LDT is
|
||||
* already installed then the PTE page is already
|
||||
* populated. Mop up a half populated page table.
|
||||
*/
|
||||
if (!WARN_ON_ONCE(old_ldt))
|
||||
free_ldt_pgtables(mm);
|
||||
free_ldt_struct(new_ldt);
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
install_ldt(mm, new_ldt);
|
||||
free_ldt_struct(old_ldt);
|
||||
error = 0;
|
||||
|
@ -48,8 +48,6 @@ static void load_segments(void)
|
||||
"\tmovl $"STR(__KERNEL_DS)",%%eax\n"
|
||||
"\tmovl %%eax,%%ds\n"
|
||||
"\tmovl %%eax,%%es\n"
|
||||
"\tmovl %%eax,%%fs\n"
|
||||
"\tmovl %%eax,%%gs\n"
|
||||
"\tmovl %%eax,%%ss\n"
|
||||
: : : "eax", "memory");
|
||||
#undef STR
|
||||
@ -232,8 +230,8 @@ void machine_kexec(struct kimage *image)
|
||||
* The gdt & idt are now invalid.
|
||||
* If you want to load them you must set up your own idt & gdt.
|
||||
*/
|
||||
set_gdt(phys_to_virt(0), 0);
|
||||
idt_invalidate(phys_to_virt(0));
|
||||
set_gdt(phys_to_virt(0), 0);
|
||||
|
||||
/* now call it */
|
||||
image->start = relocate_kernel_ptr((unsigned long)image->head,
|
||||
|
@ -47,7 +47,7 @@
|
||||
* section. Since TSS's are completely CPU-local, we want them
|
||||
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
|
||||
*/
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss_rw) = {
|
||||
__visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
|
||||
.x86_tss = {
|
||||
/*
|
||||
* .sp0 is only used when entering ring 0 from a lower
|
||||
|
@ -128,25 +128,16 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip)
|
||||
spin_lock_irqsave(&rtc_lock, flags);
|
||||
CMOS_WRITE(0xa, 0xf);
|
||||
spin_unlock_irqrestore(&rtc_lock, flags);
|
||||
local_flush_tlb();
|
||||
pr_debug("1.\n");
|
||||
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) =
|
||||
start_eip >> 4;
|
||||
pr_debug("2.\n");
|
||||
*((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) =
|
||||
start_eip & 0xf;
|
||||
pr_debug("3.\n");
|
||||
}
|
||||
|
||||
static inline void smpboot_restore_warm_reset_vector(void)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Install writable page 0 entry to set BIOS data area.
|
||||
*/
|
||||
local_flush_tlb();
|
||||
|
||||
/*
|
||||
* Paranoid: Set warm reset code and vector here back
|
||||
* to default values.
|
||||
|
@ -98,7 +98,7 @@ static int __save_stack_trace_reliable(struct stack_trace *trace,
|
||||
for (unwind_start(&state, task, NULL, NULL); !unwind_done(&state);
|
||||
unwind_next_frame(&state)) {
|
||||
|
||||
regs = unwind_get_entry_regs(&state);
|
||||
regs = unwind_get_entry_regs(&state, NULL);
|
||||
if (regs) {
|
||||
/*
|
||||
* Kernel mode registers on the stack indicate an
|
||||
|
@ -93,17 +93,10 @@ static void set_tls_desc(struct task_struct *p, int idx,
|
||||
cpu = get_cpu();
|
||||
|
||||
while (n-- > 0) {
|
||||
if (LDT_empty(info) || LDT_zero(info)) {
|
||||
if (LDT_empty(info) || LDT_zero(info))
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
} else {
|
||||
else
|
||||
fill_ldt(desc, info);
|
||||
|
||||
/*
|
||||
* Always set the accessed bit so that the CPU
|
||||
* doesn't try to write to the (read-only) GDT.
|
||||
*/
|
||||
desc->type |= 1;
|
||||
}
|
||||
++info;
|
||||
++desc;
|
||||
}
|
||||
|
@ -361,7 +361,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
||||
*
|
||||
* No need for ist_enter here because we don't use RCU.
|
||||
*/
|
||||
if (((long)regs->sp >> PGDIR_SHIFT) == ESPFIX_PGD_ENTRY &&
|
||||
if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY &&
|
||||
regs->cs == __KERNEL_CS &&
|
||||
regs->ip == (unsigned long)native_irq_return_iret)
|
||||
{
|
||||
|
@ -61,11 +61,17 @@ jiffies_64 = jiffies;
|
||||
. = ALIGN(HPAGE_SIZE); \
|
||||
__end_rodata_hpage_align = .;
|
||||
|
||||
#define ALIGN_ENTRY_TEXT_BEGIN . = ALIGN(PMD_SIZE);
|
||||
#define ALIGN_ENTRY_TEXT_END . = ALIGN(PMD_SIZE);
|
||||
|
||||
#else
|
||||
|
||||
#define X64_ALIGN_RODATA_BEGIN
|
||||
#define X64_ALIGN_RODATA_END
|
||||
|
||||
#define ALIGN_ENTRY_TEXT_BEGIN
|
||||
#define ALIGN_ENTRY_TEXT_END
|
||||
|
||||
#endif
|
||||
|
||||
PHDRS {
|
||||
@ -102,8 +108,10 @@ SECTIONS
|
||||
CPUIDLE_TEXT
|
||||
LOCK_TEXT
|
||||
KPROBES_TEXT
|
||||
ALIGN_ENTRY_TEXT_BEGIN
|
||||
ENTRY_TEXT
|
||||
IRQENTRY_TEXT
|
||||
ALIGN_ENTRY_TEXT_END
|
||||
SOFTIRQENTRY_TEXT
|
||||
*(.fixup)
|
||||
*(.gnu.warning)
|
||||
|
@ -43,9 +43,10 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology.o
|
||||
obj-$(CONFIG_ACPI_NUMA) += srat.o
|
||||
obj-$(CONFIG_NUMA_EMU) += numa_emulation.o
|
||||
|
||||
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
|
||||
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
|
||||
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
|
||||
obj-$(CONFIG_X86_INTEL_MPX) += mpx.o
|
||||
obj-$(CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS) += pkeys.o
|
||||
obj-$(CONFIG_RANDOMIZE_MEMORY) += kaslr.o
|
||||
obj-$(CONFIG_PAGE_TABLE_ISOLATION) += pti.o
|
||||
|
||||
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt.o
|
||||
obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o
|
||||
|
@ -38,6 +38,32 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
|
||||
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
|
||||
}
|
||||
|
||||
static void percpu_setup_debug_store(int cpu)
|
||||
{
|
||||
#ifdef CONFIG_CPU_SUP_INTEL
|
||||
int npages;
|
||||
void *cea;
|
||||
|
||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||
return;
|
||||
|
||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_store;
|
||||
npages = sizeof(struct debug_store) / PAGE_SIZE;
|
||||
BUILD_BUG_ON(sizeof(struct debug_store) % PAGE_SIZE != 0);
|
||||
cea_map_percpu_pages(cea, &per_cpu(cpu_debug_store, cpu), npages,
|
||||
PAGE_KERNEL);
|
||||
|
||||
cea = &get_cpu_entry_area(cpu)->cpu_debug_buffers;
|
||||
/*
|
||||
* Force the population of PMDs for not yet allocated per cpu
|
||||
* memory like debug store buffers.
|
||||
*/
|
||||
npages = sizeof(struct debug_store_buffers) / PAGE_SIZE;
|
||||
for (; npages; npages--, cea += PAGE_SIZE)
|
||||
cea_set_pte(cea, 0, PAGE_NONE);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Setup the fixmap mappings only once per-processor */
|
||||
static void __init setup_cpu_entry_area(int cpu)
|
||||
{
|
||||
@ -109,6 +135,7 @@ static void __init setup_cpu_entry_area(int cpu)
|
||||
cea_set_pte(&get_cpu_entry_area(cpu)->entry_trampoline,
|
||||
__pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
|
||||
#endif
|
||||
percpu_setup_debug_store(cpu);
|
||||
}
|
||||
|
||||
static __init void setup_cpu_entry_area_ptes(void)
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
static int ptdump_show(struct seq_file *m, void *v)
|
||||
{
|
||||
ptdump_walk_pgd_level(m, NULL);
|
||||
ptdump_walk_pgd_level_debugfs(m, NULL, false);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -22,21 +22,89 @@ static const struct file_operations ptdump_fops = {
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
static struct dentry *pe;
|
||||
static int ptdump_show_curknl(struct seq_file *m, void *v)
|
||||
{
|
||||
if (current->mm->pgd) {
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, false);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptdump_open_curknl(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, ptdump_show_curknl, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations ptdump_curknl_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ptdump_open_curknl,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
static struct dentry *pe_curusr;
|
||||
|
||||
static int ptdump_show_curusr(struct seq_file *m, void *v)
|
||||
{
|
||||
if (current->mm->pgd) {
|
||||
down_read(¤t->mm->mmap_sem);
|
||||
ptdump_walk_pgd_level_debugfs(m, current->mm->pgd, true);
|
||||
up_read(¤t->mm->mmap_sem);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ptdump_open_curusr(struct inode *inode, struct file *filp)
|
||||
{
|
||||
return single_open(filp, ptdump_show_curusr, NULL);
|
||||
}
|
||||
|
||||
static const struct file_operations ptdump_curusr_fops = {
|
||||
.owner = THIS_MODULE,
|
||||
.open = ptdump_open_curusr,
|
||||
.read = seq_read,
|
||||
.llseek = seq_lseek,
|
||||
.release = single_release,
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct dentry *dir, *pe_knl, *pe_curknl;
|
||||
|
||||
static int __init pt_dump_debug_init(void)
|
||||
{
|
||||
pe = debugfs_create_file("kernel_page_tables", S_IRUSR, NULL, NULL,
|
||||
&ptdump_fops);
|
||||
if (!pe)
|
||||
dir = debugfs_create_dir("page_tables", NULL);
|
||||
if (!dir)
|
||||
return -ENOMEM;
|
||||
|
||||
pe_knl = debugfs_create_file("kernel", 0400, dir, NULL,
|
||||
&ptdump_fops);
|
||||
if (!pe_knl)
|
||||
goto err;
|
||||
|
||||
pe_curknl = debugfs_create_file("current_kernel", 0400,
|
||||
dir, NULL, &ptdump_curknl_fops);
|
||||
if (!pe_curknl)
|
||||
goto err;
|
||||
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
pe_curusr = debugfs_create_file("current_user", 0400,
|
||||
dir, NULL, &ptdump_curusr_fops);
|
||||
if (!pe_curusr)
|
||||
goto err;
|
||||
#endif
|
||||
return 0;
|
||||
err:
|
||||
debugfs_remove_recursive(dir);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
static void __exit pt_dump_debug_exit(void)
|
||||
{
|
||||
debugfs_remove_recursive(pe);
|
||||
debugfs_remove_recursive(dir);
|
||||
}
|
||||
|
||||
module_init(pt_dump_debug_init);
|
||||
|
@ -52,6 +52,9 @@ enum address_markers_idx {
|
||||
USER_SPACE_NR = 0,
|
||||
KERNEL_SPACE_NR,
|
||||
LOW_KERNEL_NR,
|
||||
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && defined(CONFIG_X86_5LEVEL)
|
||||
LDT_NR,
|
||||
#endif
|
||||
VMALLOC_START_NR,
|
||||
VMEMMAP_START_NR,
|
||||
#ifdef CONFIG_KASAN
|
||||
@ -59,6 +62,9 @@ enum address_markers_idx {
|
||||
KASAN_SHADOW_END_NR,
|
||||
#endif
|
||||
CPU_ENTRY_AREA_NR,
|
||||
#if defined(CONFIG_MODIFY_LDT_SYSCALL) && !defined(CONFIG_X86_5LEVEL)
|
||||
LDT_NR,
|
||||
#endif
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
ESPFIX_START_NR,
|
||||
#endif
|
||||
@ -81,6 +87,9 @@ static struct addr_marker address_markers[] = {
|
||||
#ifdef CONFIG_KASAN
|
||||
[KASAN_SHADOW_START_NR] = { KASAN_SHADOW_START, "KASAN shadow" },
|
||||
[KASAN_SHADOW_END_NR] = { KASAN_SHADOW_END, "KASAN shadow end" },
|
||||
#endif
|
||||
#ifdef CONFIG_MODIFY_LDT_SYSCALL
|
||||
[LDT_NR] = { LDT_BASE_ADDR, "LDT remap" },
|
||||
#endif
|
||||
[CPU_ENTRY_AREA_NR] = { CPU_ENTRY_AREA_BASE,"CPU entry Area" },
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
@ -467,7 +476,7 @@ static inline bool is_hypervisor_range(int idx)
|
||||
}
|
||||
|
||||
static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
||||
bool checkwx)
|
||||
bool checkwx, bool dmesg)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
pgd_t *start = (pgd_t *) &init_top_pgt;
|
||||
@ -480,7 +489,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
||||
|
||||
if (pgd) {
|
||||
start = pgd;
|
||||
st.to_dmesg = true;
|
||||
st.to_dmesg = dmesg;
|
||||
}
|
||||
|
||||
st.check_wx = checkwx;
|
||||
@ -518,13 +527,37 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd,
|
||||
|
||||
void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd)
|
||||
{
|
||||
ptdump_walk_pgd_level_core(m, pgd, false);
|
||||
ptdump_walk_pgd_level_core(m, pgd, false, true);
|
||||
}
|
||||
|
||||
void ptdump_walk_pgd_level_debugfs(struct seq_file *m, pgd_t *pgd, bool user)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
if (user && static_cpu_has(X86_FEATURE_PTI))
|
||||
pgd = kernel_to_user_pgdp(pgd);
|
||||
#endif
|
||||
ptdump_walk_pgd_level_core(m, pgd, false, false);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level_debugfs);
|
||||
|
||||
static void ptdump_walk_user_pgd_level_checkwx(void)
|
||||
{
|
||||
#ifdef CONFIG_PAGE_TABLE_ISOLATION
|
||||
pgd_t *pgd = (pgd_t *) &init_top_pgt;
|
||||
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
|
||||
pr_info("x86/mm: Checking user space page tables\n");
|
||||
pgd = kernel_to_user_pgdp(pgd);
|
||||
ptdump_walk_pgd_level_core(NULL, pgd, true, false);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ptdump_walk_pgd_level);
|
||||
|
||||
void ptdump_walk_pgd_level_checkwx(void)
|
||||
{
|
||||
ptdump_walk_pgd_level_core(NULL, NULL, true);
|
||||
ptdump_walk_pgd_level_core(NULL, NULL, true, false);
|
||||
ptdump_walk_user_pgd_level_checkwx();
|
||||
}
|
||||
|
||||
static int __init pt_dump_init(void)
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <asm/kaslr.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/pti.h>
|
||||
|
||||
/*
|
||||
* We need to define the tracepoints somewhere, and tlb.c
|
||||
@ -161,6 +162,12 @@ struct map_range {
|
||||
|
||||
static int page_size_mask;
|
||||
|
||||
static void enable_global_pages(void)
|
||||
{
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
__supported_pte_mask |= _PAGE_GLOBAL;
|
||||
}
|
||||
|
||||
static void __init probe_page_size_mask(void)
|
||||
{
|
||||
/*
|
||||
@ -179,11 +186,11 @@ static void __init probe_page_size_mask(void)
|
||||
cr4_set_bits_and_update_boot(X86_CR4_PSE);
|
||||
|
||||
/* Enable PGE if available */
|
||||
__supported_pte_mask &= ~_PAGE_GLOBAL;
|
||||
if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
cr4_set_bits_and_update_boot(X86_CR4_PGE);
|
||||
__supported_pte_mask |= _PAGE_GLOBAL;
|
||||
} else
|
||||
__supported_pte_mask &= ~_PAGE_GLOBAL;
|
||||
enable_global_pages();
|
||||
}
|
||||
|
||||
/* Enable 1 GB linear kernel mappings if available: */
|
||||
if (direct_gbpages && boot_cpu_has(X86_FEATURE_GBPAGES)) {
|
||||
@ -196,34 +203,44 @@ static void __init probe_page_size_mask(void)
|
||||
|
||||
static void setup_pcid(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_64
|
||||
if (boot_cpu_has(X86_FEATURE_PCID)) {
|
||||
if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
/*
|
||||
* This can't be cr4_set_bits_and_update_boot() --
|
||||
* the trampoline code can't handle CR4.PCIDE and
|
||||
* it wouldn't do any good anyway. Despite the name,
|
||||
* cr4_set_bits_and_update_boot() doesn't actually
|
||||
* cause the bits in question to remain set all the
|
||||
* way through the secondary boot asm.
|
||||
*
|
||||
* Instead, we brute-force it and set CR4.PCIDE
|
||||
* manually in start_secondary().
|
||||
*/
|
||||
cr4_set_bits(X86_CR4_PCIDE);
|
||||
} else {
|
||||
/*
|
||||
* flush_tlb_all(), as currently implemented, won't
|
||||
* work if PCID is on but PGE is not. Since that
|
||||
* combination doesn't exist on real hardware, there's
|
||||
* no reason to try to fully support it, but it's
|
||||
* polite to avoid corrupting data if we're on
|
||||
* an improperly configured VM.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
}
|
||||
if (!IS_ENABLED(CONFIG_X86_64))
|
||||
return;
|
||||
|
||||
if (!boot_cpu_has(X86_FEATURE_PCID))
|
||||
return;
|
||||
|
||||
if (boot_cpu_has(X86_FEATURE_PGE)) {
|
||||
/*
|
||||
* This can't be cr4_set_bits_and_update_boot() -- the
|
||||
* trampoline code can't handle CR4.PCIDE and it wouldn't
|
||||
* do any good anyway. Despite the name,
|
||||
* cr4_set_bits_and_update_boot() doesn't actually cause
|
||||
* the bits in question to remain set all the way through
|
||||
* the secondary boot asm.
|
||||
*
|
||||
* Instead, we brute-force it and set CR4.PCIDE manually in
|
||||
* start_secondary().
|
||||
*/
|
||||
cr4_set_bits(X86_CR4_PCIDE);
|
||||
|
||||
/*
|
||||
* INVPCID's single-context modes (2/3) only work if we set
|
||||
* X86_CR4_PCIDE, *and* we INVPCID support. It's unusable
|
||||
* on systems that have X86_CR4_PCIDE clear, or that have
|
||||
* no INVPCID support at all.
|
||||
*/
|
||||
if (boot_cpu_has(X86_FEATURE_INVPCID))
|
||||
setup_force_cpu_cap(X86_FEATURE_INVPCID_SINGLE);
|
||||
} else {
|
||||
/*
|
||||
* flush_tlb_all(), as currently implemented, won't work if
|
||||
* PCID is on but PGE is not. Since that combination
|
||||
* doesn't exist on real hardware, there's no reason to try
|
||||
* to fully support it, but it's polite to avoid corrupting
|
||||
* data if we're on an improperly configured VM.
|
||||
*/
|
||||
setup_clear_cpu_cap(X86_FEATURE_PCID);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
@ -624,6 +641,7 @@ void __init init_mem_mapping(void)
|
||||
{
|
||||
unsigned long end;
|
||||
|
||||
pti_check_boottime_disable();
|
||||
probe_page_size_mask();
|
||||
setup_pcid();
|
||||
|
||||
@ -847,12 +865,12 @@ void __init zone_sizes_init(void)
|
||||
free_area_init_nodes(max_zone_pfns);
|
||||
}
|
||||
|
||||
DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
||||
__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = {
|
||||
.loaded_mm = &init_mm,
|
||||
.next_asid = 1,
|
||||
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(cpu_tlbstate);
|
||||
EXPORT_PER_CPU_SYMBOL(cpu_tlbstate);
|
||||
|
||||
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
|
||||
{
|
||||
|
@ -34,25 +34,14 @@
|
||||
#define TB_SHIFT 40
|
||||
|
||||
/*
|
||||
* Virtual address start and end range for randomization. The end changes base
|
||||
* on configuration to have the highest amount of space for randomization.
|
||||
* It increases the possible random position for each randomized region.
|
||||
* Virtual address start and end range for randomization.
|
||||
*
|
||||
* You need to add an if/def entry if you introduce a new memory region
|
||||
* compatible with KASLR. Your entry must be in logical order with memory
|
||||
* layout. For example, ESPFIX is before EFI because its virtual address is
|
||||
* before. You also need to add a BUILD_BUG_ON() in kernel_randomize_memory() to
|
||||
* ensure that this order is correct and won't be changed.
|
||||
* The end address could depend on more configuration options to make the
|
||||
* highest amount of space for randomization available, but that's too hard
|
||||
* to keep straight and caused issues already.
|
||||
*/
|
||||
static const unsigned long vaddr_start = __PAGE_OFFSET_BASE;
|
||||
|
||||
#if defined(CONFIG_X86_ESPFIX64)
|
||||
static const unsigned long vaddr_end = ESPFIX_BASE_ADDR;
|
||||
#elif defined(CONFIG_EFI)
|
||||
static const unsigned long vaddr_end = EFI_VA_END;
|
||||
#else
|
||||
static const unsigned long vaddr_end = __START_KERNEL_map;
|
||||
#endif
|
||||
static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
|
||||
|
||||
/* Default values */
|
||||
unsigned long page_offset_base = __PAGE_OFFSET_BASE;
|
||||
@ -101,15 +90,12 @@ void __init kernel_randomize_memory(void)
|
||||
unsigned long remain_entropy;
|
||||
|
||||
/*
|
||||
* All these BUILD_BUG_ON checks ensures the memory layout is
|
||||
* consistent with the vaddr_start/vaddr_end variables.
|
||||
* These BUILD_BUG_ON checks ensure the memory layout is consistent
|
||||
* with the vaddr_start/vaddr_end variables. These checks are very
|
||||
* limited....
|
||||
*/
|
||||
BUILD_BUG_ON(vaddr_start >= vaddr_end);
|
||||
BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) &&
|
||||
vaddr_end >= EFI_VA_END);
|
||||
BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) ||
|
||||
IS_ENABLED(CONFIG_EFI)) &&
|
||||
vaddr_end >= __START_KERNEL_map);
|
||||
BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
|
||||
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
|
||||
|
||||
if (!kaslr_memory_enabled())
|
||||
|
@ -355,14 +355,15 @@ static inline void _pgd_free(pgd_t *pgd)
|
||||
kmem_cache_free(pgd_cache, pgd);
|
||||
}
|
||||
#else
|
||||
|
||||
static inline pgd_t *_pgd_alloc(void)
|
||||
{
|
||||
return (pgd_t *)__get_free_page(PGALLOC_GFP);
|
||||
return (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ALLOCATION_ORDER);
|
||||
}
|
||||
|
||||
static inline void _pgd_free(pgd_t *pgd)
|
||||
{
|
||||
free_page((unsigned long)pgd);
|
||||
free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
|
||||
}
|
||||
#endif /* CONFIG_X86_PAE */
|
||||
|
||||
|
388
arch/x86/mm/pti.c
Normal file
388
arch/x86/mm/pti.c
Normal file
@ -0,0 +1,388 @@
|
||||
/*
|
||||
* Copyright(c) 2017 Intel Corporation. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* This code is based in part on work published here:
|
||||
*
|
||||
* https://github.com/IAIK/KAISER
|
||||
*
|
||||
* The original work was written by and and signed off by for the Linux
|
||||
* kernel by:
|
||||
*
|
||||
* Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
|
||||
* Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
|
||||
* Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
|
||||
* Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
|
||||
*
|
||||
* Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
|
||||
* Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
|
||||
* Andy Lutomirsky <luto@amacapital.net>
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/types.h>
|
||||
#include <linux/bug.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/uaccess.h>
|
||||
|
||||
#include <asm/cpufeature.h>
|
||||
#include <asm/hypervisor.h>
|
||||
#include <asm/vsyscall.h>
|
||||
#include <asm/cmdline.h>
|
||||
#include <asm/pti.h>
|
||||
#include <asm/pgtable.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/tlbflush.h>
|
||||
#include <asm/desc.h>
|
||||
|
||||
#undef pr_fmt
|
||||
#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
|
||||
|
||||
/* Backporting helper */
|
||||
#ifndef __GFP_NOTRACK
|
||||
#define __GFP_NOTRACK 0
|
||||
#endif
|
||||
|
||||
static void __init pti_print_if_insecure(const char *reason)
|
||||
{
|
||||
if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
|
||||
pr_info("%s\n", reason);
|
||||
}
|
||||
|
||||
static void __init pti_print_if_secure(const char *reason)
|
||||
{
|
||||
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
|
||||
pr_info("%s\n", reason);
|
||||
}
|
||||
|
||||
void __init pti_check_boottime_disable(void)
|
||||
{
|
||||
char arg[5];
|
||||
int ret;
|
||||
|
||||
if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
|
||||
pti_print_if_insecure("disabled on XEN PV.");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
|
||||
if (ret > 0) {
|
||||
if (ret == 3 && !strncmp(arg, "off", 3)) {
|
||||
pti_print_if_insecure("disabled on command line.");
|
||||
return;
|
||||
}
|
||||
if (ret == 2 && !strncmp(arg, "on", 2)) {
|
||||
pti_print_if_secure("force enabled on command line.");
|
||||
goto enable;
|
||||
}
|
||||
if (ret == 4 && !strncmp(arg, "auto", 4))
|
||||
goto autosel;
|
||||
}
|
||||
|
||||
if (cmdline_find_option_bool(boot_command_line, "nopti")) {
|
||||
pti_print_if_insecure("disabled on command line.");
|
||||
return;
|
||||
}
|
||||
|
||||
autosel:
|
||||
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
|
||||
return;
|
||||
enable:
|
||||
setup_force_cpu_cap(X86_FEATURE_PTI);
|
||||
}
|
||||
|
||||
pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
|
||||
{
|
||||
/*
|
||||
* Changes to the high (kernel) portion of the kernelmode page
|
||||
* tables are not automatically propagated to the usermode tables.
|
||||
*
|
||||
* Users should keep in mind that, unlike the kernelmode tables,
|
||||
* there is no vmalloc_fault equivalent for the usermode tables.
|
||||
* Top-level entries added to init_mm's usermode pgd after boot
|
||||
* will not be automatically propagated to other mms.
|
||||
*/
|
||||
if (!pgdp_maps_userspace(pgdp))
|
||||
return pgd;
|
||||
|
||||
/*
|
||||
* The user page tables get the full PGD, accessible from
|
||||
* userspace:
|
||||
*/
|
||||
kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
|
||||
|
||||
/*
|
||||
* If this is normal user memory, make it NX in the kernel
|
||||
* pagetables so that, if we somehow screw up and return to
|
||||
* usermode with the kernel CR3 loaded, we'll get a page fault
|
||||
* instead of allowing user code to execute with the wrong CR3.
|
||||
*
|
||||
* As exceptions, we don't set NX if:
|
||||
* - _PAGE_USER is not set. This could be an executable
|
||||
* EFI runtime mapping or something similar, and the kernel
|
||||
* may execute from it
|
||||
* - we don't have NX support
|
||||
* - we're clearing the PGD (i.e. the new pgd is not present).
|
||||
*/
|
||||
if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
|
||||
(__supported_pte_mask & _PAGE_NX))
|
||||
pgd.pgd |= _PAGE_NX;
|
||||
|
||||
/* return the copy of the PGD we want the kernel to use: */
|
||||
return pgd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk the user copy of the page tables (optionally) trying to allocate
|
||||
* page table pages on the way down.
|
||||
*
|
||||
* Returns a pointer to a P4D on success, or NULL on failure.
|
||||
*/
|
||||
static p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
|
||||
{
|
||||
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
|
||||
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
||||
|
||||
if (address < PAGE_OFFSET) {
|
||||
WARN_ONCE(1, "attempt to walk user address\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pgd_none(*pgd)) {
|
||||
unsigned long new_p4d_page = __get_free_page(gfp);
|
||||
if (!new_p4d_page)
|
||||
return NULL;
|
||||
|
||||
if (pgd_none(*pgd)) {
|
||||
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
|
||||
new_p4d_page = 0;
|
||||
}
|
||||
if (new_p4d_page)
|
||||
free_page(new_p4d_page);
|
||||
}
|
||||
BUILD_BUG_ON(pgd_large(*pgd) != 0);
|
||||
|
||||
return p4d_offset(pgd, address);
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk the user copy of the page tables (optionally) trying to allocate
|
||||
* page table pages on the way down.
|
||||
*
|
||||
* Returns a pointer to a PMD on success, or NULL on failure.
|
||||
*/
|
||||
static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
|
||||
{
|
||||
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
||||
p4d_t *p4d = pti_user_pagetable_walk_p4d(address);
|
||||
pud_t *pud;
|
||||
|
||||
BUILD_BUG_ON(p4d_large(*p4d) != 0);
|
||||
if (p4d_none(*p4d)) {
|
||||
unsigned long new_pud_page = __get_free_page(gfp);
|
||||
if (!new_pud_page)
|
||||
return NULL;
|
||||
|
||||
if (p4d_none(*p4d)) {
|
||||
set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
|
||||
new_pud_page = 0;
|
||||
}
|
||||
if (new_pud_page)
|
||||
free_page(new_pud_page);
|
||||
}
|
||||
|
||||
pud = pud_offset(p4d, address);
|
||||
/* The user page tables do not use large mappings: */
|
||||
if (pud_large(*pud)) {
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
if (pud_none(*pud)) {
|
||||
unsigned long new_pmd_page = __get_free_page(gfp);
|
||||
if (!new_pmd_page)
|
||||
return NULL;
|
||||
|
||||
if (pud_none(*pud)) {
|
||||
set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
|
||||
new_pmd_page = 0;
|
||||
}
|
||||
if (new_pmd_page)
|
||||
free_page(new_pmd_page);
|
||||
}
|
||||
|
||||
return pmd_offset(pud, address);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_VSYSCALL_EMULATION
|
||||
/*
|
||||
* Walk the shadow copy of the page tables (optionally) trying to allocate
|
||||
* page table pages on the way down. Does not support large pages.
|
||||
*
|
||||
* Note: this is only used when mapping *new* kernel data into the
|
||||
* user/shadow page tables. It is never used for userspace data.
|
||||
*
|
||||
* Returns a pointer to a PTE on success, or NULL on failure.
|
||||
*/
|
||||
static __init pte_t *pti_user_pagetable_walk_pte(unsigned long address)
|
||||
{
|
||||
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
|
||||
pmd_t *pmd = pti_user_pagetable_walk_pmd(address);
|
||||
pte_t *pte;
|
||||
|
||||
/* We can't do anything sensible if we hit a large mapping. */
|
||||
if (pmd_large(*pmd)) {
|
||||
WARN_ON(1);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (pmd_none(*pmd)) {
|
||||
unsigned long new_pte_page = __get_free_page(gfp);
|
||||
if (!new_pte_page)
|
||||
return NULL;
|
||||
|
||||
if (pmd_none(*pmd)) {
|
||||
set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
|
||||
new_pte_page = 0;
|
||||
}
|
||||
if (new_pte_page)
|
||||
free_page(new_pte_page);
|
||||
}
|
||||
|
||||
pte = pte_offset_kernel(pmd, address);
|
||||
if (pte_flags(*pte) & _PAGE_USER) {
|
||||
WARN_ONCE(1, "attempt to walk to user pte\n");
|
||||
return NULL;
|
||||
}
|
||||
return pte;
|
||||
}
|
||||
|
||||
static void __init pti_setup_vsyscall(void)
|
||||
{
|
||||
pte_t *pte, *target_pte;
|
||||
unsigned int level;
|
||||
|
||||
pte = lookup_address(VSYSCALL_ADDR, &level);
|
||||
if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
|
||||
return;
|
||||
|
||||
target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
|
||||
if (WARN_ON(!target_pte))
|
||||
return;
|
||||
|
||||
*target_pte = *pte;
|
||||
set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
|
||||
}
|
||||
#else
|
||||
static void __init pti_setup_vsyscall(void) { }
|
||||
#endif
|
||||
|
||||
static void __init
|
||||
pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
|
||||
{
|
||||
unsigned long addr;
|
||||
|
||||
/*
|
||||
* Clone the populated PMDs which cover start to end. These PMD areas
|
||||
* can have holes.
|
||||
*/
|
||||
for (addr = start; addr < end; addr += PMD_SIZE) {
|
||||
pmd_t *pmd, *target_pmd;
|
||||
pgd_t *pgd;
|
||||
p4d_t *p4d;
|
||||
pud_t *pud;
|
||||
|
||||
pgd = pgd_offset_k(addr);
|
||||
if (WARN_ON(pgd_none(*pgd)))
|
||||
return;
|
||||
p4d = p4d_offset(pgd, addr);
|
||||
if (WARN_ON(p4d_none(*p4d)))
|
||||
return;
|
||||
pud = pud_offset(p4d, addr);
|
||||
if (pud_none(*pud))
|
||||
continue;
|
||||
pmd = pmd_offset(pud, addr);
|
||||
if (pmd_none(*pmd))
|
||||
continue;
|
||||
|
||||
target_pmd = pti_user_pagetable_walk_pmd(addr);
|
||||
if (WARN_ON(!target_pmd))
|
||||
return;
|
||||
|
||||
/*
|
||||
* Copy the PMD. That is, the kernelmode and usermode
|
||||
* tables will share the last-level page tables of this
|
||||
* address range
|
||||
*/
|
||||
*target_pmd = pmd_clear_flags(*pmd, clear);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Clone a single p4d (i.e. a top-level entry on 4-level systems and a
|
||||
* next-level entry on 5-level systems.
|
||||
*/
|
||||
static void __init pti_clone_p4d(unsigned long addr)
|
||||
{
|
||||
p4d_t *kernel_p4d, *user_p4d;
|
||||
pgd_t *kernel_pgd;
|
||||
|
||||
user_p4d = pti_user_pagetable_walk_p4d(addr);
|
||||
kernel_pgd = pgd_offset_k(addr);
|
||||
kernel_p4d = p4d_offset(kernel_pgd, addr);
|
||||
*user_p4d = *kernel_p4d;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clone the CPU_ENTRY_AREA into the user space visible page table.
|
||||
*/
|
||||
static void __init pti_clone_user_shared(void)
|
||||
{
|
||||
pti_clone_p4d(CPU_ENTRY_AREA_BASE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clone the ESPFIX P4D into the user space visinble page table
|
||||
*/
|
||||
static void __init pti_setup_espfix64(void)
|
||||
{
|
||||
#ifdef CONFIG_X86_ESPFIX64
|
||||
pti_clone_p4d(ESPFIX_BASE_ADDR);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Clone the populated PMDs of the entry and irqentry text and force it RO.
|
||||
*/
|
||||
static void __init pti_clone_entry_text(void)
|
||||
{
|
||||
pti_clone_pmds((unsigned long) __entry_text_start,
|
||||
(unsigned long) __irqentry_text_end,
|
||||
_PAGE_RW | _PAGE_GLOBAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize kernel page table isolation
|
||||
*/
|
||||
void __init pti_init(void)
|
||||
{
|
||||
if (!static_cpu_has(X86_FEATURE_PTI))
|
||||
return;
|
||||
|
||||
pr_info("enabled\n");
|
||||
|
||||
pti_clone_user_shared();
|
||||
pti_clone_entry_text();
|
||||
pti_setup_espfix64();
|
||||
pti_setup_vsyscall();
|
||||
}
|
@ -28,6 +28,38 @@
|
||||
* Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
|
||||
*/
|
||||
|
||||
/*
|
||||
* We get here when we do something requiring a TLB invalidation
|
||||
* but could not go invalidate all of the contexts. We do the
|
||||
* necessary invalidation by clearing out the 'ctx_id' which
|
||||
* forces a TLB flush when the context is loaded.
|
||||
*/
|
||||
void clear_asid_other(void)
|
||||
{
|
||||
u16 asid;
|
||||
|
||||
/*
|
||||
* This is only expected to be set if we have disabled
|
||||
* kernel _PAGE_GLOBAL pages.
|
||||
*/
|
||||
if (!static_cpu_has(X86_FEATURE_PTI)) {
|
||||
WARN_ON_ONCE(1);
|
||||
return;
|
||||
}
|
||||
|
||||
for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
|
||||
/* Do not need to flush the current asid */
|
||||
if (asid == this_cpu_read(cpu_tlbstate.loaded_mm_asid))
|
||||
continue;
|
||||
/*
|
||||
* Make sure the next time we go to switch to
|
||||
* this asid, we do a flush:
|
||||
*/
|
||||
this_cpu_write(cpu_tlbstate.ctxs[asid].ctx_id, 0);
|
||||
}
|
||||
this_cpu_write(cpu_tlbstate.invalidate_other, false);
|
||||
}
|
||||
|
||||
atomic64_t last_mm_ctx_id = ATOMIC64_INIT(1);
|
||||
|
||||
|
||||
@ -42,6 +74,9 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
|
||||
return;
|
||||
}
|
||||
|
||||
if (this_cpu_read(cpu_tlbstate.invalidate_other))
|
||||
clear_asid_other();
|
||||
|
||||
for (asid = 0; asid < TLB_NR_DYN_ASIDS; asid++) {
|
||||
if (this_cpu_read(cpu_tlbstate.ctxs[asid].ctx_id) !=
|
||||
next->context.ctx_id)
|
||||
@ -65,6 +100,25 @@ static void choose_new_asid(struct mm_struct *next, u64 next_tlb_gen,
|
||||
*need_flush = true;
|
||||
}
|
||||
|
||||
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush)
|
||||
{
|
||||
unsigned long new_mm_cr3;
|
||||
|
||||
if (need_flush) {
|
||||
invalidate_user_asid(new_asid);
|
||||
new_mm_cr3 = build_cr3(pgdir, new_asid);
|
||||
} else {
|
||||
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid);
|
||||
}
|
||||
|
||||
/*
|
||||
* Caution: many callers of this function expect
|
||||
* that load_cr3() is serializing and orders TLB
|
||||
* fills with respect to the mm_cpumask writes.
|
||||
*/
|
||||
write_cr3(new_mm_cr3);
|
||||
}
|
||||
|
||||
void leave_mm(int cpu)
|
||||
{
|
||||
struct mm_struct *loaded_mm = this_cpu_read(cpu_tlbstate.loaded_mm);
|
||||
@ -195,7 +249,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
if (need_flush) {
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
|
||||
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
|
||||
write_cr3(build_cr3(next->pgd, new_asid));
|
||||
load_new_mm_cr3(next->pgd, new_asid, true);
|
||||
|
||||
/*
|
||||
* NB: This gets called via leave_mm() in the idle path
|
||||
@ -208,7 +262,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
|
||||
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
|
||||
} else {
|
||||
/* The new ASID is already up to date. */
|
||||
write_cr3(build_cr3_noflush(next->pgd, new_asid));
|
||||
load_new_mm_cr3(next->pgd, new_asid, false);
|
||||
|
||||
/* See above wrt _rcuidle. */
|
||||
trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0);
|
||||
|
@ -195,6 +195,9 @@ static pgd_t *efi_pgd;
|
||||
* because we want to avoid inserting EFI region mappings (EFI_VA_END
|
||||
* to EFI_VA_START) into the standard kernel page tables. Everything
|
||||
* else can be shared, see efi_sync_low_kernel_mappings().
|
||||
*
|
||||
* We don't want the pgd on the pgd_list and cannot use pgd_alloc() for the
|
||||
* allocation.
|
||||
*/
|
||||
int __init efi_alloc_page_tables(void)
|
||||
{
|
||||
@ -207,7 +210,7 @@ int __init efi_alloc_page_tables(void)
|
||||
return 0;
|
||||
|
||||
gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO;
|
||||
efi_pgd = (pgd_t *)__get_free_page(gfp_mask);
|
||||
efi_pgd = (pgd_t *)__get_free_pages(gfp_mask, PGD_ALLOCATION_ORDER);
|
||||
if (!efi_pgd)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -592,7 +592,18 @@ static int qrk_capsule_setup_info(struct capsule_info *cap_info, void **pkbuff,
|
||||
/*
|
||||
* Update the first page pointer to skip over the CSH header.
|
||||
*/
|
||||
cap_info->pages[0] += csh->headersize;
|
||||
cap_info->phys[0] += csh->headersize;
|
||||
|
||||
/*
|
||||
* cap_info->capsule should point at a virtual mapping of the entire
|
||||
* capsule, starting at the capsule header. Our image has the Quark
|
||||
* security header prepended, so we cannot rely on the default vmap()
|
||||
* mapping created by the generic capsule code.
|
||||
* Given that the Quark firmware does not appear to care about the
|
||||
* virtual mapping, let's just point cap_info->capsule at our copy
|
||||
* of the capsule header.
|
||||
*/
|
||||
cap_info->capsule = &cap_info->header;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
@ -12,22 +12,29 @@
|
||||
#include "blk.h"
|
||||
|
||||
/*
|
||||
* Append a bio to a passthrough request. Only works can be merged into
|
||||
* the request based on the driver constraints.
|
||||
* Append a bio to a passthrough request. Only works if the bio can be merged
|
||||
* into the request based on the driver constraints.
|
||||
*/
|
||||
int blk_rq_append_bio(struct request *rq, struct bio *bio)
|
||||
int blk_rq_append_bio(struct request *rq, struct bio **bio)
|
||||
{
|
||||
blk_queue_bounce(rq->q, &bio);
|
||||
struct bio *orig_bio = *bio;
|
||||
|
||||
blk_queue_bounce(rq->q, bio);
|
||||
|
||||
if (!rq->bio) {
|
||||
blk_rq_bio_prep(rq->q, rq, bio);
|
||||
blk_rq_bio_prep(rq->q, rq, *bio);
|
||||
} else {
|
||||
if (!ll_back_merge_fn(rq->q, rq, bio))
|
||||
if (!ll_back_merge_fn(rq->q, rq, *bio)) {
|
||||
if (orig_bio != *bio) {
|
||||
bio_put(*bio);
|
||||
*bio = orig_bio;
|
||||
}
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rq->biotail->bi_next = bio;
|
||||
rq->biotail = bio;
|
||||
rq->__data_len += bio->bi_iter.bi_size;
|
||||
rq->biotail->bi_next = *bio;
|
||||
rq->biotail = *bio;
|
||||
rq->__data_len += (*bio)->bi_iter.bi_size;
|
||||
}
|
||||
|
||||
return 0;
|
||||
@ -80,14 +87,12 @@ static int __blk_rq_map_user_iov(struct request *rq,
|
||||
* We link the bounce buffer in and could have to traverse it
|
||||
* later so we have to get a ref to prevent it from being freed
|
||||
*/
|
||||
ret = blk_rq_append_bio(rq, bio);
|
||||
bio_get(bio);
|
||||
ret = blk_rq_append_bio(rq, &bio);
|
||||
if (ret) {
|
||||
bio_endio(bio);
|
||||
__blk_rq_unmap_user(orig_bio);
|
||||
bio_put(bio);
|
||||
return ret;
|
||||
}
|
||||
bio_get(bio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -220,7 +225,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
int reading = rq_data_dir(rq) == READ;
|
||||
unsigned long addr = (unsigned long) kbuf;
|
||||
int do_copy = 0;
|
||||
struct bio *bio;
|
||||
struct bio *bio, *orig_bio;
|
||||
int ret;
|
||||
|
||||
if (len > (queue_max_hw_sectors(q) << 9))
|
||||
@ -243,10 +248,11 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf,
|
||||
if (do_copy)
|
||||
rq->rq_flags |= RQF_COPY_USER;
|
||||
|
||||
ret = blk_rq_append_bio(rq, bio);
|
||||
orig_bio = bio;
|
||||
ret = blk_rq_append_bio(rq, &bio);
|
||||
if (unlikely(ret)) {
|
||||
/* request is too big */
|
||||
bio_put(bio);
|
||||
bio_put(orig_bio);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
@ -200,6 +200,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
||||
unsigned i = 0;
|
||||
bool bounce = false;
|
||||
int sectors = 0;
|
||||
bool passthrough = bio_is_passthrough(*bio_orig);
|
||||
|
||||
bio_for_each_segment(from, *bio_orig, iter) {
|
||||
if (i++ < BIO_MAX_PAGES)
|
||||
@ -210,13 +211,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
|
||||
if (!bounce)
|
||||
return;
|
||||
|
||||
if (sectors < bio_sectors(*bio_orig)) {
|
||||
if (!passthrough && sectors < bio_sectors(*bio_orig)) {
|
||||
bio = bio_split(*bio_orig, sectors, GFP_NOIO, bounce_bio_split);
|
||||
bio_chain(bio, *bio_orig);
|
||||
generic_make_request(*bio_orig);
|
||||
*bio_orig = bio;
|
||||
}
|
||||
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, bounce_bio_set);
|
||||
bio = bio_clone_bioset(*bio_orig, GFP_NOIO, passthrough ? NULL :
|
||||
bounce_bio_set);
|
||||
|
||||
bio_for_each_segment_all(to, bio, i) {
|
||||
struct page *page = to->bv_page;
|
||||
|
@ -610,6 +610,11 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
|
||||
algt->mask));
|
||||
if (IS_ERR(poly))
|
||||
return PTR_ERR(poly);
|
||||
poly_hash = __crypto_hash_alg_common(poly);
|
||||
|
||||
err = -EINVAL;
|
||||
if (poly_hash->digestsize != POLY1305_DIGEST_SIZE)
|
||||
goto out_put_poly;
|
||||
|
||||
err = -ENOMEM;
|
||||
inst = kzalloc(sizeof(*inst) + sizeof(*ctx), GFP_KERNEL);
|
||||
@ -618,7 +623,6 @@ static int chachapoly_create(struct crypto_template *tmpl, struct rtattr **tb,
|
||||
|
||||
ctx = aead_instance_ctx(inst);
|
||||
ctx->saltlen = CHACHAPOLY_IV_SIZE - ivsize;
|
||||
poly_hash = __crypto_hash_alg_common(poly);
|
||||
err = crypto_init_ahash_spawn(&ctx->poly, poly_hash,
|
||||
aead_crypto_instance(inst));
|
||||
if (err)
|
||||
|
@ -254,6 +254,14 @@ static void pcrypt_aead_exit_tfm(struct crypto_aead *tfm)
|
||||
crypto_free_aead(ctx->child);
|
||||
}
|
||||
|
||||
static void pcrypt_free(struct aead_instance *inst)
|
||||
{
|
||||
struct pcrypt_instance_ctx *ctx = aead_instance_ctx(inst);
|
||||
|
||||
crypto_drop_aead(&ctx->spawn);
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static int pcrypt_init_instance(struct crypto_instance *inst,
|
||||
struct crypto_alg *alg)
|
||||
{
|
||||
@ -319,6 +327,8 @@ static int pcrypt_create_aead(struct crypto_template *tmpl, struct rtattr **tb,
|
||||
inst->alg.encrypt = pcrypt_aead_encrypt;
|
||||
inst->alg.decrypt = pcrypt_aead_decrypt;
|
||||
|
||||
inst->free = pcrypt_free;
|
||||
|
||||
err = aead_register_instance(tmpl, inst);
|
||||
if (err)
|
||||
goto out_drop_aead;
|
||||
@ -349,14 +359,6 @@ static int pcrypt_create(struct crypto_template *tmpl, struct rtattr **tb)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void pcrypt_free(struct crypto_instance *inst)
|
||||
{
|
||||
struct pcrypt_instance_ctx *ctx = crypto_instance_ctx(inst);
|
||||
|
||||
crypto_drop_aead(&ctx->spawn);
|
||||
kfree(inst);
|
||||
}
|
||||
|
||||
static int pcrypt_cpumask_change_notify(struct notifier_block *self,
|
||||
unsigned long val, void *data)
|
||||
{
|
||||
@ -469,7 +471,6 @@ static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
|
||||
static struct crypto_template pcrypt_tmpl = {
|
||||
.name = "pcrypt",
|
||||
.create = pcrypt_create,
|
||||
.free = pcrypt_free,
|
||||
.module = THIS_MODULE,
|
||||
};
|
||||
|
||||
|
@ -186,6 +186,11 @@ static void cache_associativity(struct cacheinfo *this_leaf)
|
||||
this_leaf->ways_of_associativity = (size / nr_sets) / line_size;
|
||||
}
|
||||
|
||||
static bool cache_node_is_unified(struct cacheinfo *this_leaf)
|
||||
{
|
||||
return of_property_read_bool(this_leaf->of_node, "cache-unified");
|
||||
}
|
||||
|
||||
static void cache_of_override_properties(unsigned int cpu)
|
||||
{
|
||||
int index;
|
||||
@ -194,6 +199,14 @@ static void cache_of_override_properties(unsigned int cpu)
|
||||
|
||||
for (index = 0; index < cache_leaves(cpu); index++) {
|
||||
this_leaf = this_cpu_ci->info_list + index;
|
||||
/*
|
||||
* init_cache_level must setup the cache level correctly
|
||||
* overriding the architecturally specified levels, so
|
||||
* if type is NONE at this stage, it should be unified
|
||||
*/
|
||||
if (this_leaf->type == CACHE_TYPE_NOCACHE &&
|
||||
cache_node_is_unified(this_leaf))
|
||||
this_leaf->type = CACHE_TYPE_UNIFIED;
|
||||
cache_size(this_leaf);
|
||||
cache_get_line_size(this_leaf);
|
||||
cache_nr_sets(this_leaf);
|
||||
|
@ -178,6 +178,7 @@ static struct bus_type sunxi_rsb_bus = {
|
||||
.match = sunxi_rsb_device_match,
|
||||
.probe = sunxi_rsb_device_probe,
|
||||
.remove = sunxi_rsb_device_remove,
|
||||
.uevent = of_device_uevent_modalias,
|
||||
};
|
||||
|
||||
static void sunxi_rsb_dev_release(struct device *dev)
|
||||
|
@ -5,6 +5,7 @@ config CRYPTO_DEV_CHELSIO
|
||||
select CRYPTO_SHA256
|
||||
select CRYPTO_SHA512
|
||||
select CRYPTO_AUTHENC
|
||||
select CRYPTO_GF128MUL
|
||||
---help---
|
||||
The Chelsio Crypto Co-processor driver for T6 adapters.
|
||||
|
||||
|
@ -1625,6 +1625,7 @@ static int queue_cache_init(void)
|
||||
CWQ_ENTRY_SIZE, 0, NULL);
|
||||
if (!queue_cache[HV_NCS_QTYPE_CWQ - 1]) {
|
||||
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
|
||||
queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
|
||||
return -ENOMEM;
|
||||
}
|
||||
return 0;
|
||||
@ -1634,6 +1635,8 @@ static void queue_cache_destroy(void)
|
||||
{
|
||||
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_MAU - 1]);
|
||||
kmem_cache_destroy(queue_cache[HV_NCS_QTYPE_CWQ - 1]);
|
||||
queue_cache[HV_NCS_QTYPE_MAU - 1] = NULL;
|
||||
queue_cache[HV_NCS_QTYPE_CWQ - 1] = NULL;
|
||||
}
|
||||
|
||||
static long spu_queue_register_workfn(void *arg)
|
||||
|
@ -20,10 +20,6 @@
|
||||
|
||||
#define NO_FURTHER_WRITE_ACTION -1
|
||||
|
||||
#ifndef phys_to_page
|
||||
#define phys_to_page(x) pfn_to_page((x) >> PAGE_SHIFT)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* efi_free_all_buff_pages - free all previous allocated buffer pages
|
||||
* @cap_info: pointer to current instance of capsule_info structure
|
||||
@ -35,7 +31,7 @@
|
||||
static void efi_free_all_buff_pages(struct capsule_info *cap_info)
|
||||
{
|
||||
while (cap_info->index > 0)
|
||||
__free_page(phys_to_page(cap_info->pages[--cap_info->index]));
|
||||
__free_page(cap_info->pages[--cap_info->index]);
|
||||
|
||||
cap_info->index = NO_FURTHER_WRITE_ACTION;
|
||||
}
|
||||
@ -71,6 +67,14 @@ int __efi_capsule_setup_info(struct capsule_info *cap_info)
|
||||
|
||||
cap_info->pages = temp_page;
|
||||
|
||||
temp_page = krealloc(cap_info->phys,
|
||||
pages_needed * sizeof(phys_addr_t *),
|
||||
GFP_KERNEL | __GFP_ZERO);
|
||||
if (!temp_page)
|
||||
return -ENOMEM;
|
||||
|
||||
cap_info->phys = temp_page;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -105,9 +109,24 @@ int __weak efi_capsule_setup_info(struct capsule_info *cap_info, void *kbuff,
|
||||
**/
|
||||
static ssize_t efi_capsule_submit_update(struct capsule_info *cap_info)
|
||||
{
|
||||
bool do_vunmap = false;
|
||||
int ret;
|
||||
|
||||
ret = efi_capsule_update(&cap_info->header, cap_info->pages);
|
||||
/*
|
||||
* cap_info->capsule may have been assigned already by a quirk
|
||||
* handler, so only overwrite it if it is NULL
|
||||
*/
|
||||
if (!cap_info->capsule) {
|
||||
cap_info->capsule = vmap(cap_info->pages, cap_info->index,
|
||||
VM_MAP, PAGE_KERNEL);
|
||||
if (!cap_info->capsule)
|
||||
return -ENOMEM;
|
||||
do_vunmap = true;
|
||||
}
|
||||
|
||||
ret = efi_capsule_update(cap_info->capsule, cap_info->phys);
|
||||
if (do_vunmap)
|
||||
vunmap(cap_info->capsule);
|
||||
if (ret) {
|
||||
pr_err("capsule update failed\n");
|
||||
return ret;
|
||||
@ -165,10 +184,12 @@ static ssize_t efi_capsule_write(struct file *file, const char __user *buff,
|
||||
goto failed;
|
||||
}
|
||||
|
||||
cap_info->pages[cap_info->index++] = page_to_phys(page);
|
||||
cap_info->pages[cap_info->index] = page;
|
||||
cap_info->phys[cap_info->index] = page_to_phys(page);
|
||||
cap_info->page_bytes_remain = PAGE_SIZE;
|
||||
cap_info->index++;
|
||||
} else {
|
||||
page = phys_to_page(cap_info->pages[cap_info->index - 1]);
|
||||
page = cap_info->pages[cap_info->index - 1];
|
||||
}
|
||||
|
||||
kbuff = kmap(page);
|
||||
@ -252,6 +273,7 @@ static int efi_capsule_release(struct inode *inode, struct file *file)
|
||||
struct capsule_info *cap_info = file->private_data;
|
||||
|
||||
kfree(cap_info->pages);
|
||||
kfree(cap_info->phys);
|
||||
kfree(file->private_data);
|
||||
file->private_data = NULL;
|
||||
return 0;
|
||||
@ -281,6 +303,13 @@ static int efi_capsule_open(struct inode *inode, struct file *file)
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
cap_info->phys = kzalloc(sizeof(void *), GFP_KERNEL);
|
||||
if (!cap_info->phys) {
|
||||
kfree(cap_info->pages);
|
||||
kfree(cap_info);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
file->private_data = cap_info;
|
||||
|
||||
return 0;
|
||||
|
@ -1074,7 +1074,7 @@ void acpi_gpiochip_add(struct gpio_chip *chip)
|
||||
}
|
||||
|
||||
if (!chip->names)
|
||||
devprop_gpiochip_set_names(chip);
|
||||
devprop_gpiochip_set_names(chip, dev_fwnode(chip->parent));
|
||||
|
||||
acpi_gpiochip_request_regions(acpi_gpio);
|
||||
acpi_gpiochip_scan_gpios(acpi_gpio);
|
||||
|
@ -19,30 +19,27 @@
|
||||
/**
|
||||
* devprop_gpiochip_set_names - Set GPIO line names using device properties
|
||||
* @chip: GPIO chip whose lines should be named, if possible
|
||||
* @fwnode: Property Node containing the gpio-line-names property
|
||||
*
|
||||
* Looks for device property "gpio-line-names" and if it exists assigns
|
||||
* GPIO line names for the chip. The memory allocated for the assigned
|
||||
* names belong to the underlying firmware node and should not be released
|
||||
* by the caller.
|
||||
*/
|
||||
void devprop_gpiochip_set_names(struct gpio_chip *chip)
|
||||
void devprop_gpiochip_set_names(struct gpio_chip *chip,
|
||||
const struct fwnode_handle *fwnode)
|
||||
{
|
||||
struct gpio_device *gdev = chip->gpiodev;
|
||||
const char **names;
|
||||
int ret, i;
|
||||
|
||||
if (!chip->parent) {
|
||||
dev_warn(&gdev->dev, "GPIO chip parent is NULL\n");
|
||||
return;
|
||||
}
|
||||
|
||||
ret = device_property_read_string_array(chip->parent, "gpio-line-names",
|
||||
ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
|
||||
NULL, 0);
|
||||
if (ret < 0)
|
||||
return;
|
||||
|
||||
if (ret != gdev->ngpio) {
|
||||
dev_warn(chip->parent,
|
||||
dev_warn(&gdev->dev,
|
||||
"names %d do not match number of GPIOs %d\n", ret,
|
||||
gdev->ngpio);
|
||||
return;
|
||||
@ -52,10 +49,10 @@ void devprop_gpiochip_set_names(struct gpio_chip *chip)
|
||||
if (!names)
|
||||
return;
|
||||
|
||||
ret = device_property_read_string_array(chip->parent, "gpio-line-names",
|
||||
ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
|
||||
names, gdev->ngpio);
|
||||
if (ret < 0) {
|
||||
dev_warn(chip->parent, "failed to read GPIO line names\n");
|
||||
dev_warn(&gdev->dev, "failed to read GPIO line names\n");
|
||||
kfree(names);
|
||||
return;
|
||||
}
|
||||
|
@ -493,7 +493,8 @@ int of_gpiochip_add(struct gpio_chip *chip)
|
||||
|
||||
/* If the chip defines names itself, these take precedence */
|
||||
if (!chip->names)
|
||||
devprop_gpiochip_set_names(chip);
|
||||
devprop_gpiochip_set_names(chip,
|
||||
of_fwnode_handle(chip->of_node));
|
||||
|
||||
of_node_get(chip->of_node);
|
||||
|
||||
|
@ -224,7 +224,8 @@ static inline int gpio_chip_hwgpio(const struct gpio_desc *desc)
|
||||
return desc - &desc->gdev->descs[0];
|
||||
}
|
||||
|
||||
void devprop_gpiochip_set_names(struct gpio_chip *chip);
|
||||
void devprop_gpiochip_set_names(struct gpio_chip *chip,
|
||||
const struct fwnode_handle *fwnode);
|
||||
|
||||
/* With descriptor prefix */
|
||||
|
||||
|
@ -6944,6 +6944,7 @@ enum {
|
||||
#define RESET_PCH_HANDSHAKE_ENABLE (1<<4)
|
||||
|
||||
#define GEN8_CHICKEN_DCPR_1 _MMIO(0x46430)
|
||||
#define SKL_SELECT_ALTERNATE_DC_EXIT (1<<30)
|
||||
#define MASK_WAKEMEM (1<<13)
|
||||
|
||||
#define SKL_DFSM _MMIO(0x51000)
|
||||
@ -8475,6 +8476,7 @@ enum skl_power_gate {
|
||||
#define BXT_CDCLK_CD2X_DIV_SEL_2 (2<<22)
|
||||
#define BXT_CDCLK_CD2X_DIV_SEL_4 (3<<22)
|
||||
#define BXT_CDCLK_CD2X_PIPE(pipe) ((pipe)<<20)
|
||||
#define CDCLK_DIVMUX_CD_OVERRIDE (1<<19)
|
||||
#define BXT_CDCLK_CD2X_PIPE_NONE BXT_CDCLK_CD2X_PIPE(3)
|
||||
#define BXT_CDCLK_SSA_PRECHARGE_ENABLE (1<<16)
|
||||
#define CDCLK_FREQ_DECIMAL_MASK (0x7ff)
|
||||
|
@ -859,16 +859,10 @@ static void skl_set_preferred_cdclk_vco(struct drm_i915_private *dev_priv,
|
||||
|
||||
static void skl_dpll0_enable(struct drm_i915_private *dev_priv, int vco)
|
||||
{
|
||||
int min_cdclk = skl_calc_cdclk(0, vco);
|
||||
u32 val;
|
||||
|
||||
WARN_ON(vco != 8100000 && vco != 8640000);
|
||||
|
||||
/* select the minimum CDCLK before enabling DPLL 0 */
|
||||
val = CDCLK_FREQ_337_308 | skl_cdclk_decimal(min_cdclk);
|
||||
I915_WRITE(CDCLK_CTL, val);
|
||||
POSTING_READ(CDCLK_CTL);
|
||||
|
||||
/*
|
||||
* We always enable DPLL0 with the lowest link rate possible, but still
|
||||
* taking into account the VCO required to operate the eDP panel at the
|
||||
@ -922,7 +916,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
|
||||
{
|
||||
int cdclk = cdclk_state->cdclk;
|
||||
int vco = cdclk_state->vco;
|
||||
u32 freq_select, pcu_ack;
|
||||
u32 freq_select, pcu_ack, cdclk_ctl;
|
||||
int ret;
|
||||
|
||||
WARN_ON((cdclk == 24000) != (vco == 0));
|
||||
@ -939,7 +933,7 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
|
||||
return;
|
||||
}
|
||||
|
||||
/* set CDCLK_CTL */
|
||||
/* Choose frequency for this cdclk */
|
||||
switch (cdclk) {
|
||||
case 450000:
|
||||
case 432000:
|
||||
@ -967,10 +961,33 @@ static void skl_set_cdclk(struct drm_i915_private *dev_priv,
|
||||
dev_priv->cdclk.hw.vco != vco)
|
||||
skl_dpll0_disable(dev_priv);
|
||||
|
||||
cdclk_ctl = I915_READ(CDCLK_CTL);
|
||||
|
||||
if (dev_priv->cdclk.hw.vco != vco) {
|
||||
/* Wa Display #1183: skl,kbl,cfl */
|
||||
cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
|
||||
cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
|
||||
I915_WRITE(CDCLK_CTL, cdclk_ctl);
|
||||
}
|
||||
|
||||
/* Wa Display #1183: skl,kbl,cfl */
|
||||
cdclk_ctl |= CDCLK_DIVMUX_CD_OVERRIDE;
|
||||
I915_WRITE(CDCLK_CTL, cdclk_ctl);
|
||||
POSTING_READ(CDCLK_CTL);
|
||||
|
||||
if (dev_priv->cdclk.hw.vco != vco)
|
||||
skl_dpll0_enable(dev_priv, vco);
|
||||
|
||||
I915_WRITE(CDCLK_CTL, freq_select | skl_cdclk_decimal(cdclk));
|
||||
/* Wa Display #1183: skl,kbl,cfl */
|
||||
cdclk_ctl &= ~(CDCLK_FREQ_SEL_MASK | CDCLK_FREQ_DECIMAL_MASK);
|
||||
I915_WRITE(CDCLK_CTL, cdclk_ctl);
|
||||
|
||||
cdclk_ctl |= freq_select | skl_cdclk_decimal(cdclk);
|
||||
I915_WRITE(CDCLK_CTL, cdclk_ctl);
|
||||
|
||||
/* Wa Display #1183: skl,kbl,cfl */
|
||||
cdclk_ctl &= ~CDCLK_DIVMUX_CD_OVERRIDE;
|
||||
I915_WRITE(CDCLK_CTL, cdclk_ctl);
|
||||
POSTING_READ(CDCLK_CTL);
|
||||
|
||||
/* inform PCU of the change */
|
||||
|
@ -598,6 +598,11 @@ void gen9_enable_dc5(struct drm_i915_private *dev_priv)
|
||||
|
||||
DRM_DEBUG_KMS("Enabling DC5\n");
|
||||
|
||||
/* Wa Display #1183: skl,kbl,cfl */
|
||||
if (IS_GEN9_BC(dev_priv))
|
||||
I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
|
||||
SKL_SELECT_ALTERNATE_DC_EXIT);
|
||||
|
||||
gen9_set_dc_state(dev_priv, DC_STATE_EN_UPTO_DC5);
|
||||
}
|
||||
|
||||
@ -625,6 +630,11 @@ void skl_disable_dc6(struct drm_i915_private *dev_priv)
|
||||
{
|
||||
DRM_DEBUG_KMS("Disabling DC6\n");
|
||||
|
||||
/* Wa Display #1183: skl,kbl,cfl */
|
||||
if (IS_GEN9_BC(dev_priv))
|
||||
I915_WRITE(GEN8_CHICKEN_DCPR_1, I915_READ(GEN8_CHICKEN_DCPR_1) |
|
||||
SKL_SELECT_ALTERNATE_DC_EXIT);
|
||||
|
||||
gen9_set_dc_state(dev_priv, DC_STATE_DISABLE);
|
||||
}
|
||||
|
||||
@ -1786,6 +1796,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
|
||||
GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS | \
|
||||
BIT_ULL(POWER_DOMAIN_MODESET) | \
|
||||
BIT_ULL(POWER_DOMAIN_AUX_A) | \
|
||||
BIT_ULL(POWER_DOMAIN_GMBUS) | \
|
||||
BIT_ULL(POWER_DOMAIN_INIT))
|
||||
|
||||
#define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS ( \
|
||||
|
@ -386,6 +386,9 @@ int ib_open_shared_qp_security(struct ib_qp *qp, struct ib_device *dev)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!qp->qp_sec)
|
||||
return 0;
|
||||
|
||||
mutex_lock(&real_qp->qp_sec->mutex);
|
||||
ret = check_qp_port_pkey_settings(real_qp->qp_sec->ports_pkeys,
|
||||
qp->qp_sec);
|
||||
|
@ -2085,8 +2085,8 @@ int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file,
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (ucore->inlen > sizeof(cmd)) {
|
||||
if (ib_is_udata_cleared(ucore, sizeof(cmd),
|
||||
ucore->inlen - sizeof(cmd)))
|
||||
if (!ib_is_udata_cleared(ucore, sizeof(cmd),
|
||||
ucore->inlen - sizeof(cmd)))
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
|
@ -1400,7 +1400,8 @@ int ib_close_qp(struct ib_qp *qp)
|
||||
spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
|
||||
|
||||
atomic_dec(&real_qp->usecnt);
|
||||
ib_close_shared_qp_security(qp->qp_sec);
|
||||
if (qp->qp_sec)
|
||||
ib_close_shared_qp_security(qp->qp_sec);
|
||||
kfree(qp);
|
||||
|
||||
return 0;
|
||||
|
@ -586,10 +586,10 @@ static int poll_cq(struct t4_wq *wq, struct t4_cq *cq, struct t4_cqe *cqe,
|
||||
ret = -EAGAIN;
|
||||
goto skip_cqe;
|
||||
}
|
||||
if (unlikely((CQE_WRID_MSN(hw_cqe) != (wq->rq.msn)))) {
|
||||
if (unlikely(!CQE_STATUS(hw_cqe) &&
|
||||
CQE_WRID_MSN(hw_cqe) != wq->rq.msn)) {
|
||||
t4_set_wq_in_error(wq);
|
||||
hw_cqe->header |= htonl(CQE_STATUS_V(T4_ERR_MSN));
|
||||
goto proc_cqe;
|
||||
hw_cqe->header |= cpu_to_be32(CQE_STATUS_V(T4_ERR_MSN));
|
||||
}
|
||||
goto proc_cqe;
|
||||
}
|
||||
|
@ -1129,7 +1129,6 @@ struct hfi1_devdata {
|
||||
u16 pcie_lnkctl;
|
||||
u16 pcie_devctl2;
|
||||
u32 pci_msix0;
|
||||
u32 pci_lnkctl3;
|
||||
u32 pci_tph2;
|
||||
|
||||
/*
|
||||
|
@ -411,15 +411,12 @@ int restore_pci_variables(struct hfi1_devdata *dd)
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
|
||||
dd->pci_lnkctl3);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2, dd->pci_tph2);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
|
||||
ret = pci_write_config_dword(dd->pcidev, PCIE_CFG_TPH2,
|
||||
dd->pci_tph2);
|
||||
if (ret)
|
||||
goto error;
|
||||
}
|
||||
return 0;
|
||||
|
||||
error:
|
||||
@ -469,15 +466,12 @@ int save_pci_variables(struct hfi1_devdata *dd)
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_SPCIE1,
|
||||
&dd->pci_lnkctl3);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2, &dd->pci_tph2);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
||||
if (pci_find_ext_capability(dd->pcidev, PCI_EXT_CAP_ID_TPH)) {
|
||||
ret = pci_read_config_dword(dd->pcidev, PCIE_CFG_TPH2,
|
||||
&dd->pci_tph2);
|
||||
if (ret)
|
||||
goto error;
|
||||
}
|
||||
return 0;
|
||||
|
||||
error:
|
||||
|
@ -1415,6 +1415,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&context->vma_private_list);
|
||||
mutex_init(&context->vma_private_list_mutex);
|
||||
INIT_LIST_HEAD(&context->db_page_list);
|
||||
mutex_init(&context->db_page_mutex);
|
||||
|
||||
@ -1576,7 +1577,9 @@ static void mlx5_ib_vma_close(struct vm_area_struct *area)
|
||||
* mlx5_ib_disassociate_ucontext().
|
||||
*/
|
||||
mlx5_ib_vma_priv_data->vma = NULL;
|
||||
mutex_lock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
|
||||
list_del(&mlx5_ib_vma_priv_data->list);
|
||||
mutex_unlock(mlx5_ib_vma_priv_data->vma_private_list_mutex);
|
||||
kfree(mlx5_ib_vma_priv_data);
|
||||
}
|
||||
|
||||
@ -1596,10 +1599,13 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,
|
||||
return -ENOMEM;
|
||||
|
||||
vma_prv->vma = vma;
|
||||
vma_prv->vma_private_list_mutex = &ctx->vma_private_list_mutex;
|
||||
vma->vm_private_data = vma_prv;
|
||||
vma->vm_ops = &mlx5_ib_vm_ops;
|
||||
|
||||
mutex_lock(&ctx->vma_private_list_mutex);
|
||||
list_add(&vma_prv->list, vma_head);
|
||||
mutex_unlock(&ctx->vma_private_list_mutex);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -1642,6 +1648,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
|
||||
* mlx5_ib_vma_close.
|
||||
*/
|
||||
down_write(&owning_mm->mmap_sem);
|
||||
mutex_lock(&context->vma_private_list_mutex);
|
||||
list_for_each_entry_safe(vma_private, n, &context->vma_private_list,
|
||||
list) {
|
||||
vma = vma_private->vma;
|
||||
@ -1656,6 +1663,7 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
|
||||
list_del(&vma_private->list);
|
||||
kfree(vma_private);
|
||||
}
|
||||
mutex_unlock(&context->vma_private_list_mutex);
|
||||
up_write(&owning_mm->mmap_sem);
|
||||
mmput(owning_mm);
|
||||
put_task_struct(owning_process);
|
||||
|
@ -115,6 +115,8 @@ enum {
|
||||
struct mlx5_ib_vma_private_data {
|
||||
struct list_head list;
|
||||
struct vm_area_struct *vma;
|
||||
/* protect vma_private_list add/del */
|
||||
struct mutex *vma_private_list_mutex;
|
||||
};
|
||||
|
||||
struct mlx5_ib_ucontext {
|
||||
@ -129,6 +131,8 @@ struct mlx5_ib_ucontext {
|
||||
/* Transport Domain number */
|
||||
u32 tdn;
|
||||
struct list_head vma_private_list;
|
||||
/* protect vma_private_list add/del */
|
||||
struct mutex vma_private_list_mutex;
|
||||
|
||||
unsigned long upd_xlt_page;
|
||||
/* protect ODP/KSM */
|
||||
|
@ -1613,7 +1613,7 @@ static int elantech_set_properties(struct elantech_data *etd)
|
||||
case 5:
|
||||
etd->hw_version = 3;
|
||||
break;
|
||||
case 6 ... 14:
|
||||
case 6 ... 15:
|
||||
etd->hw_version = 4;
|
||||
break;
|
||||
default:
|
||||
|
@ -1611,13 +1611,15 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain)
|
||||
domain->pgsize_bitmap = pgtbl_cfg.pgsize_bitmap;
|
||||
domain->geometry.aperture_end = (1UL << ias) - 1;
|
||||
domain->geometry.force_aperture = true;
|
||||
smmu_domain->pgtbl_ops = pgtbl_ops;
|
||||
|
||||
ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
free_io_pgtable_ops(pgtbl_ops);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
smmu_domain->pgtbl_ops = pgtbl_ops;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
|
||||
@ -1644,7 +1646,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid)
|
||||
|
||||
static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
|
||||
{
|
||||
int i;
|
||||
int i, j;
|
||||
struct arm_smmu_master_data *master = fwspec->iommu_priv;
|
||||
struct arm_smmu_device *smmu = master->smmu;
|
||||
|
||||
@ -1652,6 +1654,13 @@ static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec)
|
||||
u32 sid = fwspec->ids[i];
|
||||
__le64 *step = arm_smmu_get_step_for_sid(smmu, sid);
|
||||
|
||||
/* Bridged PCI devices may end up with duplicated IDs */
|
||||
for (j = 0; j < i; j++)
|
||||
if (fwspec->ids[j] == sid)
|
||||
break;
|
||||
if (j < i)
|
||||
continue;
|
||||
|
||||
arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste);
|
||||
}
|
||||
}
|
||||
|
@ -950,6 +950,7 @@ static void prepare_start_command(struct pxa3xx_nand_info *info, int command)
|
||||
|
||||
switch (command) {
|
||||
case NAND_CMD_READ0:
|
||||
case NAND_CMD_READOOB:
|
||||
case NAND_CMD_PAGEPROG:
|
||||
info->use_ecc = 1;
|
||||
break;
|
||||
|
@ -167,7 +167,7 @@ static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable)
|
||||
reg = reg_readl(priv, REG_SPHY_CNTRL);
|
||||
if (enable) {
|
||||
reg |= PHY_RESET;
|
||||
reg &= ~(EXT_PWR_DOWN | IDDQ_BIAS | CK25_DIS);
|
||||
reg &= ~(EXT_PWR_DOWN | IDDQ_BIAS | IDDQ_GLOBAL_PWR | CK25_DIS);
|
||||
reg_writel(priv, reg, REG_SPHY_CNTRL);
|
||||
udelay(21);
|
||||
reg = reg_readl(priv, REG_SPHY_CNTRL);
|
||||
|
@ -1875,7 +1875,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
|
||||
* here forever if we consistently cannot allocate
|
||||
* buffers.
|
||||
*/
|
||||
else if (rc == -ENOMEM)
|
||||
else if (rc == -ENOMEM && budget)
|
||||
rx_pkts++;
|
||||
else if (rc == -EBUSY) /* partial completion */
|
||||
break;
|
||||
@ -1961,7 +1961,7 @@ static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget)
|
||||
cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR);
|
||||
|
||||
rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &event);
|
||||
if (likely(rc == -EIO))
|
||||
if (likely(rc == -EIO) && budget)
|
||||
rx_pkts++;
|
||||
else if (rc == -EBUSY) /* partial completion */
|
||||
break;
|
||||
|
@ -14227,7 +14227,9 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
|
||||
/* Reset PHY, otherwise the read DMA engine will be in a mode that
|
||||
* breaks all requests to 256 bytes.
|
||||
*/
|
||||
if (tg3_asic_rev(tp) == ASIC_REV_57766)
|
||||
if (tg3_asic_rev(tp) == ASIC_REV_57766 ||
|
||||
tg3_asic_rev(tp) == ASIC_REV_5717 ||
|
||||
tg3_asic_rev(tp) == ASIC_REV_5719)
|
||||
reset_phy = true;
|
||||
|
||||
err = tg3_restart_hw(tp, reset_phy);
|
||||
|
@ -818,6 +818,12 @@ static void fec_enet_bd_init(struct net_device *dev)
|
||||
for (i = 0; i < txq->bd.ring_size; i++) {
|
||||
/* Initialize the BD for every fragment in the page. */
|
||||
bdp->cbd_sc = cpu_to_fec16(0);
|
||||
if (bdp->cbd_bufaddr &&
|
||||
!IS_TSO_HEADER(txq, fec32_to_cpu(bdp->cbd_bufaddr)))
|
||||
dma_unmap_single(&fep->pdev->dev,
|
||||
fec32_to_cpu(bdp->cbd_bufaddr),
|
||||
fec16_to_cpu(bdp->cbd_datlen),
|
||||
DMA_TO_DEVICE);
|
||||
if (txq->tx_skbuff[i]) {
|
||||
dev_kfree_skb_any(txq->tx_skbuff[i]);
|
||||
txq->tx_skbuff[i] = NULL;
|
||||
|
@ -344,7 +344,8 @@ static int orion_mdio_probe(struct platform_device *pdev)
|
||||
dev->regs + MVMDIO_ERR_INT_MASK);
|
||||
|
||||
} else if (dev->err_interrupt == -EPROBE_DEFER) {
|
||||
return -EPROBE_DEFER;
|
||||
ret = -EPROBE_DEFER;
|
||||
goto out_mdio;
|
||||
}
|
||||
|
||||
if (pdev->dev.of_node)
|
||||
|
@ -362,7 +362,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
|
||||
case MLX5_CMD_OP_QUERY_VPORT_COUNTER:
|
||||
case MLX5_CMD_OP_ALLOC_Q_COUNTER:
|
||||
case MLX5_CMD_OP_QUERY_Q_COUNTER:
|
||||
case MLX5_CMD_OP_SET_RATE_LIMIT:
|
||||
case MLX5_CMD_OP_SET_PP_RATE_LIMIT:
|
||||
case MLX5_CMD_OP_QUERY_RATE_LIMIT:
|
||||
case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
|
||||
case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
|
||||
@ -505,7 +505,7 @@ const char *mlx5_command_str(int command)
|
||||
MLX5_COMMAND_STR_CASE(ALLOC_Q_COUNTER);
|
||||
MLX5_COMMAND_STR_CASE(DEALLOC_Q_COUNTER);
|
||||
MLX5_COMMAND_STR_CASE(QUERY_Q_COUNTER);
|
||||
MLX5_COMMAND_STR_CASE(SET_RATE_LIMIT);
|
||||
MLX5_COMMAND_STR_CASE(SET_PP_RATE_LIMIT);
|
||||
MLX5_COMMAND_STR_CASE(QUERY_RATE_LIMIT);
|
||||
MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
|
||||
MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
|
||||
|
@ -590,6 +590,7 @@ struct mlx5e_channel {
|
||||
struct mlx5_core_dev *mdev;
|
||||
struct mlx5e_tstamp *tstamp;
|
||||
int ix;
|
||||
int cpu;
|
||||
};
|
||||
|
||||
struct mlx5e_channels {
|
||||
|
@ -71,11 +71,6 @@ struct mlx5e_channel_param {
|
||||
struct mlx5e_cq_param icosq_cq;
|
||||
};
|
||||
|
||||
static int mlx5e_get_node(struct mlx5e_priv *priv, int ix)
|
||||
{
|
||||
return pci_irq_get_node(priv->mdev->pdev, MLX5_EQ_VEC_COMP_BASE + ix);
|
||||
}
|
||||
|
||||
static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
|
||||
{
|
||||
return MLX5_CAP_GEN(mdev, striding_rq) &&
|
||||
@ -452,17 +447,16 @@ static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq,
|
||||
int wq_sz = mlx5_wq_ll_get_size(&rq->wq);
|
||||
int mtt_sz = mlx5e_get_wqe_mtt_sz();
|
||||
int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1;
|
||||
int node = mlx5e_get_node(c->priv, c->ix);
|
||||
int i;
|
||||
|
||||
rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info),
|
||||
GFP_KERNEL, node);
|
||||
GFP_KERNEL, cpu_to_node(c->cpu));
|
||||
if (!rq->mpwqe.info)
|
||||
goto err_out;
|
||||
|
||||
/* We allocate more than mtt_sz as we will align the pointer */
|
||||
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz,
|
||||
GFP_KERNEL, node);
|
||||
rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL,
|
||||
cpu_to_node(c->cpu));
|
||||
if (unlikely(!rq->mpwqe.mtt_no_align))
|
||||
goto err_free_wqe_info;
|
||||
|
||||
@ -570,7 +564,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
|
||||
int err;
|
||||
int i;
|
||||
|
||||
rqp->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
rqp->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
|
||||
err = mlx5_wq_ll_create(mdev, &rqp->wq, rqc_wq, &rq->wq,
|
||||
&rq->wq_ctrl);
|
||||
@ -636,8 +630,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
|
||||
default: /* MLX5_WQ_TYPE_LINKED_LIST */
|
||||
rq->wqe.frag_info =
|
||||
kzalloc_node(wq_sz * sizeof(*rq->wqe.frag_info),
|
||||
GFP_KERNEL,
|
||||
mlx5e_get_node(c->priv, c->ix));
|
||||
GFP_KERNEL, cpu_to_node(c->cpu));
|
||||
if (!rq->wqe.frag_info) {
|
||||
err = -ENOMEM;
|
||||
goto err_rq_wq_destroy;
|
||||
@ -1007,13 +1000,13 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
|
||||
sq->uar_map = mdev->mlx5e_res.bfreg.map;
|
||||
sq->min_inline_mode = params->tx_min_inline_mode;
|
||||
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
|
||||
if (err)
|
||||
return err;
|
||||
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
|
||||
|
||||
err = mlx5e_alloc_xdpsq_db(sq, mlx5e_get_node(c->priv, c->ix));
|
||||
err = mlx5e_alloc_xdpsq_db(sq, cpu_to_node(c->cpu));
|
||||
if (err)
|
||||
goto err_sq_wq_destroy;
|
||||
|
||||
@ -1060,13 +1053,13 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c,
|
||||
sq->channel = c;
|
||||
sq->uar_map = mdev->mlx5e_res.bfreg.map;
|
||||
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
|
||||
if (err)
|
||||
return err;
|
||||
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
|
||||
|
||||
err = mlx5e_alloc_icosq_db(sq, mlx5e_get_node(c->priv, c->ix));
|
||||
err = mlx5e_alloc_icosq_db(sq, cpu_to_node(c->cpu));
|
||||
if (err)
|
||||
goto err_sq_wq_destroy;
|
||||
|
||||
@ -1132,13 +1125,13 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
|
||||
if (MLX5_IPSEC_DEV(c->priv->mdev))
|
||||
set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
|
||||
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
err = mlx5_wq_cyc_create(mdev, ¶m->wq, sqc_wq, &sq->wq, &sq->wq_ctrl);
|
||||
if (err)
|
||||
return err;
|
||||
sq->wq.db = &sq->wq.db[MLX5_SND_DBR];
|
||||
|
||||
err = mlx5e_alloc_txqsq_db(sq, mlx5e_get_node(c->priv, c->ix));
|
||||
err = mlx5e_alloc_txqsq_db(sq, cpu_to_node(c->cpu));
|
||||
if (err)
|
||||
goto err_sq_wq_destroy;
|
||||
|
||||
@ -1510,8 +1503,8 @@ static int mlx5e_alloc_cq(struct mlx5e_channel *c,
|
||||
struct mlx5_core_dev *mdev = c->priv->mdev;
|
||||
int err;
|
||||
|
||||
param->wq.buf_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.db_numa_node = mlx5e_get_node(c->priv, c->ix);
|
||||
param->wq.buf_numa_node = cpu_to_node(c->cpu);
|
||||
param->wq.db_numa_node = cpu_to_node(c->cpu);
|
||||
param->eq_ix = c->ix;
|
||||
|
||||
err = mlx5e_alloc_cq_common(mdev, param, cq);
|
||||
@ -1610,6 +1603,11 @@ static void mlx5e_close_cq(struct mlx5e_cq *cq)
|
||||
mlx5e_free_cq(cq);
|
||||
}
|
||||
|
||||
static int mlx5e_get_cpu(struct mlx5e_priv *priv, int ix)
|
||||
{
|
||||
return cpumask_first(priv->mdev->priv.irq_info[ix].mask);
|
||||
}
|
||||
|
||||
static int mlx5e_open_tx_cqs(struct mlx5e_channel *c,
|
||||
struct mlx5e_params *params,
|
||||
struct mlx5e_channel_param *cparam)
|
||||
@ -1758,12 +1756,13 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
|
||||
{
|
||||
struct mlx5e_cq_moder icocq_moder = {0, 0};
|
||||
struct net_device *netdev = priv->netdev;
|
||||
int cpu = mlx5e_get_cpu(priv, ix);
|
||||
struct mlx5e_channel *c;
|
||||
unsigned int irq;
|
||||
int err;
|
||||
int eqn;
|
||||
|
||||
c = kzalloc_node(sizeof(*c), GFP_KERNEL, mlx5e_get_node(priv, ix));
|
||||
c = kzalloc_node(sizeof(*c), GFP_KERNEL, cpu_to_node(cpu));
|
||||
if (!c)
|
||||
return -ENOMEM;
|
||||
|
||||
@ -1771,6 +1770,7 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix,
|
||||
c->mdev = priv->mdev;
|
||||
c->tstamp = &priv->tstamp;
|
||||
c->ix = ix;
|
||||
c->cpu = cpu;
|
||||
c->pdev = &priv->mdev->pdev->dev;
|
||||
c->netdev = priv->netdev;
|
||||
c->mkey_be = cpu_to_be32(priv->mdev->mlx5e_res.mkey.key);
|
||||
@ -1859,8 +1859,7 @@ static void mlx5e_activate_channel(struct mlx5e_channel *c)
|
||||
for (tc = 0; tc < c->num_tc; tc++)
|
||||
mlx5e_activate_txqsq(&c->sq[tc]);
|
||||
mlx5e_activate_rq(&c->rq);
|
||||
netif_set_xps_queue(c->netdev,
|
||||
mlx5_get_vector_affinity(c->priv->mdev, c->ix), c->ix);
|
||||
netif_set_xps_queue(c->netdev, get_cpu_mask(c->cpu), c->ix);
|
||||
}
|
||||
|
||||
static void mlx5e_deactivate_channel(struct mlx5e_channel *c)
|
||||
@ -3554,6 +3553,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
|
||||
struct sk_buff *skb,
|
||||
netdev_features_t features)
|
||||
{
|
||||
unsigned int offset = 0;
|
||||
struct udphdr *udph;
|
||||
u8 proto;
|
||||
u16 port;
|
||||
@ -3563,7 +3563,7 @@ static netdev_features_t mlx5e_tunnel_features_check(struct mlx5e_priv *priv,
|
||||
proto = ip_hdr(skb)->protocol;
|
||||
break;
|
||||
case htons(ETH_P_IPV6):
|
||||
proto = ipv6_hdr(skb)->nexthdr;
|
||||
proto = ipv6_find_hdr(skb, &offset, -1, NULL, NULL);
|
||||
break;
|
||||
default:
|
||||
goto out;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user