396 lines
9.9 KiB
C
Raw Permalink Normal View History

/*
* Copyright(c) 2017 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* This code is based in part on work published here:
*
* https://github.com/IAIK/KAISER
*
* The original work was written by and and signed off by for the Linux
* kernel by:
*
* Signed-off-by: Richard Fellner <richard.fellner@student.tugraz.at>
* Signed-off-by: Moritz Lipp <moritz.lipp@iaik.tugraz.at>
* Signed-off-by: Daniel Gruss <daniel.gruss@iaik.tugraz.at>
* Signed-off-by: Michael Schwarz <michael.schwarz@iaik.tugraz.at>
*
* Major changes to the original code by: Dave Hansen <dave.hansen@intel.com>
* Mostly rewritten by Thomas Gleixner <tglx@linutronix.de> and
* Andy Lutomirsky <luto@amacapital.net>
*/
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/bug.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
x86/speculation: Support 'mitigations=' cmdline option commit d68be4c4d31295ff6ae34a8ddfaa4c1a8ff42812 upstream Configure x86 runtime CPU speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Meltdown, Spectre v2, Speculative Store Bypass, and L1TF. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86) Reviewed-by: Jiri Kosina <jkosina@suse.cz> Cc: Borislav Petkov <bp@alien8.de> Cc: "H . Peter Anvin" <hpa@zytor.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Jiri Kosina <jikos@kernel.org> Cc: Waiman Long <longman@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Jon Masters <jcm@redhat.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Tyler Hicks <tyhicks@canonical.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Steven Price <steven.price@arm.com> Cc: Phil Auld <pauld@redhat.com> Link: https://lkml.kernel.org/r/6616d0ae169308516cfdf5216bedd169f8a8291b.1555085500.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-04-12 15:39:29 -05:00
#include <linux/cpu.h>
#include <asm/cpufeature.h>
#include <asm/hypervisor.h>
#include <asm/vsyscall.h>
#include <asm/cmdline.h>
#include <asm/pti.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/desc.h>
x86: Don't include linux/irq.h from asm/hardirq.h commit 447ae316670230d7d29430e2cbf1f5db4f49d14c upstream The next patch in this series will have to make the definition of irq_cpustat_t available to entering_irq(). Inclusion of asm/hardirq.h into asm/apic.h would cause circular header dependencies like asm/smp.h asm/apic.h asm/hardirq.h linux/irq.h linux/topology.h linux/smp.h asm/smp.h or linux/gfp.h linux/mmzone.h asm/mmzone.h asm/mmzone_64.h asm/smp.h asm/apic.h asm/hardirq.h linux/irq.h linux/irqdesc.h linux/kobject.h linux/sysfs.h linux/kernfs.h linux/idr.h linux/gfp.h and others. This causes compilation errors because of the header guards becoming effective in the second inclusion: symbols/macros that had been defined before wouldn't be available to intermediate headers in the #include chain anymore. A possible workaround would be to move the definition of irq_cpustat_t into its own header and include that from both, asm/hardirq.h and asm/apic.h. However, this wouldn't solve the real problem, namely asm/harirq.h unnecessarily pulling in all the linux/irq.h cruft: nothing in asm/hardirq.h itself requires it. Also, note that there are some other archs, like e.g. arm64, which don't have that #include in their asm/hardirq.h. Remove the linux/irq.h #include from x86' asm/hardirq.h. Fix resulting compilation errors by adding appropriate #includes to *.c files as needed. Note that some of these *.c files could be cleaned up a bit wrt. to their set of #includes, but that should better be done from separate patches, if at all. Signed-off-by: Nicolai Stange <nstange@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-07-29 12:15:33 +02:00
#include <asm/sections.h>
#undef pr_fmt
#define pr_fmt(fmt) "Kernel/User page tables isolation: " fmt
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
/* Backporting helper */
#ifndef __GFP_NOTRACK
#define __GFP_NOTRACK 0
#endif
static void __init pti_print_if_insecure(const char *reason)
{
if (boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
pr_info("%s\n", reason);
}
static void __init pti_print_if_secure(const char *reason)
{
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
pr_info("%s\n", reason);
}
void __init pti_check_boottime_disable(void)
{
char arg[5];
int ret;
if (hypervisor_is_type(X86_HYPER_XEN_PV)) {
pti_print_if_insecure("disabled on XEN PV.");
return;
}
ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
if (ret > 0) {
if (ret == 3 && !strncmp(arg, "off", 3)) {
pti_print_if_insecure("disabled on command line.");
return;
}
if (ret == 2 && !strncmp(arg, "on", 2)) {
pti_print_if_secure("force enabled on command line.");
goto enable;
}
if (ret == 4 && !strncmp(arg, "auto", 4))
goto autosel;
}
x86/speculation: Support 'mitigations=' cmdline option commit d68be4c4d31295ff6ae34a8ddfaa4c1a8ff42812 upstream Configure x86 runtime CPU speculation bug mitigations in accordance with the 'mitigations=' cmdline option. This affects Meltdown, Spectre v2, Speculative Store Bypass, and L1TF. The default behavior is unchanged. Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Jiri Kosina <jkosina@suse.cz> (on x86) Reviewed-by: Jiri Kosina <jkosina@suse.cz> Cc: Borislav Petkov <bp@alien8.de> Cc: "H . Peter Anvin" <hpa@zytor.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Jiri Kosina <jikos@kernel.org> Cc: Waiman Long <longman@redhat.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Jon Masters <jcm@redhat.com> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: linuxppc-dev@lists.ozlabs.org Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: Heiko Carstens <heiko.carstens@de.ibm.com> Cc: linux-s390@vger.kernel.org Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-arm-kernel@lists.infradead.org Cc: linux-arch@vger.kernel.org Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Tyler Hicks <tyhicks@canonical.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Randy Dunlap <rdunlap@infradead.org> Cc: Steven Price <steven.price@arm.com> Cc: Phil Auld <pauld@redhat.com> Link: https://lkml.kernel.org/r/6616d0ae169308516cfdf5216bedd169f8a8291b.1555085500.git.jpoimboe@redhat.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2019-04-12 15:39:29 -05:00
if (cmdline_find_option_bool(boot_command_line, "nopti") ||
cpu_mitigations_off()) {
pti_print_if_insecure("disabled on command line.");
return;
}
autosel:
if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
return;
enable:
setup_force_cpu_cap(X86_FEATURE_PTI);
}
pgd_t __pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd)
{
/*
* Changes to the high (kernel) portion of the kernelmode page
* tables are not automatically propagated to the usermode tables.
*
* Users should keep in mind that, unlike the kernelmode tables,
* there is no vmalloc_fault equivalent for the usermode tables.
* Top-level entries added to init_mm's usermode pgd after boot
* will not be automatically propagated to other mms.
*/
if (!pgdp_maps_userspace(pgdp))
return pgd;
/*
* The user page tables get the full PGD, accessible from
* userspace:
*/
kernel_to_user_pgdp(pgdp)->pgd = pgd.pgd;
/*
* If this is normal user memory, make it NX in the kernel
* pagetables so that, if we somehow screw up and return to
* usermode with the kernel CR3 loaded, we'll get a page fault
* instead of allowing user code to execute with the wrong CR3.
*
* As exceptions, we don't set NX if:
* - _PAGE_USER is not set. This could be an executable
* EFI runtime mapping or something similar, and the kernel
* may execute from it
* - we don't have NX support
* - we're clearing the PGD (i.e. the new pgd is not present).
*/
if ((pgd.pgd & (_PAGE_USER|_PAGE_PRESENT)) == (_PAGE_USER|_PAGE_PRESENT) &&
(__supported_pte_mask & _PAGE_NX))
pgd.pgd |= _PAGE_NX;
/* return the copy of the PGD we want the kernel to use: */
return pgd;
}
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
/*
* Walk the user copy of the page tables (optionally) trying to allocate
* page table pages on the way down.
*
* Returns a pointer to a P4D on success, or NULL on failure.
*/
x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e upstream. The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song <albcamus@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Jiri Koshina <jikos@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Kees Cook <keescook@google.com> Cc: Andi Lutomirski <luto@amacapital.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Greg KH <gregkh@linux-foundation.org> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Paul Turner <pjt@google.com> Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-01-09 00:03:41 +08:00
static __init p4d_t *pti_user_pagetable_walk_p4d(unsigned long address)
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
{
pgd_t *pgd = kernel_to_user_pgdp(pgd_offset_k(address));
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
if (address < PAGE_OFFSET) {
WARN_ONCE(1, "attempt to walk user address\n");
return NULL;
}
if (pgd_none(*pgd)) {
unsigned long new_p4d_page = __get_free_page(gfp);
if (WARN_ON_ONCE(!new_p4d_page))
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
return NULL;
x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e upstream. The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song <albcamus@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Jiri Koshina <jikos@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Kees Cook <keescook@google.com> Cc: Andi Lutomirski <luto@amacapital.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Greg KH <gregkh@linux-foundation.org> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Paul Turner <pjt@google.com> Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-01-09 00:03:41 +08:00
set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page)));
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
}
BUILD_BUG_ON(pgd_large(*pgd) != 0);
return p4d_offset(pgd, address);
}
/*
* Walk the user copy of the page tables (optionally) trying to allocate
* page table pages on the way down.
*
* Returns a pointer to a PMD on success, or NULL on failure.
*/
x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e upstream. The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song <albcamus@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Jiri Koshina <jikos@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Kees Cook <keescook@google.com> Cc: Andi Lutomirski <luto@amacapital.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Greg KH <gregkh@linux-foundation.org> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Paul Turner <pjt@google.com> Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-01-09 00:03:41 +08:00
static __init pmd_t *pti_user_pagetable_walk_pmd(unsigned long address)
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
p4d_t *p4d;
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
pud_t *pud;
p4d = pti_user_pagetable_walk_p4d(address);
if (!p4d)
return NULL;
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
BUILD_BUG_ON(p4d_large(*p4d) != 0);
if (p4d_none(*p4d)) {
unsigned long new_pud_page = __get_free_page(gfp);
if (WARN_ON_ONCE(!new_pud_page))
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
return NULL;
x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e upstream. The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song <albcamus@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Jiri Koshina <jikos@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Kees Cook <keescook@google.com> Cc: Andi Lutomirski <luto@amacapital.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Greg KH <gregkh@linux-foundation.org> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Paul Turner <pjt@google.com> Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-01-09 00:03:41 +08:00
set_p4d(p4d, __p4d(_KERNPG_TABLE | __pa(new_pud_page)));
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
}
pud = pud_offset(p4d, address);
/* The user page tables do not use large mappings: */
if (pud_large(*pud)) {
WARN_ON(1);
return NULL;
}
if (pud_none(*pud)) {
unsigned long new_pmd_page = __get_free_page(gfp);
if (WARN_ON_ONCE(!new_pmd_page))
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
return NULL;
x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e upstream. The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song <albcamus@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Jiri Koshina <jikos@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Kees Cook <keescook@google.com> Cc: Andi Lutomirski <luto@amacapital.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Greg KH <gregkh@linux-foundation.org> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Paul Turner <pjt@google.com> Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-01-09 00:03:41 +08:00
set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
}
return pmd_offset(pud, address);
}
#ifdef CONFIG_X86_VSYSCALL_EMULATION
/*
* Walk the shadow copy of the page tables (optionally) trying to allocate
* page table pages on the way down. Does not support large pages.
*
* Note: this is only used when mapping *new* kernel data into the
* user/shadow page tables. It is never used for userspace data.
*
* Returns a pointer to a PTE on success, or NULL on failure.
*/
static pte_t *pti_user_pagetable_walk_pte(unsigned long address)
{
gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
pmd_t *pmd;
pte_t *pte;
pmd = pti_user_pagetable_walk_pmd(address);
if (!pmd)
return NULL;
/* We can't do anything sensible if we hit a large mapping. */
if (pmd_large(*pmd)) {
WARN_ON(1);
return NULL;
}
if (pmd_none(*pmd)) {
unsigned long new_pte_page = __get_free_page(gfp);
if (!new_pte_page)
return NULL;
x86/mm/pti: Remove dead logic in pti_user_pagetable_walk*() commit 8d56eff266f3e41a6c39926269c4c3f58f881a8e upstream. The following code contains dead logic: 162 if (pgd_none(*pgd)) { 163 unsigned long new_p4d_page = __get_free_page(gfp); 164 if (!new_p4d_page) 165 return NULL; 166 167 if (pgd_none(*pgd)) { 168 set_pgd(pgd, __pgd(_KERNPG_TABLE | __pa(new_p4d_page))); 169 new_p4d_page = 0; 170 } 171 if (new_p4d_page) 172 free_page(new_p4d_page); 173 } There can't be any difference between two pgd_none(*pgd) at L162 and L167, so it's always false at L171. Dave Hansen explained: Yes, the double-test was part of an optimization where we attempted to avoid using a global spinlock in the fork() path. We would check for unallocated mid-level page tables without the lock. The lock was only taken when we needed to *make* an entry to avoid collisions. Now that it is all single-threaded, there is no chance of a collision, no need for a lock, and no need for the re-check. As all these functions are only called during init, mark them __init as well. Fixes: 03f4424f348e ("x86/mm/pti: Add functions to clone kernel PMDs") Signed-off-by: Jike Song <albcamus@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Alan Cox <gnomes@lxorguk.ukuu.org.uk> Cc: Andi Kleen <ak@linux.intel.com> Cc: Tom Lendacky <thomas.lendacky@amd.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Tim Chen <tim.c.chen@linux.intel.com> Cc: Jiri Koshina <jikos@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Kees Cook <keescook@google.com> Cc: Andi Lutomirski <luto@amacapital.net> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Greg KH <gregkh@linux-foundation.org> Cc: David Woodhouse <dwmw@amazon.co.uk> Cc: Paul Turner <pjt@google.com> Link: https://lkml.kernel.org/r/20180108160341.3461-1-albcamus@gmail.com Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-01-09 00:03:41 +08:00
set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
}
pte = pte_offset_kernel(pmd, address);
if (pte_flags(*pte) & _PAGE_USER) {
WARN_ONCE(1, "attempt to walk to user pte\n");
return NULL;
}
return pte;
}
static void __init pti_setup_vsyscall(void)
{
pte_t *pte, *target_pte;
unsigned int level;
pte = lookup_address(VSYSCALL_ADDR, &level);
if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte))
return;
target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR);
if (WARN_ON(!target_pte))
return;
*target_pte = *pte;
set_vsyscall_pgtable_user_bits(kernel_to_user_pgdp(swapper_pg_dir));
}
#else
static void __init pti_setup_vsyscall(void) { }
#endif
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
static void __init
pti_clone_pmds(unsigned long start, unsigned long end, pmdval_t clear)
{
unsigned long addr;
/*
* Clone the populated PMDs which cover start to end. These PMD areas
* can have holes.
*/
for (addr = start; addr < end; addr += PMD_SIZE) {
pmd_t *pmd, *target_pmd;
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
x86/mm/pti: Add an overflow check to pti_clone_pmds() [ Upstream commit 935232ce28dfabff1171e5a7113b2d865fa9ee63 ] The addr counter will overflow if the last PMD of the address space is cloned, resulting in an endless loop. Check for that and bail out of the loop when it happens. Signed-off-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Tested-by: Pavel Machek <pavel@ucw.cz> Cc: "H . Peter Anvin" <hpa@zytor.com> Cc: linux-mm@kvack.org Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andy Lutomirski <luto@kernel.org> Cc: Dave Hansen <dave.hansen@intel.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Jiri Kosina <jkosina@suse.cz> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Brian Gerst <brgerst@gmail.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Waiman Long <llong@redhat.com> Cc: "David H . Gutteridge" <dhgutteridge@sympatico.ca> Cc: joro@8bytes.org Link: https://lkml.kernel.org/r/1531906876-13451-25-git-send-email-joro@8bytes.org Signed-off-by: Sasha Levin <alexander.levin@microsoft.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2018-07-18 11:41:01 +02:00
/* Overflow check */
if (addr < start)
break;
x86/mm/pti: Add functions to clone kernel PMDs commit 03f4424f348e8be95eb1bbeba09461cd7b867828 upstream. Provide infrastructure to: - find a kernel PMD for a mapping which must be visible to user space for the entry/exit code to work. - walk an address range and share the kernel PMD with it. This reuses a small part of the original KAISER patches to populate the user space page table. [ tglx: Made it universally usable so it can be used for any kind of shared mapping. Add a mechanism to clear specific bits in the user space visible PMD entry. Folded Andys simplifactions ] Originally-by: Dave Hansen <dave.hansen@linux.intel.com> Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bpetkov@suse.de> Cc: Brian Gerst <brgerst@gmail.com> Cc: Dave Hansen <dave.hansen@linux.intel.com> Cc: David Laight <David.Laight@aculab.com> Cc: Denys Vlasenko <dvlasenk@redhat.com> Cc: Eduardo Valentin <eduval@amazon.com> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: H. Peter Anvin <hpa@zytor.com> Cc: Josh Poimboeuf <jpoimboe@redhat.com> Cc: Juergen Gross <jgross@suse.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Will Deacon <will.deacon@arm.com> Cc: aliguori@amazon.com Cc: daniel.gruss@iaik.tugraz.at Cc: hughd@google.com Cc: keescook@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
2017-12-04 15:07:42 +01:00
pgd = pgd_offset_k(addr);
if (WARN_ON(pgd_none(*pgd)))
return;
p4d = p4d_offset(pgd, addr);
if (WARN_ON(p4d_none(*p4d)))
return;
pud = pud_offset(p4d, addr);
if (pud_none(*pud))
continue;
pmd = pmd_offset(pud, addr);
if (pmd_none(*pmd))
continue;
target_pmd = pti_user_pagetable_walk_pmd(addr);
if (WARN_ON(!target_pmd))
return;
/*
* Copy the PMD. That is, the kernelmode and usermode
* tables will share the last-level page tables of this
* address range
*/
*target_pmd = pmd_clear_flags(*pmd, clear);
}
}
/*
* Clone a single p4d (i.e. a top-level entry on 4-level systems and a
* next-level entry on 5-level systems.
*/
static void __init pti_clone_p4d(unsigned long addr)
{
p4d_t *kernel_p4d, *user_p4d;
pgd_t *kernel_pgd;
user_p4d = pti_user_pagetable_walk_p4d(addr);
if (!user_p4d)
return;
kernel_pgd = pgd_offset_k(addr);
kernel_p4d = p4d_offset(kernel_pgd, addr);
*user_p4d = *kernel_p4d;
}
/*
* Clone the CPU_ENTRY_AREA into the user space visible page table.
*/
static void __init pti_clone_user_shared(void)
{
pti_clone_p4d(CPU_ENTRY_AREA_BASE);
}
/*
* Clone the ESPFIX P4D into the user space visinble page table
*/
static void __init pti_setup_espfix64(void)
{
#ifdef CONFIG_X86_ESPFIX64
pti_clone_p4d(ESPFIX_BASE_ADDR);
#endif
}
/*
* Clone the populated PMDs of the entry and irqentry text and force it RO.
*/
static void __init pti_clone_entry_text(void)
{
pti_clone_pmds((unsigned long) __entry_text_start,
(unsigned long) __irqentry_text_end,
_PAGE_RW | _PAGE_GLOBAL);
/*
* If CFI is enabled, also map jump tables, so the entry code can
* make indirect calls.
*/
if (IS_ENABLED(CONFIG_CFI_CLANG))
pti_clone_pmds((unsigned long) __cfi_jt_start,
(unsigned long) __cfi_jt_end,
_PAGE_RW | _PAGE_GLOBAL);
}
/*
* Initialize kernel page table isolation
*/
void __init pti_init(void)
{
if (!static_cpu_has(X86_FEATURE_PTI))
return;
pr_info("enabled\n");
pti_clone_user_shared();
pti_clone_entry_text();
pti_setup_espfix64();
pti_setup_vsyscall();
}