From 8232cdfd5643909f30abe17d8ad9d81459f7feae Mon Sep 17 00:00:00 2001 From: Eugeniy Paltsev Date: Mon, 25 Feb 2019 20:16:01 +0300 Subject: [PATCH 01/93] ARC: u-boot args: check that magic number is correct [ Upstream commit edb64bca50cd736c6894cc6081d5263c007ce005 ] In case of devboards we really often disable bootloader and load Linux image in memory via JTAG. Even if kernel tries to verify uboot_tag and uboot_arg there is sill a chance that we treat some garbage in registers as valid u-boot arguments in JTAG case. E.g. it is enough to have '1' in r0 to treat any value in r2 as a boot command line. So check that magic number passed from u-boot is correct and drop u-boot arguments otherwise. That helps to reduce the possibility of using garbage as u-boot arguments in JTAG case. We can safely check U-boot magic value (0x0) in linux passed via r1 register as U-boot pass it from the beginning. So there is no backward-compatibility issues. Signed-off-by: Eugeniy Paltsev Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/kernel/head.S | 1 + arch/arc/kernel/setup.c | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 1f945d0f40da..208bf2c9e7b0 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -107,6 +107,7 @@ ENTRY(stext) ; r2 = pointer to uboot provided cmdline or external DTB in mem ; These are handled later in handle_uboot_args() st r0, [@uboot_tag] + st r1, [@uboot_magic] st r2, [@uboot_arg] #endif diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 709649e5f9bc..6b8d106e0d53 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -35,6 +35,7 @@ unsigned int intr_to_DE_cnt; /* Part of U-boot ABI: see head.S */ int __initdata uboot_tag; +int __initdata uboot_magic; char __initdata *uboot_arg; const struct machine_desc *machine_desc; @@ -433,6 +434,8 @@ static inline bool uboot_arg_invalid(unsigned long addr) #define UBOOT_TAG_NONE 0 #define UBOOT_TAG_CMDLINE 1 #define UBOOT_TAG_DTB 2 +/* We always pass 0 as magic from U-boot */ +#define UBOOT_MAGIC_VALUE 0 void __init handle_uboot_args(void) { @@ -448,6 +451,11 @@ void __init handle_uboot_args(void) goto ignore_uboot_args; } + if (uboot_magic != UBOOT_MAGIC_VALUE) { + pr_warn(IGNORE_ARGS "non zero uboot magic\n"); + goto ignore_uboot_args; + } + if (uboot_tag != UBOOT_TAG_NONE && uboot_arg_invalid((unsigned long)uboot_arg)) { pr_warn(IGNORE_ARGS "invalid uboot arg: '%px'\n", uboot_arg); From 2d6453a3e9e08b40c93b1049a591c6caa5dfa06a Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 25 Feb 2019 09:45:38 +0000 Subject: [PATCH 02/93] arc: hsdk_defconfig: Enable CONFIG_BLK_DEV_RAM [ Upstream commit 0728aeb7ead99a9b0dac2f3c92b3752b4e02ff97 ] We have now a HSDK device in our kernelci lab, but kernel builded via the hsdk_defconfig lacks ramfs supports, so it cannot boot kernelci jobs yet. So this patch enable CONFIG_BLK_DEV_RAM in hsdk_defconfig. Signed-off-by: Corentin Labbe Acked-by: Alexey Brodkin Signed-off-by: Vineet Gupta Signed-off-by: Sasha Levin --- arch/arc/configs/hsdk_defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arc/configs/hsdk_defconfig b/arch/arc/configs/hsdk_defconfig index 083560e9e571..4dac1169f528 100644 --- a/arch/arc/configs/hsdk_defconfig +++ b/arch/arc/configs/hsdk_defconfig @@ -9,6 +9,7 @@ CONFIG_NAMESPACES=y # CONFIG_UTS_NS is not set # CONFIG_PID_NS is not set CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_RAM=y CONFIG_EMBEDDED=y CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set From 103eb70cf4eb7fb263d1e337811217e2e4c53731 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 7 Mar 2019 10:52:33 -0800 Subject: [PATCH 03/93] perf/core: Restore mmap record type correctly [ Upstream commit d9c1bb2f6a2157b38e8eb63af437cb22701d31ee ] On mmap(), perf_events generates a RECORD_MMAP record and then checks which events are interested in this record. There are currently 2 versions of mmap records: RECORD_MMAP and RECORD_MMAP2. MMAP2 is larger. The event configuration controls which version the user level tool accepts. If the event->attr.mmap2=1 field then MMAP2 record is returned. The perf_event_mmap_output() takes care of this. It checks attr->mmap2 and corrects the record fields before putting it in the sampling buffer of the event. At the end the function restores the modified MMAP record fields. The problem is that the function restores the size but not the type. Thus, if a subsequent event only accepts MMAP type, then it would instead receive an MMAP2 record with a size of MMAP record. This patch fixes the problem by restoring the record type on exit. Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra (Intel) Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Fixes: 13d7a2410fa6 ("perf: Add attr->mmap2 attribute to an event") Link: http://lkml.kernel.org/r/20190307185233.225521-1-eranian@google.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- kernel/events/core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 92939b5397df..580616e6fcee 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6923,6 +6923,7 @@ static void perf_event_mmap_output(struct perf_event *event, struct perf_output_handle handle; struct perf_sample_data sample; int size = mmap_event->event_id.header.size; + u32 type = mmap_event->event_id.header.type; int ret; if (!perf_event_mmap_match(event, data)) @@ -6966,6 +6967,7 @@ static void perf_event_mmap_output(struct perf_event *event, perf_output_end(&handle); out: mmap_event->event_id.header.size = size; + mmap_event->event_id.header.type = type; } static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) From 7a82651d06cbaf2447f4f1193f4a7b325c1ab53a Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 15 Mar 2019 00:15:32 -0400 Subject: [PATCH 04/93] ext4: add missing brelse() in add_new_gdb_meta_bg() [ Upstream commit d64264d6218e6892edd832dc3a5a5857c2856c53 ] Currently in add_new_gdb_meta_bg() there is a missing brelse of gdb_bh in case ext4_journal_get_write_access() fails. Additionally kvfree() is missing in the same error path. Fix it by moving the ext4_journal_get_write_access() before the ext4 sb update as Ted suggested and release n_group_desc and gdb_bh in case it fails. Fixes: 61a9c11e5e7a ("ext4: add missing brelse() add_new_gdb_meta_bg()'s error path") Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/resize.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 6f0acfe31418..fb9fbf993e22 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -907,11 +907,18 @@ static int add_new_gdb_meta_bg(struct super_block *sb, memcpy(n_group_desc, o_group_desc, EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *)); n_group_desc[gdb_num] = gdb_bh; + + BUFFER_TRACE(gdb_bh, "get_write_access"); + err = ext4_journal_get_write_access(handle, gdb_bh); + if (err) { + kvfree(n_group_desc); + brelse(gdb_bh); + return err; + } + EXT4_SB(sb)->s_group_desc = n_group_desc; EXT4_SB(sb)->s_gdb_count++; kvfree(o_group_desc); - BUFFER_TRACE(gdb_bh, "get_write_access"); - err = ext4_journal_get_write_access(handle, gdb_bh); return err; } From ce1bdbed2097bffb0cabc8381f3727f8698ff991 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 15 Mar 2019 00:22:28 -0400 Subject: [PATCH 05/93] ext4: report real fs size after failed resize [ Upstream commit 6c7328400e0488f7d49e19e02290ba343b6811b2 ] Currently when the file system resize using ext4_resize_fs() fails it will report into log that "resized filesystem to ". However this may not be true in the case of failure. Use the current block count as returned by ext4_blocks_count() to report the block count. Additionally, report a warning that "error occurred during file system resize" Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/resize.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index fb9fbf993e22..333fba05e1a5 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -2049,6 +2049,10 @@ out: free_flex_gd(flex_gd); if (resize_inode != NULL) iput(resize_inode); - ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", n_blocks_count); + if (err) + ext4_warning(sb, "error (%d) occurred during " + "file system resize", err); + ext4_msg(sb, KERN_INFO, "resized filesystem to %llu", + ext4_blocks_count(es)); return err; } From 7d831a0512987fc2af6b3a23ed44a4e96b79f26a Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 22:58:29 -0500 Subject: [PATCH 06/93] ALSA: echoaudio: add a check for ioremap_nocache [ Upstream commit 6ade657d6125ec3ec07f95fa51e28138aef6208f ] In case ioremap_nocache fails, the fix releases chip and returns an error code upstream to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/pci/echoaudio/echoaudio.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sound/pci/echoaudio/echoaudio.c b/sound/pci/echoaudio/echoaudio.c index d68f99e076a8..e1f0bcd45c37 100644 --- a/sound/pci/echoaudio/echoaudio.c +++ b/sound/pci/echoaudio/echoaudio.c @@ -1953,6 +1953,11 @@ static int snd_echo_create(struct snd_card *card, } chip->dsp_registers = (volatile u32 __iomem *) ioremap_nocache(chip->dsp_registers_phys, sz); + if (!chip->dsp_registers) { + dev_err(chip->card->dev, "ioremap failed\n"); + snd_echo_free(chip); + return -ENOMEM; + } if (request_irq(pci->irq, snd_echo_interrupt, IRQF_SHARED, KBUILD_MODNAME, chip)) { From b042245be24b0e663bcf45f0ae77910bb3f11829 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 14 Mar 2019 23:04:14 -0500 Subject: [PATCH 07/93] ALSA: sb8: add a check for request_region [ Upstream commit dcd0feac9bab901d5739de51b3f69840851f8919 ] In case request_region fails, the fix returns an error code to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/isa/sb/sb8.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sound/isa/sb/sb8.c b/sound/isa/sb/sb8.c index d77dcba276b5..1eb8b61a185b 100644 --- a/sound/isa/sb/sb8.c +++ b/sound/isa/sb/sb8.c @@ -111,6 +111,10 @@ static int snd_sb8_probe(struct device *pdev, unsigned int dev) /* block the 0x388 port to avoid PnP conflicts */ acard->fm_res = request_region(0x388, 4, "SoundBlaster FM"); + if (!acard->fm_res) { + err = -EBUSY; + goto _err; + } if (port[dev] != SNDRV_AUTO_PORT) { if ((err = snd_sbdsp_create(card, port[dev], irq[dev], From bfd5834e38886031625fe39dc6f3907b9c382729 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 12 Mar 2019 16:44:28 +0200 Subject: [PATCH 08/93] auxdisplay: hd44780: Fix memory leak on ->remove() [ Upstream commit 41c8d0adf3c4df1867d98cee4a2c4531352a33ad ] We have to free on ->remove() the allocated resources on ->probe(). Fixes: d47d88361fee ("auxdisplay: Add HD44780 Character LCD support") Reviewed-by: Geert Uytterhoeven Signed-off-by: Andy Shevchenko Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- drivers/auxdisplay/hd44780.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/auxdisplay/hd44780.c b/drivers/auxdisplay/hd44780.c index 036eec404289..2d927feb3db4 100644 --- a/drivers/auxdisplay/hd44780.c +++ b/drivers/auxdisplay/hd44780.c @@ -302,6 +302,8 @@ static int hd44780_remove(struct platform_device *pdev) struct charlcd *lcd = platform_get_drvdata(pdev); charlcd_unregister(lcd); + + kfree(lcd); return 0; } From e04dc07f620a11d73b201cb4d7bb4712c607856d Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 6 Mar 2019 19:17:56 +0200 Subject: [PATCH 09/93] IB/mlx4: Fix race condition between catas error reset and aliasguid flows [ Upstream commit 587443e7773e150ae29e643ee8f41a1eed226565 ] Code review revealed a race condition which could allow the catas error flow to interrupt the alias guid query post mechanism at random points. Thiis is fixed by doing cancel_delayed_work_sync() instead of cancel_delayed_work() during the alias guid mechanism destroy flow. Fixes: a0c64a17aba8 ("mlx4: Add alias_guid mechanism") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe Signed-off-by: Sasha Levin --- drivers/infiniband/hw/mlx4/alias_GUID.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c index 155b4dfc0ae8..baab9afa9174 100644 --- a/drivers/infiniband/hw/mlx4/alias_GUID.c +++ b/drivers/infiniband/hw/mlx4/alias_GUID.c @@ -804,8 +804,8 @@ void mlx4_ib_destroy_alias_guid_service(struct mlx4_ib_dev *dev) unsigned long flags; for (i = 0 ; i < dev->num_ports; i++) { - cancel_delayed_work(&dev->sriov.alias_guid.ports_guid[i].alias_guid_work); det = &sriov->alias_guid.ports_guid[i]; + cancel_delayed_work_sync(&det->alias_guid_work); spin_lock_irqsave(&sriov->alias_guid.ag_work_lock, flags); while (!list_empty(&det->cb_list)) { cb_ctx = list_entry(det->cb_list.next, From eebec30d6b7ae4d60b1002e4dc31ffec38a0d4a5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Mar 2019 11:10:11 +0100 Subject: [PATCH 10/93] mmc: davinci: remove extraneous __init annotation [ Upstream commit 9ce58dd7d9da3ca0d7cb8c9568f1c6f4746da65a ] Building with clang finds a mistaken __init tag: WARNING: vmlinux.o(.text+0x5e4250): Section mismatch in reference from the function davinci_mmcsd_probe() to the function .init.text:init_mmcsd_host() The function davinci_mmcsd_probe() references the function __init init_mmcsd_host(). This is often because davinci_mmcsd_probe lacks a __init annotation or the annotation of init_mmcsd_host is wrong. Signed-off-by: Arnd Bergmann Acked-by: Wolfram Sang Reviewed-by: Nathan Chancellor Signed-off-by: Ulf Hansson Signed-off-by: Sasha Levin --- drivers/mmc/host/davinci_mmc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/mmc/host/davinci_mmc.c b/drivers/mmc/host/davinci_mmc.c index 351330dfb954..1bd1819cca7d 100644 --- a/drivers/mmc/host/davinci_mmc.c +++ b/drivers/mmc/host/davinci_mmc.c @@ -1118,7 +1118,7 @@ static inline void mmc_davinci_cpufreq_deregister(struct mmc_davinci_host *host) { } #endif -static void __init init_mmcsd_host(struct mmc_davinci_host *host) +static void init_mmcsd_host(struct mmc_davinci_host *host) { mmc_davinci_reset_ctrl(host, 1); From 4467b4fdad62644814f13417b6fd54546db6a7ae Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Sun, 17 Mar 2019 23:21:24 +0000 Subject: [PATCH 11/93] ALSA: opl3: fix mismatch between snd_opl3_drum_switch definition and declaration [ Upstream commit b4748e7ab731e436cf5db4786358ada5dd2db6dd ] The function snd_opl3_drum_switch declaration in the header file has the order of the two arguments on_off and vel swapped when compared to the definition arguments of vel and on_off. Fix this by swapping them around to match the definition. This error predates the git history, so no idea when this error was introduced. Signed-off-by: Colin Ian King Signed-off-by: Takashi Iwai Signed-off-by: Sasha Levin --- sound/drivers/opl3/opl3_voice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sound/drivers/opl3/opl3_voice.h b/sound/drivers/opl3/opl3_voice.h index eaef435e0528..abf6c23a721c 100644 --- a/sound/drivers/opl3/opl3_voice.h +++ b/sound/drivers/opl3/opl3_voice.h @@ -41,7 +41,7 @@ void snd_opl3_timer_func(unsigned long data); /* Prototypes for opl3_drums.c */ void snd_opl3_load_drums(struct snd_opl3 *opl3); -void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int on_off, int vel, struct snd_midi_channel *chan); +void snd_opl3_drum_switch(struct snd_opl3 *opl3, int note, int vel, int on_off, struct snd_midi_channel *chan); /* Prototypes for opl3_oss.c */ #if IS_ENABLED(CONFIG_SND_SEQUENCER_OSS) From 643deb59cee632f326f1865dc3b5ea8e91ff8c35 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Sat, 19 Jan 2019 17:15:23 +0100 Subject: [PATCH 12/93] thermal/intel_powerclamp: fix __percpu declaration of worker_data [ Upstream commit aa36e3616532f82a920b5ebf4e059fbafae63d88 ] This variable is declared as: static struct powerclamp_worker_data * __percpu worker_data; In other words, a percpu pointer to struct ... But this variable not used like so but as a pointer to a percpu struct powerclamp_worker_data. So fix the declaration as: static struct powerclamp_worker_data __percpu *worker_data; This also quiets Sparse's warnings from __verify_pcpu_ptr(), like: 494:49: warning: incorrect type in initializer (different address spaces) 494:49: expected void const [noderef] *__vpp_verify 494:49: got struct powerclamp_worker_data * Signed-off-by: Luc Van Oostenryck Reviewed-by: Petr Mladek Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/intel_powerclamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index d718cd179ddb..45d9840491bd 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -101,7 +101,7 @@ struct powerclamp_worker_data { bool clamping; }; -static struct powerclamp_worker_data * __percpu worker_data; +static struct powerclamp_worker_data __percpu *worker_data; static struct thermal_cooling_device *cooling_dev; static unsigned long *cpu_clamping_mask; /* bit map for tracking per cpu * clamping kthread worker From 6aa84e8a9fcba8aee8a1437bb4ab134cdc256d4a Mon Sep 17 00:00:00 2001 From: Phil Elwell Date: Tue, 29 Jan 2019 09:55:57 +0000 Subject: [PATCH 13/93] thermal: bcm2835: Fix crash in bcm2835_thermal_debugfs [ Upstream commit 35122495a8c6683e863acf7b05a7036b2be64c7a ] "cat /sys/kernel/debug/bcm2835_thermal/regset" causes a NULL pointer dereference in bcm2835_thermal_debugfs. The driver makes use of the implementation details of the thermal framework to retrieve a pointer to its private data from a struct thermal_zone_device, and gets it wrong - leading to the crash. Instead, store its private data as the drvdata and retrieve the thermal_zone_device pointer from it. Fixes: bcb7dd9ef206 ("thermal: bcm2835: add thermal driver for bcm2835 SoC") Signed-off-by: Phil Elwell Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/broadcom/bcm2835_thermal.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/thermal/broadcom/bcm2835_thermal.c b/drivers/thermal/broadcom/bcm2835_thermal.c index 24b006a95142..8646fb7425f2 100644 --- a/drivers/thermal/broadcom/bcm2835_thermal.c +++ b/drivers/thermal/broadcom/bcm2835_thermal.c @@ -128,8 +128,7 @@ static const struct debugfs_reg32 bcm2835_thermal_regs[] = { static void bcm2835_thermal_debugfs(struct platform_device *pdev) { - struct thermal_zone_device *tz = platform_get_drvdata(pdev); - struct bcm2835_thermal_data *data = tz->devdata; + struct bcm2835_thermal_data *data = platform_get_drvdata(pdev); struct debugfs_regset32 *regset; data->debugfsdir = debugfs_create_dir("bcm2835_thermal", NULL); @@ -275,7 +274,7 @@ static int bcm2835_thermal_probe(struct platform_device *pdev) data->tz = tz; - platform_set_drvdata(pdev, tz); + platform_set_drvdata(pdev, data); /* * Thermal_zone doesn't enable hwmon as default, @@ -299,8 +298,8 @@ err_clk: static int bcm2835_thermal_remove(struct platform_device *pdev) { - struct thermal_zone_device *tz = platform_get_drvdata(pdev); - struct bcm2835_thermal_data *data = tz->devdata; + struct bcm2835_thermal_data *data = platform_get_drvdata(pdev); + struct thermal_zone_device *tz = data->tz; debugfs_remove_recursive(data->debugfsdir); thermal_zone_of_sensor_unregister(&pdev->dev, tz); From f37079c7dc6aaa6fd392c1f5426f77c71361f15b Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 10 Oct 2018 01:30:06 -0700 Subject: [PATCH 14/93] thermal/int340x_thermal: Add additional UUIDs [ Upstream commit 16fc8eca1975358111dbd7ce65e4ce42d1a848fb ] Add more supported DPTF policies than the driver currently exposes. Signed-off-by: Matthew Garrett Cc: Nisha Aram Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/int340x_thermal/int3400_thermal.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index 43b90fd577e4..34dc4d6dda66 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -22,6 +22,13 @@ enum int3400_thermal_uuid { INT3400_THERMAL_PASSIVE_1, INT3400_THERMAL_ACTIVE, INT3400_THERMAL_CRITICAL, + INT3400_THERMAL_ADAPTIVE_PERFORMANCE, + INT3400_THERMAL_EMERGENCY_CALL_MODE, + INT3400_THERMAL_PASSIVE_2, + INT3400_THERMAL_POWER_BOSS, + INT3400_THERMAL_VIRTUAL_SENSOR, + INT3400_THERMAL_COOLING_MODE, + INT3400_THERMAL_HARDWARE_DUTY_CYCLING, INT3400_THERMAL_MAXIMUM_UUID, }; @@ -29,6 +36,13 @@ static char *int3400_thermal_uuids[INT3400_THERMAL_MAXIMUM_UUID] = { "42A441D6-AE6A-462b-A84B-4A8CE79027D3", "3A95C389-E4B8-4629-A526-C52C88626BAE", "97C68AE7-15FA-499c-B8C9-5DA81D606E0A", + "63BE270F-1C11-48FD-A6F7-3AF253FF3E2D", + "5349962F-71E6-431D-9AE8-0A635B710AEE", + "9E04115A-AE87-4D1C-9500-0F3E340BFE75", + "F5A35014-C209-46A4-993A-EB56DE7530A1", + "6ED722A7-9240-48A5-B479-31EEF723D7CF", + "16CAF1B7-DD38-40ED-B1C1-1B8A1913D531", + "BE84BABF-C4D4-403D-B495-3128FD44dAC1", }; struct int3400_thermal_priv { From 637f86d25097b565a5c9a17094bc4d06df3dc1dc Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 10 Oct 2018 01:30:07 -0700 Subject: [PATCH 15/93] thermal/int340x_thermal: fix mode setting [ Upstream commit 396ee4d0cd52c13b3f6421b8d324d65da5e7e409 ] int3400 only pushes the UUID into the firmware when the mode is flipped to "enable". The current code only exposes the mode flag if the firmware supports the PASSIVE_1 UUID, which not all machines do. Remove the restriction. Signed-off-by: Matthew Garrett Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/int340x_thermal/int3400_thermal.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c index 34dc4d6dda66..4a20f4d47b1d 100644 --- a/drivers/thermal/int340x_thermal/int3400_thermal.c +++ b/drivers/thermal/int340x_thermal/int3400_thermal.c @@ -316,10 +316,9 @@ static int int3400_thermal_probe(struct platform_device *pdev) platform_set_drvdata(pdev, priv); - if (priv->uuid_bitmap & 1 << INT3400_THERMAL_PASSIVE_1) { - int3400_thermal_ops.get_mode = int3400_thermal_get_mode; - int3400_thermal_ops.set_mode = int3400_thermal_set_mode; - } + int3400_thermal_ops.get_mode = int3400_thermal_get_mode; + int3400_thermal_ops.set_mode = int3400_thermal_set_mode; + priv->thermal = thermal_zone_device_register("INT3400 Thermal", 0, 0, priv, &int3400_thermal_ops, &int3400_thermal_params, 0, 0); From f1e062e8ce7934e2f4a80e53b2ca2fbbf71ce24d Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 18 Mar 2019 22:26:33 +0800 Subject: [PATCH 16/93] thermal/intel_powerclamp: fix truncated kthread name [ Upstream commit e925b5be5751f6a7286bbd9a4cbbc4ac90cc5fa6 ] kthread name only allows 15 characters (TASK_COMMON_LEN is 16). Thus rename the kthreads created by intel_powerclamp driver from "kidle_inject/ + decimal cpuid" to "kidle_inj/ + decimal cpuid" to avoid truncated kthead name for cpu 100 and later. Signed-off-by: Zhang Rui Signed-off-by: Sasha Levin --- drivers/thermal/intel_powerclamp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c index 45d9840491bd..c3293fa2bb1b 100644 --- a/drivers/thermal/intel_powerclamp.c +++ b/drivers/thermal/intel_powerclamp.c @@ -494,7 +494,7 @@ static void start_power_clamp_worker(unsigned long cpu) struct powerclamp_worker_data *w_data = per_cpu_ptr(worker_data, cpu); struct kthread_worker *worker; - worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inject/%ld", cpu); + worker = kthread_create_worker_on_cpu(cpu, 0, "kidle_inj/%ld", cpu); if (IS_ERR(worker)) return; From 58ad2f1b020a114d0ecdcc77fb4ff6766ad0c53e Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 28 Jan 2019 15:24:42 +0100 Subject: [PATCH 17/93] scsi: iscsi: flush running unbind operations when removing a session [ Upstream commit 165aa2bfb42904b1bec4bf2fa257c8c603c14a06 ] In some cases, the iscsi_remove_session() function is called while an unbind_work operation is still running. This may cause a situation where sysfs objects are removed in an incorrect order, triggering a kernel warning. [ 605.249442] ------------[ cut here ]------------ [ 605.259180] sysfs group 'power' not found for kobject 'target2:0:0' [ 605.321371] WARNING: CPU: 1 PID: 26794 at fs/sysfs/group.c:235 sysfs_remove_group+0x76/0x80 [ 605.341266] Modules linked in: dm_service_time target_core_user target_core_pscsi target_core_file target_core_iblock iscsi_target_mod target_core_mod nls_utf8 isofs ppdev bochs_drm nfit ttm libnvdimm drm_kms_helper syscopyarea sysfillrect sysimgblt joydev pcspkr fb_sys_fops drm i2c_piix4 sg parport_pc parport xfs libcrc32c dm_multipath sr_mod sd_mod cdrom ata_generic 8021q garp mrp ata_piix stp crct10dif_pclmul crc32_pclmul llc libata crc32c_intel virtio_net net_failover ghash_clmulni_intel serio_raw failover sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi [ 605.627479] CPU: 1 PID: 26794 Comm: kworker/u32:2 Not tainted 4.18.0-60.el8.x86_64 #1 [ 605.721401] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20180724_192412-buildhw-07.phx2.fedoraproject.org-1.fc29 04/01/2014 [ 605.823651] Workqueue: scsi_wq_2 __iscsi_unbind_session [scsi_transport_iscsi] [ 605.830940] RIP: 0010:sysfs_remove_group+0x76/0x80 [ 605.922907] Code: 48 89 df 5b 5d 41 5c e9 38 c4 ff ff 48 89 df e8 e0 bf ff ff eb cb 49 8b 14 24 48 8b 75 00 48 c7 c7 38 73 cb a7 e8 24 77 d7 ff <0f> 0b 5b 5d 41 5c c3 0f 1f 00 0f 1f 44 00 00 41 56 41 55 41 54 55 [ 606.122304] RSP: 0018:ffffbadcc8d1bda8 EFLAGS: 00010286 [ 606.218492] RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000 [ 606.326381] RDX: ffff98bdfe85eb40 RSI: ffff98bdfe856818 RDI: ffff98bdfe856818 [ 606.514498] RBP: ffffffffa7ab73e0 R08: 0000000000000268 R09: 0000000000000007 [ 606.529469] R10: 0000000000000000 R11: ffffffffa860d9ad R12: ffff98bdf978e838 [ 606.630535] R13: ffff98bdc2cd4010 R14: ffff98bdc2cd3ff0 R15: ffff98bdc2cd4000 [ 606.824707] FS: 0000000000000000(0000) GS:ffff98bdfe840000(0000) knlGS:0000000000000000 [ 607.018333] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 607.117844] CR2: 00007f84b78ac024 CR3: 000000002c00a003 CR4: 00000000003606e0 [ 607.117844] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 607.420926] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 607.524236] Call Trace: [ 607.530591] device_del+0x56/0x350 [ 607.624393] ? ata_tlink_match+0x30/0x30 [libata] [ 607.727805] ? attribute_container_device_trigger+0xb4/0xf0 [ 607.829911] scsi_target_reap_ref_release+0x39/0x50 [ 607.928572] scsi_remove_target+0x1a2/0x1d0 [ 608.017350] __iscsi_unbind_session+0xb3/0x160 [scsi_transport_iscsi] [ 608.117435] process_one_work+0x1a7/0x360 [ 608.132917] worker_thread+0x30/0x390 [ 608.222900] ? pwq_unbound_release_workfn+0xd0/0xd0 [ 608.323989] kthread+0x112/0x130 [ 608.418318] ? kthread_bind+0x30/0x30 [ 608.513821] ret_from_fork+0x35/0x40 [ 608.613909] ---[ end trace 0b98c310c8a6138c ]--- Signed-off-by: Maurizio Lombardi Acked-by: Chris Leech Signed-off-by: Martin K. Petersen Signed-off-by: Sasha Levin --- drivers/scsi/scsi_transport_iscsi.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index f6542c159ed6..b4d06bd9ed51 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2185,6 +2185,8 @@ void iscsi_remove_session(struct iscsi_cls_session *session) scsi_target_unblock(&session->dev, SDEV_TRANSPORT_OFFLINE); /* flush running scans then delete devices */ flush_work(&session->scan_work); + /* flush running unbind operations */ + flush_work(&session->unbind_work); __iscsi_unbind_session(&session->unbind_work); /* hw iscsi may not have removed all connections from session */ From 3fb2d0a276294a7625216395bc329e917e0643e2 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Mon, 18 Mar 2019 22:24:03 +0100 Subject: [PATCH 18/93] x86/mm: Don't leak kernel addresses [ Upstream commit a3151724437f54076cc10bc02b1c4f0003ae36cd ] Since commit: ad67b74d2469d9b8 ("printk: hash addresses printed with %p") at boot "____ptrval____" is printed instead of actual addresses: found SMP MP-table at [mem 0x000f5cc0-0x000f5ccf] mapped at [(____ptrval____)] Instead of changing the print to "%px", and leaking a kernel addresses, just remove the print completely, like in: 071929dbdd865f77 ("arm64: Stop printing the virtual memory layout"). Signed-off-by: Matteo Croce Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Sasha Levin --- arch/x86/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index bc6bc6689e68..1c52acaa5bec 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -596,8 +596,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length) mpf_base = base; mpf_found = true; - pr_info("found SMP MP-table at [mem %#010lx-%#010lx] mapped at [%p]\n", - base, base + sizeof(*mpf) - 1, mpf); + pr_info("found SMP MP-table at [mem %#010lx-%#010lx]\n", + base, base + sizeof(*mpf) - 1); memblock_reserve(base, sizeof(*mpf)); if (mpf->physptr) From 182d26496e045247939c8697c6448a4830f0dff6 Mon Sep 17 00:00:00 2001 From: David Arcari Date: Tue, 12 Feb 2019 09:34:39 -0500 Subject: [PATCH 19/93] tools/power turbostat: return the exit status of a command [ Upstream commit 2a95496634a017c19641f26f00907af75b962f01 ] turbostat failed to return a non-zero exit status even though the supplied command (turbostat ) failed. Currently when turbostat forks a command it returns zero instead of the actual exit status of the command. Modify the code to return the exit status. Signed-off-by: David Arcari Acked-by: Len Brown Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- tools/power/x86/turbostat/turbostat.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 7a1b20ec5216..d1b2348db0f9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4588,6 +4588,9 @@ int fork_it(char **argv) signal(SIGQUIT, SIG_IGN); if (waitpid(child_pid, &status, 0) == -1) err(status, "waitpid"); + + if (WIFEXITED(status)) + status = WEXITSTATUS(status); } /* * n.b. fork_it() does not check for errors from for_all_cpus() From 96f74822311f010c50f339e85d87c190419e8ee0 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:42 +0800 Subject: [PATCH 20/93] perf list: Don't forget to drop the reference to the allocated thread_map [ Upstream commit 39df730b09774bd860e39ea208a48d15078236cb ] Detected via gcc's ASan: Direct leak of 2048 byte(s) in 64 object(s) allocated from: 6 #0 0x7f606512e370 in __interceptor_realloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee370) 7 #1 0x556b0f1d7ddd in thread_map__realloc util/thread_map.c:43 8 #2 0x556b0f1d84c7 in thread_map__new_by_tid util/thread_map.c:85 9 #3 0x556b0f0e045e in is_event_supported util/parse-events.c:2250 10 #4 0x556b0f0e1aa1 in print_hwcache_events util/parse-events.c:2382 11 #5 0x556b0f0e3231 in print_events util/parse-events.c:2514 12 #6 0x556b0ee0a66e in cmd_list /home/changbin/work/linux/tools/perf/builtin-list.c:58 13 #7 0x556b0f01e0ae in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 14 #8 0x556b0f01e859 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 15 #9 0x556b0f01edc8 in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 16 #10 0x556b0f01f71f in main /home/changbin/work/linux/tools/perf/perf.c:520 17 #11 0x7f6062ccf09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 89896051f8da ("perf tools: Do not put a variable sized type not at the end of a struct") Link: http://lkml.kernel.org/r/20190316080556.3075-3-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/parse-events.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index d0b92d374ba9..ec3517326a68 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2109,6 +2109,7 @@ static bool is_event_supported(u8 type, unsigned config) perf_evsel__delete(evsel); } + thread_map__put(tmap); return ret; } From 0d85f280c1b3bc1747a8b83e9f764086db8a69fc Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:44 +0800 Subject: [PATCH 21/93] perf config: Fix an error in the config template documentation [ Upstream commit 9b40dff7ba3caaf0d1919f98e136fa3400bd34aa ] The option 'sort-order' should be 'sort_order'. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Milian Wolff Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 893c5c798be9 ("perf config: Show default report configuration in example and docs") Link: http://lkml.kernel.org/r/20190316080556.3075-5-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/Documentation/perf-config.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 5b4fff3adc4b..782a8966b721 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -114,7 +114,7 @@ Given a $HOME/.perfconfig like this: [report] # Defaults - sort-order = comm,dso,symbol + sort_order = comm,dso,symbol percent-limit = 0 queue-size = 0 children = true From b820793ac523ba28cda45f86fbc031a06feaf720 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:45 +0800 Subject: [PATCH 22/93] perf config: Fix a memory leak in collect_config() [ Upstream commit 54569ba4b06d5baedae4614bde33a25a191473ba ] Detected with gcc's ASan: Direct leak of 66 byte(s) in 5 object(s) allocated from: #0 0x7ff3b1f32070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x560c8761034d in collect_config util/config.c:597 #2 0x560c8760d9cb in get_value util/config.c:169 #3 0x560c8760dfd7 in perf_parse_file util/config.c:285 #4 0x560c8760e0d2 in perf_config_from_file util/config.c:476 #5 0x560c876108fd in perf_config_set__init util/config.c:661 #6 0x560c87610c72 in perf_config_set__new util/config.c:709 #7 0x560c87610d2f in perf_config__init util/config.c:718 #8 0x560c87610e5d in perf_config util/config.c:730 #9 0x560c875ddea0 in main /home/changbin/work/linux/tools/perf/perf.c:442 #10 0x7ff3afb8609a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Cc: Taeung Song Fixes: 20105ca1240c ("perf config: Introduce perf_config_set class") Link: http://lkml.kernel.org/r/20190316080556.3075-6-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/config.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 4b893c622236..a0c9ff27c7bf 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -628,11 +628,10 @@ static int collect_config(const char *var, const char *value, } ret = set_value(item, value); - return ret; out_free: free(key); - return -1; + return ret; } int perf_config_set__collect(struct perf_config_set *set, const char *file_name, From 801a59019383eefb821b66b22ca50a11636045a2 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:46 +0800 Subject: [PATCH 23/93] perf build-id: Fix memory leak in print_sdt_events() [ Upstream commit 8bde8516893da5a5fdf06121f74d11b52ab92df5 ] Detected with gcc's ASan: Direct leak of 4356 byte(s) in 120 object(s) allocated from: #0 0x7ff1a2b5a070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x55719aef4814 in build_id_cache__origname util/build-id.c:215 #2 0x55719af649b6 in print_sdt_events util/parse-events.c:2339 #3 0x55719af66272 in print_events util/parse-events.c:2542 #4 0x55719ad1ecaa in cmd_list /home/changbin/work/linux/tools/perf/builtin-list.c:58 #5 0x55719aec745d in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #6 0x55719aec7d1a in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #7 0x55719aec8184 in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #8 0x55719aeca41a in main /home/changbin/work/linux/tools/perf/perf.c:520 #9 0x7ff1a07ae09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Masami Hiramatsu Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 40218daea1db ("perf list: Show SDT and pre-cached events") Link: http://lkml.kernel.org/r/20190316080556.3075-7-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/build-id.c | 1 + tools/perf/util/parse-events.c | 1 + 2 files changed, 2 insertions(+) diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 7f8553630c4d..69910deab6e0 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -185,6 +185,7 @@ char *build_id_cache__linkname(const char *sbuild_id, char *bf, size_t size) return bf; } +/* The caller is responsible to free the returned buffer. */ char *build_id_cache__origname(const char *sbuild_id) { char *linkname; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index ec3517326a68..29e2bb304168 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -2180,6 +2180,7 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, printf(" %-50s [%s]\n", buf, "SDT event"); free(buf); } + free(path); } else printf(" %-50s [%s]\n", nd->s, "SDT event"); if (nd2) { From dac5fedfc9070bf1fcb14b0dd9ef812aa5c5c2a6 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:48 +0800 Subject: [PATCH 24/93] perf top: Fix error handling in cmd_top() [ Upstream commit 70c819e4bf1c5f492768b399d898d458ccdad2b6 ] We should go to the cleanup path, to avoid leaks, detected using gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: http://lkml.kernel.org/r/20190316080556.3075-9-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/builtin-top.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 3103a33c13a8..133eb7949321 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -1345,8 +1345,9 @@ int cmd_top(int argc, const char **argv) goto out_delete_evlist; symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); - if (symbol__init(NULL) < 0) - return -1; + status = symbol__init(NULL); + if (status < 0) + goto out_delete_evlist; sort__setup_elide(stdout); From ff22178b282675fa3542763f8e67cbb40f3f9bfd Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:49 +0800 Subject: [PATCH 25/93] perf hist: Add missing map__put() in error case [ Upstream commit cb6186aeffda4d27e56066c79e9579e7831541d3 ] We need to map__put() before returning from failure of sample__resolve_callchain(). Detected with gcc's ASan. Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Krister Johansen Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 9c68ae98c6f7 ("perf callchain: Reference count maps") Link: http://lkml.kernel.org/r/20190316080556.3075-10-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/hist.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 5d420209505e..5b8bc1fd943d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1040,8 +1040,10 @@ int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, err = sample__resolve_callchain(iter->sample, &callchain_cursor, &iter->parent, iter->evsel, al, max_stack_depth); - if (err) + if (err) { + map__put(alm); return err; + } err = iter->ops->prepare_entry(iter, al); if (err) From 952d449dbf3e753c009e04f55fe31dcc3b27a9b9 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 18 Mar 2019 16:41:28 -0300 Subject: [PATCH 26/93] perf evsel: Free evsel->counts in perf_evsel__exit() [ Upstream commit 42dfa451d825a2ad15793c476f73e7bbc0f9d312 ] Using gcc's ASan, Changbin reports: ================================================================= ==7494==ERROR: LeakSanitizer: detected memory leaks Direct leak of 48 byte(s) in 1 object(s) allocated from: #0 0x7f0333a89138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x5625e5330a5e in zalloc util/util.h:23 #2 0x5625e5330a9b in perf_counts__new util/counts.c:10 #3 0x5625e5330ca0 in perf_evsel__alloc_counts util/counts.c:47 #4 0x5625e520d8e5 in __perf_evsel__read_on_cpu util/evsel.c:1505 #5 0x5625e517a985 in perf_evsel__read_on_cpu /home/work/linux/tools/perf/util/evsel.h:347 #6 0x5625e517ad1a in test__openat_syscall_event tests/openat-syscall.c:47 #7 0x5625e51528e6 in run_test tests/builtin-test.c:358 #8 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #9 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #10 0x5625e515572f in cmd_test tests/builtin-test.c:722 #11 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #12 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #13 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #14 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #15 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Indirect leak of 72 byte(s) in 1 object(s) allocated from: #0 0x7f0333a89138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x5625e532560d in zalloc util/util.h:23 #2 0x5625e532566b in xyarray__new util/xyarray.c:10 #3 0x5625e5330aba in perf_counts__new util/counts.c:15 #4 0x5625e5330ca0 in perf_evsel__alloc_counts util/counts.c:47 #5 0x5625e520d8e5 in __perf_evsel__read_on_cpu util/evsel.c:1505 #6 0x5625e517a985 in perf_evsel__read_on_cpu /home/work/linux/tools/perf/util/evsel.h:347 #7 0x5625e517ad1a in test__openat_syscall_event tests/openat-syscall.c:47 #8 0x5625e51528e6 in run_test tests/builtin-test.c:358 #9 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #10 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #11 0x5625e515572f in cmd_test tests/builtin-test.c:722 #12 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #13 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #14 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #15 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #16 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) His patch took care of evsel->prev_raw_counts, but the above backtraces are about evsel->counts, so fix that instead. Reported-by: Changbin Du Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Link: https://lkml.kernel.org/n/tip-hd1x13g59f0nuhe4anxhsmfp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 44c2f62b47a3..0cf6f537f980 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1229,6 +1229,7 @@ void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); assert(evsel->evlist == NULL); + perf_evsel__free_counts(evsel); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); perf_evsel__free_config_terms(evsel); From a0898f576b7a0ca83ca35faf4e25ab1d1198e5ab Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:54 +0800 Subject: [PATCH 27/93] perf tests: Fix a memory leak of cpu_map object in the openat_syscall_event_on_all_cpus test [ Upstream commit 93faa52e8371f0291ee1ff4994edae2b336b6233 ] ================================================================= ==7497==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f0333a88f30 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedf30) #1 0x5625e5326213 in cpu_map__trim_new util/cpumap.c:45 #2 0x5625e5326703 in cpu_map__read util/cpumap.c:103 #3 0x5625e53267ef in cpu_map__read_all_cpu_map util/cpumap.c:120 #4 0x5625e5326915 in cpu_map__new util/cpumap.c:135 #5 0x5625e517b355 in test__openat_syscall_event_on_all_cpus tests/openat-syscall-all-cpus.c:36 #6 0x5625e51528e6 in run_test tests/builtin-test.c:358 #7 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #8 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #9 0x5625e515572f in cmd_test tests/builtin-test.c:722 #10 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #11 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #12 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #13 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #14 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: f30a79b012e5 ("perf tools: Add reference counting for cpu_map object") Link: http://lkml.kernel.org/r/20190316080556.3075-15-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/openat-syscall-all-cpus.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index c531e6deb104..493ecb611540 100644 --- a/tools/perf/tests/openat-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -45,7 +45,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int if (IS_ERR(evsel)) { tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat"); pr_debug("%s\n", errbuf); - goto out_thread_map_delete; + goto out_cpu_map_delete; } if (perf_evsel__open(evsel, cpus, threads) < 0) { @@ -119,6 +119,8 @@ out_close_fd: perf_evsel__close_fd(evsel); out_evsel_delete: perf_evsel__delete(evsel); +out_cpu_map_delete: + cpu_map__put(cpus); out_thread_map_delete: thread_map__put(threads); return err; From 4c66a273f6b3829a641017084b029a58a5559493 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:55 +0800 Subject: [PATCH 28/93] perf tests: Fix memory leak by expr__find_other() in test__expr() [ Upstream commit f97a8991d3b998e518f56794d879f645964de649 ] ================================================================= ==7506==ERROR: LeakSanitizer: detected memory leaks Direct leak of 13 byte(s) in 3 object(s) allocated from: #0 0x7f03339d6070 in __interceptor_strdup (/usr/lib/x86_64-linux-gnu/libasan.so.5+0x3b070) #1 0x5625e53aaef0 in expr__find_other util/expr.y:221 #2 0x5625e51bcd3f in test__expr tests/expr.c:52 #3 0x5625e51528e6 in run_test tests/builtin-test.c:358 #4 0x5625e5152baf in test_and_print tests/builtin-test.c:388 #5 0x5625e51543fe in __cmd_test tests/builtin-test.c:583 #6 0x5625e515572f in cmd_test tests/builtin-test.c:722 #7 0x5625e51c3fb8 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #8 0x5625e51c44f7 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #9 0x5625e51c48fb in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #10 0x5625e51c5069 in main /home/changbin/work/linux/tools/perf/perf.c:520 #11 0x7f033214d09a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Signed-off-by: Changbin Du Cc: Alexei Starovoitov Cc: Andi Kleen Cc: Daniel Borkmann Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 075167363f8b ("perf tools: Add a simple expression parser for JSON") Link: http://lkml.kernel.org/r/20190316080556.3075-16-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/expr.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 01f0706995a9..9acc1e80b936 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -19,7 +19,7 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) const char *p; const char **other; double val; - int ret; + int i, ret; struct parse_ctx ctx; int num_other; @@ -56,6 +56,9 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused) TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ")); TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO")); TEST_ASSERT_VAL("find other", other[3] == NULL); + + for (i = 0; i < num_other; i++) + free((void *)other[i]); free((void *)other); return 0; From b1930a27c81bf14be77e5c30401e1f50345a3e99 Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Sat, 16 Mar 2019 16:05:56 +0800 Subject: [PATCH 29/93] perf tests: Fix a memory leak in test__perf_evsel__tp_sched_test() [ Upstream commit d982b33133284fa7efa0e52ae06b88f9be3ea764 ] ================================================================= ==20875==ERROR: LeakSanitizer: detected memory leaks Direct leak of 1160 byte(s) in 1 object(s) allocated from: #0 0x7f1b6fc84138 in calloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xee138) #1 0x55bd50005599 in zalloc util/util.h:23 #2 0x55bd500068f5 in perf_evsel__newtp_idx util/evsel.c:327 #3 0x55bd4ff810fc in perf_evsel__newtp /home/work/linux/tools/perf/util/evsel.h:216 #4 0x55bd4ff81608 in test__perf_evsel__tp_sched_test tests/evsel-tp-sched.c:69 #5 0x55bd4ff528e6 in run_test tests/builtin-test.c:358 #6 0x55bd4ff52baf in test_and_print tests/builtin-test.c:388 #7 0x55bd4ff543fe in __cmd_test tests/builtin-test.c:583 #8 0x55bd4ff5572f in cmd_test tests/builtin-test.c:722 #9 0x55bd4ffc4087 in run_builtin /home/changbin/work/linux/tools/perf/perf.c:302 #10 0x55bd4ffc45c6 in handle_internal_command /home/changbin/work/linux/tools/perf/perf.c:354 #11 0x55bd4ffc49ca in run_argv /home/changbin/work/linux/tools/perf/perf.c:398 #12 0x55bd4ffc5138 in main /home/changbin/work/linux/tools/perf/perf.c:520 #13 0x7f1b6e34809a in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x2409a) Indirect leak of 19 byte(s) in 1 object(s) allocated from: #0 0x7f1b6fc83f30 in __interceptor_malloc (/usr/lib/x86_64-linux-gnu/libasan.so.5+0xedf30) #1 0x7f1b6e3ac30f in vasprintf (/lib/x86_64-linux-gnu/libc.so.6+0x8830f) Signed-off-by: Changbin Du Reviewed-by: Jiri Olsa Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt (VMware) Fixes: 6a6cd11d4e57 ("perf test: Add test for the sched tracepoint format fields") Link: http://lkml.kernel.org/r/20190316080556.3075-17-changbin.du@gmail.com Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Sasha Levin --- tools/perf/tests/evsel-tp-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index d0406116c905..926a8e1b5e94 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -85,5 +85,6 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes if (perf_evsel__test_field(evsel, "target_cpu", 4, true)) ret = -1; + perf_evsel__delete(evsel); return ret; } From 04260753ba278303c9a9a551b56867cb6590d0a7 Mon Sep 17 00:00:00 2001 From: Jianguo Chen Date: Wed, 20 Mar 2019 18:54:21 +0000 Subject: [PATCH 30/93] irqchip/mbigen: Don't clear eventid when freeing an MSI [ Upstream commit fca269f201a8d9985c0a31fb60b15d4eb57cef80 ] mbigen_write_msg clears eventid bits of a mbigen register when free a interrupt, because msi_domain_deactivate memset struct msg to zero. Then multiple mbigen pins with zero eventid will report the same interrupt number. The eventid clear call trace: free_irq __free_irq irq_shutdown irq_domain_deactivate_irq __irq_domain_deactivate_irq __irq_domain_deactivate_irq msi_domain_deactivate platform_msi_write_msg mbigen_write_msg Signed-off-by: Jianguo Chen [maz: massaged subject] Signed-off-by: Marc Zyngier Signed-off-by: Sasha Levin --- drivers/irqchip/irq-mbigen.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 567b29c47608..98b6e1d4b1a6 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -161,6 +161,9 @@ static void mbigen_write_msg(struct msi_desc *desc, struct msi_msg *msg) void __iomem *base = d->chip_data; u32 val; + if (!msg->address_lo && !msg->address_hi) + return; + base += get_mbigen_vec_reg(d->hwirq); val = readl_relaxed(base); From c65812de901537e6dec45a66fd86f4f535925644 Mon Sep 17 00:00:00 2001 From: Aditya Pakki Date: Mon, 18 Mar 2019 21:19:56 -0500 Subject: [PATCH 31/93] x86/hpet: Prevent potential NULL pointer dereference [ Upstream commit 2e84f116afca3719c9d0a1a78b47b48f75fd5724 ] hpet_virt_address may be NULL when ioremap_nocache fail, but the code lacks a check. Add a check to prevent NULL pointer dereference. Signed-off-by: Aditya Pakki Signed-off-by: Thomas Gleixner Cc: kjlu@umn.edu Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Joe Perches Cc: Nicolai Stange Cc: Roland Dreier Link: https://lkml.kernel.org/r/20190319021958.17275-1-pakki001@umn.edu Signed-off-by: Sasha Levin --- arch/x86/kernel/hpet.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index afa1a204bc6d..df767e6de8dd 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -909,6 +909,8 @@ int __init hpet_enable(void) return 0; hpet_set_mapping(); + if (!hpet_virt_address) + return 0; /* * Read the period and check for a sane value: From 20afb90f730982882e65b01fb8bdfe83914339c5 Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Thu, 14 Mar 2019 16:46:00 -0400 Subject: [PATCH 32/93] x86/cpu/cyrix: Use correct macros for Cyrix calls on Geode processors [ Upstream commit 18fb053f9b827bd98cfc64f2a35df8ab19745a1d ] There are comments in processor-cyrix.h advising you to _not_ make calls using the deprecated macros in this style: setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); This is because it expands the macro into a non-functioning calling sequence. The calling order must be: outb(CX86_CCR2, 0x22); inb(0x23); From the comments: * When using the old macros a line like * setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); * gets expanded to: * do { * outb((CX86_CCR2), 0x22); * outb((({ * outb((CX86_CCR2), 0x22); * inb(0x23); * }) | 0x88), 0x23); * } while (0); The new macros fix this problem, so use them instead. Tested on an actual Geode processor. Signed-off-by: Matthew Whitehead Signed-off-by: Thomas Gleixner Cc: luto@kernel.org Link: https://lkml.kernel.org/r/1552596361-8967-2-git-send-email-tedheadster@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/cpu/cyrix.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 8949b7ae6d92..fa61c870ada9 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -124,7 +124,7 @@ static void set_cx86_reorder(void) setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* Load/Store Serialize to mem access disable (=reorder it) */ - setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80); + setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); /* set load/store serialize from 1GB to 4GB */ ccr3 |= 0xe0; setCx86(CX86_CCR3, ccr3); @@ -135,11 +135,11 @@ static void set_cx86_memwb(void) pr_info("Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); /* CCR2 bit 2: unlock NW bit */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); /* set 'Not Write-through' */ write_cr0(read_cr0() | X86_CR0_NW); /* CCR2 bit 2: lock NW bit and set WT1 */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); } /* @@ -153,14 +153,14 @@ static void geode_configure(void) local_irq_save(flags); /* Suspend on halt power saving and enable #SUSP pin */ - setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88); + setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); ccr3 = getCx86(CX86_CCR3); setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ /* FPU fast, DTE cache, Mem bypass */ - setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38); + setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ set_cx86_memwb(); @@ -296,7 +296,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) /* GXm supports extended cpuid levels 'ala' AMD */ if (c->cpuid_level == 2) { /* Enable cxMMX extensions (GX1 Datasheet 54) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); /* * GXm : 0x30 ... 0x5f GXm datasheet 51 @@ -319,7 +319,7 @@ static void init_cyrix(struct cpuinfo_x86 *c) if (dir1 > 7) { dir0_msn++; /* M II */ /* Enable MMX extensions (App note 108) */ - setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1); + setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); } else { /* A 6x86MX - it has the bug. */ set_cpu_bug(c, X86_BUG_COMA); From 19525f7b031f1018a74ed3760b81a4f0f3940fa3 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 28 Feb 2019 20:24:59 +0800 Subject: [PATCH 33/93] drm/nouveau/debugfs: Fix check of pm_runtime_get_sync failure [ Upstream commit 909e9c9c428376e2a43d178ed4b0a2d5ba9cb7d3 ] pm_runtime_get_sync returns negative on failure. Fixes: eaeb9010bb4b ("drm/nouveau/debugfs: Wake up GPU before doing any reclocking") Signed-off-by: YueHaibing Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- drivers/gpu/drm/nouveau/nouveau_debugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c index 9109b69cd052..9635704a1d86 100644 --- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c +++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c @@ -161,7 +161,7 @@ nouveau_debugfs_pstate_set(struct file *file, const char __user *ubuf, } ret = pm_runtime_get_sync(drm->dev); - if (IS_ERR_VALUE(ret) && ret != -EACCES) + if (ret < 0 && ret != -EACCES) return ret; ret = nvif_mthd(ctrl, NVIF_CONTROL_PSTATE_USER, &args, sizeof(args)); pm_runtime_put_autosuspend(drm->dev); From 486d82206c3bfd21f9ea4a3b6bca8ca7c9060285 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 20 Mar 2019 09:58:33 +0800 Subject: [PATCH 34/93] iommu/vt-d: Check capability before disabling protected memory [ Upstream commit 5bb71fc790a88d063507dc5d445ab8b14e845591 ] The spec states in 10.4.16 that the Protected Memory Enable Register should be treated as read-only for implementations not supporting protected memory regions (PLMR and PHMR fields reported as Clear in the Capability register). Cc: Jacob Pan Cc: mark gross Suggested-by: Ashok Raj Fixes: f8bab73515ca5 ("intel-iommu: PMEN support") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 802ba7b16e09..fe935293fa7b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1646,6 +1646,9 @@ static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu) u32 pmen; unsigned long flags; + if (!cap_plmr(iommu->cap) && !cap_phmr(iommu->cap)) + return; + raw_spin_lock_irqsave(&iommu->register_lock, flags); pmen = readl(iommu->reg + DMAR_PMEN_REG); pmen &= ~DMA_PMEN_EPM; From 727344f1bb2d517b921cdf2e1e2508c4c4de2104 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 7 Mar 2019 14:27:56 -0700 Subject: [PATCH 35/93] x86/hw_breakpoints: Make default case in hw_breakpoint_arch_parse() return an error [ Upstream commit e898e69d6b9475bf123f99b3c5d1a67bb7cb2361 ] When building with -Wsometimes-uninitialized, Clang warns: arch/x86/kernel/hw_breakpoint.c:355:2: warning: variable 'align' is used uninitialized whenever switch default is taken [-Wsometimes-uninitialized] The default cannot be reached because arch_build_bp_info() initializes hw->len to one of the specified cases. Nevertheless the warning is valid and returning -EINVAL makes sure that this cannot be broken by future modifications. Suggested-by: Nick Desaulniers Signed-off-by: Nathan Chancellor Signed-off-by: Thomas Gleixner Reviewed-by: Nick Desaulniers Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: clang-built-linux@googlegroups.com Link: https://github.com/ClangBuiltLinux/linux/issues/392 Link: https://lkml.kernel.org/r/20190307212756.4648-1-natechancellor@gmail.com Signed-off-by: Sasha Levin --- arch/x86/kernel/hw_breakpoint.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 8771766d46b6..9954a604a822 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -352,6 +352,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) #endif default: WARN_ON_ONCE(1); + return -EINVAL; } /* From 329f34e8cddf66b21e3ad0082fcb410fdc5e61ac Mon Sep 17 00:00:00 2001 From: Steve French Date: Sun, 17 Mar 2019 15:58:38 -0500 Subject: [PATCH 36/93] fix incorrect error code mapping for OBJECTID_NOT_FOUND [ Upstream commit 85f9987b236cf46e06ffdb5c225cf1f3c0acb789 ] It was mapped to EIO which can be confusing when user space queries for an object GUID for an object for which the server file system doesn't support (or hasn't saved one). As Amir Goldstein suggested this is similar to ENOATTR (equivalently ENODATA in Linux errno definitions) so changing NT STATUS code mapping for OBJECTID_NOT_FOUND to ENODATA. Signed-off-by: Steve French CC: Amir Goldstein Signed-off-by: Sasha Levin --- fs/cifs/smb2maperror.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c index d7e839cb773f..92c9cdf4704d 100644 --- a/fs/cifs/smb2maperror.c +++ b/fs/cifs/smb2maperror.c @@ -1035,7 +1035,8 @@ static const struct status_to_posix_error smb2_error_map_table[] = { {STATUS_UNFINISHED_CONTEXT_DELETED, -EIO, "STATUS_UNFINISHED_CONTEXT_DELETED"}, {STATUS_NO_TGT_REPLY, -EIO, "STATUS_NO_TGT_REPLY"}, - {STATUS_OBJECTID_NOT_FOUND, -EIO, "STATUS_OBJECTID_NOT_FOUND"}, + /* Note that ENOATTTR and ENODATA are the same errno */ + {STATUS_OBJECTID_NOT_FOUND, -ENODATA, "STATUS_OBJECTID_NOT_FOUND"}, {STATUS_NO_IP_ADDRESSES, -EIO, "STATUS_NO_IP_ADDRESSES"}, {STATUS_WRONG_CREDENTIAL_HANDLE, -EIO, "STATUS_WRONG_CREDENTIAL_HANDLE"}, From 6fee657dfd593ad45c762fa2613c0c14406188ba Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 23 Mar 2019 12:10:29 -0400 Subject: [PATCH 37/93] ext4: prohibit fstrim in norecovery mode [ Upstream commit 18915b5873f07e5030e6fb108a050fa7c71c59fb ] The ext4 fstrim implementation uses the block bitmaps to find free space that can be discarded. If we haven't replayed the journal, the bitmaps will be stale and we absolutely *cannot* use stale metadata to zap the underlying storage. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o Signed-off-by: Sasha Levin --- fs/ext4/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7917cc89ab21..3dbf4e414706 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -940,6 +940,13 @@ resizefs_out: if (!blk_queue_discard(q)) return -EOPNOTSUPP; + /* + * We haven't replayed the journal, so we cannot use our + * block-bitmap-guided storage zapping commands. + */ + if (test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) + return -EROFS; + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; From 15fbb1fe07a338d9228524873bc881813b83e834 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Sat, 25 Aug 2018 10:44:17 +0200 Subject: [PATCH 38/93] gpio: pxa: handle corner case of unprobed device [ Upstream commit 9ce3ebe973bf4073426f35f282c6b955ed802765 ] In the corner case where the gpio driver probe fails, for whatever reason, the suspend and resume handlers will still be called as they have to be registered as syscore operations. This applies as well when no probe was called while the driver has been built in the kernel. Nicolas tracked this in : https://bugzilla.kernel.org/show_bug.cgi?id=200905 Therefore, add a failsafe in these function, and test if a proper probe succeeded and the driver is functional. Signed-off-by: Robert Jarzmik Reported-by: Nicolas Chauvet Signed-off-by: Linus Walleij Signed-off-by: Sasha Levin --- drivers/gpio/gpio-pxa.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/gpio/gpio-pxa.c b/drivers/gpio/gpio-pxa.c index 2943dfc4c470..822ad220f0af 100644 --- a/drivers/gpio/gpio-pxa.c +++ b/drivers/gpio/gpio-pxa.c @@ -776,6 +776,9 @@ static int pxa_gpio_suspend(void) struct pxa_gpio_bank *c; int gpio; + if (!pchip) + return 0; + for_each_gpio_bank(gpio, c, pchip) { c->saved_gplr = readl_relaxed(c->regbase + GPLR_OFFSET); c->saved_gpdr = readl_relaxed(c->regbase + GPDR_OFFSET); @@ -794,6 +797,9 @@ static void pxa_gpio_resume(void) struct pxa_gpio_bank *c; int gpio; + if (!pchip) + return; + for_each_gpio_bank(gpio, c, pchip) { /* restore level with set/clear */ writel_relaxed(c->saved_gplr, c->regbase + GPSR_OFFSET); From ad78e2e057ab8d914a2b5e3e6acf29c3c8a428a3 Mon Sep 17 00:00:00 2001 From: Siva Rebbagondla Date: Mon, 27 Aug 2018 17:05:15 +0530 Subject: [PATCH 39/93] rsi: improve kernel thread handling to fix kernel panic [ Upstream commit 4c62764d0fc21a34ffc44eec1210038c3a2e4473 ] While running regressions, observed below kernel panic when sdio disconnect called. This is because of, kthread_stop() is taking care of wait_for_completion() by default. When wait_for_completion triggered in kthread_stop and as it was done already, giving kernel panic. Hence, removing redundant wait_for_completion() from rsi_kill_thread(). ... skipping ... BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] exit_creds+0x1f/0x50 PGD 0 Oops: 0002 [#1] SMP CPU: 0 PID: 6502 Comm: rmmod Tainted: G OE 4.15.9-Generic #154-Ubuntu Hardware name: Dell Inc. Edge Gateway 3003/ , BIOS 01.00.00 04/17/2017 Stack: ffff88007392e600 ffff880075847dc0 ffffffff8108160a 0000000000000000 ffff88007392e600 ffff880075847de8 ffffffff810a484b ffff880076127000 ffff88003cd3a800 ffff880074f12a00 ffff880075847e28 ffffffffc09bed15 Call Trace: [] __put_task_struct+0x5a/0x140 [] kthread_stop+0x10b/0x110 [] rsi_disconnect+0x2f5/0x300 [ven_rsi_sdio] [] ? __pm_runtime_resume+0x5b/0x80 [] sdio_bus_remove+0x38/0x100 [] __device_release_driver+0xa4/0x150 [] driver_detach+0xb5/0xc0 [] bus_remove_driver+0x55/0xd0 [] driver_unregister+0x2c/0x50 [] sdio_unregister_driver+0x1a/0x20 [] rsi_module_exit+0x15/0x30 [ven_rsi_sdio] [] SyS_delete_module+0x1b8/0x210 [] entry_SYSCALL_64_fastpath+0x1c/0xbb Signed-off-by: Siva Rebbagondla Signed-off-by: Kalle Valo Signed-off-by: Sasha Levin --- drivers/net/wireless/rsi/rsi_common.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/rsi/rsi_common.h b/drivers/net/wireless/rsi/rsi_common.h index e579d694d13c..21986ba56a3c 100644 --- a/drivers/net/wireless/rsi/rsi_common.h +++ b/drivers/net/wireless/rsi/rsi_common.h @@ -74,7 +74,6 @@ static inline int rsi_kill_thread(struct rsi_thread *handle) atomic_inc(&handle->thread_done); rsi_set_event(&handle->event); - wait_for_completion(&handle->completion); return kthread_stop(handle->task); } From fdf0c593ec3e7924952f195933f676e53b319302 Mon Sep 17 00:00:00 2001 From: Gertjan Halkes Date: Wed, 5 Sep 2018 15:41:29 +0900 Subject: [PATCH 40/93] 9p: do not trust pdu content for stat item size [ Upstream commit 2803cf4379ed252894f046cb8812a48db35294e3 ] v9fs_dir_readdir() could deadloop if a struct was sent with a size set to -2 Link: http://lkml.kernel.org/r/1536134432-11997-1-git-send-email-asmadeus@codewreck.org Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=88021 Signed-off-by: Gertjan Halkes Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- fs/9p/vfs_dir.c | 8 +++----- net/9p/protocol.c | 3 ++- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 48db9a9f13f9..cb6c4031af55 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -105,7 +105,6 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) int err = 0; struct p9_fid *fid; int buflen; - int reclen = 0; struct p9_rdir *rdir; struct kvec kvec; @@ -138,11 +137,10 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) while (rdir->head < rdir->tail) { err = p9stat_read(fid->clnt, rdir->buf + rdir->head, rdir->tail - rdir->head, &st); - if (err) { + if (err <= 0) { p9_debug(P9_DEBUG_VFS, "returned %d\n", err); return -EIO; } - reclen = st.size+2; over = !dir_emit(ctx, st.name, strlen(st.name), v9fs_qid2ino(&st.qid), dt_type(&st)); @@ -150,8 +148,8 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) if (over) return 0; - rdir->head += reclen; - ctx->pos += reclen; + rdir->head += err; + ctx->pos += err; } } } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 9743837aebc6..766d1ef4640a 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -570,9 +570,10 @@ int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st) if (ret) { p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); trace_9p_protocol_dump(clnt, &fake_pdu); + return ret; } - return ret; + return fake_pdu.offset; } EXPORT_SYMBOL(p9stat_read); From 85a2ad155826deef3cab4f5c704c620aeba3292b Mon Sep 17 00:00:00 2001 From: Dinu-Razvan Chis-Serban Date: Wed, 5 Sep 2018 16:44:12 +0900 Subject: [PATCH 41/93] 9p locks: add mount option for lock retry interval [ Upstream commit 5e172f75e51e3de1b4274146d9b990f803cb5c2a ] The default P9_LOCK_TIMEOUT can be too long for some users exporting a local file system to a guest VM (30s), make this configurable at mount time. Link: http://lkml.kernel.org/r/1536295827-3181-1-git-send-email-asmadeus@codewreck.org Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=195727 Signed-off-by: Dinu-Razvan Chis-Serban Signed-off-by: Dominique Martinet Signed-off-by: Sasha Levin --- fs/9p/v9fs.c | 21 +++++++++++++++++++++ fs/9p/v9fs.h | 1 + fs/9p/vfs_file.c | 6 +++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 8fb89ddc6cc7..c52f10efdc9c 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -61,6 +61,8 @@ enum { Opt_cache_loose, Opt_fscache, Opt_mmap, /* Access options */ Opt_access, Opt_posixacl, + /* Lock timeout option */ + Opt_locktimeout, /* Error token */ Opt_err }; @@ -80,6 +82,7 @@ static const match_table_t tokens = { {Opt_cachetag, "cachetag=%s"}, {Opt_access, "access=%s"}, {Opt_posixacl, "posixacl"}, + {Opt_locktimeout, "locktimeout=%u"}, {Opt_err, NULL} }; @@ -187,6 +190,7 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) #ifdef CONFIG_9P_FSCACHE v9ses->cachetag = NULL; #endif + v9ses->session_lock_timeout = P9_LOCK_TIMEOUT; if (!opts) return 0; @@ -360,6 +364,23 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) #endif break; + case Opt_locktimeout: + r = match_int(&args[0], &option); + if (r < 0) { + p9_debug(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } + if (option < 1) { + p9_debug(P9_DEBUG_ERROR, + "locktimeout must be a greater than zero integer.\n"); + ret = -EINVAL; + continue; + } + v9ses->session_lock_timeout = (long)option * HZ; + break; + default: continue; } diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 982e017acadb..129e5243a6bf 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -116,6 +116,7 @@ struct v9fs_session_info { struct p9_client *clnt; /* 9p client */ struct list_head slist; /* list of sessions registered with v9fs */ struct rw_semaphore rename_sem; + long session_lock_timeout; /* retry interval for blocking locks */ }; /* cache_validity flags */ diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index af8cac975a74..89e69904976a 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -154,6 +154,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) uint8_t status = P9_LOCK_ERROR; int res = 0; unsigned char fl_type; + struct v9fs_session_info *v9ses; fid = filp->private_data; BUG_ON(fid == NULL); @@ -189,6 +190,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) if (IS_SETLKW(cmd)) flock.flags = P9_LOCK_FLAGS_BLOCK; + v9ses = v9fs_inode2v9ses(file_inode(filp)); + /* * if its a blocked request and we get P9_LOCK_BLOCKED as the status * for lock request, keep on trying @@ -202,7 +205,8 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) break; if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd)) break; - if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0) + if (schedule_timeout_interruptible(v9ses->session_lock_timeout) + != 0) break; /* * p9_client_lock_dotl overwrites flock.client_id with the From 40e8d128f5ed918bc9f19708e1320efea1c0c24a Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 6 Sep 2018 20:34:12 +0800 Subject: [PATCH 42/93] f2fs: fix to do sanity check with current segment number [ Upstream commit 042be0f849e5fc24116d0afecfaf926eed5cac63 ] https://bugzilla.kernel.org/show_bug.cgi?id=200219 Reproduction way: - mount image - run poc code - umount image F2FS-fs (loop1): Bitmap was wrongly set, blk:15364 ------------[ cut here ]------------ kernel BUG at /home/yuchao/git/devf2fs/segment.c:2061! invalid opcode: 0000 [#1] PREEMPT SMP CPU: 2 PID: 17686 Comm: umount Tainted: G W O 4.18.0-rc2+ #39 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 EIP: update_sit_entry+0x459/0x4e0 [f2fs] Code: e8 1c b5 fd ff 0f 0b 0f 0b 8b 45 e4 c7 44 24 08 9c 7a 6c f8 c7 44 24 04 bc 4a 6c f8 89 44 24 0c 8b 06 89 04 24 e8 f7 b4 fd ff <0f> 0b 8b 45 e4 0f b6 d2 89 54 24 10 c7 44 24 08 60 7a 6c f8 c7 44 EAX: 00000032 EBX: 000000f8 ECX: 00000002 EDX: 00000001 ESI: d7177000 EDI: f520fe68 EBP: d6477c6c ESP: d6477c34 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 EFLAGS: 00010282 CR0: 80050033 CR2: b7fbe000 CR3: 2a99b3c0 CR4: 000406f0 Call Trace: f2fs_allocate_data_block+0x124/0x580 [f2fs] do_write_page+0x78/0x150 [f2fs] f2fs_do_write_node_page+0x25/0xa0 [f2fs] __write_node_page+0x2bf/0x550 [f2fs] f2fs_sync_node_pages+0x60e/0x6d0 [f2fs] ? sync_inode_metadata+0x2f/0x40 ? f2fs_write_checkpoint+0x28f/0x7d0 [f2fs] ? up_write+0x1e/0x80 f2fs_write_checkpoint+0x2a9/0x7d0 [f2fs] ? mark_held_locks+0x5d/0x80 ? _raw_spin_unlock_irq+0x27/0x50 kill_f2fs_super+0x68/0x90 [f2fs] deactivate_locked_super+0x3d/0x70 deactivate_super+0x40/0x60 cleanup_mnt+0x39/0x70 __cleanup_mnt+0x10/0x20 task_work_run+0x81/0xa0 exit_to_usermode_loop+0x59/0xa7 do_fast_syscall_32+0x1f5/0x22c entry_SYSENTER_32+0x53/0x86 EIP: 0xb7f95c51 Code: c1 1e f7 ff ff 89 e5 8b 55 08 85 d2 8b 81 64 cd ff ff 74 02 89 02 5d c3 8b 0c 24 c3 8b 1c 24 c3 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d 76 00 58 b8 77 00 00 00 cd 80 90 8d 76 EAX: 00000000 EBX: 0871ab90 ECX: bfb2cd00 EDX: 00000000 ESI: 00000000 EDI: 0871ab90 EBP: 0871ab90 ESP: bfb2cd7c DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 007b EFLAGS: 00000246 Modules linked in: f2fs(O) crc32_generic bnep rfcomm bluetooth ecdh_generic snd_intel8x0 snd_ac97_codec ac97_bus snd_pcm snd_seq_midi snd_seq_midi_event snd_rawmidi snd_seq pcbc joydev aesni_intel snd_seq_device aes_i586 snd_timer crypto_simd snd cryptd soundcore mac_hid serio_raw video i2c_piix4 parport_pc ppdev lp parport hid_generic psmouse usbhid hid e1000 [last unloaded: f2fs] ---[ end trace d423f83982cfcdc5 ]--- The reason is, different log headers using the same segment, once one log's next block address is used by another log, it will cause panic as above. Main area: 24 segs, 24 secs 24 zones - COLD data: 0, 0, 0 - WARM data: 1, 1, 1 - HOT data: 20, 20, 20 - Dir dnode: 22, 22, 22 - File dnode: 22, 22, 22 - Indir nodes: 21, 21, 21 So this patch adds sanity check to detect such condition to avoid this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim Signed-off-by: Sasha Levin --- fs/f2fs/super.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index fc5c41257e68..4c169ba50c0f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1959,7 +1959,7 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) unsigned int segment_count_main; unsigned int cp_pack_start_sum, cp_payload; block_t user_block_count; - int i; + int i, j; total = le32_to_cpu(raw_super->segment_count); fsmeta = le32_to_cpu(raw_super->segment_count_ckpt); @@ -2000,11 +2000,43 @@ int sanity_check_ckpt(struct f2fs_sb_info *sbi) if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_NODE_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_node_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Node segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) { if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs || le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg) return 1; + for (j = i + 1; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_data_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u, %u) has the same " + "segno: %u", i, j, + le32_to_cpu(ckpt->cur_data_segno[i])); + return 1; + } + } + } + for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { + for (j = i; j < NR_CURSEG_DATA_TYPE; j++) { + if (le32_to_cpu(ckpt->cur_node_segno[i]) == + le32_to_cpu(ckpt->cur_data_segno[j])) { + f2fs_msg(sbi->sb, KERN_ERR, + "Data segment (%u) and Data segment (%u)" + " has the same segno: %u", i, j, + le32_to_cpu(ckpt->cur_node_segno[i])); + return 1; + } + } } sit_bitmap_size = le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); From edbcdafac3e44346b8f8321a03b5434c26cddb80 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 4 Sep 2018 12:07:55 +0200 Subject: [PATCH 43/93] netfilter: xt_cgroup: shrink size of v2 path [ Upstream commit 0d704967f4a49cc2212350b3e4a8231f8b4283ed ] cgroup v2 path field is PATH_MAX which is too large, this is placing too much pressure on memory allocation for people with many rules doing cgroup v1 classid matching, side effects of this are bug reports like: https://bugzilla.kernel.org/show_bug.cgi?id=200639 This patch registers a new revision that shrinks the cgroup path to 512 bytes, which is the same approach we follow in similar extensions that have a path field. Cc: Tejun Heo Signed-off-by: Pablo Neira Ayuso Acked-by: Tejun Heo Signed-off-by: Sasha Levin --- include/uapi/linux/netfilter/xt_cgroup.h | 16 ++++++ net/netfilter/xt_cgroup.c | 72 ++++++++++++++++++++++++ 2 files changed, 88 insertions(+) diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h index e96dfa1b34f7..b74e370d6133 100644 --- a/include/uapi/linux/netfilter/xt_cgroup.h +++ b/include/uapi/linux/netfilter/xt_cgroup.h @@ -22,4 +22,20 @@ struct xt_cgroup_info_v1 { void *priv __attribute__((aligned(8))); }; +#define XT_CGROUP_PATH_MAX 512 + +struct xt_cgroup_info_v2 { + __u8 has_path; + __u8 has_classid; + __u8 invert_path; + __u8 invert_classid; + union { + char path[XT_CGROUP_PATH_MAX]; + __u32 classid; + }; + + /* kernel internal data */ + void *priv __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_CGROUP_H */ diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 891f4e7e8ea7..db18c0177b0f 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -66,6 +66,38 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) return 0; } +static int cgroup_mt_check_v2(const struct xt_mtchk_param *par) +{ + struct xt_cgroup_info_v2 *info = par->matchinfo; + struct cgroup *cgrp; + + if ((info->invert_path & ~1) || (info->invert_classid & ~1)) + return -EINVAL; + + if (!info->has_path && !info->has_classid) { + pr_info("xt_cgroup: no path or classid specified\n"); + return -EINVAL; + } + + if (info->has_path && info->has_classid) { + pr_info_ratelimited("path and classid specified\n"); + return -EINVAL; + } + + info->priv = NULL; + if (info->has_path) { + cgrp = cgroup_get_from_path(info->path); + if (IS_ERR(cgrp)) { + pr_info_ratelimited("invalid path, errno=%ld\n", + PTR_ERR(cgrp)); + return -EINVAL; + } + info->priv = cgrp; + } + + return 0; +} + static bool cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par) { @@ -95,6 +127,24 @@ static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) info->invert_classid; } +static bool cgroup_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_cgroup_info_v2 *info = par->matchinfo; + struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data; + struct cgroup *ancestor = info->priv; + struct sock *sk = skb->sk; + + if (!sk || !sk_fullsock(sk) || !net_eq(xt_net(par), sock_net(sk))) + return false; + + if (ancestor) + return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^ + info->invert_path; + else + return (info->classid == sock_cgroup_classid(skcd)) ^ + info->invert_classid; +} + static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) { struct xt_cgroup_info_v1 *info = par->matchinfo; @@ -103,6 +153,14 @@ static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par) cgroup_put(info->priv); } +static void cgroup_mt_destroy_v2(const struct xt_mtdtor_param *par) +{ + struct xt_cgroup_info_v2 *info = par->matchinfo; + + if (info->priv) + cgroup_put(info->priv); +} + static struct xt_match cgroup_mt_reg[] __read_mostly = { { .name = "cgroup", @@ -130,6 +188,20 @@ static struct xt_match cgroup_mt_reg[] __read_mostly = { (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), }, + { + .name = "cgroup", + .revision = 2, + .family = NFPROTO_UNSPEC, + .checkentry = cgroup_mt_check_v2, + .match = cgroup_mt_v2, + .matchsize = sizeof(struct xt_cgroup_info_v2), + .usersize = offsetof(struct xt_cgroup_info_v2, priv), + .destroy = cgroup_mt_destroy_v2, + .me = THIS_MODULE, + .hooks = (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_IN), + }, }; static int __init cgroup_mt_init(void) From 1fa5208a30e645355b8946bf8645710381d4e735 Mon Sep 17 00:00:00 2001 From: Michal Simek Date: Mon, 3 Sep 2018 15:10:49 +0200 Subject: [PATCH 44/93] serial: uartps: console_setup() can't be placed to init section [ Upstream commit 4bb1ce2350a598502b23088b169e16b43d4bc639 ] When console device is rebinded, console_setup() is called again. But marking it as __init means that function will be clear after boot is complete. If console device is binded again console_setup() is not found and error "Unable to handle kernel paging request at virtual address" is reported. Signed-off-by: Michal Simek Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/tty/serial/xilinx_uartps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c index f438a2158006..b0da63737aa1 100644 --- a/drivers/tty/serial/xilinx_uartps.c +++ b/drivers/tty/serial/xilinx_uartps.c @@ -1270,7 +1270,7 @@ static void cdns_uart_console_write(struct console *co, const char *s, * * Return: 0 on success, negative errno otherwise. */ -static int __init cdns_uart_console_setup(struct console *co, char *options) +static int cdns_uart_console_setup(struct console *co, char *options) { struct uart_port *port = &cdns_uart_port[co->index]; int baud = 9600; From 8b8fa9879d44d9b488ebbadee2ca1da4ac76e033 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 10 Sep 2018 09:57:00 -0500 Subject: [PATCH 45/93] powerpc/pseries: Remove prrn_work workqueue [ Upstream commit cd24e457fd8b2d087d9236700c8d2957054598bf ] When a PRRN event is received we are already running in a worker thread. Instead of spawning off another worker thread on the prrn_work workqueue to handle the PRRN event we can just call the PRRN handler routine directly. With this update we can also pass the scope variable for the PRRN event directly to the handler instead of it being a global variable. This patch fixes the following oops mnessage we are seeing in PRRN testing: Oops: Bad kernel stack pointer, sig: 6 [#1] SMP NR_CPUS=2048 NUMA pSeries Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc fscache binfmt_misc reiserfs vfat fat rpadlpar_io(X) rpaphp(X) tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag af_packet xfs libcrc32c dm_service_time ibmveth(X) ses enclosure scsi_transport_sas rtc_generic btrfs xor raid6_pq sd_mod ibmvscsi(X) scsi_transport_srp ipr(X) libata sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4 Supported: Yes, External 54 CPU: 7 PID: 18967 Comm: kworker/u96:0 Tainted: G X 4.4.126-94.22-default #1 Workqueue: pseries hotplug workque pseries_hp_work_fn task: c000000775367790 ti: c00000001ebd4000 task.ti: c00000070d140000 NIP: 0000000000000000 LR: 000000001fb3d050 CTR: 0000000000000000 REGS: c00000001ebd7d40 TRAP: 0700 Tainted: G X (4.4.126-94.22-default) MSR: 8000000102081000 <41,VEC,ME5 CR: 28000002 XER: 20040018 4 CFAR: 000000001fb3d084 40 419 1 3 GPR00: 000000000000000040000000000010007 000000001ffff400 000000041fffe200 GPR04: 000000000000008050000000000000000 000000001fb15fa8 0000000500000500 GPR08: 000000000001f40040000000000000001 0000000000000000 000005:5200040002 GPR12: 00000000000000005c000000007a05400 c0000000000e89f8 000000001ed9f668 GPR16: 000000001fbeff944000000001fbeff94 000000001fb545e4 0000006000000060 GPR20: ffffffffffffffff4ffffffffffffffff 0000000000000000 0000000000000000 GPR24: 00000000000000005400000001fb3c000 0000000000000000 000000001fb1b040 GPR28: 000000001fb240004000000001fb440d8 0000000000000008 0000000000000000 NIP [0000000000000000] 5 (null) LR [000000001fb3d050] 031fb3d050 Call Trace: 4 Instruction dump: 4 5:47 12 2 XXXXXXXX XXXXXXXX XXXXX4XX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXX5XX XXXXXXXX 60000000 60000000 60000000 60000000 ---[ end trace aa5627b04a7d9d6b ]--- 3NMI watchdog: BUG: soft lockup - CPU#27 stuck for 23s! [kworker/27:0:13903] Modules linked in: nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace sunrpc fscache binfmt_misc reiserfs vfat fat rpadlpar_io(X) rpaphp(X) tcp_diag udp_diag inet_diag unix_diag af_packet_diag netlink_diag af_packet xfs libcrc32c dm_service_time ibmveth(X) ses enclosure scsi_transport_sas rtc_generic btrfs xor raid6_pq sd_mod ibmvscsi(X) scsi_transport_srp ipr(X) libata sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4 Supported: Yes, External CPU: 27 PID: 13903 Comm: kworker/27:0 Tainted: G D X 4.4.126-94.22-default #1 Workqueue: events prrn_work_fn task: c000000747cfa390 ti: c00000074712c000 task.ti: c00000074712c000 NIP: c0000000008002a8 LR: c000000000090770 CTR: 000000000032e088 REGS: c00000074712f7b0 TRAP: 0901 Tainted: G D X (4.4.126-94.22-default) MSR: 8000000100009033 CR: 22482044 XER: 20040000 CFAR: c0000000008002c4 SOFTE: 1 GPR00: c000000000090770 c00000074712fa30 c000000000f09800 c000000000fa1928 6:02 GPR04: c000000775f5e000 fffffffffffffffe 0000000000000001 c000000000f42db8 GPR08: 0000000000000001 0000000080000007 0000000000000000 0000000000000000 GPR12: 8006210083180000 c000000007a14400 NIP [c0000000008002a8] _raw_spin_lock+0x68/0xd0 LR [c000000000090770] mobility_rtas_call+0x50/0x100 Call Trace: 59 5 [c00000074712fa60] [c000000000090770] mobility_rtas_call+0x50/0x100 [c00000074712faf0] [c000000000090b08] pseries_devicetree_update+0xf8/0x530 [c00000074712fc20] [c000000000031ba4] prrn_work_fn+0x34/0x50 [c00000074712fc40] [c0000000000e0390] process_one_work+0x1a0/0x4e0 [c00000074712fcd0] [c0000000000e0870] worker_thread+0x1a0/0x6105:57 2 [c00000074712fd80] [c0000000000e8b18] kthread+0x128/0x150 [c00000074712fe30] [c0000000000096f8] ret_from_kernel_thread+0x5c/0x64 Instruction dump: 2c090000 40c20010 7d40192d 40c2fff0 7c2004ac 2fa90000 40de0018 5:540030 3 e8010010 ebe1fff8 7c0803a6 4e800020 <7c210b78> e92d0000 89290009 792affe3 Signed-off-by: John Allen Signed-off-by: Haren Myneni Signed-off-by: Nathan Fontenot Signed-off-by: Michael Ellerman Signed-off-by: Sasha Levin --- arch/powerpc/kernel/rtasd.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 0f0b1b2f3b60..7caeae73348d 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -274,27 +274,16 @@ void pSeries_log_error(char *buf, unsigned int err_type, int fatal) } #ifdef CONFIG_PPC_PSERIES -static s32 prrn_update_scope; - -static void prrn_work_fn(struct work_struct *work) +static void handle_prrn_event(s32 scope) { /* * For PRRN, we must pass the negative of the scope value in * the RTAS event. */ - pseries_devicetree_update(-prrn_update_scope); + pseries_devicetree_update(-scope); numa_update_cpu_topology(false); } -static DECLARE_WORK(prrn_work, prrn_work_fn); - -static void prrn_schedule_update(u32 scope) -{ - flush_work(&prrn_work); - prrn_update_scope = scope; - schedule_work(&prrn_work); -} - static void handle_rtas_event(const struct rtas_error_log *log) { if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) @@ -303,7 +292,7 @@ static void handle_rtas_event(const struct rtas_error_log *log) /* For PRRN Events the extended log length is used to denote * the scope for calling rtas update-nodes. */ - prrn_schedule_update(rtas_error_extended_log_length(log)); + handle_prrn_event(rtas_error_extended_log_length(log)); } #else From fec306b2ef690030a799d1643a2216c4df8d02ce Mon Sep 17 00:00:00 2001 From: Brad Love Date: Thu, 6 Sep 2018 17:07:48 -0400 Subject: [PATCH 46/93] media: au0828: cannot kfree dev before usb disconnect [ Upstream commit 4add7104919f9e94e0db03e234caeadbfcc02ea9 ] If au0828_analog_register fails, the dev is kfree'd and then flow jumps to done, which can call au0828_usb_disconnect. Since all USB error codes are negative, au0828_usb_disconnect will be called. The problem is au0828_usb_disconnect uses dev, if dev is NULL then there is immediate oops encountered. [ 7.454307] au0828: au0828_usb_probe() au0282_dev_register failed to register on V4L2 [ 7.454323] BUG: unable to handle kernel NULL pointer dereference at 0000000000000050 [ 7.454421] PGD 0 P4D 0 [ 7.454457] Oops: 0002 [#1] SMP PTI [ 7.454500] CPU: 1 PID: 262 Comm: systemd-udevd Tainted: P O 4.18.3 #1 [ 7.454584] Hardware name: Google Panther/Panther, BIOS MattDevo 04/27/2015 [ 7.454670] RIP: 0010:_raw_spin_lock_irqsave+0x2c/0x50 [ 7.454725] Code: 44 00 00 55 48 89 e5 41 54 53 48 89 fb 9c 58 0f 1f 44 00 00 49 89 c4 fa 66 0f 1f 44 00 00 e8 db 23 1b ff 31 c0 ba 01 00 00 00 0f b1 13 85 c0 75 08 4c 89 e0 5b 41 5c 5d c3 89 c6 48 89 df e8 [ 7.455004] RSP: 0018:ffff9130f53ef988 EFLAGS: 00010046 [ 7.455063] RAX: 0000000000000000 RBX: 0000000000000050 RCX: 0000000000000000 [ 7.455139] RDX: 0000000000000001 RSI: 0000000000000003 RDI: 0000000000000050 [ 7.455216] RBP: ffff9130f53ef998 R08: 0000000000000018 R09: 0000000000000090 [ 7.455292] R10: ffffed4cc53cb000 R11: ffffed4cc53cb108 R12: 0000000000000082 [ 7.455369] R13: ffff9130cf2c6188 R14: 0000000000000000 R15: 0000000000000018 [ 7.455447] FS: 00007f2ff8514cc0(0000) GS:ffff9130fcb00000(0000) knlGS:0000000000000000 [ 7.455535] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7.455597] CR2: 0000000000000050 CR3: 00000001753f0002 CR4: 00000000000606a0 [ 7.455675] Call Trace: [ 7.455713] __wake_up_common_lock+0x65/0xc0 [ 7.455764] __wake_up+0x13/0x20 [ 7.455808] ir_lirc_unregister+0x57/0xe0 [rc_core] [ 7.455865] rc_unregister_device+0xa0/0xc0 [rc_core] [ 7.455935] au0828_rc_unregister+0x25/0x40 [au0828] [ 7.455999] au0828_usb_disconnect+0x33/0x80 [au0828] [ 7.456064] au0828_usb_probe.cold.16+0x8d/0x2aa [au0828] [ 7.456130] usb_probe_interface+0xf1/0x300 [ 7.456184] driver_probe_device+0x2e3/0x460 [ 7.456235] __driver_attach+0xe4/0x110 [ 7.456282] ? driver_probe_device+0x460/0x460 [ 7.456335] bus_for_each_dev+0x74/0xb0 [ 7.456385] ? kmem_cache_alloc_trace+0x15d/0x1d0 [ 7.456441] driver_attach+0x1e/0x20 [ 7.456485] bus_add_driver+0x159/0x230 [ 7.456532] driver_register+0x70/0xc0 [ 7.456578] usb_register_driver+0x7f/0x140 [ 7.456626] ? 0xffffffffc0474000 [ 7.456674] au0828_init+0xbc/0x1000 [au0828] [ 7.456725] do_one_initcall+0x4a/0x1c9 [ 7.456771] ? _cond_resched+0x19/0x30 [ 7.456817] ? kmem_cache_alloc_trace+0x15d/0x1d0 [ 7.456873] do_init_module+0x60/0x210 [ 7.456918] load_module+0x221b/0x2710 [ 7.456966] ? vfs_read+0xf5/0x120 [ 7.457010] __do_sys_finit_module+0xbd/0x120 [ 7.457061] ? __do_sys_finit_module+0xbd/0x120 [ 7.457115] __x64_sys_finit_module+0x1a/0x20 [ 7.457166] do_syscall_64+0x5b/0x110 [ 7.457210] entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: Brad Love Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab Signed-off-by: Sasha Levin --- drivers/media/usb/au0828/au0828-core.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/media/usb/au0828/au0828-core.c b/drivers/media/usb/au0828/au0828-core.c index cd363a2100d4..257ae0d8cfe2 100644 --- a/drivers/media/usb/au0828/au0828-core.c +++ b/drivers/media/usb/au0828/au0828-core.c @@ -629,7 +629,6 @@ static int au0828_usb_probe(struct usb_interface *interface, pr_err("%s() au0282_dev_register failed to register on V4L2\n", __func__); mutex_unlock(&dev->lock); - kfree(dev); goto done; } From e058a6e56e38f90d649fbedb8bfaebb82aea46eb Mon Sep 17 00:00:00 2001 From: Julian Sax Date: Wed, 19 Sep 2018 11:46:23 +0200 Subject: [PATCH 47/93] HID: i2c-hid: override HID descriptors for certain devices [ Upstream commit 9ee3e06610fdb8a601cde59c92089fb6c1deb4aa ] A particular touchpad (SIPODEV SP1064) refuses to supply the HID descriptors. This patch provides the framework for overriding these descriptors based on DMI data. It also includes the descriptors for said touchpad, which were extracted by listening to the traffic of the windows filter driver, as well as the DMI data for the laptops known to use this device. Relevant Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1526312 Cc: Hans de Goede Reported-and-tested-by: ahormann@gmx.net Reported-and-tested-by: Bruno Jesus Reported-and-tested-by: Dietrich Reported-and-tested-by: kloxdami@yahoo.com Signed-off-by: Julian Sax Reviewed-by: Benjamin Tissoires Signed-off-by: Jiri Kosina Signed-off-by: Sasha Levin --- drivers/hid/i2c-hid/Makefile | 3 + .../hid/i2c-hid/{i2c-hid.c => i2c-hid-core.c} | 56 ++- drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c | 376 ++++++++++++++++++ drivers/hid/i2c-hid/i2c-hid.h | 20 + 4 files changed, 437 insertions(+), 18 deletions(-) rename drivers/hid/i2c-hid/{i2c-hid.c => i2c-hid-core.c} (96%) create mode 100644 drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c create mode 100644 drivers/hid/i2c-hid/i2c-hid.h diff --git a/drivers/hid/i2c-hid/Makefile b/drivers/hid/i2c-hid/Makefile index 832d8f9aaba2..099e1ce2f234 100644 --- a/drivers/hid/i2c-hid/Makefile +++ b/drivers/hid/i2c-hid/Makefile @@ -3,3 +3,6 @@ # obj-$(CONFIG_I2C_HID) += i2c-hid.o + +i2c-hid-objs = i2c-hid-core.o +i2c-hid-$(CONFIG_DMI) += i2c-hid-dmi-quirks.o diff --git a/drivers/hid/i2c-hid/i2c-hid.c b/drivers/hid/i2c-hid/i2c-hid-core.c similarity index 96% rename from drivers/hid/i2c-hid/i2c-hid.c rename to drivers/hid/i2c-hid/i2c-hid-core.c index 136a34dc31b8..7842d76aa813 100644 --- a/drivers/hid/i2c-hid/i2c-hid.c +++ b/drivers/hid/i2c-hid/i2c-hid-core.c @@ -43,6 +43,7 @@ #include #include "../hid-ids.h" +#include "i2c-hid.h" /* quirks to control the device */ #define I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV BIT(0) @@ -663,6 +664,7 @@ static int i2c_hid_parse(struct hid_device *hid) char *rdesc; int ret; int tries = 3; + char *use_override; i2c_hid_dbg(ihid, "entering %s\n", __func__); @@ -681,26 +683,37 @@ static int i2c_hid_parse(struct hid_device *hid) if (ret) return ret; - rdesc = kzalloc(rsize, GFP_KERNEL); + use_override = i2c_hid_get_dmi_hid_report_desc_override(client->name, + &rsize); - if (!rdesc) { - dbg_hid("couldn't allocate rdesc memory\n"); - return -ENOMEM; - } + if (use_override) { + rdesc = use_override; + i2c_hid_dbg(ihid, "Using a HID report descriptor override\n"); + } else { + rdesc = kzalloc(rsize, GFP_KERNEL); - i2c_hid_dbg(ihid, "asking HID report descriptor\n"); + if (!rdesc) { + dbg_hid("couldn't allocate rdesc memory\n"); + return -ENOMEM; + } - ret = i2c_hid_command(client, &hid_report_descr_cmd, rdesc, rsize); - if (ret) { - hid_err(hid, "reading report descriptor failed\n"); - kfree(rdesc); - return -EIO; + i2c_hid_dbg(ihid, "asking HID report descriptor\n"); + + ret = i2c_hid_command(client, &hid_report_descr_cmd, + rdesc, rsize); + if (ret) { + hid_err(hid, "reading report descriptor failed\n"); + kfree(rdesc); + return -EIO; + } } i2c_hid_dbg(ihid, "Report Descriptor: %*ph\n", rsize, rdesc); ret = hid_parse_report(hid, rdesc, rsize); - kfree(rdesc); + if (!use_override) + kfree(rdesc); + if (ret) { dbg_hid("parsing report descriptor failed\n"); return ret; @@ -827,12 +840,19 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid) int ret; /* i2c hid fetch using a fixed descriptor size (30 bytes) */ - i2c_hid_dbg(ihid, "Fetching the HID descriptor\n"); - ret = i2c_hid_command(client, &hid_descr_cmd, ihid->hdesc_buffer, - sizeof(struct i2c_hid_desc)); - if (ret) { - dev_err(&client->dev, "hid_descr_cmd failed\n"); - return -ENODEV; + if (i2c_hid_get_dmi_i2c_hid_desc_override(client->name)) { + i2c_hid_dbg(ihid, "Using a HID descriptor override\n"); + ihid->hdesc = + *i2c_hid_get_dmi_i2c_hid_desc_override(client->name); + } else { + i2c_hid_dbg(ihid, "Fetching the HID descriptor\n"); + ret = i2c_hid_command(client, &hid_descr_cmd, + ihid->hdesc_buffer, + sizeof(struct i2c_hid_desc)); + if (ret) { + dev_err(&client->dev, "hid_descr_cmd failed\n"); + return -ENODEV; + } } /* Validate the length of HID descriptor, the 4 first bytes: diff --git a/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c new file mode 100644 index 000000000000..1d645c9ab417 --- /dev/null +++ b/drivers/hid/i2c-hid/i2c-hid-dmi-quirks.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0+ + +/* + * Quirks for I2C-HID devices that do not supply proper descriptors + * + * Copyright (c) 2018 Julian Sax + * + */ + +#include +#include +#include + +#include "i2c-hid.h" + + +struct i2c_hid_desc_override { + union { + struct i2c_hid_desc *i2c_hid_desc; + uint8_t *i2c_hid_desc_buffer; + }; + uint8_t *hid_report_desc; + unsigned int hid_report_desc_size; + uint8_t *i2c_name; +}; + + +/* + * descriptors for the SIPODEV SP1064 touchpad + * + * This device does not supply any descriptors and on windows a filter + * driver operates between the i2c-hid layer and the device and injects + * these descriptors when the device is prompted. The descriptors were + * extracted by listening to the i2c-hid traffic that occurs between the + * windows filter driver and the windows i2c-hid driver. + */ + +static const struct i2c_hid_desc_override sipodev_desc = { + .i2c_hid_desc_buffer = (uint8_t []) + {0x1e, 0x00, /* Length of descriptor */ + 0x00, 0x01, /* Version of descriptor */ + 0xdb, 0x01, /* Length of report descriptor */ + 0x21, 0x00, /* Location of report descriptor */ + 0x24, 0x00, /* Location of input report */ + 0x1b, 0x00, /* Max input report length */ + 0x25, 0x00, /* Location of output report */ + 0x11, 0x00, /* Max output report length */ + 0x22, 0x00, /* Location of command register */ + 0x23, 0x00, /* Location of data register */ + 0x11, 0x09, /* Vendor ID */ + 0x88, 0x52, /* Product ID */ + 0x06, 0x00, /* Version ID */ + 0x00, 0x00, 0x00, 0x00 /* Reserved */ + }, + + .hid_report_desc = (uint8_t []) + {0x05, 0x01, /* Usage Page (Desktop), */ + 0x09, 0x02, /* Usage (Mouse), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x01, /* Report ID (1), */ + 0x09, 0x01, /* Usage (Pointer), */ + 0xA1, 0x00, /* Collection (Physical), */ + 0x05, 0x09, /* Usage Page (Button), */ + 0x19, 0x01, /* Usage Minimum (01h), */ + 0x29, 0x02, /* Usage Maximum (02h), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x02, /* Report Count (2), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x06, /* Report Count (6), */ + 0x81, 0x01, /* Input (Constant), */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x09, 0x30, /* Usage (X), */ + 0x09, 0x31, /* Usage (Y), */ + 0x15, 0x81, /* Logical Minimum (-127), */ + 0x25, 0x7F, /* Logical Maximum (127), */ + 0x75, 0x08, /* Report Size (8), */ + 0x95, 0x02, /* Report Count (2), */ + 0x81, 0x06, /* Input (Variable, Relative), */ + 0xC0, /* End Collection, */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x05, /* Usage (Touchpad), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x04, /* Report ID (4), */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x22, /* Usage (Finger), */ + 0xA1, 0x02, /* Collection (Logical), */ + 0x15, 0x00, /* Logical Minimum (0), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x09, 0x47, /* Usage (Touch Valid), */ + 0x09, 0x42, /* Usage (Tip Switch), */ + 0x95, 0x02, /* Report Count (2), */ + 0x75, 0x01, /* Report Size (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x01, /* Report Count (1), */ + 0x75, 0x03, /* Report Size (3), */ + 0x25, 0x05, /* Logical Maximum (5), */ + 0x09, 0x51, /* Usage (Contact Identifier), */ + 0x81, 0x02, /* Input (Variable), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x26, 0x44, 0x0A, /* Logical Maximum (2628), */ + 0x75, 0x10, /* Report Size (16), */ + 0x55, 0x0E, /* Unit Exponent (14), */ + 0x65, 0x11, /* Unit (Centimeter), */ + 0x09, 0x30, /* Usage (X), */ + 0x46, 0x1A, 0x04, /* Physical Maximum (1050), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x46, 0xBC, 0x02, /* Physical Maximum (700), */ + 0x26, 0x34, 0x05, /* Logical Maximum (1332), */ + 0x09, 0x31, /* Usage (Y), */ + 0x81, 0x02, /* Input (Variable), */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x22, /* Usage (Finger), */ + 0xA1, 0x02, /* Collection (Logical), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x09, 0x47, /* Usage (Touch Valid), */ + 0x09, 0x42, /* Usage (Tip Switch), */ + 0x95, 0x02, /* Report Count (2), */ + 0x75, 0x01, /* Report Size (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x01, /* Report Count (1), */ + 0x75, 0x03, /* Report Size (3), */ + 0x25, 0x05, /* Logical Maximum (5), */ + 0x09, 0x51, /* Usage (Contact Identifier), */ + 0x81, 0x02, /* Input (Variable), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x26, 0x44, 0x0A, /* Logical Maximum (2628), */ + 0x75, 0x10, /* Report Size (16), */ + 0x09, 0x30, /* Usage (X), */ + 0x46, 0x1A, 0x04, /* Physical Maximum (1050), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x46, 0xBC, 0x02, /* Physical Maximum (700), */ + 0x26, 0x34, 0x05, /* Logical Maximum (1332), */ + 0x09, 0x31, /* Usage (Y), */ + 0x81, 0x02, /* Input (Variable), */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x22, /* Usage (Finger), */ + 0xA1, 0x02, /* Collection (Logical), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x09, 0x47, /* Usage (Touch Valid), */ + 0x09, 0x42, /* Usage (Tip Switch), */ + 0x95, 0x02, /* Report Count (2), */ + 0x75, 0x01, /* Report Size (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x01, /* Report Count (1), */ + 0x75, 0x03, /* Report Size (3), */ + 0x25, 0x05, /* Logical Maximum (5), */ + 0x09, 0x51, /* Usage (Contact Identifier), */ + 0x81, 0x02, /* Input (Variable), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x26, 0x44, 0x0A, /* Logical Maximum (2628), */ + 0x75, 0x10, /* Report Size (16), */ + 0x09, 0x30, /* Usage (X), */ + 0x46, 0x1A, 0x04, /* Physical Maximum (1050), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x46, 0xBC, 0x02, /* Physical Maximum (700), */ + 0x26, 0x34, 0x05, /* Logical Maximum (1332), */ + 0x09, 0x31, /* Usage (Y), */ + 0x81, 0x02, /* Input (Variable), */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x22, /* Usage (Finger), */ + 0xA1, 0x02, /* Collection (Logical), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x09, 0x47, /* Usage (Touch Valid), */ + 0x09, 0x42, /* Usage (Tip Switch), */ + 0x95, 0x02, /* Report Count (2), */ + 0x75, 0x01, /* Report Size (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x01, /* Report Count (1), */ + 0x75, 0x03, /* Report Size (3), */ + 0x25, 0x05, /* Logical Maximum (5), */ + 0x09, 0x51, /* Usage (Contact Identifier), */ + 0x81, 0x02, /* Input (Variable), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x03, /* Report Count (3), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x05, 0x01, /* Usage Page (Desktop), */ + 0x26, 0x44, 0x0A, /* Logical Maximum (2628), */ + 0x75, 0x10, /* Report Size (16), */ + 0x09, 0x30, /* Usage (X), */ + 0x46, 0x1A, 0x04, /* Physical Maximum (1050), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x46, 0xBC, 0x02, /* Physical Maximum (700), */ + 0x26, 0x34, 0x05, /* Logical Maximum (1332), */ + 0x09, 0x31, /* Usage (Y), */ + 0x81, 0x02, /* Input (Variable), */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x55, 0x0C, /* Unit Exponent (12), */ + 0x66, 0x01, 0x10, /* Unit (Seconds), */ + 0x47, 0xFF, 0xFF, 0x00, 0x00,/* Physical Maximum (65535), */ + 0x27, 0xFF, 0xFF, 0x00, 0x00,/* Logical Maximum (65535), */ + 0x75, 0x10, /* Report Size (16), */ + 0x95, 0x01, /* Report Count (1), */ + 0x09, 0x56, /* Usage (Scan Time), */ + 0x81, 0x02, /* Input (Variable), */ + 0x09, 0x54, /* Usage (Contact Count), */ + 0x25, 0x7F, /* Logical Maximum (127), */ + 0x75, 0x08, /* Report Size (8), */ + 0x81, 0x02, /* Input (Variable), */ + 0x05, 0x09, /* Usage Page (Button), */ + 0x09, 0x01, /* Usage (01h), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x01, /* Report Count (1), */ + 0x81, 0x02, /* Input (Variable), */ + 0x95, 0x07, /* Report Count (7), */ + 0x81, 0x03, /* Input (Constant, Variable), */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x85, 0x02, /* Report ID (2), */ + 0x09, 0x55, /* Usage (Contact Count Maximum), */ + 0x09, 0x59, /* Usage (59h), */ + 0x75, 0x04, /* Report Size (4), */ + 0x95, 0x02, /* Report Count (2), */ + 0x25, 0x0F, /* Logical Maximum (15), */ + 0xB1, 0x02, /* Feature (Variable), */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x85, 0x07, /* Report ID (7), */ + 0x09, 0x60, /* Usage (60h), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x01, /* Report Count (1), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0xB1, 0x02, /* Feature (Variable), */ + 0x95, 0x07, /* Report Count (7), */ + 0xB1, 0x03, /* Feature (Constant, Variable), */ + 0x85, 0x06, /* Report ID (6), */ + 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ + 0x09, 0xC5, /* Usage (C5h), */ + 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ + 0x75, 0x08, /* Report Size (8), */ + 0x96, 0x00, 0x01, /* Report Count (256), */ + 0xB1, 0x02, /* Feature (Variable), */ + 0xC0, /* End Collection, */ + 0x06, 0x00, 0xFF, /* Usage Page (FF00h), */ + 0x09, 0x01, /* Usage (01h), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x0D, /* Report ID (13), */ + 0x26, 0xFF, 0x00, /* Logical Maximum (255), */ + 0x19, 0x01, /* Usage Minimum (01h), */ + 0x29, 0x02, /* Usage Maximum (02h), */ + 0x75, 0x08, /* Report Size (8), */ + 0x95, 0x02, /* Report Count (2), */ + 0xB1, 0x02, /* Feature (Variable), */ + 0xC0, /* End Collection, */ + 0x05, 0x0D, /* Usage Page (Digitizer), */ + 0x09, 0x0E, /* Usage (Configuration), */ + 0xA1, 0x01, /* Collection (Application), */ + 0x85, 0x03, /* Report ID (3), */ + 0x09, 0x22, /* Usage (Finger), */ + 0xA1, 0x02, /* Collection (Logical), */ + 0x09, 0x52, /* Usage (Device Mode), */ + 0x25, 0x0A, /* Logical Maximum (10), */ + 0x95, 0x01, /* Report Count (1), */ + 0xB1, 0x02, /* Feature (Variable), */ + 0xC0, /* End Collection, */ + 0x09, 0x22, /* Usage (Finger), */ + 0xA1, 0x00, /* Collection (Physical), */ + 0x85, 0x05, /* Report ID (5), */ + 0x09, 0x57, /* Usage (57h), */ + 0x09, 0x58, /* Usage (58h), */ + 0x75, 0x01, /* Report Size (1), */ + 0x95, 0x02, /* Report Count (2), */ + 0x25, 0x01, /* Logical Maximum (1), */ + 0xB1, 0x02, /* Feature (Variable), */ + 0x95, 0x06, /* Report Count (6), */ + 0xB1, 0x03, /* Feature (Constant, Variable),*/ + 0xC0, /* End Collection, */ + 0xC0 /* End Collection */ + }, + .hid_report_desc_size = 475, + .i2c_name = "SYNA3602:00" +}; + + +static const struct dmi_system_id i2c_hid_dmi_desc_override_table[] = { + { + .ident = "Teclast F6 Pro", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "F6 Pro"), + }, + .driver_data = (void *)&sipodev_desc + }, + { + .ident = "Teclast F7", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TECLAST"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "F7"), + }, + .driver_data = (void *)&sipodev_desc + }, + { + .ident = "Trekstor Primebook C13", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Primebook C13"), + }, + .driver_data = (void *)&sipodev_desc + }, + { + .ident = "Trekstor Primebook C11", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "TREKSTOR"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Primebook C11"), + }, + .driver_data = (void *)&sipodev_desc + }, + { + .ident = "Direkt-Tek DTLAPY116-2", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Direkt-Tek"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "DTLAPY116-2"), + }, + .driver_data = (void *)&sipodev_desc + }, + { + .ident = "Mediacom Flexbook Edge 11", + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "MEDIACOM"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "FlexBook edge11 - M-FBE11"), + }, + .driver_data = (void *)&sipodev_desc + } +}; + + +struct i2c_hid_desc *i2c_hid_get_dmi_i2c_hid_desc_override(uint8_t *i2c_name) +{ + struct i2c_hid_desc_override *override; + const struct dmi_system_id *system_id; + + system_id = dmi_first_match(i2c_hid_dmi_desc_override_table); + if (!system_id) + return NULL; + + override = system_id->driver_data; + if (strcmp(override->i2c_name, i2c_name)) + return NULL; + + return override->i2c_hid_desc; +} + +char *i2c_hid_get_dmi_hid_report_desc_override(uint8_t *i2c_name, + unsigned int *size) +{ + struct i2c_hid_desc_override *override; + const struct dmi_system_id *system_id; + + system_id = dmi_first_match(i2c_hid_dmi_desc_override_table); + if (!system_id) + return NULL; + + override = system_id->driver_data; + if (strcmp(override->i2c_name, i2c_name)) + return NULL; + + *size = override->hid_report_desc_size; + return override->hid_report_desc; +} diff --git a/drivers/hid/i2c-hid/i2c-hid.h b/drivers/hid/i2c-hid/i2c-hid.h new file mode 100644 index 000000000000..a8c19aef5824 --- /dev/null +++ b/drivers/hid/i2c-hid/i2c-hid.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ + +#ifndef I2C_HID_H +#define I2C_HID_H + + +#ifdef CONFIG_DMI +struct i2c_hid_desc *i2c_hid_get_dmi_i2c_hid_desc_override(uint8_t *i2c_name); +char *i2c_hid_get_dmi_hid_report_desc_override(uint8_t *i2c_name, + unsigned int *size); +#else +static inline struct i2c_hid_desc + *i2c_hid_get_dmi_i2c_hid_desc_override(uint8_t *i2c_name) +{ return NULL; } +static inline char *i2c_hid_get_dmi_hid_report_desc_override(uint8_t *i2c_name, + unsigned int *size) +{ return NULL; } +#endif + +#endif From ef9f432612e56651d2aa2984763ff1ba69621e82 Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Fri, 28 Sep 2018 15:32:46 +0200 Subject: [PATCH 48/93] ARM: samsung: Limit SAMSUNG_PM_CHECK config option to non-Exynos platforms [ Upstream commit 6862fdf2201ab67cd962dbf0643d37db909f4860 ] "S3C2410 PM Suspend Memory CRC" feature (controlled by SAMSUNG_PM_CHECK config option) is incompatible with highmem (uses phys_to_virt() instead of proper mapping) which is used by the majority of Exynos boards. The issue manifests itself in OOPS on affected boards, i.e. on Odroid-U3 I got the following one: Unable to handle kernel paging request at virtual address f0000000 pgd = 1c0f9bb4 [f0000000] *pgd=00000000 Internal error: Oops: 5 [#1] PREEMPT SMP ARM [] (crc32_le) from [] (s3c_pm_makecheck+0x34/0x54) [] (s3c_pm_makecheck) from [] (s3c_pm_run_res+0x74/0x8c) [] (s3c_pm_run_res) from [] (s3c_pm_run_res+0x44/0x8c) [] (s3c_pm_run_res) from [] (exynos_suspend_enter+0x64/0x148) [] (exynos_suspend_enter) from [] (suspend_devices_and_enter+0x9ec/0xe74) [] (suspend_devices_and_enter) from [] (pm_suspend+0x770/0xc04) [] (pm_suspend) from [] (state_store+0x6c/0xcc) [] (state_store) from [] (kobj_attr_store+0x14/0x20) [] (kobj_attr_store) from [] (sysfs_kf_write+0x4c/0x50) [] (sysfs_kf_write) from [] (kernfs_fop_write+0xfc/0x1e4) [] (kernfs_fop_write) from [] (__vfs_write+0x2c/0x140) [] (__vfs_write) from [] (vfs_write+0xa4/0x160) [] (vfs_write) from [] (ksys_write+0x40/0x8c) [] (ksys_write) from [] (ret_fast_syscall+0x0/0x28) Add PLAT_S3C24XX, ARCH_S3C64XX and ARCH_S5PV210 dependencies to SAMSUNG_PM_CHECK config option to hide it on Exynos platforms. Signed-off-by: Bartlomiej Zolnierkiewicz Signed-off-by: Krzysztof Kozlowski Signed-off-by: Sasha Levin --- arch/arm/plat-samsung/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/plat-samsung/Kconfig b/arch/arm/plat-samsung/Kconfig index e8229b9fee4a..3265b8f86069 100644 --- a/arch/arm/plat-samsung/Kconfig +++ b/arch/arm/plat-samsung/Kconfig @@ -258,7 +258,7 @@ config S3C_PM_DEBUG_LED_SMDK config SAMSUNG_PM_CHECK bool "S3C2410 PM Suspend Memory CRC" - depends on PM + depends on PM && (PLAT_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210) select CRC32 help Enable the PM code's memory area checksum over sleep. This option From 0dd663753b0afcc47c9c6b93e36bb62530de68be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Thu, 20 Sep 2018 13:29:42 -0700 Subject: [PATCH 49/93] usbip: fix vhci_hcd controller counting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit e0a2e73e501c77037c8756137e87b12c7c3c9793 ] Without this usbip fails on a machine with devices that lexicographically come after vhci_hcd. ie. $ ls -l /sys/devices/platform ... drwxr-xr-x. 4 root root 0 Sep 19 16:21 serial8250 -rw-r--r--. 1 root root 4096 Sep 19 23:50 uevent drwxr-xr-x. 6 root root 0 Sep 20 13:15 vhci_hcd.0 drwxr-xr-x. 4 root root 0 Sep 19 16:22 w83627hf.656 Because it detects 'w83627hf.656' as another vhci_hcd controller, and then fails to be able to talk to it. Note: this doesn't actually fix usbip's support for multiple controllers... that's still broken for other reasons ("vhci_hcd.0" is hardcoded in a string macro), but is enough to actually make it work on the above machine. See also: https://bugzilla.redhat.com/show_bug.cgi?id=1631148 Cc: Jonathan Dieter Cc: Valentina Manea Cc: Shuah Khan Cc: linux-usb@vger.kernel.org Signed-off-by: Maciej Żenczykowski Acked-by: Shuah Khan (Samsung OSG) Tested-by: Jonathan Dieter Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- tools/usb/usbip/libsrc/vhci_driver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/usb/usbip/libsrc/vhci_driver.c b/tools/usb/usbip/libsrc/vhci_driver.c index ed8c9d360c0f..4225d462701d 100644 --- a/tools/usb/usbip/libsrc/vhci_driver.c +++ b/tools/usb/usbip/libsrc/vhci_driver.c @@ -150,7 +150,7 @@ static int get_nports(struct udev_device *hc_device) static int vhci_hcd_filter(const struct dirent *dirent) { - return strcmp(dirent->d_name, "vhci_hcd") >= 0; + return !strncmp(dirent->d_name, "vhci_hcd.", 9); } static int get_ncontrollers(void) From 00254adf3db21a6cfb6d7bdac367e774662198f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ronald=20Tschal=C3=A4r?= Date: Sun, 30 Sep 2018 19:52:51 -0700 Subject: [PATCH 50/93] ACPI / SBS: Fix GPE storm on recent MacBookPro's MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit ca1721c5bee77105829cbd7baab8ee0eab85b06d ] On Apple machines, plugging-in or unplugging the power triggers a GPE for the EC. Since these machines expose an SBS device, this GPE ends up triggering the acpi_sbs_callback(). This in turn tries to get the status of the SBS charger. However, on MBP13,* and MBP14,* machines, performing the smbus-read operation to get the charger's status triggers the EC's GPE again. The result is an endless re-triggering and handling of that GPE, consuming significant CPU resources (> 50% in irq). In the end this is quite similar to commit 3031cddea633 (ACPI / SBS: Don't assume the existence of an SBS charger), except that on the above machines a status of all 1's is returned. And like there, we just want ignore the charger here. Link: https://bugzilla.kernel.org/show_bug.cgi?id=198169 Signed-off-by: Ronald Tschalär Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/sbs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/acpi/sbs.c b/drivers/acpi/sbs.c index a2428e9462dd..3c092f07d7e3 100644 --- a/drivers/acpi/sbs.c +++ b/drivers/acpi/sbs.c @@ -441,9 +441,13 @@ static int acpi_ac_get_present(struct acpi_sbs *sbs) /* * The spec requires that bit 4 always be 1. If it's not set, assume - * that the implementation doesn't support an SBS charger + * that the implementation doesn't support an SBS charger. + * + * And on some MacBooks a status of 0xffff is always returned, no + * matter whether the charger is plugged in or not, which is also + * wrong, so ignore the SBS charger for those too. */ - if (!((status >> 4) & 0x1)) + if (!((status >> 4) & 0x1) || status == 0xffff) return -ENODEV; sbs->charger_present = (status >> 15) & 0x1; From 05e788500b46cb3c45231ea51a3e4b36c2ccd3c0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 22 Aug 2018 14:57:07 -0700 Subject: [PATCH 51/93] KVM: nVMX: restore host state in nested_vmx_vmexit for VMFail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit bd18bffca35397214ae68d85cf7203aca25c3c1d ] A VMEnter that VMFails (as opposed to VMExits) does not touch host state beyond registers that are explicitly noted in the VMFail path, e.g. EFLAGS. Host state does not need to be loaded because VMFail is only signaled for consistency checks that occur before the CPU starts to load guest state, i.e. there is no need to restore any state as nothing has been modified. But in the case where a VMFail is detected by hardware and not by KVM (due to deferring consistency checks to hardware), KVM has already loaded some amount of guest state. Luckily, "loaded" only means loaded to KVM's software model, i.e. vmcs01 has not been modified. So, unwind our software model to the pre-VMEntry host state. Not restoring host state in this VMFail path leads to a variety of failures because we end up with stale data in vcpu->arch, e.g. CR0, CR4, EFER, etc... will all be out of sync relative to vmcs01. Any significant delta in the stale data is all but guaranteed to crash L1, e.g. emulation of SMEP, SMAP, UMIP, WP, etc... will be wrong. An alternative to this "soft" reload would be to load host state from vmcs12 as if we triggered a VMExit (as opposed to VMFail), but that is wildly inconsistent with respect to the VMX architecture, e.g. an L1 VMM with separate VMExit and VMFail paths would explode. Note that this approach does not mean KVM is 100% accurate with respect to VMX hardware behavior, even at an architectural level (the exact order of consistency checks is microarchitecture specific). But 100% emulation accuracy isn't the goal (with this patch), rather the goal is to be consistent in the information delivered to L1, e.g. a VMExit should not fall-through VMENTER, and a VMFail should not jump to HOST_RIP. This technically reverts commit "5af4157388ad (KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure)", but retains the core aspects of that patch, just in an open coded form due to the need to pull state from vmcs01 instead of vmcs12. Restoring host state resolves a variety of issues introduced by commit "4f350c6dbcb9 (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly)", which remedied the incorrect behavior of treating VMFail like VMExit but in doing so neglected to restore arch state that had been modified prior to attempting nested VMEnter. A sample failure that occurs due to stale vcpu.arch state is a fault of some form while emulating an LGDT (due to emulated UMIP) from L1 after a failed VMEntry to L3, in this case when running the KVM unit test test_tpr_threshold_values in L1. L0 also hits a WARN in this case due to a stale arch.cr4.UMIP. L1: BUG: unable to handle kernel paging request at ffffc90000663b9e PGD 276512067 P4D 276512067 PUD 276513067 PMD 274efa067 PTE 8000000271de2163 Oops: 0009 [#1] SMP CPU: 5 PID: 12495 Comm: qemu-system-x86 Tainted: G W 4.18.0-rc2+ #2 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:native_load_gdt+0x0/0x10 ... Call Trace: load_fixmap_gdt+0x22/0x30 __vmx_load_host_state+0x10e/0x1c0 [kvm_intel] vmx_switch_vmcs+0x2d/0x50 [kvm_intel] nested_vmx_vmexit+0x222/0x9c0 [kvm_intel] vmx_handle_exit+0x246/0x15a0 [kvm_intel] kvm_arch_vcpu_ioctl_run+0x850/0x1830 [kvm] kvm_vcpu_ioctl+0x3a1/0x5c0 [kvm] do_vfs_ioctl+0x9f/0x600 ksys_ioctl+0x66/0x70 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4f/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 L0: WARNING: CPU: 2 PID: 3529 at arch/x86/kvm/vmx.c:6618 handle_desc+0x28/0x30 [kvm_intel] ... CPU: 2 PID: 3529 Comm: qemu-system-x86 Not tainted 4.17.2-coffee+ #76 Hardware name: Intel Corporation Kabylake Client platform/KBL S RIP: 0010:handle_desc+0x28/0x30 [kvm_intel] ... Call Trace: kvm_arch_vcpu_ioctl_run+0x863/0x1840 [kvm] kvm_vcpu_ioctl+0x3a1/0x5c0 [kvm] do_vfs_ioctl+0x9f/0x5e0 ksys_ioctl+0x66/0x70 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x49/0xf0 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: 5af4157388ad (KVM: nVMX: Fix mmu context after VMLAUNCH/VMRESUME failure) Fixes: 4f350c6dbcb9 (kvm: nVMX: Handle deferred early VMLAUNCH/VMRESUME failure properly) Cc: Jim Mattson Cc: Krish Sadhukhan Cc: Paolo Bonzini Cc: Radim KrÄmář Cc: Wanpeng Li Signed-off-by: Sean Christopherson Signed-off-by: Paolo Bonzini Signed-off-by: Sasha Levin --- arch/x86/kvm/vmx.c | 173 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 153 insertions(+), 20 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4bd878c9f7d2..90b7eee6d0f9 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11846,24 +11846,6 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, kvm_clear_interrupt_queue(vcpu); } -static void load_vmcs12_mmu_host_state(struct kvm_vcpu *vcpu, - struct vmcs12 *vmcs12) -{ - u32 entry_failure_code; - - nested_ept_uninit_mmu_context(vcpu); - - /* - * Only PDPTE load can fail as the value of cr3 was checked on entry and - * couldn't have changed. - */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) - nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); - - if (!enable_ept) - vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; -} - /* * A part of what we need to when the nested L2 guest exits and we want to * run its L1 parent, is to reset L1's guest state to the host state specified @@ -11877,6 +11859,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; + u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -11903,7 +11886,17 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); vmx_set_cr4(vcpu, vmcs12->host_cr4); - load_vmcs12_mmu_host_state(vcpu, vmcs12); + nested_ept_uninit_mmu_context(vcpu); + + /* + * Only PDPTE load can fail as the value of cr3 was checked on entry and + * couldn't have changed. + */ + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); + + if (!enable_ept) + vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; if (enable_vpid) { /* @@ -11994,6 +11987,140 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); } +static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx) +{ + struct shared_msr_entry *efer_msr; + unsigned int i; + + if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER) + return vmcs_read64(GUEST_IA32_EFER); + + if (cpu_has_load_ia32_efer) + return host_efer; + + for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) { + if (vmx->msr_autoload.guest.val[i].index == MSR_EFER) + return vmx->msr_autoload.guest.val[i].value; + } + + efer_msr = find_msr_entry(vmx, MSR_EFER); + if (efer_msr) + return efer_msr->data; + + return host_efer; +} + +static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmx_msr_entry g, h; + struct msr_data msr; + gpa_t gpa; + u32 i, j; + + vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT); + + if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + /* + * L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set + * as vmcs01.GUEST_DR7 contains a userspace defined value + * and vcpu->arch.dr7 is not squirreled away before the + * nested VMENTER (not worth adding a variable in nested_vmx). + */ + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + kvm_set_dr(vcpu, 7, DR7_FIXED_1); + else + WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7))); + } + + /* + * Note that calling vmx_set_{efer,cr0,cr4} is important as they + * handle a variety of side effects to KVM's software model. + */ + vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx)); + + vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; + vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW)); + + vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); + vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); + + nested_ept_uninit_mmu_context(vcpu); + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); + + /* + * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs + * from vmcs01 (if necessary). The PDPTRs are not loaded on + * VMFail, like everything else we just need to ensure our + * software model is up-to-date. + */ + ept_save_pdptrs(vcpu); + + kvm_mmu_reset_context(vcpu); + + if (cpu_has_vmx_msr_bitmap()) + vmx_update_msr_bitmap(vcpu); + + /* + * This nasty bit of open coding is a compromise between blindly + * loading L1's MSRs using the exit load lists (incorrect emulation + * of VMFail), leaving the nested VM's MSRs in the software model + * (incorrect behavior) and snapshotting the modified MSRs (too + * expensive since the lists are unbound by hardware). For each + * MSR that was (prematurely) loaded from the nested VMEntry load + * list, reload it from the exit load list if it exists and differs + * from the guest value. The intent is to stuff host state as + * silently as possible, not to fully process the exit load list. + */ + msr.host_initiated = false; + for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) { + gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g)); + if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) { + pr_debug_ratelimited( + "%s read MSR index failed (%u, 0x%08llx)\n", + __func__, i, gpa); + goto vmabort; + } + + for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) { + gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h)); + if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) { + pr_debug_ratelimited( + "%s read MSR failed (%u, 0x%08llx)\n", + __func__, j, gpa); + goto vmabort; + } + if (h.index != g.index) + continue; + if (h.value == g.value) + break; + + if (nested_vmx_load_msr_check(vcpu, &h)) { + pr_debug_ratelimited( + "%s check failed (%u, 0x%x, 0x%x)\n", + __func__, j, h.index, h.reserved); + goto vmabort; + } + + msr.index = h.index; + msr.data = h.value; + if (kvm_set_msr(vcpu, &msr)) { + pr_debug_ratelimited( + "%s WRMSR failed (%u, 0x%x, 0x%llx)\n", + __func__, j, h.index, h.value); + goto vmabort; + } + } + } + + return; + +vmabort: + nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL); +} + /* * Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1 * and modify vmcs12 to make it see what it would expect to see there if @@ -12126,7 +12253,13 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - load_vmcs12_mmu_host_state(vcpu, vmcs12); + /* + * Restore L1's host state to KVM's software model. We're here + * because a consistency check was caught by hardware, which + * means some amount of guest state has been propagated to KVM's + * model and needs to be unwound to the host's state. + */ + nested_vmx_restore_host_state(vcpu); /* * The emulated instruction was already skipped in From 82017e26e51596ee577171a33f357377ec6513b5 Mon Sep 17 00:00:00 2001 From: "ndesaulniers@google.com" Date: Mon, 15 Oct 2018 10:22:21 -0700 Subject: [PATCH 52/93] compiler.h: update definition of unreachable() [ Upstream commit fe0640eb30b7da261ae84d252ed9ed3c7e68dfd8 ] Fixes the objtool warning seen with Clang: arch/x86/mm/fault.o: warning: objtool: no_context()+0x220: unreachable instruction Fixes commit 815f0ddb346c ("include/linux/compiler*.h: make compiler-*.h mutually exclusive") Josh noted that the fallback definition was meant to work around a pre-gcc-4.6 bug. GCC still needs to work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365, so compiler-gcc.h defines its own version of unreachable(). Clang and ICC can use this shared definition. Link: https://github.com/ClangBuiltLinux/linux/issues/204 Suggested-by: Andy Lutomirski Suggested-by: Josh Poimboeuf Tested-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Miguel Ojeda Signed-off-by: Sasha Levin --- include/linux/compiler.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index a704d032713b..67c3934fb9ed 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -119,7 +119,10 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define ASM_UNREACHABLE #endif #ifndef unreachable -# define unreachable() do { annotate_reachable(); do { } while (1); } while (0) +# define unreachable() do { \ + annotate_unreachable(); \ + __builtin_unreachable(); \ +} while (0) #endif /* From 1d41cd12307f96ebefbd856cfe26719c23d8d8ec Mon Sep 17 00:00:00 2001 From: Steve French Date: Fri, 19 Oct 2018 01:58:22 -0500 Subject: [PATCH 53/93] cifs: fallback to older infolevels on findfirst queryinfo retry [ Upstream commit 3b7960caceafdfc2cdfe2850487f8d091eb41144 ] In cases where queryinfo fails, we have cases in cifs (vers=1.0) where with backupuid mounts we retry the query info with findfirst. This doesn't work to some NetApp servers which don't support WindowsXP (and later) infolevel 261 (SMB_FIND_FILE_ID_FULL_DIR_INFO) so in this case use other info levels (in this case it will usually be level 257, SMB_FIND_FILE_DIRECTORY_INFO). (Also fixes some indentation) See kernel bugzilla 201435 Signed-off-by: Steve French Signed-off-by: Sasha Levin --- fs/cifs/inode.c | 67 +++++++++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 30 deletions(-) diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index a90a637ae79a..6fd4a6a75234 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -779,43 +779,50 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } else if ((rc == -EACCES) && backup_cred(cifs_sb) && (strcmp(server->vals->version_string, SMB1_VERSION_STRING) == 0)) { - /* - * For SMB2 and later the backup intent flag is already - * sent if needed on open and there is no path based - * FindFirst operation to use to retry with - */ + /* + * For SMB2 and later the backup intent flag is already + * sent if needed on open and there is no path based + * FindFirst operation to use to retry with + */ - srchinf = kzalloc(sizeof(struct cifs_search_info), - GFP_KERNEL); - if (srchinf == NULL) { - rc = -ENOMEM; - goto cgii_exit; - } + srchinf = kzalloc(sizeof(struct cifs_search_info), + GFP_KERNEL); + if (srchinf == NULL) { + rc = -ENOMEM; + goto cgii_exit; + } - srchinf->endOfSearch = false; + srchinf->endOfSearch = false; + if (tcon->unix_ext) + srchinf->info_level = SMB_FIND_FILE_UNIX; + else if ((tcon->ses->capabilities & + tcon->ses->server->vals->cap_nt_find) == 0) + srchinf->info_level = SMB_FIND_FILE_INFO_STANDARD; + else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO; + else /* no srvino useful for fallback to some netapp */ + srchinf->info_level = SMB_FIND_FILE_DIRECTORY_INFO; - srchflgs = CIFS_SEARCH_CLOSE_ALWAYS | - CIFS_SEARCH_CLOSE_AT_END | - CIFS_SEARCH_BACKUP_SEARCH; + srchflgs = CIFS_SEARCH_CLOSE_ALWAYS | + CIFS_SEARCH_CLOSE_AT_END | + CIFS_SEARCH_BACKUP_SEARCH; - rc = CIFSFindFirst(xid, tcon, full_path, - cifs_sb, NULL, srchflgs, srchinf, false); - if (!rc) { - data = - (FILE_ALL_INFO *)srchinf->srch_entries_start; + rc = CIFSFindFirst(xid, tcon, full_path, + cifs_sb, NULL, srchflgs, srchinf, false); + if (!rc) { + data = (FILE_ALL_INFO *)srchinf->srch_entries_start; - cifs_dir_info_to_fattr(&fattr, - (FILE_DIRECTORY_INFO *)data, cifs_sb); - fattr.cf_uniqueid = le64_to_cpu( - ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); - validinum = true; + cifs_dir_info_to_fattr(&fattr, + (FILE_DIRECTORY_INFO *)data, cifs_sb); + fattr.cf_uniqueid = le64_to_cpu( + ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId); + validinum = true; - cifs_buf_release(srchinf->ntwrk_buf_start); - } - kfree(srchinf); - if (rc) - goto cgii_exit; + cifs_buf_release(srchinf->ntwrk_buf_start); + } + kfree(srchinf); + if (rc) + goto cgii_exit; } else goto cgii_exit; From 507f2f17e044b3efc6516ad0ae66fb244752c569 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Wed, 17 Oct 2018 13:23:55 +0200 Subject: [PATCH 54/93] kernel: hung_task.c: disable on suspend [ Upstream commit a1c6ca3c6de763459a6e93b644ec6518c890ba1c ] It is possible to observe hung_task complaints when system goes to suspend-to-idle state: # echo freeze > /sys/power/state PM: Syncing filesystems ... done. Freezing user space processes ... (elapsed 0.001 seconds) done. OOM killer disabled. Freezing remaining freezable tasks ... (elapsed 0.002 seconds) done. sd 0:0:0:0: [sda] Synchronizing SCSI cache INFO: task bash:1569 blocked for more than 120 seconds. Not tainted 4.19.0-rc3_+ #687 "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. bash D 0 1569 604 0x00000000 Call Trace: ? __schedule+0x1fe/0x7e0 schedule+0x28/0x80 suspend_devices_and_enter+0x4ac/0x750 pm_suspend+0x2c0/0x310 Register a PM notifier to disable the detector on suspend and re-enable back on wakeup. Signed-off-by: Vitaly Kuznetsov Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- kernel/hung_task.c | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/kernel/hung_task.c b/kernel/hung_task.c index f9aaf4994062..2e4869fa66c9 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -232,6 +233,28 @@ void reset_hung_task_detector(void) } EXPORT_SYMBOL_GPL(reset_hung_task_detector); +static bool hung_detector_suspended; + +static int hungtask_pm_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + switch (action) { + case PM_SUSPEND_PREPARE: + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + hung_detector_suspended = true; + break; + case PM_POST_SUSPEND: + case PM_POST_HIBERNATION: + case PM_POST_RESTORE: + hung_detector_suspended = false; + break; + default: + break; + } + return NOTIFY_OK; +} + /* * kthread which checks for tasks stuck in D state */ @@ -246,7 +269,8 @@ static int watchdog(void *dummy) long t = hung_timeout_jiffies(hung_last_checked, timeout); if (t <= 0) { - if (!atomic_xchg(&reset_hung_task, 0)) + if (!atomic_xchg(&reset_hung_task, 0) && + !hung_detector_suspended) check_hung_uninterruptible_tasks(timeout); hung_last_checked = jiffies; continue; @@ -260,6 +284,10 @@ static int watchdog(void *dummy) static int __init hung_task_init(void) { atomic_notifier_chain_register(&panic_notifier_list, &panic_block); + + /* Disable hung task detector on suspend */ + pm_notifier(hungtask_pm_notify, 0); + watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); return 0; From 92562a9fc5dfab7dc2102ca81d96a067d104cccc Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 16 Feb 2019 14:51:25 +0100 Subject: [PATCH 55/93] crypto: sha256/arm - fix crash bug in Thumb2 build [ Upstream commit 69216a545cf81b2b32d01948f7039315abaf75a0 ] The SHA256 code we adopted from the OpenSSL project uses a rather peculiar way to take the address of the round constant table: it takes the address of the sha256_block_data_order() routine, and substracts a constant known quantity to arrive at the base of the table, which is emitted by the same assembler code right before the routine's entry point. However, recent versions of binutils have helpfully changed the behavior of references emitted via an ADR instruction when running in Thumb2 mode: it now takes the Thumb execution mode bit into account, which is bit 0 af the address. This means the produced table address also has bit 0 set, and so we end up with an address value pointing 1 byte past the start of the table, which results in crashes such as Unable to handle kernel paging request at virtual address bf825000 pgd = 42f44b11 [bf825000] *pgd=80000040206003, *pmd=5f1bd003, *pte=00000000 Internal error: Oops: 207 [#1] PREEMPT SMP THUMB2 Modules linked in: sha256_arm(+) sha1_arm_ce sha1_arm ... CPU: 7 PID: 396 Comm: cryptomgr_test Not tainted 5.0.0-rc6+ #144 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 PC is at sha256_block_data_order+0xaaa/0xb30 [sha256_arm] LR is at __this_module+0x17fd/0xffffe800 [sha256_arm] pc : [] lr : [] psr: 800b0033 sp : ebc8bbe8 ip : faaabe1c fp : 2fdd3433 r10: 4c5f1692 r9 : e43037df r8 : b04b0a5a r7 : c369d722 r6 : 39c3693e r5 : 7a013189 r4 : 1580d26b r3 : 8762a9b0 r2 : eea9c2cd r1 : 3e9ab536 r0 : 1dea4ae7 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA Thumb Segment user Control: 70c5383d Table: 6b8467c0 DAC: dbadc0de Process cryptomgr_test (pid: 396, stack limit = 0x69e1fe23) Stack: (0xebc8bbe8 to 0xebc8c000) ... unwind: Unknown symbol address bf820bca unwind: Index not found bf820bca Code: 441a ea80 40f9 440a (f85e) 3b04 ---[ end trace e560cce92700ef8a ]--- Given that this affects older kernels as well, in case they are built with a recent toolchain, apply a minimal backportable fix, which is to emit another non-code label at the start of the routine, and reference that instead. (This is similar to the current upstream state of this file in OpenSSL) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/arm/crypto/sha256-armv4.pl | 3 ++- arch/arm/crypto/sha256-core.S_shipped | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl index fac0533ea633..f64e8413ab9a 100644 --- a/arch/arm/crypto/sha256-armv4.pl +++ b/arch/arm/crypto/sha256-armv4.pl @@ -205,10 +205,11 @@ K256: .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +.Lsha256_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order #else - adr r3,sha256_block_data_order + adr r3,.Lsha256_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped index 555a1a8eec90..72c248081d27 100644 --- a/arch/arm/crypto/sha256-core.S_shipped +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -86,10 +86,11 @@ K256: .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: +.Lsha256_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha256_block_data_order #else - adr r3,sha256_block_data_order + adr r3,.Lsha256_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap From 29b56343c85169c42e31fbf3b0facdb01d034aa1 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Sat, 16 Feb 2019 14:51:26 +0100 Subject: [PATCH 56/93] crypto: sha512/arm - fix crash bug in Thumb2 build [ Upstream commit c64316502008064c158fa40cc250665e461b0f2a ] The SHA512 code we adopted from the OpenSSL project uses a rather peculiar way to take the address of the round constant table: it takes the address of the sha256_block_data_order() routine, and substracts a constant known quantity to arrive at the base of the table, which is emitted by the same assembler code right before the routine's entry point. However, recent versions of binutils have helpfully changed the behavior of references emitted via an ADR instruction when running in Thumb2 mode: it now takes the Thumb execution mode bit into account, which is bit 0 af the address. This means the produced table address also has bit 0 set, and so we end up with an address value pointing 1 byte past the start of the table, which results in crashes such as Unable to handle kernel paging request at virtual address bf825000 pgd = 42f44b11 [bf825000] *pgd=80000040206003, *pmd=5f1bd003, *pte=00000000 Internal error: Oops: 207 [#1] PREEMPT SMP THUMB2 Modules linked in: sha256_arm(+) sha1_arm_ce sha1_arm ... CPU: 7 PID: 396 Comm: cryptomgr_test Not tainted 5.0.0-rc6+ #144 Hardware name: QEMU KVM Virtual Machine, BIOS 0.0.0 02/06/2015 PC is at sha256_block_data_order+0xaaa/0xb30 [sha256_arm] LR is at __this_module+0x17fd/0xffffe800 [sha256_arm] pc : [] lr : [] psr: 800b0033 sp : ebc8bbe8 ip : faaabe1c fp : 2fdd3433 r10: 4c5f1692 r9 : e43037df r8 : b04b0a5a r7 : c369d722 r6 : 39c3693e r5 : 7a013189 r4 : 1580d26b r3 : 8762a9b0 r2 : eea9c2cd r1 : 3e9ab536 r0 : 1dea4ae7 Flags: Nzcv IRQs on FIQs on Mode SVC_32 ISA Thumb Segment user Control: 70c5383d Table: 6b8467c0 DAC: dbadc0de Process cryptomgr_test (pid: 396, stack limit = 0x69e1fe23) Stack: (0xebc8bbe8 to 0xebc8c000) ... unwind: Unknown symbol address bf820bca unwind: Index not found bf820bca Code: 441a ea80 40f9 440a (f85e) 3b04 ---[ end trace e560cce92700ef8a ]--- Given that this affects older kernels as well, in case they are built with a recent toolchain, apply a minimal backportable fix, which is to emit another non-code label at the start of the routine, and reference that instead. (This is similar to the current upstream state of this file in OpenSSL) Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- arch/arm/crypto/sha512-armv4.pl | 3 ++- arch/arm/crypto/sha512-core.S_shipped | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl index a2b11a844357..5fe336420bcf 100644 --- a/arch/arm/crypto/sha512-armv4.pl +++ b/arch/arm/crypto/sha512-armv4.pl @@ -267,10 +267,11 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: +.Lsha512_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha512_block_data_order #else - adr r3,sha512_block_data_order + adr r3,.Lsha512_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped index 3694c4d4ca2b..de9bd7f55242 100644 --- a/arch/arm/crypto/sha512-core.S_shipped +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -134,10 +134,11 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) .global sha512_block_data_order .type sha512_block_data_order,%function sha512_block_data_order: +.Lsha512_block_data_order: #if __ARM_ARCH__<7 sub r3,pc,#8 @ sha512_block_data_order #else - adr r3,sha512_block_data_order + adr r3,.Lsha512_block_data_order #endif #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ldr r12,.LOPENSSL_armcap From e2f0e69fd9969b9940c3d8c48316dbc5ec16dd99 Mon Sep 17 00:00:00 2001 From: Julia Cartwright Date: Wed, 20 Feb 2019 16:46:31 +0000 Subject: [PATCH 57/93] iommu/dmar: Fix buffer overflow during PCI bus notification [ Upstream commit cffaaf0c816238c45cd2d06913476c83eb50f682 ] Commit 57384592c433 ("iommu/vt-d: Store bus information in RMRR PCI device path") changed the type of the path data, however, the change in path type was not reflected in size calculations. Update to use the correct type and prevent a buffer overflow. This bug manifests in systems with deep PCI hierarchies, and can lead to an overflow of the static allocated buffer (dmar_pci_notify_info_buf), or can lead to overflow of slab-allocated data. BUG: KASAN: global-out-of-bounds in dmar_alloc_pci_notify_info+0x1d5/0x2e0 Write of size 1 at addr ffffffff90445d80 by task swapper/0/1 CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 4.14.87-rt49-02406-gd0a0e96 #1 Call Trace: ? dump_stack+0x46/0x59 ? print_address_description+0x1df/0x290 ? dmar_alloc_pci_notify_info+0x1d5/0x2e0 ? kasan_report+0x256/0x340 ? dmar_alloc_pci_notify_info+0x1d5/0x2e0 ? e820__memblock_setup+0xb0/0xb0 ? dmar_dev_scope_init+0x424/0x48f ? __down_write_common+0x1ec/0x230 ? dmar_dev_scope_init+0x48f/0x48f ? dmar_free_unused_resources+0x109/0x109 ? cpumask_next+0x16/0x20 ? __kmem_cache_create+0x392/0x430 ? kmem_cache_create+0x135/0x2f0 ? e820__memblock_setup+0xb0/0xb0 ? intel_iommu_init+0x170/0x1848 ? _raw_spin_unlock_irqrestore+0x32/0x60 ? migrate_enable+0x27a/0x5b0 ? sched_setattr+0x20/0x20 ? migrate_disable+0x1fc/0x380 ? task_rq_lock+0x170/0x170 ? try_to_run_init_process+0x40/0x40 ? locks_remove_file+0x85/0x2f0 ? dev_prepare_static_identity_mapping+0x78/0x78 ? rt_spin_unlock+0x39/0x50 ? lockref_put_or_lock+0x2a/0x40 ? dput+0x128/0x2f0 ? __rcu_read_unlock+0x66/0x80 ? __fput+0x250/0x300 ? __rcu_read_lock+0x1b/0x30 ? mntput_no_expire+0x38/0x290 ? e820__memblock_setup+0xb0/0xb0 ? pci_iommu_init+0x25/0x63 ? pci_iommu_init+0x25/0x63 ? do_one_initcall+0x7e/0x1c0 ? initcall_blacklisted+0x120/0x120 ? kernel_init_freeable+0x27b/0x307 ? rest_init+0xd0/0xd0 ? kernel_init+0xf/0x120 ? rest_init+0xd0/0xd0 ? ret_from_fork+0x1f/0x40 The buggy address belongs to the variable: dmar_pci_notify_info_buf+0x40/0x60 Fixes: 57384592c433 ("iommu/vt-d: Store bus information in RMRR PCI device path") Signed-off-by: Julia Cartwright Signed-off-by: Joerg Roedel Signed-off-by: Sasha Levin --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index c0d1c4db5794..38d0128b8135 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -144,7 +144,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) for (tmp = dev; tmp; tmp = tmp->bus->self) level++; - size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path); + size = sizeof(*info) + level * sizeof(info->path[0]); if (size <= sizeof(dmar_pci_notify_info_buf)) { info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; } else { From 201aee309a2964a3f8f8e82a57b7e29305c4b610 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 21 Oct 2018 21:36:14 +0300 Subject: [PATCH 58/93] soc/tegra: pmc: Drop locking from tegra_powergate_is_powered() [ Upstream commit b6e1fd17a38bd1d97c11d69fd3207b3ef9bfa4b3 ] This fixes splats like the one below if CONFIG_DEBUG_ATOMIC_SLEEP=y and machine (Tegra30) booted with SMP=n or all secondary CPU's are put offline. Locking isn't needed because it protects atomic operation. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:254 in_atomic(): 1, irqs_disabled(): 128, pid: 0, name: swapper/0 CPU: 0 PID: 0 Comm: swapper/0 Tainted: G C 4.18.0-next-20180821-00180-gc3ebb6544e44-dirty #823 Hardware name: NVIDIA Tegra SoC (Flattened Device Tree) [] (unwind_backtrace) from [] (show_stack+0x20/0x24) [] (show_stack) from [] (dump_stack+0x94/0xa8) [] (dump_stack) from [] (___might_sleep+0x13c/0x174) [] (___might_sleep) from [] (__might_sleep+0x70/0xa8) [] (__might_sleep) from [] (mutex_lock+0x2c/0x70) [] (mutex_lock) from [] (tegra_powergate_is_powered+0x44/0xa8) [] (tegra_powergate_is_powered) from [] (tegra30_cpu_rail_off_ready+0x30/0x74) [] (tegra30_cpu_rail_off_ready) from [] (tegra30_idle_lp2+0xa0/0x108) [] (tegra30_idle_lp2) from [] (cpuidle_enter_state+0x140/0x540) [] (cpuidle_enter_state) from [] (cpuidle_enter+0x40/0x4c) [] (cpuidle_enter) from [] (call_cpuidle+0x30/0x48) [] (call_cpuidle) from [] (do_idle+0x238/0x28c) [] (do_idle) from [] (cpu_startup_entry+0x28/0x2c) [] (cpu_startup_entry) from [] (rest_init+0xd8/0xdc) [] (rest_init) from [] (start_kernel+0x41c/0x430) Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Signed-off-by: Thierry Reding Signed-off-by: Sasha Levin --- drivers/soc/tegra/pmc.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/soc/tegra/pmc.c b/drivers/soc/tegra/pmc.c index 7e9ef3431bea..2422ed56895a 100644 --- a/drivers/soc/tegra/pmc.c +++ b/drivers/soc/tegra/pmc.c @@ -521,16 +521,10 @@ EXPORT_SYMBOL(tegra_powergate_power_off); */ int tegra_powergate_is_powered(unsigned int id) { - int status; - if (!tegra_powergate_is_valid(id)) return -EINVAL; - mutex_lock(&pmc->powergates_lock); - status = tegra_powergate_state(id); - mutex_unlock(&pmc->powergates_lock); - - return status; + return tegra_powergate_state(id); } /** From b035faf50706d45ce8a5967b4efc1ccf5559f655 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 7 Nov 2018 20:14:10 +0000 Subject: [PATCH 59/93] lkdtm: Print real addresses [ Upstream commit 4c411157a42f122051ae3469bee0b5cabe89e139 ] Today, when doing a lkdtm test before the readiness of the random generator, (ptrval) is printed instead of the address at which it perform the fault: [ 1597.337030] lkdtm: Performing direct entry EXEC_USERSPACE [ 1597.337142] lkdtm: attempting ok execution at (ptrval) [ 1597.337398] lkdtm: attempting bad execution at (ptrval) [ 1597.337460] kernel tried to execute user page (77858000) -exploit attempt? (uid: 0) [ 1597.344769] Unable to handle kernel paging request for instruction fetch [ 1597.351392] Faulting instruction address: 0x77858000 [ 1597.356312] Oops: Kernel access of bad area, sig: 11 [#1] If the lkdtm test is done later on, it prints an hashed address. In both cases this is pointless. The purpose of the test is to ensure the kernel generates an Oops at the expected address, so real addresses needs to be printed. This patch fixes that. Signed-off-by: Christophe Leroy Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/misc/lkdtm_perms.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/drivers/misc/lkdtm_perms.c b/drivers/misc/lkdtm_perms.c index 53b85c9d16b8..fa54add6375a 100644 --- a/drivers/misc/lkdtm_perms.c +++ b/drivers/misc/lkdtm_perms.c @@ -47,7 +47,7 @@ static noinline void execute_location(void *dst, bool write) { void (*func)(void) = dst; - pr_info("attempting ok execution at %p\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing); do_nothing(); if (write == CODE_WRITE) { @@ -55,7 +55,7 @@ static noinline void execute_location(void *dst, bool write) flush_icache_range((unsigned long)dst, (unsigned long)dst + EXEC_SIZE); } - pr_info("attempting bad execution at %p\n", func); + pr_info("attempting bad execution at %px\n", func); func(); } @@ -66,14 +66,14 @@ static void execute_user_location(void *dst) /* Intentionally crossing kernel/user memory boundary. */ void (*func)(void) = dst; - pr_info("attempting ok execution at %p\n", do_nothing); + pr_info("attempting ok execution at %px\n", do_nothing); do_nothing(); copied = access_process_vm(current, (unsigned long)dst, do_nothing, EXEC_SIZE, FOLL_WRITE); if (copied < EXEC_SIZE) return; - pr_info("attempting bad execution at %p\n", func); + pr_info("attempting bad execution at %px\n", func); func(); } @@ -82,7 +82,7 @@ void lkdtm_WRITE_RO(void) /* Explicitly cast away "const" for the test. */ unsigned long *ptr = (unsigned long *)&rodata; - pr_info("attempting bad rodata write at %p\n", ptr); + pr_info("attempting bad rodata write at %px\n", ptr); *ptr ^= 0xabcd1234; } @@ -100,7 +100,7 @@ void lkdtm_WRITE_RO_AFTER_INIT(void) return; } - pr_info("attempting bad ro_after_init write at %p\n", ptr); + pr_info("attempting bad ro_after_init write at %px\n", ptr); *ptr ^= 0xabcd1234; } @@ -112,7 +112,7 @@ void lkdtm_WRITE_KERN(void) size = (unsigned long)do_overwritten - (unsigned long)do_nothing; ptr = (unsigned char *)do_overwritten; - pr_info("attempting bad %zu byte write at %p\n", size, ptr); + pr_info("attempting bad %zu byte write at %px\n", size, ptr); memcpy(ptr, (unsigned char *)do_nothing, size); flush_icache_range((unsigned long)ptr, (unsigned long)(ptr + size)); @@ -185,11 +185,11 @@ void lkdtm_ACCESS_USERSPACE(void) ptr = (unsigned long *)user_addr; - pr_info("attempting bad read at %p\n", ptr); + pr_info("attempting bad read at %px\n", ptr); tmp = *ptr; tmp += 0xc0dec0de; - pr_info("attempting bad write at %p\n", ptr); + pr_info("attempting bad write at %px\n", ptr); *ptr = tmp; vm_munmap(user_addr, PAGE_SIZE); From f2778b3522da89d5fd4a95aa95dc6d01a45a3710 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 14 Dec 2018 15:26:20 +0000 Subject: [PATCH 60/93] lkdtm: Add tests for NULL pointer dereference [ Upstream commit 59a12205d3c32aee4c13ca36889fdf7cfed31126 ] Introduce lkdtm tests for NULL pointer dereference: check access or exec at NULL address, since these errors tend to be reported differently from the general fault error text. For example from x86: pr_alert("BUG: unable to handle kernel %s at %px\n", address < PAGE_SIZE ? "NULL pointer dereference" : "paging request", (void *)address); Signed-off-by: Christophe Leroy Signed-off-by: Kees Cook Signed-off-by: Sasha Levin --- drivers/misc/lkdtm.h | 2 ++ drivers/misc/lkdtm_core.c | 2 ++ drivers/misc/lkdtm_perms.c | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+) diff --git a/drivers/misc/lkdtm.h b/drivers/misc/lkdtm.h index 687a0dbbe199..614612325332 100644 --- a/drivers/misc/lkdtm.h +++ b/drivers/misc/lkdtm.h @@ -45,7 +45,9 @@ void lkdtm_EXEC_KMALLOC(void); void lkdtm_EXEC_VMALLOC(void); void lkdtm_EXEC_RODATA(void); void lkdtm_EXEC_USERSPACE(void); +void lkdtm_EXEC_NULL(void); void lkdtm_ACCESS_USERSPACE(void); +void lkdtm_ACCESS_NULL(void); /* lkdtm_refcount.c */ void lkdtm_REFCOUNT_INC_OVERFLOW(void); diff --git a/drivers/misc/lkdtm_core.c b/drivers/misc/lkdtm_core.c index 981b3ef71e47..199271708aed 100644 --- a/drivers/misc/lkdtm_core.c +++ b/drivers/misc/lkdtm_core.c @@ -220,7 +220,9 @@ struct crashtype crashtypes[] = { CRASHTYPE(EXEC_VMALLOC), CRASHTYPE(EXEC_RODATA), CRASHTYPE(EXEC_USERSPACE), + CRASHTYPE(EXEC_NULL), CRASHTYPE(ACCESS_USERSPACE), + CRASHTYPE(ACCESS_NULL), CRASHTYPE(WRITE_RO), CRASHTYPE(WRITE_RO_AFTER_INIT), CRASHTYPE(WRITE_KERN), diff --git a/drivers/misc/lkdtm_perms.c b/drivers/misc/lkdtm_perms.c index fa54add6375a..62f76d506f04 100644 --- a/drivers/misc/lkdtm_perms.c +++ b/drivers/misc/lkdtm_perms.c @@ -164,6 +164,11 @@ void lkdtm_EXEC_USERSPACE(void) vm_munmap(user_addr, PAGE_SIZE); } +void lkdtm_EXEC_NULL(void) +{ + execute_location(NULL, CODE_AS_IS); +} + void lkdtm_ACCESS_USERSPACE(void) { unsigned long user_addr, tmp = 0; @@ -195,6 +200,19 @@ void lkdtm_ACCESS_USERSPACE(void) vm_munmap(user_addr, PAGE_SIZE); } +void lkdtm_ACCESS_NULL(void) +{ + unsigned long tmp; + unsigned long *ptr = (unsigned long *)NULL; + + pr_info("attempting bad read at %px\n", ptr); + tmp = *ptr; + tmp += 0xc0dec0de; + + pr_info("attempting bad write at %px\n", ptr); + *ptr = tmp; +} + void __init lkdtm_perms_init(void) { /* Make sure we can write to __ro_after_init values during __init */ From 61bea6ad5c99a3d1adb28b17d25b21d71998b65b Mon Sep 17 00:00:00 2001 From: "Hsin-Yi, Wang" Date: Wed, 9 Jan 2019 14:59:22 +0800 Subject: [PATCH 61/93] drm/panel: panel-innolux: set display off in innolux_panel_unprepare [ Upstream commit 46f3ceaffa81e846677bca8668e0ad40e643cffd ] Move mipi_dsi_dcs_set_display_off() from innolux_panel_disable() to innolux_panel_unprepare(), so they are consistent with innolux_panel_enable() and innolux_panel_prepare(). This also fixes some mode check and irq timeout issue in MTK dsi code. Since some dsi code (e.g. mtk_dsi) have following call trace: 1. drm_panel_disable(), which calls innolux_panel_disable() 2. switch to cmd mode 3. drm_panel_unprepare(), which calls innolux_panel_unprepare() However, mtk_dsi needs to be in cmd mode to be able to send commands (e.g. mipi_dsi_dcs_set_display_off() and mipi_dsi_dcs_enter_sleep_mode()), so we need these functions to be called after the switch to cmd mode happens, i.e. in innolux_panel_unprepare. Signed-off-by: Hsin-Yi, Wang Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/20190109065922.231753-1-hsinyi@chromium.org Signed-off-by: Sasha Levin --- drivers/gpu/drm/panel/panel-innolux-p079zca.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/panel/panel-innolux-p079zca.c b/drivers/gpu/drm/panel/panel-innolux-p079zca.c index 6ba93449fcfb..58b67e0cc385 100644 --- a/drivers/gpu/drm/panel/panel-innolux-p079zca.c +++ b/drivers/gpu/drm/panel/panel-innolux-p079zca.c @@ -40,7 +40,6 @@ static inline struct innolux_panel *to_innolux_panel(struct drm_panel *panel) static int innolux_panel_disable(struct drm_panel *panel) { struct innolux_panel *innolux = to_innolux_panel(panel); - int err; if (!innolux->enabled) return 0; @@ -48,11 +47,6 @@ static int innolux_panel_disable(struct drm_panel *panel) innolux->backlight->props.power = FB_BLANK_POWERDOWN; backlight_update_status(innolux->backlight); - err = mipi_dsi_dcs_set_display_off(innolux->link); - if (err < 0) - DRM_DEV_ERROR(panel->dev, "failed to set display off: %d\n", - err); - innolux->enabled = false; return 0; @@ -66,6 +60,11 @@ static int innolux_panel_unprepare(struct drm_panel *panel) if (!innolux->prepared) return 0; + err = mipi_dsi_dcs_set_display_off(innolux->link); + if (err < 0) + DRM_DEV_ERROR(panel->dev, "failed to set display off: %d\n", + err); + err = mipi_dsi_dcs_enter_sleep_mode(innolux->link); if (err < 0) { DRM_DEV_ERROR(panel->dev, "failed to enter sleep mode: %d\n", From 56828309878bae0d8ff5e9851dca080cd7eea224 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Wed, 23 Jan 2019 12:59:42 +0100 Subject: [PATCH 62/93] crypto: axis - fix for recursive locking from bottom half [ Upstream commit c34a83820f59bb275e5f2d55cd5ea99c64f6ef23 ] Clients may submit a new requests from the completion callback context. The driver was not prepared to receive a request in this state because it already held the request queue lock and a recursive lock error is triggered. Now all completions are queued up until we are ready to drop the queue lock and then delivered. The fault was triggered by TCP over an IPsec connection in the LTP test suite: LTP: starting tcp4_ipsec02 (tcp_ipsec.sh -p ah -m transport -s "100 1000 65535") BUG: spinlock recursion on CPU#1, genload/943 lock: 0xbf3c3094, .magic: dead4ead, .owner: genload/943, .owner_cpu: 1 CPU: 1 PID: 943 Comm: genload Tainted: G O 4.9.62-axis5-devel #6 Hardware name: Axis ARTPEC-6 Platform (unwind_backtrace) from [<8010d134>] (show_stack+0x18/0x1c) (show_stack) from [<803a289c>] (dump_stack+0x84/0x98) (dump_stack) from [<8016e164>] (do_raw_spin_lock+0x124/0x128) (do_raw_spin_lock) from [<804de1a4>] (artpec6_crypto_submit+0x2c/0xa0) (artpec6_crypto_submit) from [<804def38>] (artpec6_crypto_prepare_submit_hash+0xd0/0x54c) (artpec6_crypto_prepare_submit_hash) from [<7f3165f0>] (ah_output+0x2a4/0x3dc [ah4]) (ah_output [ah4]) from [<805df9bc>] (xfrm_output_resume+0x178/0x4a4) (xfrm_output_resume) from [<805d283c>] (xfrm4_output+0xac/0xbc) (xfrm4_output) from [<80587928>] (ip_queue_xmit+0x140/0x3b4) (ip_queue_xmit) from [<805a13b4>] (tcp_transmit_skb+0x4c4/0x95c) (tcp_transmit_skb) from [<8059f218>] (tcp_rcv_state_process+0xdf4/0xdfc) (tcp_rcv_state_process) from [<805a7530>] (tcp_v4_do_rcv+0x64/0x1ac) (tcp_v4_do_rcv) from [<805a9724>] (tcp_v4_rcv+0xa34/0xb74) (tcp_v4_rcv) from [<80581d34>] (ip_local_deliver_finish+0x78/0x2b0) (ip_local_deliver_finish) from [<8058259c>] (ip_local_deliver+0xe4/0x104) (ip_local_deliver) from [<805d23ec>] (xfrm4_transport_finish+0xf4/0x144) (xfrm4_transport_finish) from [<805df564>] (xfrm_input+0x4f4/0x74c) (xfrm_input) from [<804de420>] (artpec6_crypto_task+0x208/0x38c) (artpec6_crypto_task) from [<801271b0>] (tasklet_action+0x60/0xec) (tasklet_action) from [<801266d4>] (__do_softirq+0xcc/0x3a4) (__do_softirq) from [<80126d20>] (irq_exit+0xf4/0x15c) (irq_exit) from [<801741e8>] (__handle_domain_irq+0x68/0xbc) (__handle_domain_irq) from [<801014f0>] (gic_handle_irq+0x50/0x94) (gic_handle_irq) from [<80657370>] (__irq_usr+0x50/0x80) Signed-off-by: Lars Persson Signed-off-by: Herbert Xu Signed-off-by: Sasha Levin --- drivers/crypto/axis/artpec6_crypto.c | 30 ++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c index 6eb5cb92b986..9f82e14983f6 100644 --- a/drivers/crypto/axis/artpec6_crypto.c +++ b/drivers/crypto/axis/artpec6_crypto.c @@ -284,6 +284,7 @@ enum artpec6_crypto_hash_flags { struct artpec6_crypto_req_common { struct list_head list; + struct list_head complete_in_progress; struct artpec6_crypto_dma_descriptors *dma; struct crypto_async_request *req; void (*complete)(struct crypto_async_request *req); @@ -2046,7 +2047,8 @@ static int artpec6_crypto_prepare_aead(struct aead_request *areq) return artpec6_crypto_dma_map_descs(common); } -static void artpec6_crypto_process_queue(struct artpec6_crypto *ac) +static void artpec6_crypto_process_queue(struct artpec6_crypto *ac, + struct list_head *completions) { struct artpec6_crypto_req_common *req; @@ -2057,7 +2059,7 @@ static void artpec6_crypto_process_queue(struct artpec6_crypto *ac) list_move_tail(&req->list, &ac->pending); artpec6_crypto_start_dma(req); - req->req->complete(req->req, -EINPROGRESS); + list_add_tail(&req->complete_in_progress, completions); } /* @@ -2087,6 +2089,11 @@ static void artpec6_crypto_task(unsigned long data) struct artpec6_crypto *ac = (struct artpec6_crypto *)data; struct artpec6_crypto_req_common *req; struct artpec6_crypto_req_common *n; + struct list_head complete_done; + struct list_head complete_in_progress; + + INIT_LIST_HEAD(&complete_done); + INIT_LIST_HEAD(&complete_in_progress); if (list_empty(&ac->pending)) { pr_debug("Spurious IRQ\n"); @@ -2120,19 +2127,30 @@ static void artpec6_crypto_task(unsigned long data) pr_debug("Completing request %p\n", req); - list_del(&req->list); + list_move_tail(&req->list, &complete_done); artpec6_crypto_dma_unmap_all(req); artpec6_crypto_copy_bounce_buffers(req); ac->pending_count--; artpec6_crypto_common_destroy(req); + } + + artpec6_crypto_process_queue(ac, &complete_in_progress); + + spin_unlock_bh(&ac->queue_lock); + + /* Perform the completion callbacks without holding the queue lock + * to allow new request submissions from the callbacks. + */ + list_for_each_entry_safe(req, n, &complete_done, list) { req->complete(req->req); } - artpec6_crypto_process_queue(ac); - - spin_unlock_bh(&ac->queue_lock); + list_for_each_entry_safe(req, n, &complete_in_progress, + complete_in_progress) { + req->req->complete(req->req, -EINPROGRESS); + } } static void artpec6_crypto_complete_crypto(struct crypto_async_request *req) From 3e3adeb25d440147ae00eefbe9d62cff56dcc7c6 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Fri, 1 Feb 2019 14:13:41 +0800 Subject: [PATCH 63/93] Revert "ACPI / EC: Remove old CLEAR_ON_RESUME quirk" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [ Upstream commit b6a3e1475b0220378ad32bdf4d8692f058b1fc03 ] On some Samsung hardware, it is necessary to clear events accumulated by the EC during sleep. These ECs stop reporting GPEs until they are manually polled, if too many events are accumulated. Thus the CLEAR_ON_RESUME quirk is introduced to send EC query commands unconditionally after resume to clear all the EC query events on those platforms. Later, commit 4c237371f290 ("ACPI / EC: Remove old CLEAR_ON_RESUME quirk") removes the CLEAR_ON_RESUME quirk because we thought the new EC IRQ polling logic should handle this case. Now it has been proved that the EC IRQ Polling logic does not fix the issue actually because we got regression report on these Samsung platforms after removing the quirk. Thus revert commit 4c237371f290 ("ACPI / EC: Remove old CLEAR_ON_RESUME quirk") to introduce back the Samsung quirk in this patch. Link: https://bugzilla.kernel.org/show_bug.cgi?id=44161 Tested-by: Ortwin Glück Tested-by: Francisco Cribari Tested-by: Balazs Varga Signed-off-by: Zhang Rui Signed-off-by: Rafael J. Wysocki Signed-off-by: Sasha Levin --- drivers/acpi/ec.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 3d624c72c6c2..ebfc06f29f7b 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -194,6 +194,7 @@ static struct workqueue_struct *ec_query_wq; static int EC_FLAGS_QUERY_HANDSHAKE; /* Needs QR_EC issued when SCI_EVT set */ static int EC_FLAGS_CORRECT_ECDT; /* Needs ECDT port address correction */ static int EC_FLAGS_IGNORE_DSDT_GPE; /* Needs ECDT GPE as correction setting */ +static int EC_FLAGS_CLEAR_ON_RESUME; /* Needs acpi_ec_clear() on boot/resume */ /* -------------------------------------------------------------------------- * Logging/Debugging @@ -499,6 +500,26 @@ static inline void __acpi_ec_disable_event(struct acpi_ec *ec) ec_log_drv("event blocked"); } +/* + * Process _Q events that might have accumulated in the EC. + * Run with locked ec mutex. + */ +static void acpi_ec_clear(struct acpi_ec *ec) +{ + int i, status; + u8 value = 0; + + for (i = 0; i < ACPI_EC_CLEAR_MAX; i++) { + status = acpi_ec_query(ec, &value); + if (status || !value) + break; + } + if (unlikely(i == ACPI_EC_CLEAR_MAX)) + pr_warn("Warning: Maximum of %d stale EC events cleared\n", i); + else + pr_info("%d stale EC events cleared\n", i); +} + static void acpi_ec_enable_event(struct acpi_ec *ec) { unsigned long flags; @@ -507,6 +528,10 @@ static void acpi_ec_enable_event(struct acpi_ec *ec) if (acpi_ec_started(ec)) __acpi_ec_enable_event(ec); spin_unlock_irqrestore(&ec->lock, flags); + + /* Drain additional events if hardware requires that */ + if (EC_FLAGS_CLEAR_ON_RESUME) + acpi_ec_clear(ec); } #ifdef CONFIG_PM_SLEEP @@ -1802,6 +1827,31 @@ static int ec_flag_query_handshake(const struct dmi_system_id *id) } #endif +/* + * On some hardware it is necessary to clear events accumulated by the EC during + * sleep. These ECs stop reporting GPEs until they are manually polled, if too + * many events are accumulated. (e.g. Samsung Series 5/9 notebooks) + * + * https://bugzilla.kernel.org/show_bug.cgi?id=44161 + * + * Ideally, the EC should also be instructed NOT to accumulate events during + * sleep (which Windows seems to do somehow), but the interface to control this + * behaviour is not known at this time. + * + * Models known to be affected are Samsung 530Uxx/535Uxx/540Uxx/550Pxx/900Xxx, + * however it is very likely that other Samsung models are affected. + * + * On systems which don't accumulate _Q events during sleep, this extra check + * should be harmless. + */ +static int ec_clear_on_resume(const struct dmi_system_id *id) +{ + pr_debug("Detected system needing EC poll on resume.\n"); + EC_FLAGS_CLEAR_ON_RESUME = 1; + ec_event_clearing = ACPI_EC_EVT_TIMING_STATUS; + return 0; +} + /* * Some ECDTs contain wrong register addresses. * MSI MS-171F @@ -1851,6 +1901,9 @@ static const struct dmi_system_id ec_dmi_table[] __initconst = { ec_honor_ecdt_gpe, "ASUS X580VD", { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), DMI_MATCH(DMI_PRODUCT_NAME, "X580VD"),}, NULL}, + { + ec_clear_on_resume, "Samsung hardware", { + DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD.")}, NULL}, {}, }; From a68ee104ae03e4309f40de3443923773c97411db Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Tue, 5 Feb 2019 16:24:53 -0700 Subject: [PATCH 64/93] coresight: cpu-debug: Support for CA73 CPUs [ Upstream commit a0f890aba2be33377f4eb24e13633c4a76a68f38 ] This patch is to add the AMBA device ID for CA73 CPU, so that CPU debug module can be initialized successfully when a SoC contain CA73 CPUs. This patch has been verified on 96boards Hikey960. Signed-off-by: Leo Yan Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman Signed-off-by: Sasha Levin --- drivers/hwtracing/coresight/coresight-cpu-debug.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/hwtracing/coresight/coresight-cpu-debug.c b/drivers/hwtracing/coresight/coresight-cpu-debug.c index 9cdb3fbc8c1f..2f6f46ea68e9 100644 --- a/drivers/hwtracing/coresight/coresight-cpu-debug.c +++ b/drivers/hwtracing/coresight/coresight-cpu-debug.c @@ -680,6 +680,10 @@ static const struct amba_id debug_ids[] = { .id = 0x000bbd08, .mask = 0x000fffff, }, + { /* Debug for Cortex-A73 */ + .id = 0x000bbd09, + .mask = 0x000fffff, + }, { 0, 0 }, }; From 03e67ed367d4763f0ff9a8fc80b8a880df6c666e Mon Sep 17 00:00:00 2001 From: Ilia Mirkin Date: Sun, 13 Jan 2019 17:50:10 -0500 Subject: [PATCH 65/93] drm/nouveau/volt/gf117: fix speedo readout register [ Upstream commit fc782242749fa4235592854fafe1a1297583c1fb ] GF117 appears to use the same register as GK104 (but still with the general Fermi readout mechanism). Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108980 Signed-off-by: Ilia Mirkin Signed-off-by: Ben Skeggs Signed-off-by: Sasha Levin --- .../drm/nouveau/include/nvkm/subdev/volt.h | 1 + .../gpu/drm/nouveau/nvkm/engine/device/base.c | 2 +- .../gpu/drm/nouveau/nvkm/subdev/volt/Kbuild | 1 + .../gpu/drm/nouveau/nvkm/subdev/volt/gf117.c | 60 +++++++++++++++++++ 4 files changed, 63 insertions(+), 1 deletion(-) create mode 100644 drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h index 8a0f85f5fc1a..6a765682fbfa 100644 --- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h +++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h @@ -38,6 +38,7 @@ int nvkm_volt_set_id(struct nvkm_volt *, u8 id, u8 min_id, u8 temp, int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gf100_volt_new(struct nvkm_device *, int, struct nvkm_volt **); +int gf117_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **); int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **); diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c index e096a5d9c292..f8dd78e21456 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c @@ -1612,7 +1612,7 @@ nvd7_chipset = { .pci = gf106_pci_new, .therm = gf119_therm_new, .timer = nv41_timer_new, - .volt = gf100_volt_new, + .volt = gf117_volt_new, .ce[0] = gf100_ce_new, .disp = gf119_disp_new, .dma = gf119_dma_new, diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild index bcd179ba11d0..146adcdd316a 100644 --- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild @@ -2,6 +2,7 @@ nvkm-y += nvkm/subdev/volt/base.o nvkm-y += nvkm/subdev/volt/gpio.o nvkm-y += nvkm/subdev/volt/nv40.o nvkm-y += nvkm/subdev/volt/gf100.o +nvkm-y += nvkm/subdev/volt/gf117.o nvkm-y += nvkm/subdev/volt/gk104.o nvkm-y += nvkm/subdev/volt/gk20a.o nvkm-y += nvkm/subdev/volt/gm20b.o diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c new file mode 100644 index 000000000000..547a58f0aeac --- /dev/null +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf117.c @@ -0,0 +1,60 @@ +/* + * Copyright 2019 Ilia Mirkin + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Ilia Mirkin + */ +#include "priv.h" + +#include + +static int +gf117_volt_speedo_read(struct nvkm_volt *volt) +{ + struct nvkm_device *device = volt->subdev.device; + struct nvkm_fuse *fuse = device->fuse; + + if (!fuse) + return -EINVAL; + + return nvkm_fuse_read(fuse, 0x3a8); +} + +static const struct nvkm_volt_func +gf117_volt = { + .oneinit = gf100_volt_oneinit, + .vid_get = nvkm_voltgpio_get, + .vid_set = nvkm_voltgpio_set, + .speedo_read = gf117_volt_speedo_read, +}; + +int +gf117_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt) +{ + struct nvkm_volt *volt; + int ret; + + ret = nvkm_volt_new_(&gf117_volt, device, index, &volt); + *pvolt = volt; + if (ret) + return ret; + + return nvkm_voltgpio_init(volt); +} From 5e3f6ba82ed450d62ac7910b0fe026d31eb2aa5b Mon Sep 17 00:00:00 2001 From: Yang Shi Date: Wed, 13 Feb 2019 17:14:23 +0100 Subject: [PATCH 66/93] ARM: 8839/1: kprobe: make patch_lock a raw_spinlock_t [ Upstream commit 143c2a89e0e5fda6c6fd08d7bc1126438c19ae90 ] When running kprobe on -rt kernel, the below bug is caught: |BUG: sleeping function called from invalid context at kernel/locking/rtmutex.c:931 |in_atomic(): 1, irqs_disabled(): 128, pid: 14, name: migration/0 |Preemption disabled at:[<802f2b98>] cpu_stopper_thread+0xc0/0x140 |CPU: 0 PID: 14 Comm: migration/0 Tainted: G O 4.8.3-rt2 #1 |Hardware name: Freescale LS1021A |[<8025a43c>] (___might_sleep) |[<80b5b324>] (rt_spin_lock) |[<80b5c31c>] (__patch_text_real) |[<80b5c3ac>] (patch_text_stop_machine) |[<802f2920>] (multi_cpu_stop) Since patch_text_stop_machine() is called in stop_machine() which disables IRQ, sleepable lock should be not used in this atomic context, so replace patch_lock to raw lock. Signed-off-by: Yang Shi Signed-off-by: Sebastian Andrzej Siewior Reviewed-by: Arnd Bergmann Signed-off-by: Russell King Signed-off-by: Sasha Levin --- arch/arm/kernel/patch.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c index a50dc00d79a2..d0a05a3bdb96 100644 --- a/arch/arm/kernel/patch.c +++ b/arch/arm/kernel/patch.c @@ -16,7 +16,7 @@ struct patch { unsigned int insn; }; -static DEFINE_SPINLOCK(patch_lock); +static DEFINE_RAW_SPINLOCK(patch_lock); static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) __acquires(&patch_lock) @@ -33,7 +33,7 @@ static void __kprobes *patch_map(void *addr, int fixmap, unsigned long *flags) return addr; if (flags) - spin_lock_irqsave(&patch_lock, *flags); + raw_spin_lock_irqsave(&patch_lock, *flags); else __acquire(&patch_lock); @@ -48,7 +48,7 @@ static void __kprobes patch_unmap(int fixmap, unsigned long *flags) clear_fixmap(fixmap); if (flags) - spin_unlock_irqrestore(&patch_lock, *flags); + raw_spin_unlock_irqrestore(&patch_lock, *flags); else __release(&patch_lock); } From 86c7f76a8930556ec515095363ddaa0604486cdd Mon Sep 17 00:00:00 2001 From: Kevin Wang Date: Fri, 22 Feb 2019 12:36:49 +0800 Subject: [PATCH 67/93] drm/amdkfd: use init_mqd function to allocate object for hid_mqd (CI) [ Upstream commit cac734c2dbd2514f14c8c6a17caba1990d83bf1d ] if use the legacy method to allocate object, when mqd_hiq need to run uninit code, it will be cause WARNING call trace. eg: (s3 suspend test) [ 34.918944] Call Trace: [ 34.918948] [] dump_stack+0x19/0x1b [ 34.918950] [] __warn+0xd8/0x100 [ 34.918951] [] warn_slowpath_null+0x1d/0x20 [ 34.918991] [] uninit_mqd_hiq_sdma+0x4e/0x50 [amdgpu] [ 34.919028] [] uninitialize+0x37/0xe0 [amdgpu] [ 34.919064] [] kernel_queue_uninit+0x16/0x30 [amdgpu] [ 34.919086] [] pm_uninit+0x12/0x20 [amdgpu] [ 34.919107] [] stop_nocpsch+0x15/0x20 [amdgpu] [ 34.919129] [] kgd2kfd_suspend.part.4+0x2e/0x50 [amdgpu] [ 34.919150] [] kgd2kfd_suspend+0x17/0x20 [amdgpu] [ 34.919171] [] amdgpu_amdkfd_suspend+0x1a/0x20 [amdgpu] [ 34.919187] [] amdgpu_device_suspend+0x88/0x3a0 [amdgpu] [ 34.919189] [] ? enqueue_entity+0x2ef/0xbe0 [ 34.919205] [] amdgpu_pmops_suspend+0x20/0x30 [amdgpu] [ 34.919207] [] pci_pm_suspend+0x6f/0x150 [ 34.919208] [] ? pci_pm_freeze+0xf0/0xf0 [ 34.919210] [] dpm_run_callback+0x46/0x90 [ 34.919212] [] __device_suspend+0xfb/0x2a0 [ 34.919213] [] async_suspend+0x1f/0xa0 [ 34.919214] [] async_run_entry_fn+0x3f/0x130 [ 34.919216] [] process_one_work+0x17f/0x440 [ 34.919217] [] worker_thread+0x126/0x3c0 [ 34.919218] [] ? manage_workers.isra.25+0x2a0/0x2a0 [ 34.919220] [] kthread+0xd1/0xe0 [ 34.919221] [] ? insert_kthread_work+0x40/0x40 [ 34.919222] [] ret_from_fork_nospec_begin+0x7/0x21 [ 34.919224] [] ? insert_kthread_work+0x40/0x40 [ 34.919224] ---[ end trace 38cd9f65c963adad ]--- Signed-off-by: Kevin Wang Reviewed-by: Oak Zeng Signed-off-by: Alex Deucher Signed-off-by: Sasha Levin --- .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 52 +------------------ 1 file changed, 1 insertion(+), 51 deletions(-) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index 164fa4b1f9a9..732b8fbbca68 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -285,57 +285,7 @@ static int init_mqd_hiq(struct mqd_manager *mm, void **mqd, struct kfd_mem_obj **mqd_mem_obj, uint64_t *gart_addr, struct queue_properties *q) { - uint64_t addr; - struct cik_mqd *m; - int retval; - - retval = kfd_gtt_sa_allocate(mm->dev, sizeof(struct cik_mqd), - mqd_mem_obj); - - if (retval != 0) - return -ENOMEM; - - m = (struct cik_mqd *) (*mqd_mem_obj)->cpu_ptr; - addr = (*mqd_mem_obj)->gpu_addr; - - memset(m, 0, ALIGN(sizeof(struct cik_mqd), 256)); - - m->header = 0xC0310800; - m->compute_pipelinestat_enable = 1; - m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF; - m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF; - - m->cp_hqd_persistent_state = DEFAULT_CP_HQD_PERSISTENT_STATE | - PRELOAD_REQ; - m->cp_hqd_quantum = QUANTUM_EN | QUANTUM_SCALE_1MS | - QUANTUM_DURATION(10); - - m->cp_mqd_control = MQD_CONTROL_PRIV_STATE_EN; - m->cp_mqd_base_addr_lo = lower_32_bits(addr); - m->cp_mqd_base_addr_hi = upper_32_bits(addr); - - m->cp_hqd_ib_control = DEFAULT_MIN_IB_AVAIL_SIZE; - - /* - * Pipe Priority - * Identifies the pipe relative priority when this queue is connected - * to the pipeline. The pipe priority is against the GFX pipe and HP3D. - * In KFD we are using a fixed pipe priority set to CS_MEDIUM. - * 0 = CS_LOW (typically below GFX) - * 1 = CS_MEDIUM (typically between HP3D and GFX - * 2 = CS_HIGH (typically above HP3D) - */ - m->cp_hqd_pipe_priority = 1; - m->cp_hqd_queue_priority = 15; - - *mqd = m; - if (gart_addr) - *gart_addr = addr; - retval = mm->update_mqd(mm, m, q); - - return retval; + return init_mqd(mm, mqd, mqd_mem_obj, gart_addr, q); } static int update_mqd_hiq(struct mqd_manager *mm, void *mqd, From 0ba1fa56351e6e9c2f8db4ffc823cb7057e4ea82 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 1 Mar 2019 10:57:57 +0800 Subject: [PATCH 68/93] appletalk: Fix use-after-free in atalk_proc_exit [ Upstream commit 6377f787aeb945cae7abbb6474798de129e1f3ac ] KASAN report this: BUG: KASAN: use-after-free in pde_subdir_find+0x12d/0x150 fs/proc/generic.c:71 Read of size 8 at addr ffff8881f41fe5b0 by task syz-executor.0/2806 CPU: 0 PID: 2806 Comm: syz-executor.0 Not tainted 5.0.0-rc7+ #45 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xfa/0x1ce lib/dump_stack.c:113 print_address_description+0x65/0x270 mm/kasan/report.c:187 kasan_report+0x149/0x18d mm/kasan/report.c:317 pde_subdir_find+0x12d/0x150 fs/proc/generic.c:71 remove_proc_entry+0xe8/0x420 fs/proc/generic.c:667 atalk_proc_exit+0x18/0x820 [appletalk] atalk_exit+0xf/0x5a [appletalk] __do_sys_delete_module kernel/module.c:1018 [inline] __se_sys_delete_module kernel/module.c:961 [inline] __x64_sys_delete_module+0x3dc/0x5e0 kernel/module.c:961 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x462e99 Code: f7 d8 64 89 02 b8 ff ff ff ff c3 66 0f 1f 44 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 bc ff ff ff f7 d8 64 89 01 48 RSP: 002b:00007fb2de6b9c58 EFLAGS: 00000246 ORIG_RAX: 00000000000000b0 RAX: ffffffffffffffda RBX: 000000000073bf00 RCX: 0000000000462e99 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000200001c0 RBP: 0000000000000002 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb2de6ba6bc R13: 00000000004bccaa R14: 00000000006f6bc8 R15: 00000000ffffffff Allocated by task 2806: set_track mm/kasan/common.c:85 [inline] __kasan_kmalloc.constprop.3+0xa0/0xd0 mm/kasan/common.c:496 slab_post_alloc_hook mm/slab.h:444 [inline] slab_alloc_node mm/slub.c:2739 [inline] slab_alloc mm/slub.c:2747 [inline] kmem_cache_alloc+0xcf/0x250 mm/slub.c:2752 kmem_cache_zalloc include/linux/slab.h:730 [inline] __proc_create+0x30f/0xa20 fs/proc/generic.c:408 proc_mkdir_data+0x47/0x190 fs/proc/generic.c:469 0xffffffffc10c01bb 0xffffffffc10c0166 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 2806: set_track mm/kasan/common.c:85 [inline] __kasan_slab_free+0x130/0x180 mm/kasan/common.c:458 slab_free_hook mm/slub.c:1409 [inline] slab_free_freelist_hook mm/slub.c:1436 [inline] slab_free mm/slub.c:2986 [inline] kmem_cache_free+0xa6/0x2a0 mm/slub.c:3002 pde_put+0x6e/0x80 fs/proc/generic.c:647 remove_proc_entry+0x1d3/0x420 fs/proc/generic.c:684 0xffffffffc10c031c 0xffffffffc10c0166 do_one_initcall+0xfa/0x5ca init/main.c:887 do_init_module+0x204/0x5f6 kernel/module.c:3460 load_module+0x66b2/0x8570 kernel/module.c:3808 __do_sys_finit_module+0x238/0x2a0 kernel/module.c:3902 do_syscall_64+0x147/0x600 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe The buggy address belongs to the object at ffff8881f41fe500 which belongs to the cache proc_dir_entry of size 256 The buggy address is located 176 bytes inside of 256-byte region [ffff8881f41fe500, ffff8881f41fe600) The buggy address belongs to the page: page:ffffea0007d07f80 count:1 mapcount:0 mapping:ffff8881f6e69a00 index:0x0 flags: 0x2fffc0000000200(slab) raw: 02fffc0000000200 dead000000000100 dead000000000200 ffff8881f6e69a00 raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000 page dumped because: kasan: bad access detected Memory state around the buggy address: ffff8881f41fe480: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc ffff8881f41fe500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb >ffff8881f41fe580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb ^ ffff8881f41fe600: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb ffff8881f41fe680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb It should check the return value of atalk_proc_init fails, otherwise atalk_exit will trgger use-after-free in pde_subdir_find while unload the module.This patch fix error cleanup path of atalk_init Reported-by: Hulk Robot Signed-off-by: YueHaibing Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/atalk.h | 2 +- net/appletalk/atalk_proc.c | 2 +- net/appletalk/ddp.c | 37 ++++++++++++++++++++++++++------ net/appletalk/sysctl_net_atalk.c | 5 ++++- 4 files changed, 37 insertions(+), 9 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 4d356e168692..212eb8c7fed6 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -151,7 +151,7 @@ extern int sysctl_aarp_retransmit_limit; extern int sysctl_aarp_resolve_time; #ifdef CONFIG_SYSCTL -extern void atalk_register_sysctl(void); +extern int atalk_register_sysctl(void); extern void atalk_unregister_sysctl(void); #else #define atalk_register_sysctl() do { } while(0) diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index af46bc49e1e9..b5f84f428aa6 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -293,7 +293,7 @@ out_interface: goto out; } -void __exit atalk_proc_exit(void) +void atalk_proc_exit(void) { remove_proc_entry("interface", atalk_proc_dir); remove_proc_entry("route", atalk_proc_dir); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 5d035c1f1156..d1b68cc7da89 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1912,12 +1912,16 @@ static const char atalk_err_snap[] __initconst = /* Called by proto.c on kernel start up */ static int __init atalk_init(void) { - int rc = proto_register(&ddp_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&ddp_proto, 0); + if (rc) goto out; - (void)sock_register(&atalk_family_ops); + rc = sock_register(&atalk_family_ops); + if (rc) + goto out_proto; + ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv); if (!ddp_dl) printk(atalk_err_snap); @@ -1925,12 +1929,33 @@ static int __init atalk_init(void) dev_add_pack(<alk_packet_type); dev_add_pack(&ppptalk_packet_type); - register_netdevice_notifier(&ddp_notifier); + rc = register_netdevice_notifier(&ddp_notifier); + if (rc) + goto out_sock; + aarp_proto_init(); - atalk_proc_init(); - atalk_register_sysctl(); + rc = atalk_proc_init(); + if (rc) + goto out_aarp; + + rc = atalk_register_sysctl(); + if (rc) + goto out_proc; out: return rc; +out_proc: + atalk_proc_exit(); +out_aarp: + aarp_cleanup_module(); + unregister_netdevice_notifier(&ddp_notifier); +out_sock: + dev_remove_pack(&ppptalk_packet_type); + dev_remove_pack(<alk_packet_type); + unregister_snap_client(ddp_dl); + sock_unregister(PF_APPLETALK); +out_proto: + proto_unregister(&ddp_proto); + goto out; } module_init(atalk_init); diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index c744a853fa5f..d945b7c0176d 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -45,9 +45,12 @@ static struct ctl_table atalk_table[] = { static struct ctl_table_header *atalk_table_header; -void atalk_register_sysctl(void) +int __init atalk_register_sysctl(void) { atalk_table_header = register_net_sysctl(&init_net, "net/appletalk", atalk_table); + if (!atalk_table_header) + return -ENOMEM; + return 0; } void atalk_unregister_sysctl(void) From 6dc75ccba5680b67a4bcc72e96ec851326e66690 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Thu, 7 Mar 2019 16:28:18 -0800 Subject: [PATCH 69/93] lib/div64.c: off by one in shift [ Upstream commit cdc94a37493135e355dfc0b0e086d84e3eadb50d ] fls counts bits starting from 1 to 32 (returns 0 for zero argument). If we add 1 we shift right one bit more and loose precision from divisor, what cause function incorect results with some numbers. Corrected code was tested in user-space, see bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=202391 Link: http://lkml.kernel.org/r/1548686944-11891-1-git-send-email-sgruszka@redhat.com Fixes: 658716d19f8f ("div64_u64(): improve precision on 32bit platforms") Signed-off-by: Stanislaw Gruszka Reported-by: Siarhei Volkau Tested-by: Siarhei Volkau Acked-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- lib/div64.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/div64.c b/lib/div64.c index 58e2a404097e..a2688b882461 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -103,7 +103,7 @@ u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) quot = div_u64_rem(dividend, divisor, &rem32); *remainder = rem32; } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) @@ -141,7 +141,7 @@ u64 div64_u64(u64 dividend, u64 divisor) if (high == 0) { quot = div_u64(dividend, divisor); } else { - int n = 1 + fls(high); + int n = fls(high); quot = div_u64(dividend >> n, divisor >> n); if (quot != 0) From c43e2bdd14941e969dac515efadc3d379f333fdf Mon Sep 17 00:00:00 2001 From: Pi-Hsun Shih Date: Wed, 13 Mar 2019 11:44:33 -0700 Subject: [PATCH 70/93] include/linux/swap.h: use offsetof() instead of custom __swapoffset macro [ Upstream commit a4046c06be50a4f01d435aa7fe57514818e6cc82 ] Use offsetof() to calculate offset of a field to take advantage of compiler built-in version when possible, and avoid UBSAN warning when compiling with Clang: UBSAN: Undefined behaviour in mm/swapfile.c:3010:38 member access within null pointer of type 'union swap_header' CPU: 6 PID: 1833 Comm: swapon Tainted: G S 4.19.23 #43 Call trace: dump_backtrace+0x0/0x194 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0x70/0x94 ubsan_epilogue+0x14/0x44 ubsan_type_mismatch_common+0xf4/0xfc __ubsan_handle_type_mismatch_v1+0x34/0x54 __se_sys_swapon+0x654/0x1084 __arm64_sys_swapon+0x1c/0x24 el0_svc_common+0xa8/0x150 el0_svc_compat_handler+0x2c/0x38 el0_svc_compat+0x8/0x18 Link: http://lkml.kernel.org/r/20190312081902.223764-1-pihsun@chromium.org Signed-off-by: Pi-Hsun Shih Acked-by: Michal Hocko Reviewed-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds Signed-off-by: Sasha Levin --- include/linux/swap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/swap.h b/include/linux/swap.h index 4fd1ab9565ba..e643866912b7 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -155,9 +155,9 @@ struct swap_extent { /* * Max bad pages in the new format.. */ -#define __swapoffset(x) ((unsigned long)&((union swap_header *)0)->x) #define MAX_SWAP_BADPAGES \ - ((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int)) + ((offsetof(union swap_header, magic.magic) - \ + offsetof(union swap_header, info.badpages)) / sizeof(int)) enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ From 02c2de9be3d031d5c0ee953ec342e5e0cabb3ed5 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Mar 2019 15:54:43 +0100 Subject: [PATCH 71/93] bpf: fix use after free in bpf_evict_inode [ Upstream commit 1da6c4d9140cb7c13e87667dc4e1488d6c8fc10f ] syzkaller was able to generate the following UAF in bpf: BUG: KASAN: use-after-free in lookup_last fs/namei.c:2269 [inline] BUG: KASAN: use-after-free in path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318 Read of size 1 at addr ffff8801c4865c47 by task syz-executor2/9423 CPU: 0 PID: 9423 Comm: syz-executor2 Not tainted 4.20.0-rc1-next-20181109+ #110 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x244/0x39d lib/dump_stack.c:113 print_address_description.cold.7+0x9/0x1ff mm/kasan/report.c:256 kasan_report_error mm/kasan/report.c:354 [inline] kasan_report.cold.8+0x242/0x309 mm/kasan/report.c:412 __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:430 lookup_last fs/namei.c:2269 [inline] path_lookupat.isra.43+0x9f8/0xc00 fs/namei.c:2318 filename_lookup+0x26a/0x520 fs/namei.c:2348 user_path_at_empty+0x40/0x50 fs/namei.c:2608 user_path include/linux/namei.h:62 [inline] do_mount+0x180/0x1ff0 fs/namespace.c:2980 ksys_mount+0x12d/0x140 fs/namespace.c:3258 __do_sys_mount fs/namespace.c:3272 [inline] __se_sys_mount fs/namespace.c:3269 [inline] __x64_sys_mount+0xbe/0x150 fs/namespace.c:3269 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457569 Code: fd b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 cb b3 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fde6ed96c78 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000457569 RDX: 0000000020000040 RSI: 0000000020000000 RDI: 0000000000000000 RBP: 000000000072bf00 R08: 0000000020000340 R09: 0000000000000000 R10: 0000000000200000 R11: 0000000000000246 R12: 00007fde6ed976d4 R13: 00000000004c2c24 R14: 00000000004d4990 R15: 00000000ffffffff Allocated by task 9424: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] kasan_kmalloc+0xc7/0xe0 mm/kasan/kasan.c:553 __do_kmalloc mm/slab.c:3722 [inline] __kmalloc_track_caller+0x157/0x760 mm/slab.c:3737 kstrdup+0x39/0x70 mm/util.c:49 bpf_symlink+0x26/0x140 kernel/bpf/inode.c:356 vfs_symlink+0x37a/0x5d0 fs/namei.c:4127 do_symlinkat+0x242/0x2d0 fs/namei.c:4154 __do_sys_symlink fs/namei.c:4173 [inline] __se_sys_symlink fs/namei.c:4171 [inline] __x64_sys_symlink+0x59/0x80 fs/namei.c:4171 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Freed by task 9425: save_stack+0x43/0xd0 mm/kasan/kasan.c:448 set_track mm/kasan/kasan.c:460 [inline] __kasan_slab_free+0x102/0x150 mm/kasan/kasan.c:521 kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528 __cache_free mm/slab.c:3498 [inline] kfree+0xcf/0x230 mm/slab.c:3817 bpf_evict_inode+0x11f/0x150 kernel/bpf/inode.c:565 evict+0x4b9/0x980 fs/inode.c:558 iput_final fs/inode.c:1550 [inline] iput+0x674/0xa90 fs/inode.c:1576 do_unlinkat+0x733/0xa30 fs/namei.c:4069 __do_sys_unlink fs/namei.c:4110 [inline] __se_sys_unlink fs/namei.c:4108 [inline] __x64_sys_unlink+0x42/0x50 fs/namei.c:4108 do_syscall_64+0x1b9/0x820 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe In this scenario path lookup under RCU is racing with the final unlink in case of symlinks. As Linus puts it in his analysis: [...] We actually RCU-delay the inode freeing itself, but when we do the final iput(), the "evict()" function is called synchronously. Now, the simple fix would seem to just RCU-delay the kfree() of the symlink data in bpf_evict_inode(). Maybe that's the right thing to do. [...] Al suggested to piggy-back on the ->destroy_inode() callback in order to implement RCU deferral there which can then kfree() the inode->i_link eventually right before putting inode back into inode cache. By reusing free_inode_nonrcu() from there we can avoid the need for our own inode cache and just reuse generic one as we currently do. And in-fact on top of all this we should just get rid of the bpf_evict_inode() entirely. This means truncate_inode_pages_final() and clear_inode() will then simply be called by the fs core via evict(). Dropping the reference should really only be done when inode is unhashed and nothing reachable anymore, so it's better also moved into the final ->destroy_inode() callback. Fixes: 0f98621bef5d ("bpf, inode: add support for symlinks and fix mtime/ctime") Reported-by: syzbot+fb731ca573367b7f6564@syzkaller.appspotmail.com Reported-by: syzbot+a13e5ead792d6df37818@syzkaller.appspotmail.com Reported-by: syzbot+7a8ba368b47fdefca61e@syzkaller.appspotmail.com Suggested-by: Al Viro Analyzed-by: Linus Torvalds Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Linus Torvalds Acked-by: Al Viro Link: https://lore.kernel.org/lkml/0000000000006946d2057bbd0eef@google.com/T/ Signed-off-by: Sasha Levin (Microsoft) --- kernel/bpf/inode.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index be1dde967208..ccf9ffd5da78 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -365,19 +365,6 @@ out: } EXPORT_SYMBOL_GPL(bpf_obj_get_user); -static void bpf_evict_inode(struct inode *inode) -{ - enum bpf_type type; - - truncate_inode_pages_final(&inode->i_data); - clear_inode(inode); - - if (S_ISLNK(inode->i_mode)) - kfree(inode->i_link); - if (!bpf_inode_type(inode, &type)) - bpf_any_put(inode->i_private, type); -} - /* * Display the mount options in /proc/mounts. */ @@ -390,11 +377,28 @@ static int bpf_show_options(struct seq_file *m, struct dentry *root) return 0; } +static void bpf_destroy_inode_deferred(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + enum bpf_type type; + + if (S_ISLNK(inode->i_mode)) + kfree(inode->i_link); + if (!bpf_inode_type(inode, &type)) + bpf_any_put(inode->i_private, type); + free_inode_nonrcu(inode); +} + +static void bpf_destroy_inode(struct inode *inode) +{ + call_rcu(&inode->i_rcu, bpf_destroy_inode_deferred); +} + static const struct super_operations bpf_super_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, .show_options = bpf_show_options, - .evict_inode = bpf_evict_inode, + .destroy_inode = bpf_destroy_inode, }; enum { From 8991f1af962d939c3f3456990f5a826c3aa628fd Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Wed, 5 Sep 2018 09:17:45 -0400 Subject: [PATCH 72/93] dm: disable CRYPTO_TFM_REQ_MAY_SLEEP to fix a GFP_KERNEL recursion deadlock [ Upstream commit 432061b3da64e488be3403124a72a9250bbe96d4 ] There's a XFS on dm-crypt deadlock, recursing back to itself due to the crypto subsystems use of GFP_KERNEL, reported here: https://bugzilla.kernel.org/show_bug.cgi?id=200835 * dm-crypt calls crypt_convert in xts mode * init_crypt from xts.c calls kmalloc(GFP_KERNEL) * kmalloc(GFP_KERNEL) recurses into the XFS filesystem, the filesystem tries to submit some bios and wait for them, causing a deadlock Fix this by updating both the DM crypt and integrity targets to no longer use the CRYPTO_TFM_REQ_MAY_SLEEP flag, which will change the crypto allocations from GFP_KERNEL to GFP_ATOMIC, therefore they can't recurse into a filesystem. A GFP_ATOMIC allocation can fail, but init_crypt() in xts.c handles the allocation failure gracefully - it will fall back to preallocated buffer if the allocation fails. The crypto API maintainer says that the crypto API only needs to allocate memory when dealing with unaligned buffers and therefore turning CRYPTO_TFM_REQ_MAY_SLEEP off is safe (see this discussion: https://www.redhat.com/archives/dm-devel/2018-August/msg00195.html ) Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Sasha Levin (Microsoft) --- drivers/md/dm-crypt.c | 10 +++++----- drivers/md/dm-integrity.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 0d2005e5b24c..94b8d81f6020 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -334,7 +334,7 @@ static int crypt_iv_essiv_init(struct crypt_config *cc) sg_init_one(&sg, cc->key, cc->key_size); ahash_request_set_tfm(req, essiv->hash_tfm); - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); + ahash_request_set_callback(req, 0, NULL, NULL); ahash_request_set_crypt(req, &sg, essiv->salt, cc->key_size); err = crypto_ahash_digest(req); @@ -609,7 +609,7 @@ static int crypt_iv_lmk_one(struct crypt_config *cc, u8 *iv, int i, r; desc->tfm = lmk->hash_tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->flags = 0; r = crypto_shash_init(desc); if (r) @@ -771,7 +771,7 @@ static int crypt_iv_tcw_whitening(struct crypt_config *cc, /* calculate crc32 for every 32bit part and xor it */ desc->tfm = tcw->crc32_tfm; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->flags = 0; for (i = 0; i < 4; i++) { r = crypto_shash_init(desc); if (r) @@ -1254,7 +1254,7 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc, * requests if driver request queue is full. */ skcipher_request_set_callback(ctx->r.req, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req)); } @@ -1271,7 +1271,7 @@ static void crypt_alloc_req_aead(struct crypt_config *cc, * requests if driver request queue is full. */ aead_request_set_callback(ctx->r.req_aead, - CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + CRYPTO_TFM_REQ_MAY_BACKLOG, kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead)); } diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c index da4baea9cf83..036379a23499 100644 --- a/drivers/md/dm-integrity.c +++ b/drivers/md/dm-integrity.c @@ -493,7 +493,7 @@ static void section_mac(struct dm_integrity_c *ic, unsigned section, __u8 result unsigned j, size; desc->tfm = ic->journal_mac; - desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP; + desc->flags = 0; r = crypto_shash_init(desc); if (unlikely(r)) { @@ -637,7 +637,7 @@ static void complete_journal_encrypt(struct crypto_async_request *req, int err) static bool do_crypt(bool encrypt, struct skcipher_request *req, struct journal_completion *comp) { int r; - skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP, + skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, complete_journal_encrypt, comp); if (likely(encrypt)) r = crypto_skcipher_encrypt(req); From 28356c21ac32d49d15a3ea7383b0a96052d15394 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 3 Apr 2019 18:39:01 +0000 Subject: [PATCH 73/93] bpf: reduce verifier memory consumption commit 638f5b90d46016372a8e3e0a434f199cc5e12b8c upstream. the verifier got progressively smarter over time and size of its internal state grew as well. Time to reduce the memory consumption. Before: sizeof(struct bpf_verifier_state) = 6520 After: sizeof(struct bpf_verifier_state) = 896 It's done by observing that majority of BPF programs use little to no stack whereas verifier kept all of 512 stack slots ready always. Instead dynamically reallocate struct verifier state when stack access is detected. Runtime difference before vs after is within a noise. The number of processed instructions stays the same. Cc: jakub.kicinski@netronome.com Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller [Backported to 4.14 by sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- .../net/ethernet/netronome/nfp/bpf/verifier.c | 9 +- include/linux/bpf_verifier.h | 16 +- kernel/bpf/verifier.c | 433 ++++++++++++------ 3 files changed, 304 insertions(+), 154 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 5b783a91b115..8793fa57f844 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -76,9 +76,9 @@ nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, static int nfp_bpf_check_exit(struct nfp_prog *nfp_prog, - const struct bpf_verifier_env *env) + struct bpf_verifier_env *env) { - const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + const struct bpf_reg_state *reg0 = cur_regs(env) + BPF_REG_0; u64 imm; if (nfp_prog->act == NN_ACT_XDP) @@ -113,9 +113,10 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog, static int nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, - const struct bpf_verifier_env *env, u8 reg) + struct bpf_verifier_env *env, u8 reg_no) { - if (env->cur_state.regs[reg].type != PTR_TO_CTX) + const struct bpf_reg_state *reg = cur_regs(env) + reg_no; + if (reg->type != PTR_TO_CTX) return -EINVAL; return 0; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 8458cc5fbce5..0fc3d9afb8bf 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -91,14 +91,19 @@ enum bpf_stack_slot_type { #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +struct bpf_stack_state { + struct bpf_reg_state spilled_ptr; + u8 slot_type[BPF_REG_SIZE]; +}; + /* state of the program: * type of all registers and stack info */ struct bpf_verifier_state { struct bpf_reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; struct bpf_verifier_state *parent; + int allocated_stack; + struct bpf_stack_state *stack; }; /* linked list of verifier states used to prune search */ @@ -133,7 +138,7 @@ struct bpf_verifier_env { struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ - struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ void *analyzer_priv; /* pointer to external analyzer's private data */ @@ -145,6 +150,11 @@ struct bpf_verifier_env { struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ }; +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +{ + return env->cur_state->regs; +} + int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, void *priv); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f6755fd5bae2..805bc218150d 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -265,10 +265,11 @@ static void print_verifier_state(struct bpf_verifier_state *state) verbose(")"); } } - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] == STACK_SPILL) - verbose(" fp%d=%s", -MAX_BPF_STACK + i, - reg_type_str[state->spilled_regs[i / BPF_REG_SIZE].type]); + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] == STACK_SPILL) + verbose(" fp%d=%s", + -MAX_BPF_STACK + i * BPF_REG_SIZE, + reg_type_str[state->stack[i].spilled_ptr.type]); } verbose("\n"); } @@ -434,35 +435,123 @@ static void print_bpf_insn(const struct bpf_verifier_env *env, } } -static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) +static int copy_stack_state(struct bpf_verifier_state *dst, + const struct bpf_verifier_state *src) { - struct bpf_verifier_stack_elem *elem; - int insn_idx; + if (!src->stack) + return 0; + if (WARN_ON_ONCE(dst->allocated_stack < src->allocated_stack)) { + /* internal bug, make state invalid to reject the program */ + memset(dst, 0, sizeof(*dst)); + return -EFAULT; + } + memcpy(dst->stack, src->stack, + sizeof(*src->stack) * (src->allocated_stack / BPF_REG_SIZE)); + return 0; +} + +/* do_check() starts with zero-sized stack in struct bpf_verifier_state to + * make it consume minimal amount of memory. check_stack_write() access from + * the program calls into realloc_verifier_state() to grow the stack size. + * Note there is a non-zero 'parent' pointer inside bpf_verifier_state + * which this function copies over. It points to previous bpf_verifier_state + * which is never reallocated + */ +static int realloc_verifier_state(struct bpf_verifier_state *state, int size, + bool copy_old) +{ + u32 old_size = state->allocated_stack; + struct bpf_stack_state *new_stack; + int slot = size / BPF_REG_SIZE; + + if (size <= old_size || !size) { + if (copy_old) + return 0; + state->allocated_stack = slot * BPF_REG_SIZE; + if (!size && old_size) { + kfree(state->stack); + state->stack = NULL; + } + return 0; + } + new_stack = kmalloc_array(slot, sizeof(struct bpf_stack_state), + GFP_KERNEL); + if (!new_stack) + return -ENOMEM; + if (copy_old) { + if (state->stack) + memcpy(new_stack, state->stack, + sizeof(*new_stack) * (old_size / BPF_REG_SIZE)); + memset(new_stack + old_size / BPF_REG_SIZE, 0, + sizeof(*new_stack) * (size - old_size) / BPF_REG_SIZE); + } + state->allocated_stack = slot * BPF_REG_SIZE; + kfree(state->stack); + state->stack = new_stack; + return 0; +} + +static void free_verifier_state(struct bpf_verifier_state *state) +{ + kfree(state->stack); + kfree(state); +} + +/* copy verifier state from src to dst growing dst stack space + * when necessary to accommodate larger src stack + */ +static int copy_verifier_state(struct bpf_verifier_state *dst, + const struct bpf_verifier_state *src) +{ + int err; + + err = realloc_verifier_state(dst, src->allocated_stack, false); + if (err) + return err; + memcpy(dst, src, offsetof(struct bpf_verifier_state, allocated_stack)); + return copy_stack_state(dst, src); +} + +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, + int *insn_idx) +{ + struct bpf_verifier_state *cur = env->cur_state; + struct bpf_verifier_stack_elem *elem, *head = env->head; + int err; if (env->head == NULL) - return -1; + return -ENOENT; - memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state)); - insn_idx = env->head->insn_idx; + if (cur) { + err = copy_verifier_state(cur, &head->st); + if (err) + return err; + } + if (insn_idx) + *insn_idx = head->insn_idx; if (prev_insn_idx) - *prev_insn_idx = env->head->prev_insn_idx; - elem = env->head->next; - kfree(env->head); + *prev_insn_idx = head->prev_insn_idx; + elem = head->next; + kfree(head); env->head = elem; env->stack_size--; - return insn_idx; + return 0; } static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) { struct bpf_verifier_stack_elem *elem; + struct bpf_verifier_state *cur = env->cur_state; + int err; - elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); + elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); if (!elem) goto err; - memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state)); + err = copy_verifier_state(&elem->st, cur); + if (err) + return NULL; elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; @@ -475,7 +564,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, return &elem->st; err: /* pop all elements and return */ - while (pop_stack(env, NULL) >= 0); + while (!pop_stack(env, NULL, NULL)); return NULL; } @@ -671,7 +760,7 @@ static void mark_reg_read(const struct bpf_verifier_state *state, u32 regno) static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state->regs; if (regno >= MAX_BPF_REG) { verbose("R%d is invalid\n", regno); @@ -684,7 +773,7 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, verbose("R%d !read_ok\n", regno); return -EACCES; } - mark_reg_read(&env->cur_state, regno); + mark_reg_read(env->cur_state, regno); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { @@ -721,10 +810,21 @@ static int check_stack_write(struct bpf_verifier_env *env, struct bpf_verifier_state *state, int off, int size, int value_regno, int insn_idx) { - int i, spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err; + + err = realloc_verifier_state(state, round_up(slot + 1, BPF_REG_SIZE), + true); + if (err) + return err; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, * so it's aligned access and [off, off + size) are within stack limits */ + if (!env->allow_ptr_leaks && + state->stack[spi].slot_type[0] == STACK_SPILL && + size != BPF_REG_SIZE) { + verbose("attempt to corrupt spilled pointer on stack\n"); + return -EACCES; + } if (value_regno >= 0 && is_spillable_regtype(state->regs[value_regno].type)) { @@ -736,11 +836,11 @@ static int check_stack_write(struct bpf_verifier_env *env, } /* save register state */ - state->spilled_regs[spi] = state->regs[value_regno]; - state->spilled_regs[spi].live |= REG_LIVE_WRITTEN; + state->stack[spi].spilled_ptr = state->regs[value_regno]; + state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN; for (i = 0; i < BPF_REG_SIZE; i++) { - if (state->stack_slot_type[MAX_BPF_STACK + off + i] == STACK_MISC && + if (state->stack[spi].slot_type[i] == STACK_MISC && !env->allow_ptr_leaks) { int *poff = &env->insn_aux_data[insn_idx].sanitize_stack_off; int soff = (-spi - 1) * BPF_REG_SIZE; @@ -763,14 +863,15 @@ static int check_stack_write(struct bpf_verifier_env *env, } *poff = soff; } - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_SPILL; + state->stack[spi].slot_type[i] = STACK_SPILL; } } else { /* regular write of data into stack */ - state->spilled_regs[spi] = (struct bpf_reg_state) {}; + state->stack[spi].spilled_ptr = (struct bpf_reg_state) {}; for (i = 0; i < size; i++) - state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; + state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = + STACK_MISC; } return 0; } @@ -781,10 +882,10 @@ static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slo while (parent) { /* if read wasn't screened by an earlier write ... */ - if (state->spilled_regs[slot].live & REG_LIVE_WRITTEN) + if (state->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN) break; /* ... then we depend on parent's value */ - parent->spilled_regs[slot].live |= REG_LIVE_READ; + parent->stack[slot].spilled_ptr.live |= REG_LIVE_READ; state = parent; parent = state->parent; } @@ -793,34 +894,37 @@ static void mark_stack_slot_read(const struct bpf_verifier_state *state, int slo static int check_stack_read(struct bpf_verifier_state *state, int off, int size, int value_regno) { - u8 *slot_type; - int i, spi; + int i, slot = -off - 1, spi = slot / BPF_REG_SIZE; + u8 *stype; - slot_type = &state->stack_slot_type[MAX_BPF_STACK + off]; + if (state->allocated_stack <= slot) { + verbose("invalid read from stack off %d+0 size %d\n", + off, size); + return -EACCES; + } + stype = state->stack[spi].slot_type; - if (slot_type[0] == STACK_SPILL) { + if (stype[0] == STACK_SPILL) { if (size != BPF_REG_SIZE) { verbose("invalid size of register spill\n"); return -EACCES; } for (i = 1; i < BPF_REG_SIZE; i++) { - if (slot_type[i] != STACK_SPILL) { + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) { verbose("corrupted spill memory\n"); return -EACCES; } } - spi = (MAX_BPF_STACK + off) / BPF_REG_SIZE; - if (value_regno >= 0) { /* restore register state from stack */ - state->regs[value_regno] = state->spilled_regs[spi]; + state->regs[value_regno] = state->stack[spi].spilled_ptr; mark_stack_slot_read(state, spi); } return 0; } else { for (i = 0; i < size; i++) { - if (slot_type[i] != STACK_MISC) { + if (stype[(slot - i) % BPF_REG_SIZE] != STACK_MISC) { verbose("invalid read from stack off %d+%d size %d\n", off, i, size); return -EACCES; @@ -837,7 +941,8 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size, static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct bpf_map *map = env->cur_state.regs[regno].map_ptr; + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_map *map = regs[regno].map_ptr; if (off < 0 || size <= 0 || off + size > map->value_size) { verbose("invalid access to map value, value_size=%d off=%d size=%d\n", @@ -849,9 +954,9 @@ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, /* check read/write into a map element with possible variable offset */ static int check_map_access(struct bpf_verifier_env *env, u32 regno, - int off, int size) + int off, int size) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *reg = &state->regs[regno]; int err; @@ -924,7 +1029,7 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; if (off < 0 || size <= 0 || (u64)off + size > reg->range) { @@ -938,7 +1043,7 @@ static int __check_packet_access(struct bpf_verifier_env *env, u32 regno, static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); struct bpf_reg_state *reg = ®s[regno]; int err; @@ -1008,19 +1113,19 @@ static bool __is_pointer_value(bool allow_ptr_leaks, static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { - return __is_pointer_value(env->allow_ptr_leaks, &env->cur_state.regs[regno]); + return __is_pointer_value(env->allow_ptr_leaks, cur_regs(env) + regno); } static bool is_ctx_reg(struct bpf_verifier_env *env, int regno) { - const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; + const struct bpf_reg_state *reg = cur_regs(env) + regno; return reg->type == PTR_TO_CTX; } static bool is_pkt_reg(struct bpf_verifier_env *env, int regno) { - const struct bpf_reg_state *reg = &env->cur_state.regs[regno]; + const struct bpf_reg_state *reg = cur_regs(env) + regno; return reg->type == PTR_TO_PACKET; } @@ -1145,8 +1250,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn int off, int bpf_size, enum bpf_access_type t, int value_regno, bool strict_alignment_once) { - struct bpf_verifier_state *state = &env->cur_state; - struct bpf_reg_state *reg = &state->regs[regno]; + struct bpf_verifier_state *state = env->cur_state; + struct bpf_reg_state *regs = cur_regs(env); + struct bpf_reg_state *reg = regs + regno; int size, err = 0; size = bpf_size_to_bytes(bpf_size); @@ -1170,7 +1276,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(state->regs, value_regno); + mark_reg_unknown(regs, value_regno); } else if (reg->type == PTR_TO_CTX) { enum bpf_reg_type reg_type = SCALAR_VALUE; @@ -1203,13 +1309,13 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn * the offset is zero. */ if (reg_type == SCALAR_VALUE) - mark_reg_unknown(state->regs, value_regno); + mark_reg_unknown(regs, value_regno); else - mark_reg_known_zero(state->regs, value_regno); - state->regs[value_regno].id = 0; - state->regs[value_regno].off = 0; - state->regs[value_regno].range = 0; - state->regs[value_regno].type = reg_type; + mark_reg_known_zero(regs, value_regno); + regs[value_regno].id = 0; + regs[value_regno].off = 0; + regs[value_regno].range = 0; + regs[value_regno].type = reg_type; } } else if (reg->type == PTR_TO_STACK) { @@ -1234,18 +1340,11 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn if (env->prog->aux->stack_depth < -off) env->prog->aux->stack_depth = -off; - if (t == BPF_WRITE) { - if (!env->allow_ptr_leaks && - state->stack_slot_type[MAX_BPF_STACK + off] == STACK_SPILL && - size != BPF_REG_SIZE) { - verbose("attempt to corrupt spilled pointer on stack\n"); - return -EACCES; - } + if (t == BPF_WRITE) err = check_stack_write(env, state, off, size, value_regno, insn_idx); - } else { + else err = check_stack_read(state, off, size, value_regno); - } } else if (reg->type == PTR_TO_PACKET) { if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) { verbose("cannot write into packet\n"); @@ -1258,7 +1357,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } err = check_packet_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) - mark_reg_unknown(state->regs, value_regno); + mark_reg_unknown(regs, value_regno); } else { verbose("R%d invalid mem access '%s'\n", regno, reg_type_str[reg->type]); @@ -1266,9 +1365,9 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && - state->regs[value_regno].type == SCALAR_VALUE) { + regs[value_regno].type == SCALAR_VALUE) { /* b/h/w load zero-extends, mark upper bits as known 0 */ - coerce_reg_to_size(&state->regs[value_regno], size); + coerce_reg_to_size(®s[value_regno], size); } return err; } @@ -1333,9 +1432,9 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *regs = state->regs; - int off, i; + int off, i, slot, spi; if (regs[regno].type != PTR_TO_STACK) { /* Allow zero-byte read from NULL, regardless of pointer type */ @@ -1376,7 +1475,11 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, } for (i = 0; i < access_size; i++) { - if (state->stack_slot_type[MAX_BPF_STACK + off + i] != STACK_MISC) { + slot = -(off + i) - 1; + spi = slot / BPF_REG_SIZE; + if (state->allocated_stack <= slot || + state->stack[spi].slot_type[slot % BPF_REG_SIZE] != + STACK_MISC) { verbose("invalid indirect read from stack off %d+%d size %d\n", off, i, access_size); return -EACCES; @@ -1389,7 +1492,7 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; switch (reg->type) { case PTR_TO_PACKET: @@ -1406,7 +1509,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; enum bpf_reg_type expected_type, type = reg->type; int err = 0; @@ -1678,7 +1781,7 @@ static int check_raw_mode(const struct bpf_func_proto *fn) */ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = env->cur_state; struct bpf_reg_state *regs = state->regs, *reg; int i; @@ -1687,10 +1790,10 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) regs[i].type == PTR_TO_PACKET_END) mark_reg_unknown(regs, i); - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - reg = &state->spilled_regs[i / BPF_REG_SIZE]; + reg = &state->stack[i].spilled_ptr; if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_PACKET_END) continue; @@ -1700,9 +1803,8 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env) static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) { - struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1776,6 +1878,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id, int insn_idx) return err; } + regs = cur_regs(env); /* reset caller saved regs */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(regs, caller_saved[i]); @@ -1890,7 +1993,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg) { - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; + struct bpf_reg_state *regs = cur_regs(env), *dst_reg; bool known = tnum_is_const(off_reg->var_off); s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; @@ -2097,7 +2200,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); u8 opcode = BPF_OP(insn->code); bool src_known, dst_known; s64 smin_val, smax_val; @@ -2345,7 +2448,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg, *src_reg; + struct bpf_reg_state *regs = cur_regs(env), *dst_reg, *src_reg; struct bpf_reg_state *ptr_reg = NULL, off_reg = {0}; u8 opcode = BPF_OP(insn->code); int rc; @@ -2419,12 +2522,12 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* Got here implies adding two SCALAR_VALUEs */ if (WARN_ON_ONCE(ptr_reg)) { - print_verifier_state(&env->cur_state); + print_verifier_state(env->cur_state); verbose("verifier internal error: unexpected ptr_reg\n"); return -EINVAL; } if (WARN_ON(!src_reg)) { - print_verifier_state(&env->cur_state); + print_verifier_state(env->cur_state); verbose("verifier internal error: no src_reg\n"); return -EINVAL; } @@ -2434,7 +2537,7 @@ static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* check validity of 32-bit and 64-bit arithmetic operations */ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); u8 opcode = BPF_OP(insn->code); int err; @@ -2661,10 +2764,10 @@ static void find_good_pkt_pointers(struct bpf_verifier_state *state, /* keep the maximum range already checked */ regs[i].range = max(regs[i].range, new_range); - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - reg = &state->spilled_regs[i / BPF_REG_SIZE]; + reg = &state->stack[i].spilled_ptr; if (reg->type == PTR_TO_PACKET && reg->id == dst_reg->id) reg->range = max(reg->range, new_range); } @@ -2914,17 +3017,17 @@ static void mark_map_regs(struct bpf_verifier_state *state, u32 regno, for (i = 0; i < MAX_BPF_REG; i++) mark_map_reg(regs, i, id, is_null); - for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { - if (state->stack_slot_type[i] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - mark_map_reg(state->spilled_regs, i / BPF_REG_SIZE, id, is_null); + mark_map_reg(&state->stack[i].spilled_ptr, 0, id, is_null); } } static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; + struct bpf_verifier_state *other_branch, *this_branch = env->cur_state; struct bpf_reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -3087,7 +3190,7 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) /* verify BPF_LD_IMM64 instruction */ static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -3148,7 +3251,7 @@ static bool may_access_skb(enum bpf_prog_type type) */ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = cur_regs(env); u8 mode = BPF_MODE(insn->code); int i, err; @@ -3534,6 +3637,57 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, return false; } +static bool stacksafe(struct bpf_verifier_state *old, + struct bpf_verifier_state *cur, + struct idpair *idmap) +{ + int i, spi; + + /* if explored stack has more populated slots than current stack + * such stacks are not equivalent + */ + if (old->allocated_stack > cur->allocated_stack) + return false; + + /* walk slots of the explored stack and ignore any additional + * slots in the current stack, since explored(safe) state + * didn't use them + */ + for (i = 0; i < old->allocated_stack; i++) { + spi = i / BPF_REG_SIZE; + + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) + continue; + if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + /* Ex: old explored (safe) state has STACK_SPILL in + * this stack slot, but current has has STACK_MISC -> + * this verifier states are not equivalent, + * return false to continue verification of this path + */ + return false; + if (i % BPF_REG_SIZE) + continue; + if (old->stack[spi].slot_type[0] != STACK_SPILL) + continue; + if (!regsafe(&old->stack[spi].spilled_ptr, + &cur->stack[spi].spilled_ptr, + idmap)) + /* when explored and current stack slot are both storing + * spilled registers, check that stored pointers types + * are the same as well. + * Ex: explored safe path could have stored + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} + * but current path has stored: + * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} + * such verifier states are not equivalent. + * return false to continue verification of this path + */ + return false; + } + return true; +} + /* compare two verifier states * * all states stored in state_list are known to be valid, since @@ -3578,37 +3732,8 @@ static bool states_equal(struct bpf_verifier_env *env, goto out_free; } - for (i = 0; i < MAX_BPF_STACK; i++) { - if (old->stack_slot_type[i] == STACK_INVALID) - continue; - if (old->stack_slot_type[i] != cur->stack_slot_type[i]) - /* Ex: old explored (safe) state has STACK_SPILL in - * this stack slot, but current has has STACK_MISC -> - * this verifier states are not equivalent, - * return false to continue verification of this path - */ - goto out_free; - if (i % BPF_REG_SIZE) - continue; - if (old->stack_slot_type[i] != STACK_SPILL) - continue; - if (!regsafe(&old->spilled_regs[i / BPF_REG_SIZE], - &cur->spilled_regs[i / BPF_REG_SIZE], - idmap)) - /* when explored and current stack slot are both storing - * spilled registers, check that stored pointers types - * are the same as well. - * Ex: explored safe path could have stored - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8} - * but current path has stored: - * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16} - * such verifier states are not equivalent. - * return false to continue verification of this path - */ - goto out_free; - else - continue; - } + if (!stacksafe(old, cur, idmap)) + goto out_free; ret = true; out_free: kfree(idmap); @@ -3644,17 +3769,19 @@ static bool do_propagate_liveness(const struct bpf_verifier_state *state, } } /* ... and stack slots */ - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) { - if (parent->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL) + for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && + i < parent->allocated_stack / BPF_REG_SIZE; i++) { + if (parent->stack[i].slot_type[0] != STACK_SPILL) continue; - if (state->stack_slot_type[i * BPF_REG_SIZE] != STACK_SPILL) + if (state->stack[i].slot_type[0] != STACK_SPILL) continue; - if (parent->spilled_regs[i].live & REG_LIVE_READ) + if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) continue; - if (writes && (state->spilled_regs[i].live & REG_LIVE_WRITTEN)) + if (writes && + (state->stack[i].spilled_ptr.live & REG_LIVE_WRITTEN)) continue; - if (state->spilled_regs[i].live & REG_LIVE_READ) { - parent->spilled_regs[i].live |= REG_LIVE_READ; + if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) { + parent->stack[i].spilled_ptr.live |= REG_LIVE_READ; touched = true; } } @@ -3684,6 +3811,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; + struct bpf_verifier_state *cur = env->cur_state; int i; sl = env->explored_states[insn_idx]; @@ -3694,7 +3822,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return 0; while (sl != STATE_LIST_MARK) { - if (states_equal(env, &sl->state, &env->cur_state)) { + if (states_equal(env, &sl->state, cur)) { /* reached equivalent register/stack state, * prune the search. * Registers read by the continuation are read by us. @@ -3705,7 +3833,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * they'll be immediately forgotten as we're pruning * this state and will pop a new one. */ - propagate_liveness(&sl->state, &env->cur_state); + propagate_liveness(&sl->state, cur); return 1; } sl = sl->next; @@ -3717,16 +3845,16 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * it will be rejected. Since there are no loops, we won't be * seeing this 'insn_idx' instruction again on the way to bpf_exit */ - new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); + new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL); if (!new_sl) return -ENOMEM; /* add new state to the head of linked list */ - memcpy(&new_sl->state, &env->cur_state, sizeof(env->cur_state)); + copy_verifier_state(&new_sl->state, cur); new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; /* connect new state to parentage chain */ - env->cur_state.parent = &new_sl->state; + cur->parent = &new_sl->state; /* clear write marks in current state: the writes we did are not writes * our child did, so they don't screen off its reads from us. * (There are no read marks in current state, because reads always mark @@ -3734,10 +3862,10 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) * explored_states can get read marks.) */ for (i = 0; i < BPF_REG_FP; i++) - env->cur_state.regs[i].live = REG_LIVE_NONE; - for (i = 0; i < MAX_BPF_STACK / BPF_REG_SIZE; i++) - if (env->cur_state.stack_slot_type[i * BPF_REG_SIZE] == STACK_SPILL) - env->cur_state.spilled_regs[i].live = REG_LIVE_NONE; + cur->regs[i].live = REG_LIVE_NONE; + for (i = 0; i < cur->allocated_stack / BPF_REG_SIZE; i++) + if (cur->stack[i].slot_type[0] == STACK_SPILL) + cur->stack[i].spilled_ptr.live = REG_LIVE_NONE; return 0; } @@ -3752,15 +3880,19 @@ static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, static int do_check(struct bpf_verifier_env *env) { - struct bpf_verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state; struct bpf_insn *insns = env->prog->insnsi; - struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *regs; int insn_cnt = env->prog->len; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; bool do_print_state = false; - init_reg_state(regs); + state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL); + if (!state) + return -ENOMEM; + env->cur_state = state; + init_reg_state(state->regs); state->parent = NULL; insn_idx = 0; for (;;) { @@ -3807,7 +3939,7 @@ static int do_check(struct bpf_verifier_env *env) else verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx); - print_verifier_state(&env->cur_state); + print_verifier_state(env->cur_state); do_print_state = false; } @@ -3820,6 +3952,7 @@ static int do_check(struct bpf_verifier_env *env) if (err) return err; + regs = cur_regs(env); env->insn_aux_data[insn_idx].seen = true; if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); @@ -3991,8 +4124,10 @@ static int do_check(struct bpf_verifier_env *env) } process_bpf_exit: - insn_idx = pop_stack(env, &prev_insn_idx); - if (insn_idx < 0) { + err = pop_stack(env, &prev_insn_idx, &insn_idx); + if (err < 0) { + if (err != -ENOENT) + return err; break; } else { do_print_state = true; @@ -4633,9 +4768,11 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); + free_verifier_state(env->cur_state); + env->cur_state = NULL; skip_full_check: - while (pop_stack(env, NULL) >= 0); + while (!pop_stack(env, NULL, NULL)); free_states(env); if (ret == 0) @@ -4741,9 +4878,11 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); + free_verifier_state(env->cur_state); + env->cur_state = NULL; skip_full_check: - while (pop_stack(env, NULL) >= 0); + while (!pop_stack(env, NULL, NULL)); free_states(env); mutex_unlock(&bpf_verifier_lock); From 534087e6c3375562635b734345dd7a00ae53ba31 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 3 Apr 2019 18:39:02 +0000 Subject: [PATCH 74/93] bpf: fix verifier memory leaks commit 1969db47f8d0e800397abd4ee4e8d27d2b578587 upstream. fix verifier memory leaks Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 805bc218150d..8293fc2f452a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -491,10 +491,12 @@ static int realloc_verifier_state(struct bpf_verifier_state *state, int size, return 0; } -static void free_verifier_state(struct bpf_verifier_state *state) +static void free_verifier_state(struct bpf_verifier_state *state, + bool free_self) { kfree(state->stack); - kfree(state); + if (free_self) + kfree(state); } /* copy verifier state from src to dst growing dst stack space @@ -532,6 +534,7 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, if (prev_insn_idx) *prev_insn_idx = head->prev_insn_idx; elem = head->next; + free_verifier_state(&head->st, false); kfree(head); env->head = elem; env->stack_size--; @@ -549,14 +552,14 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, if (!elem) goto err; - err = copy_verifier_state(&elem->st, cur); - if (err) - return NULL; elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; env->head = elem; env->stack_size++; + err = copy_verifier_state(&elem->st, cur); + if (err) + goto err; if (env->stack_size > BPF_COMPLEXITY_LIMIT_STACK) { verbose("BPF program is too complex\n"); goto err; @@ -3812,7 +3815,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; struct bpf_verifier_state *cur = env->cur_state; - int i; + int i, err; sl = env->explored_states[insn_idx]; if (!sl) @@ -3850,7 +3853,12 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return -ENOMEM; /* add new state to the head of linked list */ - copy_verifier_state(&new_sl->state, cur); + err = copy_verifier_state(&new_sl->state, cur); + if (err) { + free_verifier_state(&new_sl->state, false); + kfree(new_sl); + return err; + } new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; /* connect new state to parentage chain */ @@ -4692,6 +4700,7 @@ static void free_states(struct bpf_verifier_env *env) if (sl) while (sl != STATE_LIST_MARK) { sln = sl->next; + free_verifier_state(&sl->state, false); kfree(sl); sl = sln; } @@ -4768,7 +4777,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state); + free_verifier_state(env->cur_state, true); env->cur_state = NULL; skip_full_check: @@ -4878,7 +4887,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state); + free_verifier_state(env->cur_state, true); env->cur_state = NULL; skip_full_check: From 86e5dd8cced7b1bffdae6154a1a7826ed8acf1f4 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Wed, 3 Apr 2019 18:39:03 +0000 Subject: [PATCH 75/93] bpf: fix verifier NULL pointer dereference commit 8c01c4f896aa3404af948880dcb29a2d51c833dc upstream. do_check() can fail early without allocating env->cur_state under memory pressure. Syzkaller found the stack below on the linux-next tree because of this. kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 27062 Comm: syz-executor5 Not tainted 4.14.0-rc7+ #106 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 task: ffff8801c2c74700 task.stack: ffff8801c3e28000 RIP: 0010:free_verifier_state kernel/bpf/verifier.c:347 [inline] RIP: 0010:bpf_check+0xcf4/0x19c0 kernel/bpf/verifier.c:4533 RSP: 0018:ffff8801c3e2f5c8 EFLAGS: 00010202 RAX: dffffc0000000000 RBX: 00000000fffffff4 RCX: 0000000000000000 RDX: 0000000000000070 RSI: ffffffff817d5aa9 RDI: 0000000000000380 RBP: ffff8801c3e2f668 R08: 0000000000000000 R09: 1ffff100387c5d9f R10: 00000000218c4e80 R11: ffffffff85b34380 R12: ffff8801c4dc6a28 R13: 0000000000000000 R14: ffff8801c4dc6a00 R15: ffff8801c4dc6a20 FS: 00007f311079b700(0000) GS:ffff8801db300000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000004d4a24 CR3: 00000001cbcd0000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: bpf_prog_load+0xcbb/0x18e0 kernel/bpf/syscall.c:1166 SYSC_bpf kernel/bpf/syscall.c:1690 [inline] SyS_bpf+0xae9/0x4620 kernel/bpf/syscall.c:1652 entry_SYSCALL_64_fastpath+0x1f/0xbe RIP: 0033:0x452869 RSP: 002b:00007f311079abe8 EFLAGS: 00000212 ORIG_RAX: 0000000000000141 RAX: ffffffffffffffda RBX: 0000000000758020 RCX: 0000000000452869 RDX: 0000000000000030 RSI: 0000000020168000 RDI: 0000000000000005 RBP: 00007f311079aa20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000212 R12: 00000000004b7550 R13: 00007f311079ab58 R14: 00000000004b7560 R15: 0000000000000000 Code: df 48 c1 ea 03 80 3c 02 00 0f 85 e6 0b 00 00 4d 8b 6e 20 48 b8 00 00 00 00 00 fc ff df 49 8d bd 80 03 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 b6 0b 00 00 49 8b bd 80 03 00 00 e8 d6 0c 26 RIP: free_verifier_state kernel/bpf/verifier.c:347 [inline] RSP: ffff8801c3e2f5c8 RIP: bpf_check+0xcf4/0x19c0 kernel/bpf/verifier.c:4533 RSP: ffff8801c3e2f5c8 ---[ end trace c8d37f339dc64004 ]--- Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") Fixes: 1969db47f8d0 ("bpf: fix verifier memory leaks") Signed-off-by: Craig Gallek Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 8293fc2f452a..68ff8ee42ba0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4777,8 +4777,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; + if (env->cur_state) { + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + } skip_full_check: while (!pop_stack(env, NULL, NULL)); @@ -4887,8 +4889,10 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); ret = do_check(env); - free_verifier_state(env->cur_state, true); - env->cur_state = NULL; + if (env->cur_state) { + free_verifier_state(env->cur_state, true); + env->cur_state = NULL; + } skip_full_check: while (!pop_stack(env, NULL, NULL)); From 85614894812adb00ddd1eb3e4b18ddb995dd61c1 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 3 Apr 2019 18:39:04 +0000 Subject: [PATCH 76/93] bpf: fix stack state printing in verifier log commit 12a3cc8424fe1237aaeb982dec4f0914ddd22f3e upstream. fix incorrect stack state prints in print_verifier_state() Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 68ff8ee42ba0..84e0338be0ad 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -268,7 +268,7 @@ static void print_verifier_state(struct bpf_verifier_state *state) for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { if (state->stack[i].slot_type[0] == STACK_SPILL) verbose(" fp%d=%s", - -MAX_BPF_STACK + i * BPF_REG_SIZE, + (-i - 1) * BPF_REG_SIZE, reg_type_str[state->stack[i].spilled_ptr.type]); } verbose("\n"); From 6a42c49482db5ab565a9dabb1642ff64ae0f45b7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:05 +0000 Subject: [PATCH 77/93] bpf: move {prev_,}insn_idx into verifier env commit c08435ec7f2bc8f4109401f696fd55159b4b40cb upstream. Move prev_insn_idx and insn_idx from the do_check() function into the verifier environment, so they can be read inside the various helper functions for handling the instructions. It's easier to put this into the environment rather than changing all call-sites only to pass it along. insn_idx is useful in particular since this later on allows to hold state in env->insn_aux_data[env->insn_idx]. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Vallish Vaidyeshwara [Backported to 4.14 by sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 2 ++ kernel/bpf/verifier.c | 64 +++++++++++++++++------------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 0fc3d9afb8bf..3149a01d0f58 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -134,6 +134,8 @@ struct bpf_ext_analyzer_ops { * one verifier_env per bpf_check() call */ struct bpf_verifier_env { + u32 insn_idx; + u32 prev_insn_idx; struct bpf_prog *prog; /* eBPF program being verified */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 84e0338be0ad..be0edef26465 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3892,7 +3892,6 @@ static int do_check(struct bpf_verifier_env *env) struct bpf_insn *insns = env->prog->insnsi; struct bpf_reg_state *regs; int insn_cnt = env->prog->len; - int insn_idx, prev_insn_idx = 0; int insn_processed = 0; bool do_print_state = false; @@ -3902,19 +3901,18 @@ static int do_check(struct bpf_verifier_env *env) env->cur_state = state; init_reg_state(state->regs); state->parent = NULL; - insn_idx = 0; for (;;) { struct bpf_insn *insn; u8 class; int err; - if (insn_idx >= insn_cnt) { + if (env->insn_idx >= insn_cnt) { verbose("invalid insn idx %d insn_cnt %d\n", - insn_idx, insn_cnt); + env->insn_idx, insn_cnt); return -EFAULT; } - insn = &insns[insn_idx]; + insn = &insns[env->insn_idx]; class = BPF_CLASS(insn->code); if (++insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) { @@ -3923,7 +3921,7 @@ static int do_check(struct bpf_verifier_env *env) return -E2BIG; } - err = is_state_visited(env, insn_idx); + err = is_state_visited(env, env->insn_idx); if (err < 0) return err; if (err == 1) { @@ -3931,9 +3929,9 @@ static int do_check(struct bpf_verifier_env *env) if (log_level) { if (do_print_state) verbose("\nfrom %d to %d: safe\n", - prev_insn_idx, insn_idx); + env->prev_insn_idx, env->insn_idx); else - verbose("%d: safe\n", insn_idx); + verbose("%d: safe\n", env->insn_idx); } goto process_bpf_exit; } @@ -3943,25 +3941,25 @@ static int do_check(struct bpf_verifier_env *env) if (log_level > 1 || (log_level && do_print_state)) { if (log_level > 1) - verbose("%d:", insn_idx); + verbose("%d:", env->insn_idx); else verbose("\nfrom %d to %d:", - prev_insn_idx, insn_idx); + env->prev_insn_idx, env->insn_idx); print_verifier_state(env->cur_state); do_print_state = false; } if (log_level) { - verbose("%d: ", insn_idx); + verbose("%d: ", env->insn_idx); print_bpf_insn(env, insn); } - err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); + err = ext_analyzer_insn_hook(env, env->insn_idx, env->prev_insn_idx); if (err) return err; regs = cur_regs(env); - env->insn_aux_data[insn_idx].seen = true; + env->insn_aux_data[env->insn_idx].seen = true; if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) @@ -3986,13 +3984,13 @@ static int do_check(struct bpf_verifier_env *env) /* check that memory (src_reg + off) is readable, * the state of dst_reg will be updated by this func */ - err = check_mem_access(env, insn_idx, insn->src_reg, insn->off, - BPF_SIZE(insn->code), BPF_READ, - insn->dst_reg, false); + err = check_mem_access(env, env->insn_idx, insn->src_reg, + insn->off, BPF_SIZE(insn->code), + BPF_READ, insn->dst_reg, false); if (err) return err; - prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type; if (*prev_src_type == NOT_INIT) { /* saw a valid insn @@ -4019,10 +4017,10 @@ static int do_check(struct bpf_verifier_env *env) enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { - err = check_xadd(env, insn_idx, insn); + err = check_xadd(env, env->insn_idx, insn); if (err) return err; - insn_idx++; + env->insn_idx++; continue; } @@ -4038,13 +4036,13 @@ static int do_check(struct bpf_verifier_env *env) dst_reg_type = regs[insn->dst_reg].type; /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, - insn->src_reg, false); + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_WRITE, insn->src_reg, false); if (err) return err; - prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type; if (*prev_dst_type == NOT_INIT) { *prev_dst_type = dst_reg_type; @@ -4073,9 +4071,9 @@ static int do_check(struct bpf_verifier_env *env) } /* check that memory (dst_reg + off) is writeable */ - err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, - BPF_SIZE(insn->code), BPF_WRITE, - -1, false); + err = check_mem_access(env, env->insn_idx, insn->dst_reg, + insn->off, BPF_SIZE(insn->code), + BPF_WRITE, -1, false); if (err) return err; @@ -4091,7 +4089,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - err = check_call(env, insn->imm, insn_idx); + err = check_call(env, insn->imm, env->insn_idx); if (err) return err; @@ -4104,7 +4102,7 @@ static int do_check(struct bpf_verifier_env *env) return -EINVAL; } - insn_idx += insn->off + 1; + env->insn_idx += insn->off + 1; continue; } else if (opcode == BPF_EXIT) { @@ -4132,7 +4130,7 @@ static int do_check(struct bpf_verifier_env *env) } process_bpf_exit: - err = pop_stack(env, &prev_insn_idx, &insn_idx); + err = pop_stack(env, &env->prev_insn_idx, &env->insn_idx); if (err < 0) { if (err != -ENOENT) return err; @@ -4142,7 +4140,7 @@ process_bpf_exit: continue; } } else { - err = check_cond_jmp_op(env, insn, &insn_idx); + err = check_cond_jmp_op(env, insn, &env->insn_idx); if (err) return err; } @@ -4159,8 +4157,8 @@ process_bpf_exit: if (err) return err; - insn_idx++; - env->insn_aux_data[insn_idx].seen = true; + env->insn_idx++; + env->insn_aux_data[env->insn_idx].seen = true; } else { verbose("invalid BPF_LD mode\n"); return -EINVAL; @@ -4170,7 +4168,7 @@ process_bpf_exit: return -EINVAL; } - insn_idx++; + env->insn_idx++; } verbose("processed %d insns, stack depth %d\n", From ee282977c63d1c195ee0de6df173b0f216d14c28 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:06 +0000 Subject: [PATCH 78/93] bpf: move tmp variable into ax register in interpreter commit 144cd91c4c2bced6eb8a7e25e590f6618a11e854 upstream. This change moves the on-stack 64 bit tmp variable in ___bpf_prog_run() into the hidden ax register. The latter is currently only used in JITs for constant blinding as a temporary scratch register, meaning the BPF interpreter will never see the use of ax. Therefore it is safe to use it for the cases where tmp has been used earlier. This is needed to later on allow restricted hidden use of ax in both interpreter and JITs. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov [backported to 4.14 sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- include/linux/filter.h | 3 ++- kernel/bpf/core.c | 31 ++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index 56d2cda9931b..f7880c716308 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -53,7 +53,8 @@ struct bpf_prog_aux; * constants. See JIT pre-step in bpf_jit_blind_constants(). */ #define BPF_REG_AX MAX_BPF_REG -#define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +#define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) +#define MAX_BPF_JIT_REG MAX_BPF_EXT_REG /* unused opcode to mark special call to bpf_tail_call() helper */ #define BPF_TAIL_CALL 0xf0 diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d203a5d6b726..e59a72c7d092 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -51,6 +51,7 @@ #define DST regs[insn->dst_reg] #define SRC regs[insn->src_reg] #define FP regs[BPF_REG_FP] +#define AX regs[BPF_REG_AX] #define ARG1 regs[BPF_REG_ARG1] #define CTX regs[BPF_REG_CTX] #define IMM insn->imm @@ -939,22 +940,22 @@ select_insn: ALU64_MOD_X: if (unlikely(SRC == 0)) return 0; - div64_u64_rem(DST, SRC, &tmp); - DST = tmp; + div64_u64_rem(DST, SRC, &AX); + DST = AX; CONT; ALU_MOD_X: if (unlikely((u32)SRC == 0)) return 0; - tmp = (u32) DST; - DST = do_div(tmp, (u32) SRC); + AX = (u32) DST; + DST = do_div(AX, (u32) SRC); CONT; ALU64_MOD_K: - div64_u64_rem(DST, IMM, &tmp); - DST = tmp; + div64_u64_rem(DST, IMM, &AX); + DST = AX; CONT; ALU_MOD_K: - tmp = (u32) DST; - DST = do_div(tmp, (u32) IMM); + AX = (u32) DST; + DST = do_div(AX, (u32) IMM); CONT; ALU64_DIV_X: if (unlikely(SRC == 0)) @@ -964,17 +965,17 @@ select_insn: ALU_DIV_X: if (unlikely((u32)SRC == 0)) return 0; - tmp = (u32) DST; - do_div(tmp, (u32) SRC); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) SRC); + DST = (u32) AX; CONT; ALU64_DIV_K: DST = div64_u64(DST, IMM); CONT; ALU_DIV_K: - tmp = (u32) DST; - do_div(tmp, (u32) IMM); - DST = (u32) tmp; + AX = (u32) DST; + do_div(AX, (u32) IMM); + DST = (u32) AX; CONT; ALU_END_TO_BE: switch (IMM) { @@ -1278,7 +1279,7 @@ STACK_FRAME_NON_STANDARD(___bpf_prog_run); /* jump table */ static unsigned int PROG_NAME(stack_size)(const void *ctx, const struct bpf_insn *insn) \ { \ u64 stack[stack_size / sizeof(u64)]; \ - u64 regs[MAX_BPF_REG]; \ + u64 regs[MAX_BPF_EXT_REG]; \ \ FP = (u64) (unsigned long) &stack[ARRAY_SIZE(stack)]; \ ARG1 = (u64) (unsigned long) ctx; \ From b101cf55c6da31b86ee8144ac0113b7ecfb7c06b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:07 +0000 Subject: [PATCH 79/93] bpf: enable access to ax register also from verifier rewrite commit 9b73bfdd08e73231d6a90ae6db4b46b3fbf56c30 upstream. Right now we are using BPF ax register in JIT for constant blinding as well as in interpreter as temporary variable. Verifier will not be able to use it simply because its use will get overridden from the former in bpf_jit_blind_insn(). However, it can be made to work in that blinding will be skipped if there is prior use in either source or destination register on the instruction. Taking constraints of ax into account, the verifier is then open to use it in rewrites under some constraints. Note, ax register already has mappings in every eBPF JIT. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov [backported to 4.14 sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- include/linux/filter.h | 7 +------ kernel/bpf/core.c | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/include/linux/filter.h b/include/linux/filter.h index f7880c716308..ac2272778f2e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -46,12 +46,7 @@ struct bpf_prog_aux; #define BPF_REG_X BPF_REG_7 #define BPF_REG_TMP BPF_REG_8 -/* Kernel hidden auxiliary/helper register for hardening step. - * Only used by eBPF JITs. It's nothing more than a temporary - * register that JITs use internally, only that here it's part - * of eBPF instructions that have been rewritten for blinding - * constants. See JIT pre-step in bpf_jit_blind_constants(). - */ +/* Kernel hidden auxiliary/helper register. */ #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_EXT_REG (MAX_BPF_REG + 1) #define MAX_BPF_JIT_REG MAX_BPF_EXT_REG diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index e59a72c7d092..e46106c6ac39 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -553,6 +553,26 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); BUILD_BUG_ON(MAX_BPF_REG + 1 != MAX_BPF_JIT_REG); + /* Constraints on AX register: + * + * AX register is inaccessible from user space. It is mapped in + * all JITs, and used here for constant blinding rewrites. It is + * typically "stateless" meaning its contents are only valid within + * the executed instruction, but not across several instructions. + * There are a few exceptions however which are further detailed + * below. + * + * Constant blinding is only used by JITs, not in the interpreter. + * The interpreter uses AX in some occasions as a local temporary + * register e.g. in DIV or MOD instructions. + * + * In restricted circumstances, the verifier can also use the AX + * register for rewrites as long as they do not interfere with + * the above cases! + */ + if (from->dst_reg == BPF_REG_AX || from->src_reg == BPF_REG_AX) + goto out; + if (from->imm == 0 && (from->code == (BPF_ALU | BPF_MOV | BPF_K) || from->code == (BPF_ALU64 | BPF_MOV | BPF_K))) { From afb711a6e8d003eead79432c026f6654ae303fcb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:08 +0000 Subject: [PATCH 80/93] bpf: restrict map value pointer arithmetic for unprivileged commit 0d6303db7970e6f56ae700fa07e11eb510cda125 upstream. Restrict map value pointer arithmetic for unprivileged users in that arithmetic itself must not go out of bounds as opposed to the actual access later on. Therefore after each adjust_ptr_min_max_vals() with a map value pointer as a destination it will simulate a check_map_access() of 1 byte on the destination and once that fails the program is rejected for unprivileged program loads. We use this later on for masking any pointer arithmetic with the remainder of the map value space. The likelihood of breaking any existing real-world unprivileged eBPF program is very small for this corner case. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index be0edef26465..096a15a884c9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2191,6 +2191,17 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, __update_reg_bounds(dst_reg); __reg_deduce_bounds(dst_reg); __reg_bound_offset(dst_reg); + + /* For unprivileged we require that resulting offset must be in bounds + * in order to be able to sanitize access later on. + */ + if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && + check_map_access(env, dst, dst_reg->off, 1, false)) { + verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", + dst); + return -EACCES; + } + return 0; } From ba9d2e0cc870d1a16195c801aa18b33cd70125b9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:09 +0000 Subject: [PATCH 81/93] bpf: restrict stack pointer arithmetic for unprivileged commit e4298d25830a866cc0f427d4bccb858e76715859 upstream. Restrict stack pointer arithmetic for unprivileged users in that arithmetic itself must not go out of bounds as opposed to the actual access later on. Therefore after each adjust_ptr_min_max_vals() with a stack pointer as a destination we simulate a check_stack_access() of 1 byte on the destination and once that fails the program is rejected for unprivileged program loads. This is analog to map value pointer arithmetic and needed for masking later on. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov [backported to 4.14 sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 62 ++++++++++++++++++++++++++++--------------- 1 file changed, 41 insertions(+), 21 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 096a15a884c9..bc073789b693 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -940,6 +940,31 @@ static int check_stack_read(struct bpf_verifier_state *state, int off, int size, } } +static int check_stack_access(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, + int off, int size) +{ + /* Stack accesses must be at a fixed offset, so that we + * can determine what type of data were returned. See + * check_stack_read(). + */ + if (!tnum_is_const(reg->var_off)) { + char tn_buf[48]; + + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); + verbose("variable stack access var_off=%s off=%d size=%d", + tn_buf, off, size); + return -EACCES; + } + + if (off >= 0 || off < -MAX_BPF_STACK) { + verbose("invalid stack off=%d size=%d\n", off, size); + return -EACCES; + } + + return 0; +} + /* check read/write into map element returned by bpf_map_lookup_elem() */ static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) @@ -1322,23 +1347,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn } } else if (reg->type == PTR_TO_STACK) { - /* stack accesses must be at a fixed offset, so that we can - * determine what type of data were returned. - * See check_stack_read(). - */ - if (!tnum_is_const(reg->var_off)) { - char tn_buf[48]; - - tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); - verbose("variable stack access var_off=%s off=%d size=%d", - tn_buf, off, size); - return -EACCES; - } off += reg->var_off.value; - if (off >= 0 || off < -MAX_BPF_STACK) { - verbose("invalid stack off=%d size=%d\n", off, size); - return -EACCES; - } + err = check_stack_access(env, reg, off, size); + if (err) + return err; if (env->prog->aux->stack_depth < -off) env->prog->aux->stack_depth = -off; @@ -2195,11 +2207,19 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, /* For unprivileged we require that resulting offset must be in bounds * in order to be able to sanitize access later on. */ - if (!env->allow_ptr_leaks && dst_reg->type == PTR_TO_MAP_VALUE && - check_map_access(env, dst, dst_reg->off, 1, false)) { - verbose(env, "R%d pointer arithmetic of map value goes out of range, prohibited for !root\n", - dst); - return -EACCES; + if (!env->allow_ptr_leaks) { + if (dst_reg->type == PTR_TO_MAP_VALUE && + check_map_access(env, dst, dst_reg->off, 1)) { + verbose("R%d pointer arithmetic of map value goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } else if (dst_reg->type == PTR_TO_STACK && + check_stack_access(env, dst_reg, dst_reg->off + + dst_reg->var_off.value, 1)) { + verbose("R%d stack pointer arithmetic goes out of range, " + "prohibited for !root\n", dst); + return -EACCES; + } } return 0; From 17efa65350c5a251ba7d90a56abc65205a204a03 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:10 +0000 Subject: [PATCH 82/93] bpf: restrict unknown scalars of mixed signed bounds for unprivileged commit 9d7eceede769f90b66cfa06ad5b357140d5141ed upstream. For unknown scalars of mixed signed bounds, meaning their smin_value is negative and their smax_value is positive, we need to reject arithmetic with pointer to map value. For unprivileged the goal is to mask every map pointer arithmetic and this cannot reliably be done when it is unknown at verification time whether the scalar value is negative or positive. Given this is a corner case, the likelihood of breaking should be very small. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov [backported to 4.14 sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index bc073789b693..c4b81d15fbc2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2014,8 +2014,8 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, smin_ptr = ptr_reg->smin_value, smax_ptr = ptr_reg->smax_value; u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; + u32 dst = insn->dst_reg, src = insn->src_reg; u8 opcode = BPF_OP(insn->code); - u32 dst = insn->dst_reg; dst_reg = ®s[dst]; @@ -2189,6 +2189,13 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, verbose("R%d bitwise operator %s on pointer prohibited\n", dst, bpf_alu_string[opcode >> 4]); return -EACCES; + case PTR_TO_MAP_VALUE: + if (!env->allow_ptr_leaks && !known && (smin_val < 0) != (smax_val < 0)) { + verbose("R%d has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root\n", + off_reg == dst_reg ? dst : src); + return -EACCES; + } + /* fall-through */ default: /* other operators (e.g. MUL,LSH) produce non-pointer results */ if (!env->allow_ptr_leaks) From 4b756d9f7929f035e4e18cfa36124d9ae6b49cdf Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:11 +0000 Subject: [PATCH 83/93] bpf: fix check_map_access smin_value test when pointer contains offset commit b7137c4eab85c1cf3d46acdde90ce1163b28c873 upstream. In check_map_access() we probe actual bounds through __check_map_access() with offset of reg->smin_value + off for lower bound and offset of reg->umax_value + off for the upper bound. However, even though the reg->smin_value could have a negative value, the final result of the sum with off could be positive when pointer arithmetic with known and unknown scalars is combined. In this case we reject the program with an error such as "R min value is negative, either use unsigned index or do a if (index >=0) check." even though the access itself would be fine. Therefore extend the check to probe whether the actual resulting reg->smin_value + off is less than zero. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov [backported to 4.14 sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c4b81d15fbc2..6eb24942554c 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -994,13 +994,17 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, */ if (log_level) print_verifier_state(state); + /* The minimum value is only important with signed * comparisons where we can't assume the floor of a * value is 0. If we are using signed variables for our * index'es we need to make sure that whatever we use * will have a set floor within our range. */ - if (reg->smin_value < 0) { + if (reg->smin_value < 0 && + (reg->smin_value == S64_MIN || + (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) || + reg->smin_value + off < 0)) { verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno); return -EACCES; From ae03b6b1c880a03d4771257336dc3bca156dd51b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:12 +0000 Subject: [PATCH 84/93] bpf: prevent out of bounds speculation on pointer arithmetic commit 979d63d50c0c0f7bc537bf821e056cc9fe5abd38 upstream. Jann reported that the original commit back in b2157399cc98 ("bpf: prevent out-of-bounds speculation") was not sufficient to stop CPU from speculating out of bounds memory access: While b2157399cc98 only focussed on masking array map access for unprivileged users for tail calls and data access such that the user provided index gets sanitized from BPF program and syscall side, there is still a more generic form affected from BPF programs that applies to most maps that hold user data in relation to dynamic map access when dealing with unknown scalars or "slow" known scalars as access offset, for example: - Load a map value pointer into R6 - Load an index into R7 - Do a slow computation (e.g. with a memory dependency) that loads a limit into R8 (e.g. load the limit from a map for high latency, then mask it to make the verifier happy) - Exit if R7 >= R8 (mispredicted branch) - Load R0 = R6[R7] - Load R0 = R6[R0] For unknown scalars there are two options in the BPF verifier where we could derive knowledge from in order to guarantee safe access to the memory: i) While /<=/>= variants won't allow to derive any lower or upper bounds from the unknown scalar where it would be safe to add it to the map value pointer, it is possible through ==/!= test however. ii) another option is to transform the unknown scalar into a known scalar, for example, through ALU ops combination such as R &= followed by R |= or any similar combination where the original information from the unknown scalar would be destroyed entirely leaving R with a constant. The initial slow load still precedes the latter ALU ops on that register, so the CPU executes speculatively from that point. Once we have the known scalar, any compare operation would work then. A third option only involving registers with known scalars could be crafted as described in [0] where a CPU port (e.g. Slow Int unit) would be filled with many dependent computations such that the subsequent condition depending on its outcome has to wait for evaluation on its execution port and thereby executing speculatively if the speculated code can be scheduled on a different execution port, or any other form of mistraining as described in [1], for example. Given this is not limited to only unknown scalars, not only map but also stack access is affected since both is accessible for unprivileged users and could potentially be used for out of bounds access under speculation. In order to prevent any of these cases, the verifier is now sanitizing pointer arithmetic on the offset such that any out of bounds speculation would be masked in a way where the pointer arithmetic result in the destination register will stay unchanged, meaning offset masked into zero similar as in array_index_nospec() case. With regards to implementation, there are three options that were considered: i) new insn for sanitation, ii) push/pop insn and sanitation as inlined BPF, iii) reuse of ax register and sanitation as inlined BPF. Option i) has the downside that we end up using from reserved bits in the opcode space, but also that we would require each JIT to emit masking as native arch opcodes meaning mitigation would have slow adoption till everyone implements it eventually which is counter-productive. Option ii) and iii) have both in common that a temporary register is needed in order to implement the sanitation as inlined BPF since we are not allowed to modify the source register. While a push / pop insn in ii) would be useful to have in any case, it requires once again that every JIT needs to implement it first. While possible, amount of changes needed would also be unsuitable for a -stable patch. Therefore, the path which has fewer changes, less BPF instructions for the mitigation and does not require anything to be changed in the JITs is option iii) which this work is pursuing. The ax register is already mapped to a register in all JITs (modulo arm32 where it's mapped to stack as various other BPF registers there) and used in constant blinding for JITs-only so far. It can be reused for verifier rewrites under certain constraints. The interpreter's tmp "register" has therefore been remapped into extending the register set with hidden ax register and reusing that for a number of instructions that needed the prior temporary variable internally (e.g. div, mod). This allows for zero increase in stack space usage in the interpreter, and enables (restricted) generic use in rewrites otherwise as long as such a patchlet does not make use of these instructions. The sanitation mask is dynamic and relative to the offset the map value or stack pointer currently holds. There are various cases that need to be taken under consideration for the masking, e.g. such operation could look as follows: ptr += val or val += ptr or ptr -= val. Thus, the value to be sanitized could reside either in source or in destination register, and the limit is different depending on whether the ALU op is addition or subtraction and depending on the current known and bounded offset. The limit is derived as follows: limit := max_value_size - (smin_value + off). For subtraction: limit := umax_value + off. This holds because we do not allow any pointer arithmetic that would temporarily go out of bounds or would have an unknown value with mixed signed bounds where it is unclear at verification time whether the actual runtime value would be either negative or positive. For example, we have a derived map pointer value with constant offset and bounded one, so limit based on smin_value works because the verifier requires that statically analyzed arithmetic on the pointer must be in bounds, and thus it checks if resulting smin_value + off and umax_value + off is still within map value bounds at time of arithmetic in addition to time of access. Similarly, for the case of stack access we derive the limit as follows: MAX_BPF_STACK + off for subtraction and -off for the case of addition where off := ptr_reg->off + ptr_reg->var_off.value. Subtraction is a special case for the masking which can be in form of ptr += -val, ptr -= -val, or ptr -= val. In the first two cases where we know that the value is negative, we need to temporarily negate the value in order to do the sanitation on a positive value where we later swap the ALU op, and restore original source register if the value was in source. The sanitation of pointer arithmetic alone is still not fully sufficient as is, since a scenario like the following could happen ... PTR += 0x1000 (e.g. K-based imm) PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON PTR += 0x1000 PTR -= BIG_NUMBER_WITH_SLOW_COMPARISON [...] ... which under speculation could end up as ... PTR += 0x1000 PTR -= 0 [ truncated by mitigation ] PTR += 0x1000 PTR -= 0 [ truncated by mitigation ] [...] ... and therefore still access out of bounds. To prevent such case, the verifier is also analyzing safety for potential out of bounds access under speculative execution. Meaning, it is also simulating pointer access under truncation. We therefore "branch off" and push the current verification state after the ALU operation with known 0 to the verification stack for later analysis. Given the current path analysis succeeded it is likely that the one under speculation can be pruned. In any case, it is also subject to existing complexity limits and therefore anything beyond this point will be rejected. In terms of pruning, it needs to be ensured that the verification state from speculative execution simulation must never prune a non-speculative execution path, therefore, we mark verifier state accordingly at the time of push_stack(). If verifier detects out of bounds access under speculative execution from one of the possible paths that includes a truncation, it will reject such program. Given we mask every reg-based pointer arithmetic for unprivileged programs, we've been looking into how it could affect real-world programs in terms of size increase. As the majority of programs are targeted for privileged-only use case, we've unconditionally enabled masking (with its alu restrictions on top of it) for privileged programs for the sake of testing in order to check i) whether they get rejected in its current form, and ii) by how much the number of instructions and size will increase. We've tested this by using Katran, Cilium and test_l4lb from the kernel selftests. For Katran we've evaluated balancer_kern.o, Cilium bpf_lxc.o and an older test object bpf_lxc_opt_-DUNKNOWN.o and l4lb we've used test_l4lb.o as well as test_l4lb_noinline.o. We found that none of the programs got rejected by the verifier with this change, and that impact is rather minimal to none. balancer_kern.o had 13,904 bytes (1,738 insns) xlated and 7,797 bytes JITed before and after the change. Most complex program in bpf_lxc.o had 30,544 bytes (3,817 insns) xlated and 18,538 bytes JITed before and after and none of the other tail call programs in bpf_lxc.o had any changes either. For the older bpf_lxc_opt_-DUNKNOWN.o object we found a small increase from 20,616 bytes (2,576 insns) and 12,536 bytes JITed before to 20,664 bytes (2,582 insns) and 12,558 bytes JITed after the change. Other programs from that object file had similar small increase. Both test_l4lb.o had no change and remained at 6,544 bytes (817 insns) xlated and 3,401 bytes JITed and for test_l4lb_noinline.o constant at 5,080 bytes (634 insns) xlated and 3,313 bytes JITed. This can be explained in that LLVM typically optimizes stack based pointer arithmetic by using K-based operations and that use of dynamic map access is not overly frequent. However, in future we may decide to optimize the algorithm further under known guarantees from branch and value speculation. Latter seems also unclear in terms of prediction heuristics that today's CPUs apply as well as whether there could be collisions in e.g. the predictor's Value History/Pattern Table for triggering out of bounds access, thus masking is performed unconditionally at this point but could be subject to relaxation later on. We were generally also brainstorming various other approaches for mitigation, but the blocker was always lack of available registers at runtime and/or overhead for runtime tracking of limits belonging to a specific pointer. Thus, we found this to be minimally intrusive under given constraints. With that in place, a simple example with sanitized access on unprivileged load at post-verification time looks as follows: # bpftool prog dump xlated id 282 [...] 28: (79) r1 = *(u64 *)(r7 +0) 29: (79) r2 = *(u64 *)(r7 +8) 30: (57) r1 &= 15 31: (79) r3 = *(u64 *)(r0 +4608) 32: (57) r3 &= 1 33: (47) r3 |= 1 34: (2d) if r2 > r3 goto pc+19 35: (b4) (u32) r11 = (u32) 20479 | 36: (1f) r11 -= r2 | Dynamic sanitation for pointer 37: (4f) r11 |= r2 | arithmetic with registers 38: (87) r11 = -r11 | containing bounded or known 39: (c7) r11 s>>= 63 | scalars in order to prevent 40: (5f) r11 &= r2 | out of bounds speculation. 41: (0f) r4 += r11 | 42: (71) r4 = *(u8 *)(r4 +0) 43: (6f) r4 <<= r1 [...] For the case where the scalar sits in the destination register as opposed to the source register, the following code is emitted for the above example: [...] 16: (b4) (u32) r11 = (u32) 20479 17: (1f) r11 -= r2 18: (4f) r11 |= r2 19: (87) r11 = -r11 20: (c7) r11 s>>= 63 21: (5f) r2 &= r11 22: (0f) r2 += r0 23: (61) r0 = *(u32 *)(r2 +0) [...] JIT blinding example with non-conflicting use of r10: [...] d5: je 0x0000000000000106 _ d7: mov 0x0(%rax),%edi | da: mov $0xf153246,%r10d | Index load from map value and e0: xor $0xf153259,%r10 | (const blinded) mask with 0x1f. e7: and %r10,%rdi |_ ea: mov $0x2f,%r10d | f0: sub %rdi,%r10 | Sanitized addition. Both use r10 f3: or %rdi,%r10 | but do not interfere with each f6: neg %r10 | other. (Neither do these instructions f9: sar $0x3f,%r10 | interfere with the use of ax as temp fd: and %r10,%rdi | in interpreter.) 100: add %rax,%rdi |_ 103: mov 0x0(%rdi),%eax [...] Tested that it fixes Jann's reproducer, and also checked that test_verifier and test_progs suite with interpreter, JIT and JIT with hardening enabled on x86-64 and arm64 runs successfully. [0] Speculose: Analyzing the Security Implications of Speculative Execution in CPUs, Giorgi Maisuradze and Christian Rossow, https://arxiv.org/pdf/1801.04084.pdf [1] A Systematic Evaluation of Transient Execution Attacks and Defenses, Claudio Canella, Jo Van Bulck, Michael Schwarz, Moritz Lipp, Benjamin von Berg, Philipp Ortner, Frank Piessens, Dmitry Evtyushkin, Daniel Gruss, https://arxiv.org/pdf/1811.05441.pdf Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Vallish Vaidyeshwara [some checkpatch cleanups and backported to 4.14 by sblbir] Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 10 ++ kernel/bpf/verifier.c | 184 +++++++++++++++++++++++++++++++++-- 2 files changed, 188 insertions(+), 6 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3149a01d0f58..10d85378e690 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -104,6 +104,7 @@ struct bpf_verifier_state { struct bpf_verifier_state *parent; int allocated_stack; struct bpf_stack_state *stack; + bool speculative; }; /* linked list of verifier states used to prune search */ @@ -112,14 +113,23 @@ struct bpf_verifier_state_list { struct bpf_verifier_state_list *next; }; +/* Possible states for alu_state member. */ +#define BPF_ALU_SANITIZE_SRC 1U +#define BPF_ALU_SANITIZE_DST 2U +#define BPF_ALU_NEG_VALUE (1U << 2) +#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ + BPF_ALU_SANITIZE_DST) + struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + u32 alu_limit; /* limit for add/sub register with pointer */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ + u8 alu_state; /* used in combination with alu_limit */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 6eb24942554c..dc4b0607f2c5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -542,7 +542,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, } static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, - int insn_idx, int prev_insn_idx) + int insn_idx, int prev_insn_idx, + bool speculative) { struct bpf_verifier_stack_elem *elem; struct bpf_verifier_state *cur = env->cur_state; @@ -555,6 +556,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, elem->insn_idx = insn_idx; elem->prev_insn_idx = prev_insn_idx; elem->next = env->head; + elem->st.speculative |= speculative; env->head = elem; env->stack_size++; err = copy_verifier_state(&elem->st, cur); @@ -2002,6 +2004,102 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env, return true; } +static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env) +{ + return &env->insn_aux_data[env->insn_idx]; +} + +static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, + u32 *ptr_limit, u8 opcode, bool off_is_neg) +{ + bool mask_to_left = (opcode == BPF_ADD && off_is_neg) || + (opcode == BPF_SUB && !off_is_neg); + u32 off; + + switch (ptr_reg->type) { + case PTR_TO_STACK: + off = ptr_reg->off + ptr_reg->var_off.value; + if (mask_to_left) + *ptr_limit = MAX_BPF_STACK + off; + else + *ptr_limit = -off; + return 0; + case PTR_TO_MAP_VALUE: + if (mask_to_left) { + *ptr_limit = ptr_reg->umax_value + ptr_reg->off; + } else { + off = ptr_reg->smin_value + ptr_reg->off; + *ptr_limit = ptr_reg->map_ptr->value_size - off; + } + return 0; + default: + return -EINVAL; + } +} + +static int sanitize_ptr_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn, + const struct bpf_reg_state *ptr_reg, + struct bpf_reg_state *dst_reg, + bool off_is_neg) +{ + struct bpf_verifier_state *vstate = env->cur_state; + struct bpf_insn_aux_data *aux = cur_aux(env); + bool ptr_is_dst_reg = ptr_reg == dst_reg; + u8 opcode = BPF_OP(insn->code); + u32 alu_state, alu_limit; + struct bpf_reg_state tmp; + bool ret; + + if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) + return 0; + + /* We already marked aux for masking from non-speculative + * paths, thus we got here in the first place. We only care + * to explore bad access from here. + */ + if (vstate->speculative) + goto do_sim; + + alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0; + alu_state |= ptr_is_dst_reg ? + BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST; + + if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) + return 0; + + /* If we arrived here from different branches with different + * limits to sanitize, then this won't work. + */ + if (aux->alu_state && + (aux->alu_state != alu_state || + aux->alu_limit != alu_limit)) + return -EACCES; + + /* Corresponding fixup done in fixup_bpf_calls(). */ + aux->alu_state = alu_state; + aux->alu_limit = alu_limit; + +do_sim: + /* Simulate and find potential out-of-bounds access under + * speculative execution from truncation as a result of + * masking when off was not within expected range. If off + * sits in dst, then we temporarily need to move ptr there + * to simulate dst (== 0) +/-= ptr. Needed, for example, + * for cases where we use K-based arithmetic in one direction + * and truncated reg-based in the other in order to explore + * bad access. + */ + if (!ptr_is_dst_reg) { + tmp = *dst_reg; + *dst_reg = *ptr_reg; + } + ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); + if (!ptr_is_dst_reg) + *dst_reg = tmp; + return !ret ? -EFAULT : 0; +} + /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off. * Caller should also handle BPF_MOV case separately. * If we return -EACCES, caller may want to try again treating pointer as a @@ -2020,6 +2118,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, umin_ptr = ptr_reg->umin_value, umax_ptr = ptr_reg->umax_value; u32 dst = insn->dst_reg, src = insn->src_reg; u8 opcode = BPF_OP(insn->code); + int ret; dst_reg = ®s[dst]; @@ -2071,6 +2170,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: + ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + if (ret < 0) { + verbose("R%d tried to add from different maps or paths\n", dst); + return ret; + } /* We can take a fixed offset as long as it doesn't overflow * the s32 'off' field */ @@ -2121,6 +2225,11 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, } break; case BPF_SUB: + ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0); + if (ret < 0) { + verbose("R%d tried to sub from different maps or paths\n", dst); + return ret; + } if (dst_reg == off_reg) { /* scalar -= pointer. Creates an unknown scalar */ if (!env->allow_ptr_leaks) @@ -3132,7 +3241,8 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, } } - other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx); + other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, + false); if (!other_branch) return -EFAULT; @@ -3767,6 +3877,12 @@ static bool states_equal(struct bpf_verifier_env *env, bool ret = false; int i; + /* Verification state from speculative execution simulation + * must never prune a non-speculative execution one. + */ + if (old->speculative && !cur->speculative) + return false; + idmap = kcalloc(ID_MAP_SIZE, sizeof(struct idpair), GFP_KERNEL); /* If we failed to allocate the idmap, just say it's not safe */ if (!idmap) @@ -3970,8 +4086,10 @@ static int do_check(struct bpf_verifier_env *env) /* found equivalent state, can prune the search */ if (log_level) { if (do_print_state) - verbose("\nfrom %d to %d: safe\n", - env->prev_insn_idx, env->insn_idx); + verbose("\nfrom %d to %d%s: safe\n", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); else verbose("%d: safe\n", env->insn_idx); } @@ -3985,8 +4103,10 @@ static int do_check(struct bpf_verifier_env *env) if (log_level > 1) verbose("%d:", env->insn_idx); else - verbose("\nfrom %d to %d:", - env->prev_insn_idx, env->insn_idx); + verbose("\nfrom %d to %d%s:", + env->prev_insn_idx, env->insn_idx, + env->cur_state->speculative ? + " (speculative execution)" : ""); print_verifier_state(env->cur_state); do_print_state = false; } @@ -4585,6 +4705,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) struct bpf_prog *new_prog; struct bpf_map *map_ptr; int i, cnt, delta = 0; + struct bpf_insn_aux_data *aux; for (i = 0; i < insn_cnt; i++, insn++) { if (insn->code == (BPF_ALU | BPF_MOD | BPF_X) || @@ -4605,6 +4726,57 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) continue; } + if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || + insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) { + const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X; + const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X; + struct bpf_insn insn_buf[16]; + struct bpf_insn *patch = &insn_buf[0]; + bool issrc, isneg; + u32 off_reg; + + aux = &env->insn_aux_data[i + delta]; + if (!aux->alu_state) + continue; + + isneg = aux->alu_state & BPF_ALU_NEG_VALUE; + issrc = (aux->alu_state & BPF_ALU_SANITIZE) == + BPF_ALU_SANITIZE_SRC; + + off_reg = issrc ? insn->src_reg : insn->dst_reg; + if (isneg) + *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); + *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1); + *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg); + *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0); + *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63); + if (issrc) { + *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, + off_reg); + insn->src_reg = BPF_REG_AX; + } else { + *patch++ = BPF_ALU64_REG(BPF_AND, off_reg, + BPF_REG_AX); + } + if (isneg) + insn->code = insn->code == code_add ? + code_sub : code_add; + *patch++ = *insn; + if (issrc && isneg) + *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1); + cnt = patch - insn_buf; + + new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + + delta += cnt - 1; + env->prog = prog = new_prog; + insn = new_prog->insnsi + i + delta; + continue; + } + if (insn->code != (BPF_JMP | BPF_CALL)) continue; From 6588a490bfe1b879f11b5e74724ef53a33b68641 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:13 +0000 Subject: [PATCH 85/93] bpf: fix sanitation of alu op with pointer / scalar type from different paths commit d3bd7413e0ca40b60cf60d4003246d067cafdeda upstream. While 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") took care of rejecting alu op on pointer when e.g. pointer came from two different map values with different map properties such as value size, Jann reported that a case was not covered yet when a given alu op is used in both "ptr_reg += reg" and "numeric_reg += reg" from different branches where we would incorrectly try to sanitize based on the pointer's limit. Catch this corner case and reject the program instead. Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") Reported-by: Jann Horn Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov Signed-off-by: Vallish Vaidyeshwara Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- include/linux/bpf_verifier.h | 1 + kernel/bpf/verifier.c | 61 ++++++++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 13 deletions(-) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 10d85378e690..d8b3240cfe6e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -117,6 +117,7 @@ struct bpf_verifier_state_list { #define BPF_ALU_SANITIZE_SRC 1U #define BPF_ALU_SANITIZE_DST 2U #define BPF_ALU_NEG_VALUE (1U << 2) +#define BPF_ALU_NON_POINTER (1U << 3) #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \ BPF_ALU_SANITIZE_DST) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index dc4b0607f2c5..ec949a4a7d89 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2037,6 +2037,40 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, } } +static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, + const struct bpf_insn *insn) +{ + return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K; +} + +static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, + u32 alu_state, u32 alu_limit) +{ + /* If we arrived here from different branches with different + * state or limits to sanitize, then this won't work. + */ + if (aux->alu_state && + (aux->alu_state != alu_state || + aux->alu_limit != alu_limit)) + return -EACCES; + + /* Corresponding fixup done in fixup_bpf_calls(). */ + aux->alu_state = alu_state; + aux->alu_limit = alu_limit; + return 0; +} + +static int sanitize_val_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_insn_aux_data *aux = cur_aux(env); + + if (can_skip_alu_sanitation(env, insn)) + return 0; + + return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0); +} + static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, @@ -2051,7 +2085,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_reg_state tmp; bool ret; - if (env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K) + if (can_skip_alu_sanitation(env, insn)) return 0; /* We already marked aux for masking from non-speculative @@ -2067,19 +2101,8 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env, if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg)) return 0; - - /* If we arrived here from different branches with different - * limits to sanitize, then this won't work. - */ - if (aux->alu_state && - (aux->alu_state != alu_state || - aux->alu_limit != alu_limit)) + if (update_alu_sanitation_state(aux, alu_state, alu_limit)) return -EACCES; - - /* Corresponding fixup done in fixup_bpf_calls(). */ - aux->alu_state = alu_state; - aux->alu_limit = alu_limit; - do_sim: /* Simulate and find potential out-of-bounds access under * speculative execution from truncation as a result of @@ -2360,6 +2383,8 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, s64 smin_val, smax_val; u64 umin_val, umax_val; u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? 64 : 32; + u32 dst = insn->dst_reg; + int ret; if (insn_bitness == 32) { /* Relevant for 32-bit RSH: Information can propagate towards @@ -2394,6 +2419,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, switch (opcode) { case BPF_ADD: + ret = sanitize_val_alu(env, insn); + if (ret < 0) { + verbose("R%d tried to add from different pointers or scalars\n", dst); + return ret; + } if (signed_add_overflows(dst_reg->smin_value, smin_val) || signed_add_overflows(dst_reg->smax_value, smax_val)) { dst_reg->smin_value = S64_MIN; @@ -2413,6 +2443,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off); break; case BPF_SUB: + ret = sanitize_val_alu(env, insn); + if (ret < 0) { + verbose("R%d tried to sub from different pointers or scalars\n", dst); + return ret; + } if (signed_sub_overflows(dst_reg->smin_value, smax_val) || signed_sub_overflows(dst_reg->smax_value, smin_val)) { /* Overflow possible, we know nothing */ From 0ed998d17cd421da18334745af7aee246abe4c70 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:14 +0000 Subject: [PATCH 86/93] bpf: fix inner map masking to prevent oob under speculation commit 9d5564ddcf2a0f5ba3fa1c3a1f8a1b59ad309553 upstream. During review I noticed that inner meta map setup for map in map is buggy in that it does not propagate all needed data from the reference map which the verifier is later accessing. In particular one such case is index masking to prevent out of bounds access under speculative execution due to missing the map's unpriv_array/index_mask field propagation. Fix this such that the verifier is generating the correct code for inlined lookups in case of unpriviledged use. Before patch (test_verifier's 'map in map access' dump): # bpftool prog dump xla id 3 0: (62) *(u32 *)(r10 -4) = 0 1: (bf) r2 = r10 2: (07) r2 += -4 3: (18) r1 = map[id:4] 5: (07) r1 += 272 | 6: (61) r0 = *(u32 *)(r2 +0) | 7: (35) if r0 >= 0x1 goto pc+6 | Inlined map in map lookup 8: (54) (u32) r0 &= (u32) 0 | with index masking for 9: (67) r0 <<= 3 | map->unpriv_array. 10: (0f) r0 += r1 | 11: (79) r0 = *(u64 *)(r0 +0) | 12: (15) if r0 == 0x0 goto pc+1 | 13: (05) goto pc+1 | 14: (b7) r0 = 0 | 15: (15) if r0 == 0x0 goto pc+11 16: (62) *(u32 *)(r10 -4) = 0 17: (bf) r2 = r10 18: (07) r2 += -4 19: (bf) r1 = r0 20: (07) r1 += 272 | 21: (61) r0 = *(u32 *)(r2 +0) | Index masking missing (!) 22: (35) if r0 >= 0x1 goto pc+3 | for inner map despite 23: (67) r0 <<= 3 | map->unpriv_array set. 24: (0f) r0 += r1 | 25: (05) goto pc+1 | 26: (b7) r0 = 0 | 27: (b7) r0 = 0 28: (95) exit After patch: # bpftool prog dump xla id 1 0: (62) *(u32 *)(r10 -4) = 0 1: (bf) r2 = r10 2: (07) r2 += -4 3: (18) r1 = map[id:2] 5: (07) r1 += 272 | 6: (61) r0 = *(u32 *)(r2 +0) | 7: (35) if r0 >= 0x1 goto pc+6 | Same inlined map in map lookup 8: (54) (u32) r0 &= (u32) 0 | with index masking due to 9: (67) r0 <<= 3 | map->unpriv_array. 10: (0f) r0 += r1 | 11: (79) r0 = *(u64 *)(r0 +0) | 12: (15) if r0 == 0x0 goto pc+1 | 13: (05) goto pc+1 | 14: (b7) r0 = 0 | 15: (15) if r0 == 0x0 goto pc+12 16: (62) *(u32 *)(r10 -4) = 0 17: (bf) r2 = r10 18: (07) r2 += -4 19: (bf) r1 = r0 20: (07) r1 += 272 | 21: (61) r0 = *(u32 *)(r2 +0) | 22: (35) if r0 >= 0x1 goto pc+4 | Now fixed inlined inner map 23: (54) (u32) r0 &= (u32) 0 | lookup with proper index masking 24: (67) r0 <<= 3 | for map->unpriv_array. 25: (0f) r0 += r1 | 26: (05) goto pc+1 | 27: (b7) r0 = 0 | 28: (b7) r0 = 0 29: (95) exit Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Signed-off-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Signed-off-by: Vallish Vaidyeshwara Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/map_in_map.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 1da574612bea..c0c494b7647b 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -12,6 +12,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) { struct bpf_map *inner_map, *inner_map_meta; + u32 inner_map_meta_size; struct fd f; f = fdget(inner_map_ufd); @@ -34,7 +35,12 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) return ERR_PTR(-EINVAL); } - inner_map_meta = kzalloc(sizeof(*inner_map_meta), GFP_USER); + inner_map_meta_size = sizeof(*inner_map_meta); + /* In some cases verifier needs to access beyond just base map. */ + if (inner_map->ops == &array_map_ops) + inner_map_meta_size = sizeof(struct bpf_array); + + inner_map_meta = kzalloc(inner_map_meta_size, GFP_USER); if (!inner_map_meta) { fdput(f); return ERR_PTR(-ENOMEM); @@ -44,9 +50,16 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->key_size = inner_map->key_size; inner_map_meta->value_size = inner_map->value_size; inner_map_meta->map_flags = inner_map->map_flags; - inner_map_meta->ops = inner_map->ops; inner_map_meta->max_entries = inner_map->max_entries; + /* Misc members not needed in bpf_map_meta_equal() check. */ + inner_map_meta->ops = inner_map->ops; + if (inner_map->ops == &array_map_ops) { + inner_map_meta->unpriv_array = inner_map->unpriv_array; + container_of(inner_map_meta, struct bpf_array, map)->index_mask = + container_of(inner_map, struct bpf_array, map)->index_mask; + } + fdput(f); return inner_map_meta; } From 12462c88e6e281cd136ae7c93e85fadfcf20722f Mon Sep 17 00:00:00 2001 From: Xu Yu Date: Wed, 3 Apr 2019 18:39:15 +0000 Subject: [PATCH 87/93] bpf: do not restore dst_reg when cur_state is freed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 0803278b0b4d8eeb2b461fb698785df65a725d9e upstream. Syzkaller hit 'KASAN: use-after-free Write in sanitize_ptr_alu' bug. Call trace: dump_stack+0xbf/0x12e print_address_description+0x6a/0x280 kasan_report+0x237/0x360 sanitize_ptr_alu+0x85a/0x8d0 adjust_ptr_min_max_vals+0x8f2/0x1ca0 adjust_reg_min_max_vals+0x8ed/0x22e0 do_check+0x1ca6/0x5d00 bpf_check+0x9ca/0x2570 bpf_prog_load+0xc91/0x1030 __se_sys_bpf+0x61e/0x1f00 do_syscall_64+0xc8/0x550 entry_SYSCALL_64_after_hwframe+0x49/0xbe Fault injection trace:  kfree+0xea/0x290  free_func_state+0x4a/0x60  free_verifier_state+0x61/0xe0  push_stack+0x216/0x2f0 <- inject failslab  sanitize_ptr_alu+0x2b1/0x8d0  adjust_ptr_min_max_vals+0x8f2/0x1ca0  adjust_reg_min_max_vals+0x8ed/0x22e0  do_check+0x1ca6/0x5d00  bpf_check+0x9ca/0x2570  bpf_prog_load+0xc91/0x1030  __se_sys_bpf+0x61e/0x1f00  do_syscall_64+0xc8/0x550  entry_SYSCALL_64_after_hwframe+0x49/0xbe When kzalloc() fails in push_stack(), free_verifier_state() will free current verifier state. As push_stack() returns, dst_reg was restored if ptr_is_dst_reg is false. However, as member of the cur_state, dst_reg is also freed, and error occurs when dereferencing dst_reg. Simply fix it by testing ret of push_stack() before restoring dst_reg. Fixes: 979d63d50c0c ("bpf: prevent out of bounds speculation on pointer arithmetic") Signed-off-by: Xu Yu Signed-off-by: Daniel Borkmann Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ec949a4a7d89..87b1293f20d6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2118,7 +2118,7 @@ do_sim: *dst_reg = *ptr_reg; } ret = push_stack(env, env->insn_idx + 1, env->insn_idx, true); - if (!ptr_is_dst_reg) + if (!ptr_is_dst_reg && ret) *dst_reg = tmp; return !ret ? -EFAULT : 0; } From a042c21a2341979614a2b48d1ec46de5301d3f78 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 Apr 2019 18:39:16 +0000 Subject: [PATCH 88/93] bpf: fix sanitation rewrite in case of non-pointers commit 3612af783cf52c74a031a2f11b82247b2599d3cd upstream. Marek reported that he saw an issue with the below snippet in that timing measurements where off when loaded as unpriv while results were reasonable when loaded as privileged: [...] uint64_t a = bpf_ktime_get_ns(); uint64_t b = bpf_ktime_get_ns(); uint64_t delta = b - a; if ((int64_t)delta > 0) { [...] Turns out there is a bug where a corner case is missing in the fix d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar type from different paths"), namely fixup_bpf_calls() only checks whether aux has a non-zero alu_state, but it also needs to test for the case of BPF_ALU_NON_POINTER since in both occasions we need to skip the masking rewrite (as there is nothing to mask). Fixes: d3bd7413e0ca ("bpf: fix sanitation of alu op with pointer / scalar type from different paths") Reported-by: Marek Majkowski Reported-by: Arthur Fabre Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/netdev/CAJPywTJqP34cK20iLM5YmUMz9KXQOdu1-+BZrGMAGgLuBWz7fg@mail.gmail.com/T/ Acked-by: Song Liu Signed-off-by: Alexei Starovoitov Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- kernel/bpf/verifier.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 87b1293f20d6..a4875ff0bab1 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -4771,7 +4771,8 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) u32 off_reg; aux = &env->insn_aux_data[i + delta]; - if (!aux->alu_state) + if (!aux->alu_state || + aux->alu_state == BPF_ALU_NON_POINTER) continue; isneg = aux->alu_state & BPF_ALU_NEG_VALUE; From 03f11a51a196a9aadc20a70a73825a811a726218 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Wed, 3 Apr 2019 18:39:17 +0000 Subject: [PATCH 89/93] bpf: Fix selftests are changes for CVE 2019-7308 The changes to fix the CVE 2019-7308 make the bpf verifier stricter with respect to operations that were allowed earlier in unprivileged mode. Fixup the test cases so that the error messages now correctly reflect pointer arithmetic going out of range for tests. Signed-off-by: Balbir Singh Signed-off-by: Greg Kroah-Hartman --- tools/testing/selftests/bpf/test_verifier.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a0591d06c61b..913539aea645 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -1860,6 +1860,7 @@ static struct bpf_test tests[] = { }, .result = REJECT, .errstr = "invalid stack off=-79992 size=8", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { "PTR_TO_STACK store/load - out of bounds high", @@ -2243,6 +2244,8 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN(), }, .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", }, { "unpriv: cmp of stack pointer", @@ -7013,6 +7016,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .errstr = "pointer offset 1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .result = REJECT }, { @@ -7034,6 +7038,7 @@ static struct bpf_test tests[] = { }, .fixup_map1 = { 3 }, .errstr = "pointer offset -1073741822", + .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range", .result = REJECT }, { @@ -7203,6 +7208,7 @@ static struct bpf_test tests[] = { BPF_EXIT_INSN() }, .errstr = "fp pointer offset 1073741822", + .errstr_unpriv = "R1 stack pointer arithmetic goes out of range", .result = REJECT }, { From 5661213956662f592979925eab1e4dfff955ba05 Mon Sep 17 00:00:00 2001 From: Lars Persson Date: Mon, 15 Apr 2019 09:49:47 +0200 Subject: [PATCH 90/93] net: stmmac: Set dma ring length before enabling the DMA This was fixed in upstream by commit 7d9e6c5afab6 ("net: stmmac: Integrate XGMAC into main driver flow") that is a new feature commit. We found a race condition in the DMA init sequence that hits if the PHY already has link up during stmmac_hw_setup. Since the ring length was programmed after enabling the RX path, we might receive a packet before the correct ring length is programmed. When that happened we could not get reliable interrupts for DMA RX and the MTL complained about RX FIFO overrun. Signed-off-by: Lars Persson Cc: stable@vger.kernel.org # 4.14.x Cc: Giuseppe Cavallaro Cc: Alexandre Torgue Cc: Jose Abreu Signed-off-by: Greg Kroah-Hartman --- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 4a9dbee6f054..f2429ec07b57 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2536,9 +2536,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) netdev_warn(priv->dev, "%s: failed debugFS registration\n", __func__); #endif - /* Start the ball rolling... */ - stmmac_start_all_dma(priv); - priv->tx_lpi_timer = STMMAC_DEFAULT_TWT_LS; if ((priv->use_riwt) && (priv->hw->dma->rx_watchdog)) { @@ -2558,6 +2555,9 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp) priv->hw->dma->enable_tso(priv->ioaddr, 1, chan); } + /* Start the ball rolling... */ + stmmac_start_all_dma(priv); + return 0; } From 1e4bc57ecf60a1240e4cee2c048dedd1a10d4e32 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 9 Apr 2019 20:05:43 +0300 Subject: [PATCH 91/93] mm: hide incomplete nr_indirectly_reclaimable in sysfs In upstream branch this fixed by commit b29940c1abd7 ("mm: rename and change semantics of nr_indirectly_reclaimable_bytes"). This fixes /sys/devices/system/node/node*/vmstat format: ... nr_dirtied 6613155 nr_written 5796802 11089216 ... Cc: # 4.19.y Fixes: 7aaf77272358 ("mm: don't show nr_indirectly_reclaimable in /proc/vmstat") Signed-off-by: Konstantin Khlebnikov Cc: Roman Gushchin Cc: Vlastimil Babka Signed-off-by: Greg Kroah-Hartman --- drivers/base/node.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/base/node.c b/drivers/base/node.c index ee090ab9171c..5c39f14d15a5 100644 --- a/drivers/base/node.c +++ b/drivers/base/node.c @@ -197,11 +197,16 @@ static ssize_t node_read_vmstat(struct device *dev, sum_zone_numa_state(nid, i)); #endif - for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) + for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) { + /* Skip hidden vmstat items. */ + if (*vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + + NR_VM_NUMA_STAT_ITEMS] == '\0') + continue; n += sprintf(buf+n, "%s %lu\n", vmstat_text[i + NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_STAT_ITEMS], node_page_state(pgdat, i)); + } return n; } From 76da4272779ab127acdaa9f4aa799cd7e90a8b3f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Mar 2019 11:52:36 +0100 Subject: [PATCH 92/93] appletalk: Fix compile regression [ Upstream commit 27da0d2ef998e222a876c0cec72aa7829a626266 ] A bugfix just broke compilation of appletalk when CONFIG_SYSCTL is disabled: In file included from net/appletalk/ddp.c:65: net/appletalk/ddp.c: In function 'atalk_init': include/linux/atalk.h:164:34: error: expected expression before 'do' #define atalk_register_sysctl() do { } while(0) ^~ net/appletalk/ddp.c:1934:7: note: in expansion of macro 'atalk_register_sysctl' rc = atalk_register_sysctl(); This is easier to avoid by using conventional inline functions as stubs rather than macros. The header already has inline functions for other purposes, so I'm changing over all the macros for consistency. Fixes: 6377f787aeb9 ("appletalk: Fix use-after-free in atalk_proc_exit") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller Signed-off-by: Sasha Levin --- include/linux/atalk.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/include/linux/atalk.h b/include/linux/atalk.h index 212eb8c7fed6..03885e63f92b 100644 --- a/include/linux/atalk.h +++ b/include/linux/atalk.h @@ -154,16 +154,26 @@ extern int sysctl_aarp_resolve_time; extern int atalk_register_sysctl(void); extern void atalk_unregister_sysctl(void); #else -#define atalk_register_sysctl() do { } while(0) -#define atalk_unregister_sysctl() do { } while(0) +static inline int atalk_register_sysctl(void) +{ + return 0; +} +static inline void atalk_unregister_sysctl(void) +{ +} #endif #ifdef CONFIG_PROC_FS extern int atalk_proc_init(void); extern void atalk_proc_exit(void); #else -#define atalk_proc_init() ({ 0; }) -#define atalk_proc_exit() do { } while(0) +static inline int atalk_proc_init(void) +{ + return 0; +} +static inline void atalk_proc_exit(void) +{ +} #endif /* CONFIG_PROC_FS */ #endif /* __LINUX_ATALK_H__ */ From 68d7a45eec101bc1550294c0e675a490c047b2e5 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 20 Apr 2019 09:15:10 +0200 Subject: [PATCH 93/93] Linux 4.14.113 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 94673d2a6a27..fcfef30ca9a6 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 VERSION = 4 PATCHLEVEL = 14 -SUBLEVEL = 112 +SUBLEVEL = 113 EXTRAVERSION = NAME = Petit Gorille