From 874f2a7d412ef7a30179349e9dc480f0c2429289 Mon Sep 17 00:00:00 2001
From: Houston Yaroschoff <hstn@4ever3.net>
Date: Mon, 11 Jun 2018 12:39:09 +0200
Subject: [PATCH 01/62] usb: cdc_acm: Add quirk for Uniden UBC125 scanner

commit 4a762569a2722b8a48066c7bacf0e1dc67d17fa1 upstream.

Uniden UBC125 radio scanner has USB interface which fails to work
with cdc_acm driver:
  usb 1-1.5: new full-speed USB device number 4 using xhci_hcd
  cdc_acm 1-1.5:1.0: Zero length descriptor references
  cdc_acm: probe of 1-1.5:1.0 failed with error -22

Adding the NO_UNION_NORMAL quirk for the device fixes the issue:
  usb 1-4: new full-speed USB device number 15 using xhci_hcd
  usb 1-4: New USB device found, idVendor=1965, idProduct=0018
  usb 1-4: New USB device strings: Mfr=1, Product=2, SerialNumber=3
  usb 1-4: Product: UBC125XLT
  usb 1-4: Manufacturer: Uniden Corp.
  usb 1-4: SerialNumber: 0001
  cdc_acm 1-4:1.0: ttyACM0: USB ACM device

`lsusb -v` of the device:

  Bus 001 Device 015: ID 1965:0018 Uniden Corporation
  Device Descriptor:
    bLength                18
    bDescriptorType         1
    bcdUSB               2.00
    bDeviceClass            2 Communications
    bDeviceSubClass         0
    bDeviceProtocol         0
    bMaxPacketSize0        64
    idVendor           0x1965 Uniden Corporation
    idProduct          0x0018
    bcdDevice            0.01
    iManufacturer           1 Uniden Corp.
    iProduct                2 UBC125XLT
    iSerial                 3 0001
    bNumConfigurations      1
    Configuration Descriptor:
      bLength                 9
      bDescriptorType         2
      wTotalLength           48
      bNumInterfaces          2
      bConfigurationValue     1
      iConfiguration          0
      bmAttributes         0x80
        (Bus Powered)
      MaxPower              500mA
      Interface Descriptor:
        bLength                 9
        bDescriptorType         4
        bInterfaceNumber        0
        bAlternateSetting       0
        bNumEndpoints           1
        bInterfaceClass         2 Communications
        bInterfaceSubClass      2 Abstract (modem)
        bInterfaceProtocol      0 None
        iInterface              0
        Endpoint Descriptor:
          bLength                 7
          bDescriptorType         5
          bEndpointAddress     0x87  EP 7 IN
          bmAttributes            3
            Transfer Type            Interrupt
            Synch Type               None
            Usage Type               Data
          wMaxPacketSize     0x0008  1x 8 bytes
          bInterval              10
      Interface Descriptor:
        bLength                 9
        bDescriptorType         4
        bInterfaceNumber        1
        bAlternateSetting       0
        bNumEndpoints           2
        bInterfaceClass        10 CDC Data
        bInterfaceSubClass      0 Unused
        bInterfaceProtocol      0
        iInterface              0
        Endpoint Descriptor:
          bLength                 7
          bDescriptorType         5
          bEndpointAddress     0x81  EP 1 IN
          bmAttributes            2
            Transfer Type            Bulk
            Synch Type               None
            Usage Type               Data
          wMaxPacketSize     0x0040  1x 64 bytes
          bInterval               0
        Endpoint Descriptor:
          bLength                 7
          bDescriptorType         5
          bEndpointAddress     0x02  EP 2 OUT
          bmAttributes            2
            Transfer Type            Bulk
            Synch Type               None
            Usage Type               Data
          wMaxPacketSize     0x0040  1x 64 bytes
          bInterval               0
  Device Status:     0x0000
    (Bus Powered)

Signed-off-by: Houston Yaroschoff <hstn@4ever3.net>
Cc: stable <stable@vger.kernel.org>
Acked-by: Oliver Neukum <oneukum@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/class/cdc-acm.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 22952d70b981..3b9aadd007f5 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1771,6 +1771,9 @@ static const struct usb_device_id acm_ids[] = {
 	{ USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */
 	.driver_info = SINGLE_RX_URB,
 	},
+	{ USB_DEVICE(0x1965, 0x0018), /* Uniden UBC125XLT */
+	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
+	},
 	{ USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */
 	.driver_info = NO_UNION_NORMAL, /* has no union descriptor */
 	},

From 15e449969537594f970c166bbe811889f2ec853b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 18 Jun 2018 10:24:03 +0200
Subject: [PATCH 02/62] USB: serial: cp210x: add CESINEL device ids

commit 24160628a34af962ac99f2f58e547ac3c4cbd26f upstream.

Add device ids for CESINEL products.

Reported-by: Carlos Barcala Lara <cabl@cesinel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index d0f00274d16c..31b8e303ca38 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -98,6 +98,9 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */
 	{ USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */
 	{ USB_DEVICE(0x10C4, 0x815F) }, /* Timewave HamLinkUSB */
+	{ USB_DEVICE(0x10C4, 0x817C) }, /* CESINEL MEDCAL N Power Quality Monitor */
+	{ USB_DEVICE(0x10C4, 0x817D) }, /* CESINEL MEDCAL NT Power Quality Monitor */
+	{ USB_DEVICE(0x10C4, 0x817E) }, /* CESINEL MEDCAL S Power Quality Monitor */
 	{ USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */
 	{ USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */
 	{ USB_DEVICE(0x10C4, 0x81A6) }, /* ThinkOptics WavIt */
@@ -115,6 +118,9 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x826B) }, /* Cygnal Integrated Products, Inc., Fasttrax GPS demonstration module */
 	{ USB_DEVICE(0x10C4, 0x8281) }, /* Nanotec Plug & Drive */
 	{ USB_DEVICE(0x10C4, 0x8293) }, /* Telegesis ETRX2USB */
+	{ USB_DEVICE(0x10C4, 0x82EF) }, /* CESINEL FALCO 6105 AC Power Supply */
+	{ USB_DEVICE(0x10C4, 0x82F1) }, /* CESINEL MEDCAL EFD Earth Fault Detector */
+	{ USB_DEVICE(0x10C4, 0x82F2) }, /* CESINEL MEDCAL ST Network Analyzer */
 	{ USB_DEVICE(0x10C4, 0x82F4) }, /* Starizona MicroTouch */
 	{ USB_DEVICE(0x10C4, 0x82F9) }, /* Procyon AVS */
 	{ USB_DEVICE(0x10C4, 0x8341) }, /* Siemens MC35PU GPRS Modem */
@@ -127,7 +133,9 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8470) }, /* Juniper Networks BX Series System Console */
 	{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
 	{ USB_DEVICE(0x10C4, 0x84B6) }, /* Starizona Hyperion */
+	{ USB_DEVICE(0x10C4, 0x851E) }, /* CESINEL MEDCAL PT Network Analyzer */
 	{ USB_DEVICE(0x10C4, 0x85A7) }, /* LifeScan OneTouch Verio IQ */
+	{ USB_DEVICE(0x10C4, 0x85B8) }, /* CESINEL ReCon T Energy Logger */
 	{ USB_DEVICE(0x10C4, 0x85EA) }, /* AC-Services IBUS-IF */
 	{ USB_DEVICE(0x10C4, 0x85EB) }, /* AC-Services CIS-IBUS */
 	{ USB_DEVICE(0x10C4, 0x85F8) }, /* Virtenio Preon32 */
@@ -137,10 +145,13 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8857) },	/* CEL EM357 ZigBee USB Stick */
 	{ USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
 	{ USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
+	{ USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */
+	{ USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */
 	{ USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
 	{ USB_DEVICE(0x10C4, 0x8962) }, /* Brim Brothers charging dock */
 	{ USB_DEVICE(0x10C4, 0x8977) },	/* CEL MeshWorks DevKit Device */
 	{ USB_DEVICE(0x10C4, 0x8998) }, /* KCF Technologies PRN */
+	{ USB_DEVICE(0x10C4, 0x89A4) }, /* CESINEL FTBC Flexible Thyristor Bridge Controller */
 	{ USB_DEVICE(0x10C4, 0x8A2A) }, /* HubZ dual ZigBee and Z-Wave dongle */
 	{ USB_DEVICE(0x10C4, 0x8A5E) }, /* CEL EM3588 ZigBee USB Stick Long Range */
 	{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */

From e80add5223dda08db1e5453a0ed4bc1175f394e9 Mon Sep 17 00:00:00 2001
From: Karoly Pados <pados@pados.hu>
Date: Sat, 9 Jun 2018 13:26:08 +0200
Subject: [PATCH 03/62] USB: serial: cp210x: add Silicon Labs IDs for Windows
 Update

commit 2f839823382748664b643daa73f41ee0cc01ced6 upstream.

Silicon Labs defines alternative VID/PID pairs for some chips that when
used will automatically install drivers for Windows users without manual
intervention. Unfortunately, these IDs are not recognized by the Linux
module, so using these IDs improves user experience on one platform but
degrades it on Linux. This patch addresses this problem.

Signed-off-by: Karoly Pados <pados@pados.hu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/serial/cp210x.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 31b8e303ca38..142a83e5974c 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -157,8 +157,11 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x10C4, 0x8B34) }, /* Qivicon ZigBee USB Radio Stick */
 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
+	{ USB_DEVICE(0x10C4, 0xEA63) }, /* Silicon Labs Windows Update (CP2101-4/CP2102N) */
 	{ USB_DEVICE(0x10C4, 0xEA70) }, /* Silicon Labs factory default */
 	{ USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */
+	{ USB_DEVICE(0x10C4, 0xEA7A) }, /* Silicon Labs Windows Update (CP2105) */
+	{ USB_DEVICE(0x10C4, 0xEA7B) }, /* Silicon Labs Windows Update (CP2108) */
 	{ USB_DEVICE(0x10C4, 0xF001) }, /* Elan Digital Systems USBscope50 */
 	{ USB_DEVICE(0x10C4, 0xF002) }, /* Elan Digital Systems USBwave12 */
 	{ USB_DEVICE(0x10C4, 0xF003) }, /* Elan Digital Systems USBpulse100 */

From 447294efb995181076c2c34e9c6bee1703c74a9a Mon Sep 17 00:00:00 2001
From: William Wu <william.wu@rock-chips.com>
Date: Mon, 21 May 2018 18:12:00 +0800
Subject: [PATCH 04/62] usb: dwc2: fix the incorrect bitmaps for the ports of
 multi_tt hub

commit 8760675932ddb614e83702117d36ea644050c609 upstream.

The dwc2_get_ls_map() use ttport to reference into the
bitmap if we're on a multi_tt hub. But the bitmaps index
from 0 to (hub->maxchild - 1), while the ttport index from
1 to hub->maxchild. This will cause invalid memory access
when the number of ttport is hub->maxchild.

Without this patch, I can easily meet a Kernel panic issue
if connect a low-speed USB mouse with the max port of FE2.1
multi-tt hub (1a40:0201) on rk3288 platform.

Fixes: 9f9f09b048f5 ("usb: dwc2: host: Totally redo the microframe scheduler")
Cc: <stable@vger.kernel.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Acked-by: Minas Harutyunyan hminas@synopsys.com>
Signed-off-by: William Wu <william.wu@rock-chips.com>
Signed-off-by: Felipe Balbi <felipe.balbi@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/hcd_queue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/hcd_queue.c b/drivers/usb/dwc2/hcd_queue.c
index 3ae8b1bbaa55..7f51a77bc5cc 100644
--- a/drivers/usb/dwc2/hcd_queue.c
+++ b/drivers/usb/dwc2/hcd_queue.c
@@ -379,7 +379,7 @@ static unsigned long *dwc2_get_ls_map(struct dwc2_hsotg *hsotg,
 	/* Get the map and adjust if this is a multi_tt hub */
 	map = qh->dwc_tt->periodic_bitmaps;
 	if (qh->dwc_tt->usb_tt->multi)
-		map += DWC2_ELEMENTS_PER_LS_BITMAP * qh->ttport;
+		map += DWC2_ELEMENTS_PER_LS_BITMAP * (qh->ttport - 1);
 
 	return map;
 }

From f2e9a38558d8bbd670357922c3d06b845c8d92df Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 21 Jun 2018 16:43:17 +0300
Subject: [PATCH 05/62] acpi: Add helper for deactivating memory region

commit d2d2e3c46be5d6dd8001d0eebdf7cafb9bc7006b upstream.

Sometimes memory resource may be overlapping with
SystemMemory Operation Region by design, for example if the
memory region is used as a mailbox for communication with a
firmware in the system. One occasion of such mailboxes is
USB Type-C Connector System Software Interface (UCSI).

With regions like that, it is important that the driver is
able to map the memory with the requirements it has. For
example, the driver should be allowed to map the memory as
non-cached memory. However, if the operation region has been
accessed before the driver has mapped the memory, the memory
has been marked as write-back by the time the driver is
loaded. That means the driver will fail to map the memory
if it expects non-cached memory.

To work around the problem, introducing helper that the
drivers can use to temporarily deactivate (unmap)
SystemMemory Operation Regions that overlap with their
IO memory.

Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver")
Cc: stable@vger.kernel.org
Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/acpi/osl.c   | 72 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/acpi.h |  3 ++
 2 files changed, 75 insertions(+)

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index db78d353bab1..191e86c62037 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -45,6 +45,8 @@
 #include <linux/uaccess.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 
+#include "acpica/accommon.h"
+#include "acpica/acnamesp.h"
 #include "internal.h"
 
 #define _COMPONENT		ACPI_OS_SERVICES
@@ -1477,6 +1479,76 @@ int acpi_check_region(resource_size_t start, resource_size_t n,
 }
 EXPORT_SYMBOL(acpi_check_region);
 
+static acpi_status acpi_deactivate_mem_region(acpi_handle handle, u32 level,
+					      void *_res, void **return_value)
+{
+	struct acpi_mem_space_context **mem_ctx;
+	union acpi_operand_object *handler_obj;
+	union acpi_operand_object *region_obj2;
+	union acpi_operand_object *region_obj;
+	struct resource *res = _res;
+	acpi_status status;
+
+	region_obj = acpi_ns_get_attached_object(handle);
+	if (!region_obj)
+		return AE_OK;
+
+	handler_obj = region_obj->region.handler;
+	if (!handler_obj)
+		return AE_OK;
+
+	if (region_obj->region.space_id != ACPI_ADR_SPACE_SYSTEM_MEMORY)
+		return AE_OK;
+
+	if (!(region_obj->region.flags & AOPOBJ_SETUP_COMPLETE))
+		return AE_OK;
+
+	region_obj2 = acpi_ns_get_secondary_object(region_obj);
+	if (!region_obj2)
+		return AE_OK;
+
+	mem_ctx = (void *)&region_obj2->extra.region_context;
+
+	if (!(mem_ctx[0]->address >= res->start &&
+	      mem_ctx[0]->address < res->end))
+		return AE_OK;
+
+	status = handler_obj->address_space.setup(region_obj,
+						  ACPI_REGION_DEACTIVATE,
+						  NULL, (void **)mem_ctx);
+	if (ACPI_SUCCESS(status))
+		region_obj->region.flags &= ~(AOPOBJ_SETUP_COMPLETE);
+
+	return status;
+}
+
+/**
+ * acpi_release_memory - Release any mappings done to a memory region
+ * @handle: Handle to namespace node
+ * @res: Memory resource
+ * @level: A level that terminates the search
+ *
+ * Walks through @handle and unmaps all SystemMemory Operation Regions that
+ * overlap with @res and that have already been activated (mapped).
+ *
+ * This is a helper that allows drivers to place special requirements on memory
+ * region that may overlap with operation regions, primarily allowing them to
+ * safely map the region as non-cached memory.
+ *
+ * The unmapped Operation Regions will be automatically remapped next time they
+ * are called, so the drivers do not need to do anything else.
+ */
+acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
+				u32 level)
+{
+	if (!(res->flags & IORESOURCE_MEM))
+		return AE_TYPE;
+
+	return acpi_walk_namespace(ACPI_TYPE_REGION, handle, level,
+				   acpi_deactivate_mem_region, NULL, res, NULL);
+}
+EXPORT_SYMBOL_GPL(acpi_release_memory);
+
 /*
  * Let drivers know whether the resource checks are effective
  */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 502af53ec012..13c105121a18 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -441,6 +441,9 @@ int acpi_check_resource_conflict(const struct resource *res);
 int acpi_check_region(resource_size_t start, resource_size_t n,
 		      const char *name);
 
+acpi_status acpi_release_memory(acpi_handle handle, struct resource *res,
+				u32 level);
+
 int acpi_resources_are_enforced(void);
 
 #ifdef CONFIG_HIBERNATION

From 47adbb26373f2e89f8ef76524489a250e4608b04 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 21 Jun 2018 16:43:18 +0300
Subject: [PATCH 06/62] usb: typec: ucsi: acpi: Workaround for cache mode issue

commit 1f9f9d168ce619608572b01771c47a41b15429e6 upstream.

This fixes an issue where the driver fails with an error:

	ioremap error for 0x3f799000-0x3f79a000, requested 0x2, got 0x0

On some platforms the UCSI ACPI mailbox SystemMemory
Operation Region may be setup before the driver has been
loaded. That will lead into the driver failing to map the
mailbox region, as it has been already marked as write-back
memory. acpi_os_ioremap() for x86 uses ioremap_cache()
unconditionally.

When the issue happens, the embedded controller has a
pending query event for the UCSI notification right after
boot-up which causes the operation region to be setup before
UCSI driver has been loaded.

The fix is to notify acpi core that the driver is about to
access memory region which potentially overlaps with an
operation region right before mapping it.
acpi_release_memory() will check if the memory has already
been setup (mapped) by acpi core, and deactivate it (unmap)
if it has. The driver is then able to map the memory with
ioremap_nocache() and set the memtype to uncached for the
region.

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Fixes: 8243edf44152 ("usb: typec: ucsi: Add ACPI driver")
Cc: stable@vger.kernel.org
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi_acpi.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/typec/ucsi/ucsi_acpi.c b/drivers/usb/typec/ucsi/ucsi_acpi.c
index cabd47612b0a..494d2a49203a 100644
--- a/drivers/usb/typec/ucsi/ucsi_acpi.c
+++ b/drivers/usb/typec/ucsi/ucsi_acpi.c
@@ -82,6 +82,11 @@ static int ucsi_acpi_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	/* This will make sure we can use ioremap_nocache() */
+	status = acpi_release_memory(ACPI_HANDLE(&pdev->dev), res, 1);
+	if (ACPI_FAILURE(status))
+		return -ENOMEM;
+
 	/*
 	 * NOTE: The memory region for the data structures is used also in an
 	 * operation region, which means ACPI has already reserved it. Therefore

From 0a7db82ed9ccbf6a85f4269a454e06e438b41b99 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 21 Jun 2018 16:43:19 +0300
Subject: [PATCH 07/62] usb: typec: ucsi: Fix for incorrect status data issue

commit 68816e16b4789f2d05e77b6dcb77564cf5d6a8d8 upstream.

According to UCSI Specification, Connector Change Event only
means a change in the Connector Status and Operation Mode
fields of the STATUS data structure. So any other change
should create another event.

Unfortunately on some platforms the firmware acting as PPM
(platform policy manager - usually embedded controller
firmware) still does not report any other status changes if
there is a connector change event. So if the connector power
or data role was changed when a device was plugged to the
connector, the driver does not get any indication about
that. The port will show wrong roles if that happens.

To fix the issue, always checking the data and power role
together with a connector change event.

Fixes: c1b0bc2dabfa ("usb: typec: Add support for UCSI interface")
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index dd24c5c1534d..251f5d66651e 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -346,6 +346,19 @@ static void ucsi_connector_change(struct work_struct *work)
 	}
 
 	if (con->status.change & UCSI_CONSTAT_CONNECT_CHANGE) {
+		typec_set_pwr_role(con->port, con->status.pwr_dir);
+
+		switch (con->status.partner_type) {
+		case UCSI_CONSTAT_PARTNER_TYPE_UFP:
+			typec_set_data_role(con->port, TYPEC_HOST);
+			break;
+		case UCSI_CONSTAT_PARTNER_TYPE_DFP:
+			typec_set_data_role(con->port, TYPEC_DEVICE);
+			break;
+		default:
+			break;
+		}
+
 		if (con->status.connected)
 			ucsi_register_partner(con);
 		else

From 716382f1c1eaca706ab694a61215b5e3416a0ea6 Mon Sep 17 00:00:00 2001
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Date: Thu, 21 Jun 2018 16:19:42 +0300
Subject: [PATCH 08/62] xhci: Fix kernel oops in trace_xhci_free_virt_device

commit d850c1658328e757635a46763783c6fd56390dcb upstream.

commit 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device")
set dev->udev pointer to NULL in xhci_free_dev(), it will cause kernel
panic in trace_xhci_free_virt_device. This patch reimplement the trace
function trace_xhci_free_virt_device, remove dev->udev dereference and
added more useful parameters to show in the trace function,it also makes
sure dev->udev is not NULL before calling trace_xhci_free_virt_device.
This issue happened when xhci-hcd trace is enabled and USB devices hot
plug test. Original use-after-free patch went to stable so this needs so
be applied there as well.

[ 1092.022457] usb 2-4: USB disconnect, device number 6
[ 1092.092772] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[ 1092.101694] PGD 0 P4D 0
[ 1092.104601] Oops: 0000 [#1] SMP
[ 1092.207734] Workqueue: usb_hub_wq hub_event
[ 1092.212507] RIP: 0010:trace_event_raw_event_xhci_log_virt_dev+0x6c/0xf0
[ 1092.220050] RSP: 0018:ffff8c252e883d28 EFLAGS: 00010086
[ 1092.226024] RAX: ffff8c24af86fa84 RBX: 0000000000000003 RCX: ffff8c25255c2a01
[ 1092.234130] RDX: 0000000000000000 RSI: 00000000aef55009 RDI: ffff8c252e883d28
[ 1092.242242] RBP: ffff8c252550e2c0 R08: ffff8c24af86fa84 R09: 0000000000000a70
[ 1092.250364] R10: 0000000000000a70 R11: 0000000000000000 R12: ffff8c251f21a000
[ 1092.258468] R13: 000000000000000c R14: ffff8c251f21a000 R15: ffff8c251f432f60
[ 1092.266572] FS:  0000000000000000(0000) GS:ffff8c252e880000(0000) knlGS:0000000000000000
[ 1092.275757] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1092.282281] CR2: 0000000000000000 CR3: 0000000154209001 CR4: 00000000003606e0
[ 1092.290384] Call Trace:
[ 1092.293156]  <IRQ>
[ 1092.295439]  xhci_free_virt_device.part.34+0x182/0x1a0
[ 1092.301288]  handle_cmd_completion+0x7ac/0xfa0
[ 1092.306336]  ? trace_event_raw_event_xhci_log_trb+0x6e/0xa0
[ 1092.312661]  xhci_irq+0x3e8/0x1f60
[ 1092.316524]  __handle_irq_event_percpu+0x75/0x180
[ 1092.321876]  handle_irq_event_percpu+0x20/0x50
[ 1092.326922]  handle_irq_event+0x36/0x60
[ 1092.331273]  handle_edge_irq+0x6d/0x180
[ 1092.335644]  handle_irq+0x16/0x20
[ 1092.339417]  do_IRQ+0x41/0xc0
[ 1092.342782]  common_interrupt+0xf/0xf
[ 1092.346955]  </IRQ>

Fixes: 44a182b9d177 ("xhci: Fix use-after-free in xhci_free_virt_device")
Cc: <stable@vger.kernel.org>
Signed-off-by: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mem.c   |  4 ++--
 drivers/usb/host/xhci-trace.h | 36 ++++++++++++++++++++++++++++++-----
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index efd7e4882d66..00b710016d21 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -891,12 +891,12 @@ void xhci_free_virt_device(struct xhci_hcd *xhci, int slot_id)
 
 	dev = xhci->devs[slot_id];
 
-	trace_xhci_free_virt_device(dev);
-
 	xhci->dcbaa->dev_context_ptrs[slot_id] = 0;
 	if (!dev)
 		return;
 
+	trace_xhci_free_virt_device(dev);
+
 	if (dev->tt_info)
 		old_active_eps = dev->tt_info->active_eps;
 
diff --git a/drivers/usb/host/xhci-trace.h b/drivers/usb/host/xhci-trace.h
index f20753b99624..02a1164ca599 100644
--- a/drivers/usb/host/xhci-trace.h
+++ b/drivers/usb/host/xhci-trace.h
@@ -158,6 +158,37 @@ DEFINE_EVENT(xhci_log_trb, xhci_queue_trb,
 	TP_ARGS(ring, trb)
 );
 
+DECLARE_EVENT_CLASS(xhci_log_free_virt_dev,
+	TP_PROTO(struct xhci_virt_device *vdev),
+	TP_ARGS(vdev),
+	TP_STRUCT__entry(
+		__field(void *, vdev)
+		__field(unsigned long long, out_ctx)
+		__field(unsigned long long, in_ctx)
+		__field(u8, fake_port)
+		__field(u8, real_port)
+		__field(u16, current_mel)
+
+	),
+	TP_fast_assign(
+		__entry->vdev = vdev;
+		__entry->in_ctx = (unsigned long long) vdev->in_ctx->dma;
+		__entry->out_ctx = (unsigned long long) vdev->out_ctx->dma;
+		__entry->fake_port = (u8) vdev->fake_port;
+		__entry->real_port = (u8) vdev->real_port;
+		__entry->current_mel = (u16) vdev->current_mel;
+		),
+	TP_printk("vdev %p ctx %llx | %llx fake_port %d real_port %d current_mel %d",
+		__entry->vdev, __entry->in_ctx, __entry->out_ctx,
+		__entry->fake_port, __entry->real_port, __entry->current_mel
+	)
+);
+
+DEFINE_EVENT(xhci_log_free_virt_dev, xhci_free_virt_device,
+	TP_PROTO(struct xhci_virt_device *vdev),
+	TP_ARGS(vdev)
+);
+
 DECLARE_EVENT_CLASS(xhci_log_virt_dev,
 	TP_PROTO(struct xhci_virt_device *vdev),
 	TP_ARGS(vdev),
@@ -195,11 +226,6 @@ DEFINE_EVENT(xhci_log_virt_dev, xhci_alloc_virt_device,
 	TP_ARGS(vdev)
 );
 
-DEFINE_EVENT(xhci_log_virt_dev, xhci_free_virt_device,
-	TP_PROTO(struct xhci_virt_device *vdev),
-	TP_ARGS(vdev)
-);
-
 DEFINE_EVENT(xhci_log_virt_dev, xhci_setup_device,
 	TP_PROTO(struct xhci_virt_device *vdev),
 	TP_ARGS(vdev)

From d105fb8c88940765d9555ec921ea2e1267286628 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 26 May 2018 09:53:13 +0900
Subject: [PATCH 09/62] n_tty: Fix stall at n_tty_receive_char_special().

commit 3d63b7e4ae0dc5e02d28ddd2fa1f945defc68d81 upstream.

syzbot is reporting stalls at n_tty_receive_char_special() [1]. This is
because comparison is not working as expected since ldata->read_head can
change at any moment. Mitigate this by explicitly masking with buffer size
when checking condition for "while" loops.

[1] https://syzkaller.appspot.com/bug?id=3d7481a346958d9469bebbeb0537d5f056bdd6e8

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+18df353d7540aa6b5467@syzkaller.appspotmail.com>
Fixes: bc5a5e3f45d04784 ("n_tty: Don't wrap input buffer indices at buffer size")
Cc: stable <stable@vger.kernel.org>
Cc: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 1c70541a1467..a8a7a13c8683 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -126,6 +126,8 @@ struct n_tty_data {
 	struct mutex output_lock;
 };
 
+#define MASK(x) ((x) & (N_TTY_BUF_SIZE - 1))
+
 static inline size_t read_cnt(struct n_tty_data *ldata)
 {
 	return ldata->read_head - ldata->read_tail;
@@ -980,14 +982,15 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 	}
 
 	seen_alnums = 0;
-	while (ldata->read_head != ldata->canon_head) {
+	while (MASK(ldata->read_head) != MASK(ldata->canon_head)) {
 		head = ldata->read_head;
 
 		/* erase a single possibly multibyte character */
 		do {
 			head--;
 			c = read_buf(ldata, head);
-		} while (is_continuation(c, tty) && head != ldata->canon_head);
+		} while (is_continuation(c, tty) &&
+			 MASK(head) != MASK(ldata->canon_head));
 
 		/* do not partially erase */
 		if (is_continuation(c, tty))
@@ -1029,7 +1032,7 @@ static void eraser(unsigned char c, struct tty_struct *tty)
 				 * This info is used to go back the correct
 				 * number of columns.
 				 */
-				while (tail != ldata->canon_head) {
+				while (MASK(tail) != MASK(ldata->canon_head)) {
 					tail--;
 					c = read_buf(ldata, tail);
 					if (c == '\t') {
@@ -1304,7 +1307,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c)
 			finish_erasing(ldata);
 			echo_char(c, tty);
 			echo_char_raw('\n', ldata);
-			while (tail != ldata->read_head) {
+			while (MASK(tail) != MASK(ldata->read_head)) {
 				echo_char(read_buf(ldata, tail), tty);
 				tail++;
 			}
@@ -2413,7 +2416,7 @@ static unsigned long inq_canon(struct n_tty_data *ldata)
 	tail = ldata->read_tail;
 	nr = head - tail;
 	/* Skip EOF-chars.. */
-	while (head != tail) {
+	while (MASK(head) != MASK(tail)) {
 		if (test_bit(tail & (N_TTY_BUF_SIZE - 1), ldata->read_flags) &&
 		    read_buf(ldata, tail) == __DISABLED_CHAR)
 			nr--;

From c034d161fa63f35e0107b9c03d4b3108d2401a08 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Sat, 26 May 2018 09:53:14 +0900
Subject: [PATCH 10/62] n_tty: Access echo_* variables carefully.

commit ebec3f8f5271139df618ebdf8427e24ba102ba94 upstream.

syzbot is reporting stalls at __process_echoes() [1]. This is because
since ldata->echo_commit < ldata->echo_tail becomes true for some reason,
the discard loop is serving as almost infinite loop. This patch tries to
avoid falling into ldata->echo_commit < ldata->echo_tail situation by
making access to echo_* variables more carefully.

Since reset_buffer_flags() is called without output_lock held, it should
not touch echo_* variables. And omit a call to reset_buffer_flags() from
n_tty_open() by using vzalloc().

Since add_echo_byte() is called without output_lock held, it needs memory
barrier between storing into echo_buf[] and incrementing echo_head counter.
echo_buf() needs corresponding memory barrier before reading echo_buf[].
Lack of handling the possibility of not-yet-stored multi-byte operation
might be the reason of falling into ldata->echo_commit < ldata->echo_tail
situation, for if I do WARN_ON(ldata->echo_commit == tail + 1) prior to
echo_buf(ldata, tail + 1), the WARN_ON() fires.

Also, explicitly masking with buffer for the former "while" loop, and
use ldata->echo_commit > tail for the latter "while" loop.

[1] https://syzkaller.appspot.com/bug?id=17f23b094cd80df750e5b0f8982c521ee6bcbf40

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+108696293d7a21ab688f@syzkaller.appspotmail.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index a8a7a13c8683..0475f9685a41 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -145,6 +145,7 @@ static inline unsigned char *read_buf_addr(struct n_tty_data *ldata, size_t i)
 
 static inline unsigned char echo_buf(struct n_tty_data *ldata, size_t i)
 {
+	smp_rmb(); /* Matches smp_wmb() in add_echo_byte(). */
 	return ldata->echo_buf[i & (N_TTY_BUF_SIZE - 1)];
 }
 
@@ -320,9 +321,7 @@ static inline void put_tty_queue(unsigned char c, struct n_tty_data *ldata)
 static void reset_buffer_flags(struct n_tty_data *ldata)
 {
 	ldata->read_head = ldata->canon_head = ldata->read_tail = 0;
-	ldata->echo_head = ldata->echo_tail = ldata->echo_commit = 0;
 	ldata->commit_head = 0;
-	ldata->echo_mark = 0;
 	ldata->line_start = 0;
 
 	ldata->erasing = 0;
@@ -621,12 +620,19 @@ static size_t __process_echoes(struct tty_struct *tty)
 	old_space = space = tty_write_room(tty);
 
 	tail = ldata->echo_tail;
-	while (ldata->echo_commit != tail) {
+	while (MASK(ldata->echo_commit) != MASK(tail)) {
 		c = echo_buf(ldata, tail);
 		if (c == ECHO_OP_START) {
 			unsigned char op;
 			int no_space_left = 0;
 
+			/*
+			 * Since add_echo_byte() is called without holding
+			 * output_lock, we might see only portion of multi-byte
+			 * operation.
+			 */
+			if (MASK(ldata->echo_commit) == MASK(tail + 1))
+				goto not_yet_stored;
 			/*
 			 * If the buffer byte is the start of a multi-byte
 			 * operation, get the next byte, which is either the
@@ -638,6 +644,8 @@ static size_t __process_echoes(struct tty_struct *tty)
 				unsigned int num_chars, num_bs;
 
 			case ECHO_OP_ERASE_TAB:
+				if (MASK(ldata->echo_commit) == MASK(tail + 2))
+					goto not_yet_stored;
 				num_chars = echo_buf(ldata, tail + 2);
 
 				/*
@@ -732,7 +740,8 @@ static size_t __process_echoes(struct tty_struct *tty)
 	/* If the echo buffer is nearly full (so that the possibility exists
 	 * of echo overrun before the next commit), then discard enough
 	 * data at the tail to prevent a subsequent overrun */
-	while (ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) {
+	while (ldata->echo_commit > tail &&
+	       ldata->echo_commit - tail >= ECHO_DISCARD_WATERMARK) {
 		if (echo_buf(ldata, tail) == ECHO_OP_START) {
 			if (echo_buf(ldata, tail + 1) == ECHO_OP_ERASE_TAB)
 				tail += 3;
@@ -742,6 +751,7 @@ static size_t __process_echoes(struct tty_struct *tty)
 			tail++;
 	}
 
+ not_yet_stored:
 	ldata->echo_tail = tail;
 	return old_space - space;
 }
@@ -752,6 +762,7 @@ static void commit_echoes(struct tty_struct *tty)
 	size_t nr, old, echoed;
 	size_t head;
 
+	mutex_lock(&ldata->output_lock);
 	head = ldata->echo_head;
 	ldata->echo_mark = head;
 	old = ldata->echo_commit - ldata->echo_tail;
@@ -760,10 +771,12 @@ static void commit_echoes(struct tty_struct *tty)
 	 * is over the threshold (and try again each time another
 	 * block is accumulated) */
 	nr = head - ldata->echo_tail;
-	if (nr < ECHO_COMMIT_WATERMARK || (nr % ECHO_BLOCK > old % ECHO_BLOCK))
+	if (nr < ECHO_COMMIT_WATERMARK ||
+	    (nr % ECHO_BLOCK > old % ECHO_BLOCK)) {
+		mutex_unlock(&ldata->output_lock);
 		return;
+	}
 
-	mutex_lock(&ldata->output_lock);
 	ldata->echo_commit = head;
 	echoed = __process_echoes(tty);
 	mutex_unlock(&ldata->output_lock);
@@ -814,7 +827,9 @@ static void flush_echoes(struct tty_struct *tty)
 
 static inline void add_echo_byte(unsigned char c, struct n_tty_data *ldata)
 {
-	*echo_buf_addr(ldata, ldata->echo_head++) = c;
+	*echo_buf_addr(ldata, ldata->echo_head) = c;
+	smp_wmb(); /* Matches smp_rmb() in echo_buf(). */
+	ldata->echo_head++;
 }
 
 /**
@@ -1883,30 +1898,21 @@ static int n_tty_open(struct tty_struct *tty)
 	struct n_tty_data *ldata;
 
 	/* Currently a malloc failure here can panic */
-	ldata = vmalloc(sizeof(*ldata));
+	ldata = vzalloc(sizeof(*ldata));
 	if (!ldata)
-		goto err;
+		return -ENOMEM;
 
 	ldata->overrun_time = jiffies;
 	mutex_init(&ldata->atomic_read_lock);
 	mutex_init(&ldata->output_lock);
 
 	tty->disc_data = ldata;
-	reset_buffer_flags(tty->disc_data);
-	ldata->column = 0;
-	ldata->canon_column = 0;
-	ldata->num_overrun = 0;
-	ldata->no_room = 0;
-	ldata->lnext = 0;
 	tty->closing = 0;
 	/* indicate buffer work may resume */
 	clear_bit(TTY_LDISC_HALTED, &tty->flags);
 	n_tty_set_termios(tty, NULL);
 	tty_unthrottle(tty);
-
 	return 0;
-err:
-	return -ENOMEM;
 }
 
 static inline int input_available_p(struct tty_struct *tty, int poll)

From 2a7a8556b3b4d56e60b7e2edc57754e701ddb788 Mon Sep 17 00:00:00 2001
From: Laura Abbott <labbott@redhat.com>
Date: Mon, 11 Jun 2018 11:06:53 -0700
Subject: [PATCH 11/62] staging: android: ion: Return an ERR_PTR in
 ion_map_kernel

commit 0a2bc00341dcfcc793c0dbf4f8d43adf60458b05 upstream.

The expected return value from ion_map_kernel is an ERR_PTR. The error
path for a vmalloc failure currently just returns NULL, triggering
a warning in ion_buffer_kmap_get. Encode the vmalloc failure as an ERR_PTR.

Reported-by: syzbot+55b1d9f811650de944c6@syzkaller.appspotmail.com
Signed-off-by: Laura Abbott <labbott@redhat.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/android/ion/ion_heap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/android/ion/ion_heap.c b/drivers/staging/android/ion/ion_heap.c
index 91faa7f035b9..babbd94c32d9 100644
--- a/drivers/staging/android/ion/ion_heap.c
+++ b/drivers/staging/android/ion/ion_heap.c
@@ -38,7 +38,7 @@ void *ion_heap_map_kernel(struct ion_heap *heap,
 	struct page **tmp = pages;
 
 	if (!pages)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	if (buffer->flags & ION_FLAG_CACHED)
 		pgprot = PAGE_KERNEL;

From 3ff8e558ba7b6f7fce1ea627c797dc03240bc40f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 6 Jun 2018 21:00:41 +0300
Subject: [PATCH 12/62] serial: 8250_pci: Remove stalled entries in blacklist

commit 20dcff436e9fcd2e106b0ccc48a52206bc176d70 upstream.

After the commit

  7d8905d06405 ("serial: 8250_pci: Enable device after we check black list")

pure serial multi-port cards, such as CH355, got blacklisted and thus
not being enumerated anymore. Previously, it seems, blacklisting them
was on purpose to shut up pciserial_init_one() about record duplication.

So, remove the entries from blacklist in order to get cards enumerated.

Fixes: 7d8905d06405 ("serial: 8250_pci: Enable device after we check black list")
Reported-by: Matt Turner <mattst88@gmail.com>
Cc: Sergej Pupykin <ml@sergej.pp.ru>
Cc: Alexandr Petrenko <petrenkoas83@gmail.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-and-Tested-by: Matt Turner <mattst88@gmail.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_pci.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
index 0d814a87acb2..4986b4aebe80 100644
--- a/drivers/tty/serial/8250/8250_pci.c
+++ b/drivers/tty/serial/8250/8250_pci.c
@@ -3345,9 +3345,7 @@ static const struct pci_device_id blacklist[] = {
 	/* multi-io cards handled by parport_serial */
 	{ PCI_DEVICE(0x4348, 0x7053), }, /* WCH CH353 2S1P */
 	{ PCI_DEVICE(0x4348, 0x5053), }, /* WCH CH353 1S1P */
-	{ PCI_DEVICE(0x4348, 0x7173), }, /* WCH CH355 4S */
 	{ PCI_DEVICE(0x1c00, 0x3250), }, /* WCH CH382 2S1P */
-	{ PCI_DEVICE(0x1c00, 0x3470), }, /* WCH CH384 4S */
 
 	/* Moxa Smartio MUE boards handled by 8250_moxa */
 	{ PCI_VDEVICE(MOXA, 0x1024), },

From b124a1c182fa933f579281cc650f60ac47c7026d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 13 Jun 2018 17:08:59 +0200
Subject: [PATCH 13/62] serdev: fix memleak on module unload

commit bc6cf3669d22371f573ab0305b3abf13887c0786 upstream.

Make sure to free all resources associated with the ida on module
exit.

Fixes: cd6484e1830b ("serdev: Introduce new bus for serial attached devices")
Cc: stable <stable@vger.kernel.org>	# 4.11
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serdev/core.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/tty/serdev/core.c b/drivers/tty/serdev/core.c
index 97db76afced2..ae2564ecddcd 100644
--- a/drivers/tty/serdev/core.c
+++ b/drivers/tty/serdev/core.c
@@ -482,6 +482,7 @@ EXPORT_SYMBOL_GPL(__serdev_device_driver_register);
 static void __exit serdev_exit(void)
 {
 	bus_unregister(&serdev_bus_type);
+	ida_destroy(&ctrl_ida);
 }
 module_exit(serdev_exit);
 

From b141de45e2dc73d6997e6bf7b8347b688bc7c5f7 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Thu, 14 Jun 2018 12:23:09 +0200
Subject: [PATCH 14/62] vt: prevent leaking uninitialized data to userspace via
 /dev/vcs*

commit 21eff69aaaa0e766ca0ce445b477698dc6a9f55a upstream.

KMSAN reported an infoleak when reading from /dev/vcs*:

  BUG: KMSAN: kernel-infoleak in vcs_read+0x18ba/0x1cc0
  Call Trace:
  ...
   kmsan_copy_to_user+0x7a/0x160 mm/kmsan/kmsan.c:1253
   copy_to_user ./include/linux/uaccess.h:184
   vcs_read+0x18ba/0x1cc0 drivers/tty/vt/vc_screen.c:352
   __vfs_read+0x1b2/0x9d0 fs/read_write.c:416
   vfs_read+0x36c/0x6b0 fs/read_write.c:452
  ...
  Uninit was created at:
   kmsan_save_stack_with_flags mm/kmsan/kmsan.c:279
   kmsan_internal_poison_shadow+0xb8/0x1b0 mm/kmsan/kmsan.c:189
   kmsan_kmalloc+0x94/0x100 mm/kmsan/kmsan.c:315
   __kmalloc+0x13a/0x350 mm/slub.c:3818
   kmalloc ./include/linux/slab.h:517
   vc_allocate+0x438/0x800 drivers/tty/vt/vt.c:787
   con_install+0x8c/0x640 drivers/tty/vt/vt.c:2880
   tty_driver_install_tty drivers/tty/tty_io.c:1224
   tty_init_dev+0x1b5/0x1020 drivers/tty/tty_io.c:1324
   tty_open_by_driver drivers/tty/tty_io.c:1959
   tty_open+0x17b4/0x2ed0 drivers/tty/tty_io.c:2007
   chrdev_open+0xc25/0xd90 fs/char_dev.c:417
   do_dentry_open+0xccc/0x1440 fs/open.c:794
   vfs_open+0x1b6/0x2f0 fs/open.c:908
  ...
  Bytes 0-79 of 240 are uninitialized

Consistently allocating |vc_screenbuf| with kzalloc() fixes the problem

Reported-by: syzbot+17a8efdf800000@syzkaller.appspotmail.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/vt/vt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
index de67abbda921..e77421e7bf46 100644
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -782,7 +782,7 @@ int vc_allocate(unsigned int currcons)	/* return 0 on success */
 	if (!*vc->vc_uni_pagedir_loc)
 		con_set_default_unimap(vc);
 
-	vc->vc_screenbuf = kmalloc(vc->vc_screenbuf_size, GFP_KERNEL);
+	vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_KERNEL);
 	if (!vc->vc_screenbuf)
 		goto err_free;
 
@@ -869,7 +869,7 @@ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc,
 
 	if (new_screen_size > (4 << 20))
 		return -EINVAL;
-	newscreen = kmalloc(new_screen_size, GFP_USER);
+	newscreen = kzalloc(new_screen_size, GFP_USER);
 	if (!newscreen)
 		return -ENOMEM;
 

From ce686c42476ec140756d3f1f16cfe8251f8d7520 Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 10 Apr 2018 17:17:22 +0800
Subject: [PATCH 15/62] drm/amdgpu: Add APU support in vi_set_uvd_clocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 819a23f83e3b2513cffbef418458a47ca02c36b3 upstream.

fix the issue set uvd clock failed on CZ/ST
which lead 1s delay when boot up.

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Shirish S <shirish.s@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/vi.c | 46 +++++++++++++++++++++++++--------
 1 file changed, 35 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 4968b6bb9466..81b0bd7e79a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -729,33 +729,57 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
 		return r;
 
 	tmp = RREG32_SMC(cntl_reg);
-	tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
-		CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
+
+	if (adev->flags & AMD_IS_APU)
+		tmp &= ~CG_DCLK_CNTL__DCLK_DIVIDER_MASK;
+	else
+		tmp &= ~(CG_DCLK_CNTL__DCLK_DIR_CNTL_EN_MASK |
+				CG_DCLK_CNTL__DCLK_DIVIDER_MASK);
 	tmp |= dividers.post_divider;
 	WREG32_SMC(cntl_reg, tmp);
 
 	for (i = 0; i < 100; i++) {
-		if (RREG32_SMC(status_reg) & CG_DCLK_STATUS__DCLK_STATUS_MASK)
-			break;
+		tmp = RREG32_SMC(status_reg);
+		if (adev->flags & AMD_IS_APU) {
+			if (tmp & 0x10000)
+				break;
+		} else {
+			if (tmp & CG_DCLK_STATUS__DCLK_STATUS_MASK)
+				break;
+		}
 		mdelay(10);
 	}
 	if (i == 100)
 		return -ETIMEDOUT;
-
 	return 0;
 }
 
+#define ixGNB_CLK1_DFS_CNTL 0xD82200F0
+#define ixGNB_CLK1_STATUS   0xD822010C
+#define ixGNB_CLK2_DFS_CNTL 0xD8220110
+#define ixGNB_CLK2_STATUS   0xD822012C
+
 static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
 {
 	int r;
 
-	r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
-	if (r)
-		return r;
+	if (adev->flags & AMD_IS_APU) {
+		r = vi_set_uvd_clock(adev, vclk, ixGNB_CLK2_DFS_CNTL, ixGNB_CLK2_STATUS);
+		if (r)
+			return r;
 
-	r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
-	if (r)
-		return r;
+		r = vi_set_uvd_clock(adev, dclk, ixGNB_CLK1_DFS_CNTL, ixGNB_CLK1_STATUS);
+		if (r)
+			return r;
+	} else {
+		r = vi_set_uvd_clock(adev, vclk, ixCG_VCLK_CNTL, ixCG_VCLK_STATUS);
+		if (r)
+			return r;
+
+		r = vi_set_uvd_clock(adev, dclk, ixCG_DCLK_CNTL, ixCG_DCLK_STATUS);
+		if (r)
+			return r;
+	}
 
 	return 0;
 }

From 40e2064b8fc7025cf6def10ffc5d4ebc5dc8837e Mon Sep 17 00:00:00 2001
From: Rex Zhu <Rex.Zhu@amd.com>
Date: Tue, 10 Apr 2018 17:49:56 +0800
Subject: [PATCH 16/62] drm/amdgpu: Add APU support in vi_set_vce_clocks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 08ebb6e9f4fd7098c28e0ebbb42847cf0488ebb8 upstream.

1. fix set vce clocks failed on Cz/St
   which lead 1s delay when boot up.
2. remove the workaround in vce_v3_0.c

Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Huang Rui <ray.huang@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Acked-by: Shirish S <shirish.s@amd.com>
Signed-off-by: Rex Zhu <Rex.Zhu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/vce_v3_0.c |  4 ++--
 drivers/gpu/drm/amd/amdgpu/vi.c       | 31 +++++++++++++++++++++------
 2 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index cf81065e3c5a..5183b46563f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -467,8 +467,8 @@ static int vce_v3_0_hw_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	vce_v3_0_override_vce_clock_gating(adev, true);
-	if (!(adev->flags & AMD_IS_APU))
-		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+
+	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 
 	for (i = 0; i < adev->vce.num_rings; i++)
 		adev->vce.ring[i].ready = false;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 81b0bd7e79a9..0327e0a6802b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -758,6 +758,8 @@ static int vi_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
 #define ixGNB_CLK1_STATUS   0xD822010C
 #define ixGNB_CLK2_DFS_CNTL 0xD8220110
 #define ixGNB_CLK2_STATUS   0xD822012C
+#define ixGNB_CLK3_DFS_CNTL 0xD8220130
+#define ixGNB_CLK3_STATUS   0xD822014C
 
 static int vi_set_uvd_clocks(struct amdgpu_device *adev, u32 vclk, u32 dclk)
 {
@@ -789,6 +791,22 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
 	int r, i;
 	struct atom_clock_dividers dividers;
 	u32 tmp;
+	u32 reg_ctrl;
+	u32 reg_status;
+	u32 status_mask;
+	u32 reg_mask;
+
+	if (adev->flags & AMD_IS_APU) {
+		reg_ctrl = ixGNB_CLK3_DFS_CNTL;
+		reg_status = ixGNB_CLK3_STATUS;
+		status_mask = 0x00010000;
+		reg_mask = CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+	} else {
+		reg_ctrl = ixCG_ECLK_CNTL;
+		reg_status = ixCG_ECLK_STATUS;
+		status_mask = CG_ECLK_STATUS__ECLK_STATUS_MASK;
+		reg_mask = CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK | CG_ECLK_CNTL__ECLK_DIVIDER_MASK;
+	}
 
 	r = amdgpu_atombios_get_clock_dividers(adev,
 					       COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
@@ -797,24 +815,25 @@ static int vi_set_vce_clocks(struct amdgpu_device *adev, u32 evclk, u32 ecclk)
 		return r;
 
 	for (i = 0; i < 100; i++) {
-		if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+		if (RREG32_SMC(reg_status) & status_mask)
 			break;
 		mdelay(10);
 	}
+
 	if (i == 100)
 		return -ETIMEDOUT;
 
-	tmp = RREG32_SMC(ixCG_ECLK_CNTL);
-	tmp &= ~(CG_ECLK_CNTL__ECLK_DIR_CNTL_EN_MASK |
-		CG_ECLK_CNTL__ECLK_DIVIDER_MASK);
+	tmp = RREG32_SMC(reg_ctrl);
+	tmp &= ~reg_mask;
 	tmp |= dividers.post_divider;
-	WREG32_SMC(ixCG_ECLK_CNTL, tmp);
+	WREG32_SMC(reg_ctrl, tmp);
 
 	for (i = 0; i < 100; i++) {
-		if (RREG32_SMC(ixCG_ECLK_STATUS) & CG_ECLK_STATUS__ECLK_STATUS_MASK)
+		if (RREG32_SMC(reg_status) & status_mask)
 			break;
 		mdelay(10);
 	}
+
 	if (i == 100)
 		return -ETIMEDOUT;
 

From dd19ea36f5963a7160dbc4526455d2fb4d9cb516 Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Wed, 23 May 2018 11:18:43 +0800
Subject: [PATCH 17/62] drm/amdgpu: fix the missed vcn fw version report
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit a0b2ac29415bb44d1c212184c1385a1abe68db5c upstream.

It missed vcn.fw_version setting when init vcn microcode, and it will be used to
report vcn ucode version via amdgpu_firmware_info sysfs interface.

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 041e0121590c..308a9755eae3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -85,6 +85,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 	}
 
 	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
 	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
 	version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
 	version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;

From 7d0ed747bc56ca05868423220c62e64ea525aad7 Mon Sep 17 00:00:00 2001
From: Jeremy Cline <jcline@redhat.com>
Date: Fri, 1 Jun 2018 16:05:32 -0400
Subject: [PATCH 18/62] drm/qxl: Call qxl_bo_unref outside atomic context

commit 889ad63d41eea20184b0483e7e585e5b20fb6cfe upstream.

"qxl_bo_unref" may sleep, but calling "qxl_release_map" causes
"preempt_disable()" to be called and "preempt_enable()" isn't called
until "qxl_release_unmap" is used. Move the call to "qxl_bo_unref" out
from in between the two to avoid sleeping from an atomic context.

This issue can be demonstrated on a kernel with CONFIG_LOCKDEP=y by
creating a VM using QXL, using a desktop environment using Xorg, then
moving the cursor on or off a window.

Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=1571128
Fixes: 9428088c90b6 ("drm/qxl: reapply cursor after resetting primary")
Cc: stable@vger.kernel.org
Signed-off-by: Jeremy Cline <jcline@redhat.com>
Link: http://patchwork.freedesktop.org/patch/msgid/20180601200532.13619-1-jcline@redhat.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/qxl/qxl_display.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 9a9214ae0fb5..573bab222123 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -630,7 +630,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	struct qxl_cursor_cmd *cmd;
 	struct qxl_cursor *cursor;
 	struct drm_gem_object *obj;
-	struct qxl_bo *cursor_bo = NULL, *user_bo = NULL;
+	struct qxl_bo *cursor_bo = NULL, *user_bo = NULL, *old_cursor_bo = NULL;
 	int ret;
 	void *user_ptr;
 	int size = 64*64*4;
@@ -684,7 +684,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 							   cursor_bo, 0);
 		cmd->type = QXL_CURSOR_SET;
 
-		qxl_bo_unref(&qcrtc->cursor_bo);
+		old_cursor_bo = qcrtc->cursor_bo;
 		qcrtc->cursor_bo = cursor_bo;
 		cursor_bo = NULL;
 	} else {
@@ -704,6 +704,9 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
 	qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
 	qxl_release_fence_buffer_objects(release);
 
+	if (old_cursor_bo)
+		qxl_bo_unref(&old_cursor_bo);
+
 	qxl_bo_unref(&cursor_bo);
 
 	return;

From af597bb61370e337eec93ae655625d3dbd1fc0bb Mon Sep 17 00:00:00 2001
From: Stefan Agner <stefan@agner.ch>
Date: Sun, 17 Jun 2018 10:48:22 +0200
Subject: [PATCH 19/62] drm/atmel-hlcdc: check stride values in the first plane

commit 9fcf2b3c1c0276650fea537c71b513d27d929b05 upstream.

The statement always evaluates to true since the struct fields
are arrays. This has shown up as a warning when compiling with
clang:
  warning: address of array 'desc->layout.xstride' will always
      evaluate to 'true' [-Wpointer-bool-conversion]

Check for values in the first plane instead.

Fixes: 1a396789f65a ("drm: add Atmel HLCDC Display Controller support")
Cc: <stable@vger.kernel.org>
Signed-off-by: Stefan Agner <stefan@agner.ch>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180617084826.31885-1-stefan@agner.ch
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
index 703c2d13603f..eb7c4cf19bf6 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
@@ -889,7 +889,7 @@ static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane,
 		drm_object_attach_property(&plane->base.base,
 					   props->alpha, 255);
 
-	if (desc->layout.xstride && desc->layout.pstride) {
+	if (desc->layout.xstride[0] && desc->layout.pstride[0]) {
 		int ret;
 
 		ret = drm_plane_create_rotation_property(&plane->base,

From a1bf87cfb6ea83a262d218b2bc123a11b949caaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Fri, 8 Jun 2018 12:58:15 +0200
Subject: [PATCH 20/62] drm/amdgpu: Use kvmalloc_array for allocating VRAM
 manager nodes array
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 6fa39bc1e01dab8b4f54b23e95a181a2ed5a2d38 upstream.

It can be quite big, and there's no need for it to be physically
contiguous. This is less likely to fail under memory pressure (has
actually happened while running piglit).

Cc: stable@vger.kernel.org
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 26e900627971..2902f91bb0ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -140,7 +140,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
 	}
 
-	nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL);
+	nodes = kvmalloc_array(num_nodes, sizeof(*nodes),
+			       GFP_KERNEL | __GFP_ZERO);
 	if (!nodes)
 		return -ENOMEM;
 
@@ -195,7 +196,7 @@ error:
 		drm_mm_remove_node(&nodes[i]);
 	spin_unlock(&mgr->lock);
 
-	kfree(nodes);
+	kvfree(nodes);
 	return r == -ENOSPC ? 0 : r;
 }
 
@@ -234,7 +235,7 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
 	atomic64_sub(usage, &mgr->usage);
 	atomic64_sub(vis_usage, &mgr->vis_usage);
 
-	kfree(mem->mm_node);
+	kvfree(mem->mm_node);
 	mem->mm_node = NULL;
 }
 

From 78e7000fe63f32058e913daf0c9fa2cff23ea206 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Michel=20D=C3=A4nzer?= <michel.daenzer@amd.com>
Date: Tue, 12 Jun 2018 12:07:33 +0200
Subject: [PATCH 21/62] drm/amdgpu: Refactor amdgpu_vram_mgr_bo_invisible_size
 helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit 5e9244ff585239630f15f8ad8e676bc91a94ca9e upstream.

Preparation for the following fix, no functional change intended.

Cc: stable@vger.kernel.org
Signed-off-by: Michel Dänzer <michel.daenzer@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c   |  6 ++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h      |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c | 16 ++++++++++++++++
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 4d08957d2108..1360a24d2ede 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -747,8 +747,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	}
 	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
 		adev->vram_pin_size += amdgpu_bo_size(bo);
-		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-			adev->invisible_pin_size += amdgpu_bo_size(bo);
+		adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo);
 	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		adev->gart_pin_size += amdgpu_bo_size(bo);
 	}
@@ -786,8 +785,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 
 	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
 		adev->vram_pin_size -= amdgpu_bo_size(bo);
-		if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-			adev->invisible_pin_size -= amdgpu_bo_size(bo);
+		adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo);
 	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
 		adev->gart_pin_size -= amdgpu_bo_size(bo);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 43093bffa2cf..557829a84778 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -64,6 +64,7 @@ extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;
 bool amdgpu_gtt_mgr_is_allocated(struct ttm_mem_reg *mem);
 uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
 
+u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 2902f91bb0ce..86d8a961518e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -101,6 +101,22 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
 		adev->mc.visible_vram_size : end) - start;
 }
 
+/**
+ * amdgpu_vram_mgr_bo_invisible_size - CPU invisible BO size
+ *
+ * @bo: &amdgpu_bo buffer object (must be in VRAM)
+ *
+ * Returns:
+ * How much of the given &amdgpu_bo buffer object lies in CPU invisible VRAM.
+ */
+u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo)
+{
+	if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
+		return amdgpu_bo_size(bo);
+
+	return 0;
+}
+
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *

From 1d795d1241d3269bd44d4ef8cfb4da3480ab744c Mon Sep 17 00:00:00 2001
From: Kenneth Graunke <kenneth@whitecape.org>
Date: Fri, 15 Jun 2018 20:06:05 +0100
Subject: [PATCH 22/62] drm/i915: Enable provoking vertex fix on Gen9 systems.

commit 7a3727f385dc64773db1c144f6b15c1e9d4735bb upstream.

The SF and clipper units mishandle the provoking vertex in some cases,
which can cause misrendering with shaders that use flat shaded inputs.

There are chicken bits in 3D_CHICKEN3 (for SF) and FF_SLICE_CHICKEN
(for the clipper) that work around the issue.  These registers are
unfortunately not part of the logical context (even the power context),
and so we must reload them every time we start executing in a context.

Bugzilla: https://bugs.freedesktop.org/103047
Signed-off-by: Kenneth Graunke <kenneth@whitecape.org>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20180615190605.16238-1-chris@chris-wilson.co.uk
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: stable@vger.kernel.org
(cherry picked from commit b77422f80337d363eed60c8c48db9cb6e33085c9)
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/i915_reg.h  |  5 +++++
 drivers/gpu/drm/i915/intel_lrc.c | 12 +++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 61a2203b75df..be813b2738c1 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2484,12 +2484,17 @@ enum i915_power_well_id {
 #define _3D_CHICKEN	_MMIO(0x2084)
 #define  _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB	(1 << 10)
 #define _3D_CHICKEN2	_MMIO(0x208c)
+
+#define FF_SLICE_CHICKEN	_MMIO(0x2088)
+#define  FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX	(1 << 1)
+
 /* Disables pipelining of read flushes past the SF-WIZ interface.
  * Required on all Ironlake steppings according to the B-Spec, but the
  * particular danger of not doing so is not specified.
  */
 # define _3D_CHICKEN2_WM_READ_PIPELINED			(1 << 14)
 #define _3D_CHICKEN3	_MMIO(0x2090)
+#define  _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX		(1 << 12)
 #define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL		(1 << 10)
 #define  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL		(1 << 5)
 #define  _3D_CHICKEN_SDE_LIMIT_FIFO_POLY_DEPTH(x)	((x)<<1) /* gen8+ */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6f972e6ec663..d638b641b760 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -1067,11 +1067,21 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
 	/* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
 	batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+	*batch++ = MI_LOAD_REGISTER_IMM(3);
+
 	/* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-	*batch++ = MI_LOAD_REGISTER_IMM(1);
 	*batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
 	*batch++ = _MASKED_BIT_DISABLE(
 			GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
+
+	/* BSpec: 11391 */
+	*batch++ = i915_mmio_reg_offset(FF_SLICE_CHICKEN);
+	*batch++ = _MASKED_BIT_ENABLE(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX);
+
+	/* BSpec: 11299 */
+	*batch++ = i915_mmio_reg_offset(_3D_CHICKEN3);
+	*batch++ = _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX);
+
 	*batch++ = MI_NOOP;
 
 	/* WaClearSlmSpaceAtContextSwitch:kbl */

From 1ea5ed0cadcae75f733d7bd870fea90d05f42528 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 2 May 2018 14:07:42 +0200
Subject: [PATCH 23/62] netfilter: nf_tables: nft_compat: fix refcount leak on
 xt module

commit b8e9dc1c75714ceb53615743e1036f76e00f5a17 upstream.

Taehee Yoo reported following bug:
    iptables-compat -I OUTPUT -m cpu --cpu 0
    iptables-compat -F
    lsmod |grep xt_cpu
    xt_cpu                 16384  1

Quote:
"When above command is given, a netlink message has two expressions that
are the cpu compat and the nft_counter.
The nft_expr_type_get() in the nf_tables_expr_parse() successes
first expression then, calls select_ops callback.
(allocates memory and holds module)
But, second nft_expr_type_get() in the nf_tables_expr_parse()
returns -EAGAIN because of request_module().
In that point, by the 'goto err1',
the 'module_put(info[i].ops->type->owner)' is called.
There is no release routine."

The core problem is that unlike all other expression,
nft_compat select_ops has side effects.

1. it allocates dynamic memory which holds an nft ops struct.
   In all other expressions, ops has static storage duration.
2. It grabs references to the xt module that it is supposed to
   invoke.

Depending on where things go wrong, error unwinding doesn't
always do the right thing.

In the above scenario, a new nft_compat_expr is created and
xt_cpu module gets loaded with a refcount of 1.

Due to to -EAGAIN, the netlink messages get re-parsed.
When that happens, nft_compat finds that xt_cpu is already present
and increments module refcount again.

This fixes the problem by making select_ops to have no visible
side effects and removes all extra module_get/put.

When select_ops creates a new nft_compat expression, the new
expression has a refcount of 0, and the xt module gets its refcount
incremented.

When error happens, the next call finds existing entry, but will no
longer increase the reference count -- the presence of existing
nft_xt means we already hold a module reference.

Because nft_xt_put is only called from nft_compat destroy hook,
it will never see the initial zero reference count.
->destroy can only be called after ->init(), and that will increase the
refcount.

Lastly, we now free nft_xt struct with kfree_rcu.
Else, we get use-after free in nf_tables_rule_destroy:

  while (expr != nft_expr_last(rule) && expr->ops) {
    nf_tables_expr_destroy(ctx, expr);
    expr = nft_expr_next(expr); // here

nft_expr_next() dereferences expr->ops. This is safe
for all users, as ops have static storage duration.
In nft_compat case however, its ->destroy callback can
free the memory that hold the ops structure.

Tested-by: Taehee Yoo <ap420073@gmail.com>
Reported-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nft_compat.c | 92 ++++++++++++++++++++++++--------------
 1 file changed, 58 insertions(+), 34 deletions(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index b89f4f65b2a0..4e3030113c7a 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -27,14 +27,24 @@ struct nft_xt {
 	struct list_head	head;
 	struct nft_expr_ops	ops;
 	unsigned int		refcnt;
+
+	/* Unlike other expressions, ops doesn't have static storage duration.
+	 * nft core assumes they do.  We use kfree_rcu so that nft core can
+	 * can check expr->ops->size even after nft_compat->destroy() frees
+	 * the nft_xt struct that holds the ops structure.
+	 */
+	struct rcu_head		rcu_head;
 };
 
-static void nft_xt_put(struct nft_xt *xt)
+static bool nft_xt_put(struct nft_xt *xt)
 {
 	if (--xt->refcnt == 0) {
 		list_del(&xt->head);
-		kfree(xt);
+		kfree_rcu(xt, rcu_head);
+		return true;
 	}
+
+	return false;
 }
 
 static int nft_compat_chain_validate_dependency(const char *tablename,
@@ -226,6 +236,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_target *target = expr->ops->data;
 	struct xt_tgchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO]));
+	struct nft_xt *nft_xt;
 	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
@@ -236,25 +247,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	if (ctx->nla[NFTA_RULE_COMPAT]) {
 		ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
 		if (ret < 0)
-			goto err;
+			return ret;
 	}
 
 	nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv);
 
 	ret = xt_check_target(&par, size, proto, inv);
 	if (ret < 0)
-		goto err;
+		return ret;
 
 	/* The standard target cannot be used */
-	if (target->target == NULL) {
-		ret = -EINVAL;
-		goto err;
-	}
+	if (!target->target)
+		return -EINVAL;
 
+	nft_xt = container_of(expr->ops, struct nft_xt, ops);
+	nft_xt->refcnt++;
 	return 0;
-err:
-	module_put(target->me);
-	return ret;
 }
 
 static void
@@ -271,8 +279,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	if (par.target->destroy != NULL)
 		par.target->destroy(&par);
 
-	nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
-	module_put(target->me);
+	if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+		module_put(target->me);
 }
 
 static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -411,6 +419,7 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	struct xt_match *match = expr->ops->data;
 	struct xt_mtchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
+	struct nft_xt *nft_xt;
 	u16 proto = 0;
 	bool inv = false;
 	union nft_entry e = {};
@@ -421,19 +430,18 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	if (ctx->nla[NFTA_RULE_COMPAT]) {
 		ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv);
 		if (ret < 0)
-			goto err;
+			return ret;
 	}
 
 	nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv);
 
 	ret = xt_check_match(&par, size, proto, inv);
 	if (ret < 0)
-		goto err;
+		return ret;
 
+	nft_xt = container_of(expr->ops, struct nft_xt, ops);
+	nft_xt->refcnt++;
 	return 0;
-err:
-	module_put(match->me);
-	return ret;
 }
 
 static void
@@ -450,8 +458,8 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	if (par.match->destroy != NULL)
 		par.match->destroy(&par);
 
-	nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
-	module_put(match->me);
+	if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops)))
+		module_put(match->me);
 }
 
 static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -654,13 +662,8 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	list_for_each_entry(nft_match, &nft_match_list, head) {
 		struct xt_match *match = nft_match->ops.data;
 
-		if (nft_match_cmp(match, mt_name, rev, family)) {
-			if (!try_module_get(match->me))
-				return ERR_PTR(-ENOENT);
-
-			nft_match->refcnt++;
+		if (nft_match_cmp(match, mt_name, rev, family))
 			return &nft_match->ops;
-		}
 	}
 
 	match = xt_request_find_match(family, mt_name, rev);
@@ -679,7 +682,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nft_match->refcnt = 1;
+	nft_match->refcnt = 0;
 	nft_match->ops.type = &nft_match_type;
 	nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
 	nft_match->ops.eval = nft_match_eval;
@@ -739,13 +742,8 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 	list_for_each_entry(nft_target, &nft_target_list, head) {
 		struct xt_target *target = nft_target->ops.data;
 
-		if (nft_target_cmp(target, tg_name, rev, family)) {
-			if (!try_module_get(target->me))
-				return ERR_PTR(-ENOENT);
-
-			nft_target->refcnt++;
+		if (nft_target_cmp(target, tg_name, rev, family))
 			return &nft_target->ops;
-		}
 	}
 
 	target = xt_request_find_target(family, tg_name, rev);
@@ -764,7 +762,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
 		goto err;
 	}
 
-	nft_target->refcnt = 1;
+	nft_target->refcnt = 0;
 	nft_target->ops.type = &nft_target_type;
 	nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
 	nft_target->ops.init = nft_target_init;
@@ -825,6 +823,32 @@ err_match:
 
 static void __exit nft_compat_module_exit(void)
 {
+	struct nft_xt *xt, *next;
+
+	/* list should be empty here, it can be non-empty only in case there
+	 * was an error that caused nft_xt expr to not be initialized fully
+	 * and noone else requested the same expression later.
+	 *
+	 * In this case, the lists contain 0-refcount entries that still
+	 * hold module reference.
+	 */
+	list_for_each_entry_safe(xt, next, &nft_target_list, head) {
+		struct xt_target *target = xt->ops.data;
+
+		if (WARN_ON_ONCE(xt->refcnt))
+			continue;
+		module_put(target->me);
+		kfree(xt);
+	}
+
+	list_for_each_entry_safe(xt, next, &nft_match_list, head) {
+		struct xt_match *match = xt->ops.data;
+
+		if (WARN_ON_ONCE(xt->refcnt))
+			continue;
+		module_put(match->me);
+		kfree(xt);
+	}
 	nfnetlink_subsys_unregister(&nfnl_compat_subsys);
 	nft_unregister_expr(&nft_target_type);
 	nft_unregister_expr(&nft_match_type);

From ea200cdd605662de24f7cd92e03aa6a335e0b4da Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 7 May 2018 15:22:35 +0200
Subject: [PATCH 24/62] netfilter: nft_compat: prepare for indirect info
 storage

commit 8bdf164744b2c7f63561846c01cff3db597f282d upstream.

Next patch will make it possible for *info to be stored in
a separate allocation instead of the expr private area.

This removes the 'expr priv area is info blob' assumption
from the match init/destroy/eval functions.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nft_compat.c | 47 +++++++++++++++++++++++++++++---------
 1 file changed, 36 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 4e3030113c7a..025eb37bd5c5 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -324,11 +324,11 @@ static int nft_target_validate(const struct nft_ctx *ctx,
 	return 0;
 }
 
-static void nft_match_eval(const struct nft_expr *expr,
-			   struct nft_regs *regs,
-			   const struct nft_pktinfo *pkt)
+static void __nft_match_eval(const struct nft_expr *expr,
+			     struct nft_regs *regs,
+			     const struct nft_pktinfo *pkt,
+			     void *info)
 {
-	void *info = nft_expr_priv(expr);
 	struct xt_match *match = expr->ops->data;
 	struct sk_buff *skb = pkt->skb;
 	bool ret;
@@ -352,6 +352,13 @@ static void nft_match_eval(const struct nft_expr *expr,
 	}
 }
 
+static void nft_match_eval(const struct nft_expr *expr,
+			   struct nft_regs *regs,
+			   const struct nft_pktinfo *pkt)
+{
+	__nft_match_eval(expr, regs, pkt, nft_expr_priv(expr));
+}
+
 static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
 	[NFTA_MATCH_NAME]	= { .type = NLA_NUL_STRING },
 	[NFTA_MATCH_REV]	= { .type = NLA_U32 },
@@ -412,10 +419,10 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out)
 }
 
 static int
-nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
-		const struct nlattr * const tb[])
+__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		 const struct nlattr * const tb[],
+		 void *info)
 {
-	void *info = nft_expr_priv(expr);
 	struct xt_match *match = expr->ops->data;
 	struct xt_mtchk_param par;
 	size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO]));
@@ -444,11 +451,18 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	return 0;
 }
 
+static int
+nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+	       const struct nlattr * const tb[])
+{
+	return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr));
+}
+
 static void
-nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		    void *info)
 {
 	struct xt_match *match = expr->ops->data;
-	void *info = nft_expr_priv(expr);
 	struct xt_mtdtor_param par;
 
 	par.net = ctx->net;
@@ -462,9 +476,15 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 		module_put(match->me);
 }
 
-static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+static void
+nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	__nft_match_destroy(ctx, expr, nft_expr_priv(expr));
+}
+
+static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr,
+			    void *info)
 {
-	void *info = nft_expr_priv(expr);
 	struct xt_match *match = expr->ops->data;
 
 	if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) ||
@@ -478,6 +498,11 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	return __nft_match_dump(skb, expr, nft_expr_priv(expr));
+}
+
 static int nft_match_validate(const struct nft_ctx *ctx,
 			      const struct nft_expr *expr,
 			      const struct nft_data **data)

From 365e73e07fba4c343510adf49d18cb2b97337df8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 7 May 2018 15:22:36 +0200
Subject: [PATCH 25/62] netfilter: nft_compat: fix handling of large matchinfo
 size

commit 732a8049f365f514d0607e03938491bf6cb0d620 upstream.

currently matchinfo gets stored in the expression, but some xt matches
are very large.

To handle those we either need to switch nft core to kvmalloc and increase
size limit, or allocate the info blob of large matches separately.

This does the latter, this limits the scope of the changes to
nft_compat.

I picked a threshold of 192, this allows most matches to work as before and
handle only few ones via separate alloation (cgroup, u32, sctp, rt).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nft_compat.c | 64 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 025eb37bd5c5..3bd637eadc42 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -36,6 +36,13 @@ struct nft_xt {
 	struct rcu_head		rcu_head;
 };
 
+/* Used for matches where *info is larger than X byte */
+#define NFT_MATCH_LARGE_THRESH	192
+
+struct nft_xt_match_priv {
+	void *info;
+};
+
 static bool nft_xt_put(struct nft_xt *xt)
 {
 	if (--xt->refcnt == 0) {
@@ -352,6 +359,15 @@ static void __nft_match_eval(const struct nft_expr *expr,
 	}
 }
 
+static void nft_match_large_eval(const struct nft_expr *expr,
+				 struct nft_regs *regs,
+				 const struct nft_pktinfo *pkt)
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+	__nft_match_eval(expr, regs, pkt, priv->info);
+}
+
 static void nft_match_eval(const struct nft_expr *expr,
 			   struct nft_regs *regs,
 			   const struct nft_pktinfo *pkt)
@@ -458,6 +474,24 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr));
 }
 
+static int
+nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		     const struct nlattr * const tb[])
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+	struct xt_match *m = expr->ops->data;
+	int ret;
+
+	priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL);
+	if (!priv->info)
+		return -ENOMEM;
+
+	ret = __nft_match_init(ctx, expr, tb, priv->info);
+	if (ret)
+		kfree(priv->info);
+	return ret;
+}
+
 static void
 __nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		    void *info)
@@ -482,6 +516,15 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
 	__nft_match_destroy(ctx, expr, nft_expr_priv(expr));
 }
 
+static void
+nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(expr);
+
+	__nft_match_destroy(ctx, expr, priv->info);
+	kfree(priv->info);
+}
+
 static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr,
 			    void *info)
 {
@@ -503,6 +546,13 @@ static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr)
 	return __nft_match_dump(skb, expr, nft_expr_priv(expr));
 }
 
+static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e)
+{
+	struct nft_xt_match_priv *priv = nft_expr_priv(e);
+
+	return __nft_match_dump(skb, e, priv->info);
+}
+
 static int nft_match_validate(const struct nft_ctx *ctx,
 			      const struct nft_expr *expr,
 			      const struct nft_data **data)
@@ -670,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 {
 	struct nft_xt *nft_match;
 	struct xt_match *match;
+	unsigned int matchsize;
 	char *mt_name;
 	u32 rev, family;
 	int err;
@@ -709,7 +760,6 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 
 	nft_match->refcnt = 0;
 	nft_match->ops.type = &nft_match_type;
-	nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
 	nft_match->ops.eval = nft_match_eval;
 	nft_match->ops.init = nft_match_init;
 	nft_match->ops.destroy = nft_match_destroy;
@@ -717,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx,
 	nft_match->ops.validate = nft_match_validate;
 	nft_match->ops.data = match;
 
+	matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
+	if (matchsize > NFT_MATCH_LARGE_THRESH) {
+		matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv));
+
+		nft_match->ops.eval = nft_match_large_eval;
+		nft_match->ops.init = nft_match_large_init;
+		nft_match->ops.destroy = nft_match_large_destroy;
+		nft_match->ops.dump = nft_match_large_dump;
+	}
+
+	nft_match->ops.size = matchsize;
+
 	list_add(&nft_match->head, &nft_match_list);
 
 	return &nft_match->ops;

From 4ae6a7afae5362289cc99aa80f7d6229521888c3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sun, 6 May 2018 00:47:20 +0200
Subject: [PATCH 26/62] netfilter: nf_tables: don't assume chain stats are set
 when jumplabel is set

commit 009240940e84c1c089af88b454f7e804a4c5bd1b upstream.

nft_chain_stats_replace() and all other spots assume ->stats can be
NULL, but nft_update_chain_stats does not.  It must do this check,
just because the jump label is set doesn't mean all basechains have stats
assigned.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_core.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index dfd0bf3810d2..942702a2776f 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -119,15 +119,22 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled);
 static noinline void nft_update_chain_stats(const struct nft_chain *chain,
 					    const struct nft_pktinfo *pkt)
 {
+	struct nft_base_chain *base_chain;
 	struct nft_stats *stats;
 
-	local_bh_disable();
-	stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats));
-	u64_stats_update_begin(&stats->syncp);
-	stats->pkts++;
-	stats->bytes += pkt->skb->len;
-	u64_stats_update_end(&stats->syncp);
-	local_bh_enable();
+	base_chain = nft_base_chain(chain);
+	if (!base_chain->stats)
+		return;
+
+	stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
+	if (stats) {
+		local_bh_disable();
+		u64_stats_update_begin(&stats->syncp);
+		stats->pkts++;
+		stats->bytes += pkt->skb->len;
+		u64_stats_update_end(&stats->syncp);
+		local_bh_enable();
+	}
 }
 
 struct nft_jumpstack {

From 2b93cb2861dedfc43d7eb82c37c4eafbc385fef8 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 8 May 2018 02:43:57 +0200
Subject: [PATCH 27/62] netfilter: nf_tables: bogus EBUSY in chain deletions

commit bb7b40aecbf778c0c83a5bd62b0f03ca9f49a618 upstream.

When removing a rule that jumps to chain and such chain in the same
batch, this bogusly hits EBUSY. Add activate and deactivate operations
to expression that can be called from the preparation and the
commit/abort phases.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/net/netfilter/nf_tables.h |  5 ++++
 net/netfilter/nf_tables_api.c     | 46 ++++++++++++++++++++++++++++---
 net/netfilter/nft_immediate.c     | 15 ++++++++--
 3 files changed, 59 insertions(+), 7 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 079c69cae2f6..59a4f50ffe8d 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -177,6 +177,7 @@ struct nft_data_desc {
 int nft_data_init(const struct nft_ctx *ctx,
 		  struct nft_data *data, unsigned int size,
 		  struct nft_data_desc *desc, const struct nlattr *nla);
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type);
 void nft_data_release(const struct nft_data *data, enum nft_data_types type);
 int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data,
 		  enum nft_data_types type, unsigned int len);
@@ -731,6 +732,10 @@ struct nft_expr_ops {
 	int				(*init)(const struct nft_ctx *ctx,
 						const struct nft_expr *expr,
 						const struct nlattr * const tb[]);
+	void				(*activate)(const struct nft_ctx *ctx,
+						    const struct nft_expr *expr);
+	void				(*deactivate)(const struct nft_ctx *ctx,
+						      const struct nft_expr *expr);
 	void				(*destroy)(const struct nft_ctx *ctx,
 						   const struct nft_expr *expr);
 	int				(*dump)(struct sk_buff *skb,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index cf30c440f7a7..8a2027d7aaa3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -220,6 +220,34 @@ static int nft_delchain(struct nft_ctx *ctx)
 	return err;
 }
 
+static void nft_rule_expr_activate(const struct nft_ctx *ctx,
+				   struct nft_rule *rule)
+{
+	struct nft_expr *expr;
+
+	expr = nft_expr_first(rule);
+	while (expr != nft_expr_last(rule) && expr->ops) {
+		if (expr->ops->activate)
+			expr->ops->activate(ctx, expr);
+
+		expr = nft_expr_next(expr);
+	}
+}
+
+static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
+				     struct nft_rule *rule)
+{
+	struct nft_expr *expr;
+
+	expr = nft_expr_first(rule);
+	while (expr != nft_expr_last(rule) && expr->ops) {
+		if (expr->ops->deactivate)
+			expr->ops->deactivate(ctx, expr);
+
+		expr = nft_expr_next(expr);
+	}
+}
+
 static int
 nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
 {
@@ -265,6 +293,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
 		nft_trans_destroy(trans);
 		return err;
 	}
+	nft_rule_expr_deactivate(ctx, rule);
 
 	return 0;
 }
@@ -2218,6 +2247,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
 	kfree(rule);
 }
 
+static void nf_tables_rule_release(const struct nft_ctx *ctx,
+				   struct nft_rule *rule)
+{
+	nft_rule_expr_deactivate(ctx, rule);
+	nf_tables_rule_destroy(ctx, rule);
+}
+
 #define NFT_RULE_MAXEXPRS	128
 
 static struct nft_expr_info *info;
@@ -2385,7 +2421,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk,
 	return 0;
 
 err2:
-	nf_tables_rule_destroy(&ctx, rule);
+	nf_tables_rule_release(&ctx, rule);
 err1:
 	for (i = 0; i < n; i++) {
 		if (info[i].ops != NULL)
@@ -4054,7 +4090,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk,
  *	NFT_GOTO verdicts. This function must be called on active data objects
  *	from the second phase of the commit protocol.
  */
-static void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
+void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
 {
 	if (type == NFT_DATA_VERDICT) {
 		switch (data->verdict.code) {
@@ -5221,10 +5257,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
 		case NFT_MSG_NEWRULE:
 			trans->ctx.chain->use--;
 			list_del_rcu(&nft_trans_rule(trans)->list);
+			nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans));
 			break;
 		case NFT_MSG_DELRULE:
 			trans->ctx.chain->use++;
 			nft_clear(trans->ctx.net, nft_trans_rule(trans));
+			nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSET:
@@ -5798,7 +5836,7 @@ int __nft_release_basechain(struct nft_ctx *ctx)
 	list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
 		list_del(&rule->list);
 		ctx->chain->use--;
-		nf_tables_rule_destroy(ctx, rule);
+		nf_tables_rule_release(ctx, rule);
 	}
 	list_del(&ctx->chain->list);
 	ctx->table->use--;
@@ -5832,7 +5870,7 @@ static void __nft_release_afinfo(struct net *net, struct nft_af_info *afi)
 			list_for_each_entry_safe(rule, nr, &chain->rules, list) {
 				list_del(&rule->list);
 				chain->use--;
-				nf_tables_rule_destroy(&ctx, rule);
+				nf_tables_rule_release(&ctx, rule);
 			}
 		}
 		list_for_each_entry_safe(set, ns, &table->sets, list) {
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 4717d7796927..aa87ff8beae8 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -69,8 +69,16 @@ err1:
 	return err;
 }
 
-static void nft_immediate_destroy(const struct nft_ctx *ctx,
-				  const struct nft_expr *expr)
+static void nft_immediate_activate(const struct nft_ctx *ctx,
+				   const struct nft_expr *expr)
+{
+	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+
+	return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
+}
+
+static void nft_immediate_deactivate(const struct nft_ctx *ctx,
+				     const struct nft_expr *expr)
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
@@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = {
 	.size		= NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)),
 	.eval		= nft_immediate_eval,
 	.init		= nft_immediate_init,
-	.destroy	= nft_immediate_destroy,
+	.activate	= nft_immediate_activate,
+	.deactivate	= nft_immediate_deactivate,
 	.dump		= nft_immediate_dump,
 	.validate	= nft_immediate_validate,
 };

From 491b1a866e4ad2f5b6f5d60a23e16ae77ec37d5c Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 17 May 2018 22:49:49 +0900
Subject: [PATCH 28/62] netfilter: nft_meta: fix wrong value dereference in
 nft_meta_set_eval

commit 97a0549b15a0b466c47f6a0143a490a082c64b4e upstream.

In the nft_meta_set_eval, nftrace value is dereferenced as u32 from sreg.
But correct type is u8. so that sometimes incorrect value is dereferenced.

Steps to reproduce:

   %nft add table ip filter
   %nft add chain ip filter input { type filter hook input priority 4\; }
   %nft add rule ip filter input nftrace set 0
   %nft monitor

Sometimes, we can see trace messages.

   trace id 16767227 ip filter input packet: iif "enp2s0"
   ether saddr xx:xx:xx:xx:xx:xx ether daddr xx:xx:xx:xx:xx:xx
   ip saddr 192.168.0.1 ip daddr 255.255.255.255 ip dscp cs0
   ip ecn not-ect ip
   trace id 16767227 ip filter input rule nftrace set 0 (verdict continue)
   trace id 16767227 ip filter input verdict continue
   trace id 16767227 ip filter input

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nft_meta.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5a60eb23a7ed..c71184d4eac1 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -229,7 +229,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 	struct sk_buff *skb = pkt->skb;
 	u32 *sreg = &regs->data[meta->sreg];
 	u32 value = *sreg;
-	u8 pkt_type;
+	u8 value8;
 
 	switch (meta->key) {
 	case NFT_META_MARK:
@@ -239,15 +239,17 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 		skb->priority = value;
 		break;
 	case NFT_META_PKTTYPE:
-		pkt_type = nft_reg_load8(sreg);
+		value8 = nft_reg_load8(sreg);
 
-		if (skb->pkt_type != pkt_type &&
-		    skb_pkt_type_ok(pkt_type) &&
+		if (skb->pkt_type != value8 &&
+		    skb_pkt_type_ok(value8) &&
 		    skb_pkt_type_ok(skb->pkt_type))
-			skb->pkt_type = pkt_type;
+			skb->pkt_type = value8;
 		break;
 	case NFT_META_NFTRACE:
-		skb->nf_trace = !!value;
+		value8 = nft_reg_load8(sreg);
+
+		skb->nf_trace = !!value8;
 		break;
 	default:
 		WARN_ON(1);

From d3a9b8a511812fce32be5253e8ef2a3d54b34b4d Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 27 May 2018 21:08:13 +0200
Subject: [PATCH 29/62] netfilter: nf_tables: disable preemption in
 nft_update_chain_stats()

commit ad9d9e85072b668731f356be0a3750a3ba22a607 upstream.

This patch fixes the following splat.

[118709.054937] BUG: using smp_processor_id() in preemptible [00000000] code: test/1571
[118709.054970] caller is nft_update_chain_stats.isra.4+0x53/0x97 [nf_tables]
[118709.054980] CPU: 2 PID: 1571 Comm: test Not tainted 4.17.0-rc6+ #335
[...]
[118709.054992] Call Trace:
[118709.055011]  dump_stack+0x5f/0x86
[118709.055026]  check_preemption_disabled+0xd4/0xe4

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 942702a2776f..40e744572283 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -126,15 +126,15 @@ static noinline void nft_update_chain_stats(const struct nft_chain *chain,
 	if (!base_chain->stats)
 		return;
 
+	local_bh_disable();
 	stats = this_cpu_ptr(rcu_dereference(base_chain->stats));
 	if (stats) {
-		local_bh_disable();
 		u64_stats_update_begin(&stats->syncp);
 		stats->pkts++;
 		stats->bytes += pkt->skb->len;
 		u64_stats_update_end(&stats->syncp);
-		local_bh_enable();
 	}
+	local_bh_enable();
 }
 
 struct nft_jumpstack {

From 174757e28b7bc261f8ddd12467786f286717a108 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Tue, 29 May 2018 01:14:12 +0900
Subject: [PATCH 30/62] netfilter: nf_tables: increase nft_counters_enabled in
 nft_chain_stats_replace()

commit bbb8c61f97e3a2dd91b30d3e57b7964a67569d11 upstream.

When a chain is updated, a counter can be attached. if so,
the nft_counters_enabled should be increased.

test commands:

   %nft add table ip filter
   %nft add chain ip filter input { type filter hook input priority 4\; }
   %iptables-compat -Z input
   %nft delete chain ip filter input

we can see below messages.

[  286.443720] jump label: negative count!
[  286.448278] WARNING: CPU: 0 PID: 1459 at kernel/jump_label.c:197 __static_key_slow_dec_cpuslocked+0x6f/0xf0
[  286.449144] Modules linked in: nf_tables nfnetlink ip_tables x_tables
[  286.449144] CPU: 0 PID: 1459 Comm: nft Tainted: G        W         4.17.0-rc2+ #12
[  286.449144] RIP: 0010:__static_key_slow_dec_cpuslocked+0x6f/0xf0
[  286.449144] RSP: 0018:ffff88010e5176f0 EFLAGS: 00010286
[  286.449144] RAX: 000000000000001b RBX: ffffffffc0179500 RCX: ffffffffb8a82522
[  286.449144] RDX: 0000000000000001 RSI: 0000000000000008 RDI: ffff88011b7e5eac
[  286.449144] RBP: 0000000000000000 R08: ffffed00236fce5c R09: ffffed00236fce5b
[  286.449144] R10: ffffffffc0179503 R11: ffffed00236fce5c R12: 0000000000000000
[  286.449144] R13: ffff88011a28e448 R14: ffff88011a28e470 R15: dffffc0000000000
[  286.449144] FS:  00007f0384328700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000
[  286.449144] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  286.449144] CR2: 00007f038394bf10 CR3: 0000000104a86000 CR4: 00000000001006f0
[  286.449144] Call Trace:
[  286.449144]  static_key_slow_dec+0x6a/0x70
[  286.449144]  nf_tables_chain_destroy+0x19d/0x210 [nf_tables]
[  286.449144]  nf_tables_commit+0x1891/0x1c50 [nf_tables]
[  286.449144]  nfnetlink_rcv+0x1148/0x13d0 [nfnetlink]
[ ... ]

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_api.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8a2027d7aaa3..f70c5329ee6a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1266,8 +1266,10 @@ static void nft_chain_stats_replace(struct nft_base_chain *chain,
 		rcu_assign_pointer(chain->stats, newstats);
 		synchronize_rcu();
 		free_percpu(oldstats);
-	} else
+	} else {
 		rcu_assign_pointer(chain->stats, newstats);
+		static_branch_inc(&nft_counters_enabled);
+	}
 }
 
 static void nf_tables_chain_destroy(struct nft_chain *chain)

From 082711fa317845b535faa0aeea7ee3ff82a08301 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 9 May 2018 13:22:56 +0100
Subject: [PATCH 31/62] netfilter: nf_tables: fix memory leak on error exit
 return

commit f0dfd7a2b35b02030949100247d851b793cb275f upstream.

Currently the -EBUSY error return path is not free'ing resources
allocated earlier, leaving a memory leak. Fix this by exiting via the
error exit label err5 that performs the necessary resource clean
up.

Detected by CoverityScan, CID#1432975 ("Resource leak")

Fixes: 9744a6fcefcb ("netfilter: nf_tables: check if same extensions are set when adding elements")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_api.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f70c5329ee6a..155b8098731e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3999,8 +3999,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^
 			    nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) ||
 			    nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^
-			    nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF))
-				return -EBUSY;
+			    nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) {
+				err = -EBUSY;
+				goto err5;
+			}
 			if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) &&
 			     nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) &&
 			     memcmp(nft_set_ext_data(ext),

From 44956f98fd8b4f235f8f40a367955f55f0477b37 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 20 Mar 2018 15:25:37 +0100
Subject: [PATCH 32/62] netfilter: nf_tables: add missing netlink attrs to
 policies

commit 467697d289e7e6e1b15910d99096c0da08c56d5b upstream.

Fixes: 8aeff920dcc9 ("netfilter: nf_tables: add stateful object reference to set elements")
Fixes: f25ad2e907f1 ("netfilter: nf_tables: prepare for expressions associated to set elements")
Fixes: 1a94e38d254b ("netfilter: nf_tables: add NFTA_RULE_ID attribute")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_api.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 155b8098731e..60936bca3181 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1978,6 +1978,7 @@ static const struct nla_policy nft_rule_policy[NFTA_RULE_MAX + 1] = {
 	[NFTA_RULE_POSITION]	= { .type = NLA_U64 },
 	[NFTA_RULE_USERDATA]	= { .type = NLA_BINARY,
 				    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_RULE_ID]		= { .type = NLA_U32 },
 };
 
 static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
@@ -3412,6 +3413,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
 	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
 	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
 					    .len = NFT_USERDATA_MAXLEN },
+	[NFTA_SET_ELEM_EXPR]		= { .type = NLA_NESTED },
+	[NFTA_SET_ELEM_OBJREF]		= { .type = NLA_STRING },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {

From b8d8cde449fd51db0bb407dece03d36ced1acb83 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Tue, 29 May 2018 01:13:45 +0900
Subject: [PATCH 33/62] netfilter: nf_tables: fix NULL-ptr in
 nf_tables_dump_obj()

commit 360cc79d9d299ce297b205508276285ceffc5fa8 upstream.

The table field in nft_obj_filter is not an array. In order to check
tablename, we should check if the pointer is set.

Test commands:

   %nft add table ip filter
   %nft add counter ip filter ct1
   %nft reset counters

Splat looks like:

[  306.510504] kasan: CONFIG_KASAN_INLINE enabled
[  306.516184] kasan: GPF could be caused by NULL-ptr deref or user memory access
[  306.524775] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI
[  306.528284] Modules linked in: nft_objref nft_counter nf_tables nfnetlink ip_tables x_tables
[  306.528284] CPU: 0 PID: 1488 Comm: nft Not tainted 4.17.0-rc4+ #17
[  306.528284] Hardware name: To be filled by O.E.M. To be filled by O.E.M./Aptio CRB, BIOS 5.6.5 07/08/2015
[  306.528284] RIP: 0010:nf_tables_dump_obj+0x52c/0xa70 [nf_tables]
[  306.528284] RSP: 0018:ffff8800b6cb7520 EFLAGS: 00010246
[  306.528284] RAX: 0000000000000000 RBX: ffff8800b6c49820 RCX: 0000000000000000
[  306.528284] RDX: 0000000000000000 RSI: dffffc0000000000 RDI: ffffed0016d96e9a
[  306.528284] RBP: ffff8800b6cb75c0 R08: ffffed00236fce7c R09: ffffed00236fce7b
[  306.528284] R10: ffffffff9f6241e8 R11: ffffed00236fce7c R12: ffff880111365108
[  306.528284] R13: 0000000000000000 R14: ffff8800b6c49860 R15: ffff8800b6c49860
[  306.528284] FS:  00007f838b007700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000
[  306.528284] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  306.528284] CR2: 00007ffeafabcf78 CR3: 00000000b6cbe000 CR4: 00000000001006f0
[  306.528284] Call Trace:
[  306.528284]  netlink_dump+0x470/0xa20
[  306.528284]  __netlink_dump_start+0x5ae/0x690
[  306.528284]  ? nf_tables_getobj+0x1b3/0x740 [nf_tables]
[  306.528284]  nf_tables_getobj+0x2f5/0x740 [nf_tables]
[  306.528284]  ? nft_obj_notify+0x100/0x100 [nf_tables]
[  306.528284]  ? nf_tables_getobj+0x740/0x740 [nf_tables]
[  306.528284]  ? nf_tables_dump_flowtable_done+0x70/0x70 [nf_tables]
[  306.528284]  ? nft_obj_notify+0x100/0x100 [nf_tables]
[  306.528284]  nfnetlink_rcv_msg+0x8ff/0x932 [nfnetlink]
[  306.528284]  ? nfnetlink_rcv_msg+0x216/0x932 [nfnetlink]
[  306.528284]  netlink_rcv_skb+0x1c9/0x2f0
[  306.528284]  ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink]
[  306.528284]  ? debug_check_no_locks_freed+0x270/0x270
[  306.528284]  ? netlink_ack+0x7a0/0x7a0
[  306.528284]  ? ns_capable_common+0x6e/0x110
[ ... ]

Fixes: e46abbcc05aa8 ("netfilter: nf_tables: Allow table names of up to 255 chars")
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_api.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 60936bca3181..85b549e84104 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4614,7 +4614,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb)
 				if (idx > s_idx)
 					memset(&cb->args[1], 0,
 					       sizeof(cb->args) - sizeof(cb->args[0]));
-				if (filter && filter->table[0] &&
+				if (filter && filter->table &&
 				    strcmp(filter->table, table->name))
 					goto cont;
 				if (filter &&

From e44e4cf3a8dbfee3f6e078f254c26e5a5168c6a2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 19 Oct 2017 12:17:16 +1100
Subject: [PATCH 34/62] md: always hold reconfig_mutex when calling
 mddev_suspend()

commit 4d5324f760aacaefeb721b172aa14bf66045c332 upstream.

Most often mddev_suspend() is called with
reconfig_mutex held.  Make this a requirement in
preparation a subsequent patch.  Also require
reconfig_mutex to be held for mddev_resume(),
partly for symmetry and partly to guarantee
no races with incr/decr of mddev->suspend.

Taking the mutex in r5c_disable_writeback_async() is
a little tricky as this is called from a work queue
via log->disable_writeback_work, and flush_work()
is called on that while holding ->reconfig_mutex.
If the work item hasn't run before flush_work()
is called, the work function will not be able to
get the mutex.

So we use mddev_trylock() inside the wait_event() call, and have that
abort when conf->log is set to NULL, which happens before
flush_work() is called.
We wait in mddev->sb_wait and ensure this is woken
when any of the conditions change.  This requires
waking mddev->sb_wait in mddev_unlock().  This is only
like to trigger extra wake_ups of threads that needn't
be woken when metadata is being written, and that
doesn't happen often enough that the cost would be
noticeable.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/dm-raid.c     | 10 ++++++++--
 drivers/md/md.c          |  3 +++
 drivers/md/raid5-cache.c | 18 +++++++++++++-----
 3 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 33834db7c0a0..38a2ac24428e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3637,8 +3637,11 @@ static void raid_postsuspend(struct dm_target *ti)
 {
 	struct raid_set *rs = ti->private;
 
-	if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+	if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
+		mddev_lock_nointr(&rs->md);
 		mddev_suspend(&rs->md);
+		mddev_unlock(&rs->md);
+	}
 
 	rs->md.ro = 1;
 }
@@ -3898,8 +3901,11 @@ static void raid_resume(struct dm_target *ti)
 	if (!(rs->ctr_flags & RESUME_STAY_FROZEN_FLAGS))
 		clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
 
-	if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
+	if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
+		mddev_lock_nointr(mddev);
 		mddev_resume(mddev);
+		mddev_unlock(mddev);
+	}
 }
 
 static struct target_type raid_target = {
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 7143c8b9284b..757f12d49540 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -344,6 +344,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
 void mddev_suspend(struct mddev *mddev)
 {
 	WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);
+	lockdep_assert_held(&mddev->reconfig_mutex);
 	if (mddev->suspended++)
 		return;
 	synchronize_rcu();
@@ -357,6 +358,7 @@ EXPORT_SYMBOL_GPL(mddev_suspend);
 
 void mddev_resume(struct mddev *mddev)
 {
+	lockdep_assert_held(&mddev->reconfig_mutex);
 	if (--mddev->suspended)
 		return;
 	wake_up(&mddev->sb_wait);
@@ -663,6 +665,7 @@ void mddev_unlock(struct mddev *mddev)
 	 */
 	spin_lock(&pers_lock);
 	md_wakeup_thread(mddev->thread);
+	wake_up(&mddev->sb_wait);
 	spin_unlock(&pers_lock);
 }
 EXPORT_SYMBOL_GPL(mddev_unlock);
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 9a340728b846..79d812717406 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -693,6 +693,8 @@ static void r5c_disable_writeback_async(struct work_struct *work)
 	struct r5l_log *log = container_of(work, struct r5l_log,
 					   disable_writeback_work);
 	struct mddev *mddev = log->rdev->mddev;
+	struct r5conf *conf = mddev->private;
+	int locked = 0;
 
 	if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH)
 		return;
@@ -701,11 +703,15 @@ static void r5c_disable_writeback_async(struct work_struct *work)
 
 	/* wait superblock change before suspend */
 	wait_event(mddev->sb_wait,
-		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
-
-	mddev_suspend(mddev);
-	log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
-	mddev_resume(mddev);
+		   conf->log == NULL ||
+		   (!test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) &&
+		    (locked = mddev_trylock(mddev))));
+	if (locked) {
+		mddev_suspend(mddev);
+		log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH;
+		mddev_resume(mddev);
+		mddev_unlock(mddev);
+	}
 }
 
 static void r5l_submit_current_io(struct r5l_log *log)
@@ -3161,6 +3167,8 @@ void r5l_exit_log(struct r5conf *conf)
 	conf->log = NULL;
 	synchronize_rcu();
 
+	/* Ensure disable_writeback_work wakes up and exits */
+	wake_up(&conf->mddev->sb_wait);
 	flush_work(&log->disable_writeback_work);
 	md_unregister_thread(&log->reclaim_thread);
 	mempool_destroy(log->meta_pool);

From cc091f3fbbdb117d819536d6249e34322f991899 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 17 Oct 2017 13:46:43 +1100
Subject: [PATCH 35/62] md: don't call bitmap_create() while array is quiesced.

commit 52a0d49de3d592a3118e13f35985e3d99eaf43df upstream.

bitmap_create() allocates memory with GFP_KERNEL and
so can wait for IO.
If called while the array is quiesced, it could wait indefinitely
for write out to the array - deadlock.
So call bitmap_create() before quiescing the array.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 757f12d49540..ac27fe200ecd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -6645,22 +6645,26 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
 		return -ENOENT; /* cannot remove what isn't there */
 	err = 0;
 	if (mddev->pers) {
-		mddev->pers->quiesce(mddev, 1);
 		if (fd >= 0) {
 			struct bitmap *bitmap;
 
 			bitmap = bitmap_create(mddev, -1);
+			mddev->pers->quiesce(mddev, 1);
 			if (!IS_ERR(bitmap)) {
 				mddev->bitmap = bitmap;
 				err = bitmap_load(mddev);
 			} else
 				err = PTR_ERR(bitmap);
-		}
-		if (fd < 0 || err) {
+			if (err) {
+				bitmap_destroy(mddev);
+				fd = -1;
+			}
+			mddev->pers->quiesce(mddev, 0);
+		} else if (fd < 0) {
+			mddev->pers->quiesce(mddev, 1);
 			bitmap_destroy(mddev);
-			fd = -1; /* make sure to put the file */
+			mddev->pers->quiesce(mddev, 0);
 		}
-		mddev->pers->quiesce(mddev, 0);
 	}
 	if (fd < 0) {
 		struct file *f = mddev->bitmap_info.file;
@@ -6944,8 +6948,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 				mddev->bitmap_info.default_offset;
 			mddev->bitmap_info.space =
 				mddev->bitmap_info.default_space;
-			mddev->pers->quiesce(mddev, 1);
 			bitmap = bitmap_create(mddev, -1);
+			mddev->pers->quiesce(mddev, 1);
 			if (!IS_ERR(bitmap)) {
 				mddev->bitmap = bitmap;
 				rv = bitmap_load(mddev);

From feabea21655961e6b0f87ad7351a4f99515c6b09 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 17 Oct 2017 13:46:43 +1100
Subject: [PATCH 36/62] md: move suspend_hi/lo handling into core md code

commit b3143b9a38d5039bcd1f2d1c94039651bfba8043 upstream.

responding to ->suspend_lo and ->suspend_hi is similar
to responding to ->suspended.  It is best to wait in
the common core code without incrementing ->active_io.
This allows mddev_suspend()/mddev_resume() to work while
requests are waiting for suspend_lo/hi to change.
This is will be important after a subsequent patch
which uses mddev_suspend() to synchronize updating for
suspend_lo/hi.

So move the code for testing suspend_lo/hi out of raid1.c
and raid5.c, and place it in md.c

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c    | 29 +++++++++++++++++++++++------
 drivers/md/raid1.c | 14 +++++---------
 drivers/md/raid5.c | 22 ----------------------
 3 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index ac27fe200ecd..06ad5d798d73 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -266,16 +266,31 @@ static DEFINE_SPINLOCK(all_mddevs_lock);
  * call has finished, the bio has been linked into some internal structure
  * and so is visible to ->quiesce(), so we don't need the refcount any more.
  */
+static bool is_suspended(struct mddev *mddev, struct bio *bio)
+{
+	if (mddev->suspended)
+		return true;
+	if (bio_data_dir(bio) != WRITE)
+		return false;
+	if (mddev->suspend_lo >= mddev->suspend_hi)
+		return false;
+	if (bio->bi_iter.bi_sector >= mddev->suspend_hi)
+		return false;
+	if (bio_end_sector(bio) < mddev->suspend_lo)
+		return false;
+	return true;
+}
+
 void md_handle_request(struct mddev *mddev, struct bio *bio)
 {
 check_suspended:
 	rcu_read_lock();
-	if (mddev->suspended) {
+	if (is_suspended(mddev, bio)) {
 		DEFINE_WAIT(__wait);
 		for (;;) {
 			prepare_to_wait(&mddev->sb_wait, &__wait,
 					TASK_UNINTERRUPTIBLE);
-			if (!mddev->suspended)
+			if (!is_suspended(mddev, bio))
 				break;
 			rcu_read_unlock();
 			schedule();
@@ -4849,10 +4864,11 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 		goto unlock;
 	old = mddev->suspend_lo;
 	mddev->suspend_lo = new;
-	if (new >= old)
+	if (new >= old) {
 		/* Shrinking suspended region */
+		wake_up(&mddev->sb_wait);
 		mddev->pers->quiesce(mddev, 2);
-	else {
+	} else {
 		/* Expanding suspended region - need to wait */
 		mddev->pers->quiesce(mddev, 1);
 		mddev->pers->quiesce(mddev, 0);
@@ -4892,10 +4908,11 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 		goto unlock;
 	old = mddev->suspend_hi;
 	mddev->suspend_hi = new;
-	if (new <= old)
+	if (new <= old) {
 		/* Shrinking suspended region */
+		wake_up(&mddev->sb_wait);
 		mddev->pers->quiesce(mddev, 2);
-	else {
+	} else {
 		/* Expanding suspended region - need to wait */
 		mddev->pers->quiesce(mddev, 1);
 		mddev->pers->quiesce(mddev, 0);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index e4e01d3bab81..bd5976aefb55 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1298,11 +1298,9 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 	 */
 
 
-	if ((bio_end_sector(bio) > mddev->suspend_lo &&
-	    bio->bi_iter.bi_sector < mddev->suspend_hi) ||
-	    (mddev_is_clustered(mddev) &&
+	if (mddev_is_clustered(mddev) &&
 	     md_cluster_ops->area_resyncing(mddev, WRITE,
-		     bio->bi_iter.bi_sector, bio_end_sector(bio)))) {
+		     bio->bi_iter.bi_sector, bio_end_sector(bio))) {
 
 		/*
 		 * As the suspend_* range is controlled by userspace, we want
@@ -1313,12 +1311,10 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
 			sigset_t full, old;
 			prepare_to_wait(&conf->wait_barrier,
 					&w, TASK_INTERRUPTIBLE);
-			if ((bio_end_sector(bio) <= mddev->suspend_lo ||
-			     bio->bi_iter.bi_sector >= mddev->suspend_hi) &&
-			    (!mddev_is_clustered(mddev) ||
-			     !md_cluster_ops->area_resyncing(mddev, WRITE,
+			if (!mddev_is_clustered(mddev) ||
+			    !md_cluster_ops->area_resyncing(mddev, WRITE,
 							bio->bi_iter.bi_sector,
-							bio_end_sector(bio))))
+							bio_end_sector(bio)))
 				break;
 			sigfillset(&full);
 			sigprocmask(SIG_BLOCK, &full, &old);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index de1ef6264ee7..30c1dc17d5af 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5686,28 +5686,6 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 				goto retry;
 			}
 
-			if (rw == WRITE &&
-			    logical_sector >= mddev->suspend_lo &&
-			    logical_sector < mddev->suspend_hi) {
-				raid5_release_stripe(sh);
-				/* As the suspend_* range is controlled by
-				 * userspace, we want an interruptible
-				 * wait.
-				 */
-				prepare_to_wait(&conf->wait_for_overlap,
-						&w, TASK_INTERRUPTIBLE);
-				if (logical_sector >= mddev->suspend_lo &&
-				    logical_sector < mddev->suspend_hi) {
-					sigset_t full, old;
-					sigfillset(&full);
-					sigprocmask(SIG_BLOCK, &full, &old);
-					schedule();
-					sigprocmask(SIG_SETMASK, &old, NULL);
-					do_prepare = true;
-				}
-				goto retry;
-			}
-
 			if (test_bit(STRIPE_EXPANDING, &sh->state) ||
 			    !add_stripe_bio(sh, bi, dd_idx, rw, previous)) {
 				/* Stripe is busy expanding or

From 7c435e22453038ea29e6467be9afe05225d53de2 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 17 Oct 2017 13:46:43 +1100
Subject: [PATCH 37/62] md: use mddev_suspend/resume instead of ->quiesce()

commit 9e1cc0a54556a6c63dc0cfb7cd7d60d43337bba6 upstream.

mddev_suspend() is a more general interface than
calling ->quiesce() and is so more extensible.  A
future patch will make use of this.

Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 06ad5d798d73..3d1650db2064 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4870,8 +4870,8 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 		mddev->pers->quiesce(mddev, 2);
 	} else {
 		/* Expanding suspended region - need to wait */
-		mddev->pers->quiesce(mddev, 1);
-		mddev->pers->quiesce(mddev, 0);
+		mddev_suspend(mddev);
+		mddev_resume(mddev);
 	}
 	err = 0;
 unlock:
@@ -4914,8 +4914,8 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 		mddev->pers->quiesce(mddev, 2);
 	} else {
 		/* Expanding suspended region - need to wait */
-		mddev->pers->quiesce(mddev, 1);
-		mddev->pers->quiesce(mddev, 0);
+		mddev_suspend(mddev);
+		mddev_resume(mddev);
 	}
 	err = 0;
 unlock:
@@ -6666,7 +6666,7 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
 			struct bitmap *bitmap;
 
 			bitmap = bitmap_create(mddev, -1);
-			mddev->pers->quiesce(mddev, 1);
+			mddev_suspend(mddev);
 			if (!IS_ERR(bitmap)) {
 				mddev->bitmap = bitmap;
 				err = bitmap_load(mddev);
@@ -6676,11 +6676,11 @@ static int set_bitmap_file(struct mddev *mddev, int fd)
 				bitmap_destroy(mddev);
 				fd = -1;
 			}
-			mddev->pers->quiesce(mddev, 0);
+			mddev_resume(mddev);
 		} else if (fd < 0) {
-			mddev->pers->quiesce(mddev, 1);
+			mddev_suspend(mddev);
 			bitmap_destroy(mddev);
-			mddev->pers->quiesce(mddev, 0);
+			mddev_resume(mddev);
 		}
 	}
 	if (fd < 0) {
@@ -6966,7 +6966,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 			mddev->bitmap_info.space =
 				mddev->bitmap_info.default_space;
 			bitmap = bitmap_create(mddev, -1);
-			mddev->pers->quiesce(mddev, 1);
+			mddev_suspend(mddev);
 			if (!IS_ERR(bitmap)) {
 				mddev->bitmap = bitmap;
 				rv = bitmap_load(mddev);
@@ -6974,7 +6974,7 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 				rv = PTR_ERR(bitmap);
 			if (rv)
 				bitmap_destroy(mddev);
-			mddev->pers->quiesce(mddev, 0);
+			mddev_resume(mddev);
 		} else {
 			/* remove the bitmap */
 			if (!mddev->bitmap) {
@@ -6997,9 +6997,9 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
 				mddev->bitmap_info.nodes = 0;
 				md_cluster_ops->leave(mddev);
 			}
-			mddev->pers->quiesce(mddev, 1);
+			mddev_suspend(mddev);
 			bitmap_destroy(mddev);
-			mddev->pers->quiesce(mddev, 0);
+			mddev_resume(mddev);
 			mddev->bitmap_info.offset = 0;
 		}
 	}

From ce57466d323b224bc817bbb07791b4ca111bd53e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Tue, 17 Oct 2017 13:46:43 +1100
Subject: [PATCH 38/62] md: allow metadata update while suspending.

commit 35bfc52187f6df8779d0f1cebdb52b7f797baf4e upstream.

There are various deadlocks that can occur
when a thread holds reconfig_mutex and calls
->quiesce(mddev, 1).
As some write request block waiting for
metadata to be updated (e.g. to record device
failure), and as the md thread updates the metadata
while the reconfig mutex is held, holding the mutex
can stop write requests completing, and this prevents
->quiesce(mddev, 1) from completing.

->quiesce() is now usually called from mddev_suspend(),
and it is always called with reconfig_mutex held.  So
at this time it is safe for the thread to update metadata
without explicitly taking the lock.

So add 2 new flags, one which says the unlocked updates is
allowed, and one which ways it is happening.  Then allow it
while the quiesce completes, and then wait for it to finish.

Reported-and-tested-by: Xiao Ni <xni@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md.c | 14 ++++++++++++++
 drivers/md/md.h |  6 ++++++
 2 files changed, 20 insertions(+)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3d1650db2064..b74cca273e38 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -364,8 +364,12 @@ void mddev_suspend(struct mddev *mddev)
 		return;
 	synchronize_rcu();
 	wake_up(&mddev->sb_wait);
+	set_bit(MD_ALLOW_SB_UPDATE, &mddev->flags);
+	smp_mb__after_atomic();
 	wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
 	mddev->pers->quiesce(mddev, 1);
+	clear_bit_unlock(MD_ALLOW_SB_UPDATE, &mddev->flags);
+	wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
 
 	del_timer_sync(&mddev->safemode_timer);
 }
@@ -8882,6 +8886,16 @@ void md_check_recovery(struct mddev *mddev)
 	unlock:
 		wake_up(&mddev->sb_wait);
 		mddev_unlock(mddev);
+	} else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
+		/* Write superblock - thread that called mddev_suspend()
+		 * holds reconfig_mutex for us.
+		 */
+		set_bit(MD_UPDATING_SB, &mddev->flags);
+		smp_mb__after_atomic();
+		if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
+			md_update_sb(mddev, 0);
+		clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
+		wake_up(&mddev->sb_wait);
 	}
 }
 EXPORT_SYMBOL(md_check_recovery);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 9b0a896890ef..37c19b7b5df9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -237,6 +237,12 @@ enum mddev_flags {
 				 */
 	MD_HAS_PPL,		/* The raid array has PPL feature set */
 	MD_HAS_MULTIPLE_PPLS,	/* The raid array has multiple PPLs feature set */
+	MD_ALLOW_SB_UPDATE,	/* md_check_recovery is allowed to update
+				 * the metadata without taking reconfig_mutex.
+				 */
+	MD_UPDATING_SB,		/* md_check_recovery is updating the metadata
+				 * without explicitly holding reconfig_mutex.
+				 */
 };
 
 enum mddev_sb_flags {

From 2fc45ef962879d29f9567202e3a183fab5a7fd37 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.com>
Date: Thu, 19 Oct 2017 12:49:15 +1100
Subject: [PATCH 39/62] md: remove special meaning of ->quiesce(.., 2)

commit b03e0ccb5ab9df3efbe51c87843a1ffbecbafa1f upstream.

The '2' argument means "wake up anything that is waiting".
This is an inelegant part of the design and was added
to help support management of suspend_lo/suspend_hi setting.
Now that suspend_lo/hi is managed in mddev_suspend/resume,
that need is gone.
These is still a couple of places where we call 'quiesce'
with an argument of '2', but they can safely be changed to
call ->quiesce(.., 1); ->quiesce(.., 0) which
achieve the same result at the small cost of pausing IO
briefly.

This removes a small "optimization" from suspend_{hi,lo}_store,
but it isn't clear that optimization served a useful purpose.
The code now is a lot clearer.

Suggested-by: Shaohua Li <shli@kernel.org>
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Signed-off-by: Jack Wang <jinpu.wang@profitbricks.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/md/md-cluster.c  |  6 +++---
 drivers/md/md.c          | 34 ++++++++++------------------------
 drivers/md/md.h          |  9 ++++-----
 drivers/md/raid0.c       |  2 +-
 drivers/md/raid1.c       | 13 +++----------
 drivers/md/raid10.c      | 10 +++-------
 drivers/md/raid5-cache.c | 12 ++++++------
 drivers/md/raid5-log.h   |  2 +-
 drivers/md/raid5.c       | 18 ++++++------------
 9 files changed, 37 insertions(+), 69 deletions(-)

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 03082e17c65c..72ce0bccc865 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -442,10 +442,11 @@ static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
 static void remove_suspend_info(struct mddev *mddev, int slot)
 {
 	struct md_cluster_info *cinfo = mddev->cluster_info;
+	mddev->pers->quiesce(mddev, 1);
 	spin_lock_irq(&cinfo->suspend_lock);
 	__remove_suspend_info(cinfo, slot);
 	spin_unlock_irq(&cinfo->suspend_lock);
-	mddev->pers->quiesce(mddev, 2);
+	mddev->pers->quiesce(mddev, 0);
 }
 
 
@@ -492,13 +493,12 @@ static void process_suspend_info(struct mddev *mddev,
 	s->lo = lo;
 	s->hi = hi;
 	mddev->pers->quiesce(mddev, 1);
-	mddev->pers->quiesce(mddev, 0);
 	spin_lock_irq(&cinfo->suspend_lock);
 	/* Remove existing entry (if exists) before adding */
 	__remove_suspend_info(cinfo, slot);
 	list_add(&s->list, &cinfo->suspend_list);
 	spin_unlock_irq(&cinfo->suspend_lock);
-	mddev->pers->quiesce(mddev, 2);
+	mddev->pers->quiesce(mddev, 0);
 }
 
 static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index b74cca273e38..11a67eac55b1 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -4850,7 +4850,7 @@ suspend_lo_show(struct mddev *mddev, char *page)
 static ssize_t
 suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	unsigned long long old, new;
+	unsigned long long new;
 	int err;
 
 	err = kstrtoull(buf, 10, &new);
@@ -4866,17 +4866,10 @@ suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
 	if (mddev->pers == NULL ||
 	    mddev->pers->quiesce == NULL)
 		goto unlock;
-	old = mddev->suspend_lo;
+	mddev_suspend(mddev);
 	mddev->suspend_lo = new;
-	if (new >= old) {
-		/* Shrinking suspended region */
-		wake_up(&mddev->sb_wait);
-		mddev->pers->quiesce(mddev, 2);
-	} else {
-		/* Expanding suspended region - need to wait */
-		mddev_suspend(mddev);
-		mddev_resume(mddev);
-	}
+	mddev_resume(mddev);
+
 	err = 0;
 unlock:
 	mddev_unlock(mddev);
@@ -4894,7 +4887,7 @@ suspend_hi_show(struct mddev *mddev, char *page)
 static ssize_t
 suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 {
-	unsigned long long old, new;
+	unsigned long long new;
 	int err;
 
 	err = kstrtoull(buf, 10, &new);
@@ -4907,20 +4900,13 @@ suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
 	if (err)
 		return err;
 	err = -EINVAL;
-	if (mddev->pers == NULL ||
-	    mddev->pers->quiesce == NULL)
+	if (mddev->pers == NULL)
 		goto unlock;
-	old = mddev->suspend_hi;
+
+	mddev_suspend(mddev);
 	mddev->suspend_hi = new;
-	if (new <= old) {
-		/* Shrinking suspended region */
-		wake_up(&mddev->sb_wait);
-		mddev->pers->quiesce(mddev, 2);
-	} else {
-		/* Expanding suspended region - need to wait */
-		mddev_suspend(mddev);
-		mddev_resume(mddev);
-	}
+	mddev_resume(mddev);
+
 	err = 0;
 unlock:
 	mddev_unlock(mddev);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 37c19b7b5df9..11696aba94e3 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -546,12 +546,11 @@ struct md_personality
 	int (*check_reshape) (struct mddev *mddev);
 	int (*start_reshape) (struct mddev *mddev);
 	void (*finish_reshape) (struct mddev *mddev);
-	/* quiesce moves between quiescence states
-	 * 0 - fully active
-	 * 1 - no new requests allowed
-	 * others - reserved
+	/* quiesce suspends or resumes internal processing.
+	 * 1 - stop new actions and wait for action io to complete
+	 * 0 - return to normal behaviour
 	 */
-	void (*quiesce) (struct mddev *mddev, int state);
+	void (*quiesce) (struct mddev *mddev, int quiesce);
 	/* takeover is used to transition an array from one
 	 * personality to another.  The new personality must be able
 	 * to handle the data in the current layout.
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 5a00fc118470..5ecba9eef441 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -768,7 +768,7 @@ static void *raid0_takeover(struct mddev *mddev)
 	return ERR_PTR(-EINVAL);
 }
 
-static void raid0_quiesce(struct mddev *mddev, int state)
+static void raid0_quiesce(struct mddev *mddev, int quiesce)
 {
 }
 
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index bd5976aefb55..029ecba60727 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3276,21 +3276,14 @@ static int raid1_reshape(struct mddev *mddev)
 	return 0;
 }
 
-static void raid1_quiesce(struct mddev *mddev, int state)
+static void raid1_quiesce(struct mddev *mddev, int quiesce)
 {
 	struct r1conf *conf = mddev->private;
 
-	switch(state) {
-	case 2: /* wake for suspend */
-		wake_up(&conf->wait_barrier);
-		break;
-	case 1:
+	if (quiesce)
 		freeze_array(conf, 0);
-		break;
-	case 0:
+	else
 		unfreeze_array(conf);
-		break;
-	}
 }
 
 static void *raid1_takeover(struct mddev *mddev)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 5fb31ef52945..b20c23f970f4 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3838,18 +3838,14 @@ static void raid10_free(struct mddev *mddev, void *priv)
 	kfree(conf);
 }
 
-static void raid10_quiesce(struct mddev *mddev, int state)
+static void raid10_quiesce(struct mddev *mddev, int quiesce)
 {
 	struct r10conf *conf = mddev->private;
 
-	switch(state) {
-	case 1:
+	if (quiesce)
 		raise_barrier(conf, 0);
-		break;
-	case 0:
+	else
 		lower_barrier(conf);
-		break;
-	}
 }
 
 static int raid10_resize(struct mddev *mddev, sector_t sectors)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 79d812717406..0d535b40cb3b 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1589,21 +1589,21 @@ void r5l_wake_reclaim(struct r5l_log *log, sector_t space)
 	md_wakeup_thread(log->reclaim_thread);
 }
 
-void r5l_quiesce(struct r5l_log *log, int state)
+void r5l_quiesce(struct r5l_log *log, int quiesce)
 {
 	struct mddev *mddev;
-	if (!log || state == 2)
+	if (!log)
 		return;
-	if (state == 0)
-		kthread_unpark(log->reclaim_thread->tsk);
-	else if (state == 1) {
+
+	if (quiesce) {
 		/* make sure r5l_write_super_and_discard_space exits */
 		mddev = log->rdev->mddev;
 		wake_up(&mddev->sb_wait);
 		kthread_park(log->reclaim_thread->tsk);
 		r5l_wake_reclaim(log, MaxSector);
 		r5l_do_reclaim(log);
-	}
+	} else
+		kthread_unpark(log->reclaim_thread->tsk);
 }
 
 bool r5l_log_disk_error(struct r5conf *conf)
diff --git a/drivers/md/raid5-log.h b/drivers/md/raid5-log.h
index 7f9ad5f7cda0..284578b0a349 100644
--- a/drivers/md/raid5-log.h
+++ b/drivers/md/raid5-log.h
@@ -9,7 +9,7 @@ extern void r5l_write_stripe_run(struct r5l_log *log);
 extern void r5l_flush_stripe_to_raid(struct r5l_log *log);
 extern void r5l_stripe_write_finished(struct stripe_head *sh);
 extern int r5l_handle_flush_request(struct r5l_log *log, struct bio *bio);
-extern void r5l_quiesce(struct r5l_log *log, int state);
+extern void r5l_quiesce(struct r5l_log *log, int quiesce);
 extern bool r5l_log_disk_error(struct r5conf *conf);
 extern bool r5c_is_writeback(struct r5l_log *log);
 extern int
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 30c1dc17d5af..07ca2fd10189 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -8003,16 +8003,12 @@ static void raid5_finish_reshape(struct mddev *mddev)
 	}
 }
 
-static void raid5_quiesce(struct mddev *mddev, int state)
+static void raid5_quiesce(struct mddev *mddev, int quiesce)
 {
 	struct r5conf *conf = mddev->private;
 
-	switch(state) {
-	case 2: /* resume for a suspend */
-		wake_up(&conf->wait_for_overlap);
-		break;
-
-	case 1: /* stop all writes */
+	if (quiesce) {
+		/* stop all writes */
 		lock_all_device_hash_locks_irq(conf);
 		/* '2' tells resync/reshape to pause so that all
 		 * active stripes can drain
@@ -8028,17 +8024,15 @@ static void raid5_quiesce(struct mddev *mddev, int state)
 		unlock_all_device_hash_locks_irq(conf);
 		/* allow reshape to continue */
 		wake_up(&conf->wait_for_overlap);
-		break;
-
-	case 0: /* re-enable writes */
+	} else {
+		/* re-enable writes */
 		lock_all_device_hash_locks_irq(conf);
 		conf->quiesce = 0;
 		wake_up(&conf->wait_for_quiescent);
 		wake_up(&conf->wait_for_overlap);
 		unlock_all_device_hash_locks_irq(conf);
-		break;
 	}
-	r5l_quiesce(conf->log, state);
+	r5l_quiesce(conf->log, quiesce);
 }
 
 static void *raid45_takeover_raid0(struct mddev *mddev, int level)

From 3f8e85fbbaa55c9981e27ae9e5182569b393189b Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 15 Feb 2018 00:23:05 +0100
Subject: [PATCH 40/62] netfilter: don't set F_IFACE on ipv6 fib lookups

commit 47b7e7f82802dced3ac73658bf4b77584a63063f upstream.

"fib" starts to behave strangely when an ipv6 default route is
added - the FIB lookup returns a route using 'oif' in this case.

This behaviour was inherited from ip6tables rpfilter so change
this as well.

Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1221
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/netfilter/ip6t_rpfilter.c |  4 ----
 net/ipv6/netfilter/nft_fib_ipv6.c  | 12 ++----------
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index b12e61b7b16c..3119e720a6c8 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -48,10 +48,6 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 	}
 
 	fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
-	if ((flags & XT_RPFILTER_LOOSE) == 0) {
-		fl6.flowi6_oif = dev->ifindex;
-		lookup_flags |= RT6_LOOKUP_F_IFACE;
-	}
 
 	rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
 	if (rt->dst.error)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 54b5899543ef..fd9a45cbd709 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -182,7 +182,6 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	}
 
 	*dest = 0;
- again:
 	rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
 	if (rt->dst.error)
 		goto put_rt_err;
@@ -191,15 +190,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
 	if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL))
 		goto put_rt_err;
 
-	if (oif && oif != rt->rt6i_idev->dev) {
-		/* multipath route? Try again with F_IFACE */
-		if ((lookup_flags & RT6_LOOKUP_F_IFACE) == 0) {
-			lookup_flags |= RT6_LOOKUP_F_IFACE;
-			fl6.flowi6_oif = oif->ifindex;
-			ip6_rt_put(rt);
-			goto again;
-		}
-	}
+	if (oif && oif != rt->rt6i_idev->dev)
+		goto put_rt_err;
 
 	switch (priv->result) {
 	case NFT_FIB_RESULT_OIF:

From 5acd64888e020f0ce758fa14e65a75692dcc6e37 Mon Sep 17 00:00:00 2001
From: Vincent Bernat <vincent@bernat.im>
Date: Sun, 20 May 2018 13:03:38 +0200
Subject: [PATCH 41/62] netfilter: ip6t_rpfilter: provide input interface for
 route lookup

commit cede24d1b21d68d84ac5a36c44f7d37daadcc258 upstream.

In commit 47b7e7f82802, this bit was removed at the same time the
RT6_LOOKUP_F_IFACE flag was removed. However, it is needed when
link-local addresses are used, which is a very common case: when
packets are routed, neighbor solicitations are done using link-local
addresses. For example, the following neighbor solicitation is not
matched by "-m rpfilter":

    IP6 fe80::5254:33ff:fe00:1 > ff02::1:ff00:3: ICMP6, neighbor
    solicitation, who has 2001:db8::5254:33ff:fe00:3, length 32

Commit 47b7e7f82802 doesn't quite explain why we shouldn't use
RT6_LOOKUP_F_IFACE in the rpfilter case. I suppose the interface check
later in the function would make it redundant. However, the remaining
of the routing code is using RT6_LOOKUP_F_IFACE when there is no
source address (which matches rpfilter's case with a non-unicast
destination, like with neighbor solicitation).

Signed-off-by: Vincent Bernat <vincent@bernat.im>
Fixes: 47b7e7f82802 ("netfilter: don't set F_IFACE on ipv6 fib lookups")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/netfilter/ip6t_rpfilter.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 3119e720a6c8..1c4a5de3f301 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -48,6 +48,8 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
 	}
 
 	fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+	if ((flags & XT_RPFILTER_LOOSE) == 0)
+		fl6.flowi6_oif = dev->ifindex;
 
 	rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
 	if (rt->dst.error)

From 259cc05cce439944121a7c26e439713b873af8b2 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Mon, 11 Jun 2018 22:16:33 +0900
Subject: [PATCH 42/62] netfilter: nf_tables: use WARN_ON_ONCE instead of
 BUG_ON in nft_do_chain()

commit adc972c5b88829d38ede08b1069718661c7330ae upstream.

When depth of chain is bigger than NFT_JUMP_STACK_SIZE, the nft_do_chain
crashes. But there is no need to crash hard here.

Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nf_tables_core.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 40e744572283..32b7896929f3 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -208,7 +208,8 @@ next_rule:
 
 	switch (regs.verdict.code) {
 	case NFT_JUMP:
-		BUG_ON(stackptr >= NFT_JUMP_STACK_SIZE);
+		if (WARN_ON_ONCE(stackptr >= NFT_JUMP_STACK_SIZE))
+			return NF_DROP;
 		jumpstack[stackptr].chain = chain;
 		jumpstack[stackptr].rule  = rule;
 		jumpstack[stackptr].rulenum = rulenum;

From 0ed70f20644959caf01a89b17fccebd19b204d92 Mon Sep 17 00:00:00 2001
From: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Date: Tue, 22 May 2018 19:45:09 +0200
Subject: [PATCH 43/62] ARM: dts: imx6q: Use correct SDMA script for SPI5 core

commit df07101e1c4a29e820df02f9989a066988b160e6 upstream.

According to the reference manual the shp_2_mcu / mcu_2_shp
scripts must be used for devices connected through the SPBA.

This fixes an issue we saw with DMA transfers.
Sometimes the SPI controller RX FIFO was not empty after a DMA
transfer and the driver got stuck in the next PIO transfer when
it read one word more than expected.

commit dd4b487b32a35 ("ARM: dts: imx6: Use correct SDMA script
for SPI cores") is fixing the same issue but only for SPI1 - 4.

Fixes: 677940258dd8e ("ARM: dts: imx6q: enable dma for ecspi5")
Signed-off-by: Sean Nyekjaer <sean.nyekjaer@prevas.dk>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/arm/boot/dts/imx6q.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6q.dtsi b/arch/arm/boot/dts/imx6q.dtsi
index 90a741732f60..4747ede61acd 100644
--- a/arch/arm/boot/dts/imx6q.dtsi
+++ b/arch/arm/boot/dts/imx6q.dtsi
@@ -96,7 +96,7 @@
 					clocks = <&clks IMX6Q_CLK_ECSPI5>,
 						 <&clks IMX6Q_CLK_ECSPI5>;
 					clock-names = "ipg", "per";
-					dmas = <&sdma 11 7 1>, <&sdma 12 7 2>;
+					dmas = <&sdma 11 8 1>, <&sdma 12 8 2>;
 					dma-names = "rx", "tx";
 					status = "disabled";
 				};

From 693d06dffb436ae22c899e0ffd7c28064dd8ec32 Mon Sep 17 00:00:00 2001
From: Abhishek Sahu <absahu@codeaurora.org>
Date: Wed, 13 Jun 2018 14:32:36 +0530
Subject: [PATCH 44/62] mtd: rawnand: fix return value check for bad block
 status

commit e9893e6fa932f42c90c4ac5849fa9aa0f0f00a34 upstream.

Positive return value from read_oob() is making false BAD
blocks. For some of the NAND controllers, OOB bytes will be
protected with ECC and read_oob() will return number of bitflips.
If there is any bitflip in ECC protected OOB bytes for BAD block
status page, then that block is getting treated as BAD.

Fixes: c120e75e0e7d ("mtd: nand: use read_oob() instead of cmdfunc() for bad block check")
Cc: <stable@vger.kernel.org>
Signed-off-by: Abhishek Sahu <absahu@codeaurora.org>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
[backported to 4.14.y]
Signed-off-by: Abhishek Sahu <absahu@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mtd/nand/nand_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 528e04f96c13..d410de331854 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -440,7 +440,7 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs)
 
 	for (; page < page_end; page++) {
 		res = chip->ecc.read_oob(mtd, chip, page);
-		if (res)
+		if (res < 0)
 			return res;
 
 		bad = chip->oob_poi[chip->badblockpos];

From 4cf1fbcdef7f04605333a1786f619122c135c0fe Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 12 May 2018 02:49:30 -0700
Subject: [PATCH 45/62] xfrm6: avoid potential infinite loop in
 _decode_session6()

[ Upstream commit d9f92772e8ec388d070752ee8f187ef8fa18621f ]

syzbot found a way to trigger an infinitie loop by overflowing
@offset variable that has been forced to use u16 for some very
obscure reason in the past.

We probably want to look at NEXTHDR_FRAGMENT handling which looks
wrong, in a separate patch.

In net-next, we shall try to use skb_header_pointer() instead of
pskb_may_pull().

watchdog: BUG: soft lockup - CPU#1 stuck for 134s! [syz-executor738:4553]
Modules linked in:
irq event stamp: 13885653
hardirqs last  enabled at (13885652): [<ffffffff878009d5>] restore_regs_and_return_to_kernel+0x0/0x2b
hardirqs last disabled at (13885653): [<ffffffff87800905>] interrupt_entry+0xb5/0xf0 arch/x86/entry/entry_64.S:625
softirqs last  enabled at (13614028): [<ffffffff84df0809>] tun_napi_alloc_frags drivers/net/tun.c:1478 [inline]
softirqs last  enabled at (13614028): [<ffffffff84df0809>] tun_get_user+0x1dd9/0x4290 drivers/net/tun.c:1825
softirqs last disabled at (13614032): [<ffffffff84df1b6f>] tun_get_user+0x313f/0x4290 drivers/net/tun.c:1942
CPU: 1 PID: 4553 Comm: syz-executor738 Not tainted 4.17.0-rc3+ #40
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:check_kcov_mode kernel/kcov.c:67 [inline]
RIP: 0010:__sanitizer_cov_trace_pc+0x20/0x50 kernel/kcov.c:101
RSP: 0018:ffff8801d8cfe250 EFLAGS: 00000246 ORIG_RAX: ffffffffffffff13
RAX: ffff8801d88a8080 RBX: ffff8801d7389e40 RCX: 0000000000000006
RDX: 0000000000000000 RSI: ffffffff868da4ad RDI: ffff8801c8a53277
RBP: ffff8801d8cfe250 R08: ffff8801d88a8080 R09: ffff8801d8cfe3e8
R10: ffffed003b19fc87 R11: ffff8801d8cfe43f R12: ffff8801c8a5327f
R13: 0000000000000000 R14: ffff8801c8a4e5fe R15: ffff8801d8cfe3e8
FS:  0000000000d88940(0000) GS:ffff8801daf00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffffffffff600400 CR3: 00000001acab3000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 _decode_session6+0xc1d/0x14f0 net/ipv6/xfrm6_policy.c:150
 __xfrm_decode_session+0x71/0x140 net/xfrm/xfrm_policy.c:2368
 xfrm_decode_session_reverse include/net/xfrm.h:1213 [inline]
 icmpv6_route_lookup+0x395/0x6e0 net/ipv6/icmp.c:372
 icmp6_send+0x1982/0x2da0 net/ipv6/icmp.c:551
 icmpv6_send+0x17a/0x300 net/ipv6/ip6_icmp.c:43
 ip6_input_finish+0x14e1/0x1a30 net/ipv6/ip6_input.c:305
 NF_HOOK include/linux/netfilter.h:288 [inline]
 ip6_input+0xe1/0x5e0 net/ipv6/ip6_input.c:327
 dst_input include/net/dst.h:450 [inline]
 ip6_rcv_finish+0x29c/0xa10 net/ipv6/ip6_input.c:71
 NF_HOOK include/linux/netfilter.h:288 [inline]
 ipv6_rcv+0xeb8/0x2040 net/ipv6/ip6_input.c:208
 __netif_receive_skb_core+0x2468/0x3650 net/core/dev.c:4646
 __netif_receive_skb+0x2c/0x1e0 net/core/dev.c:4711
 netif_receive_skb_internal+0x126/0x7b0 net/core/dev.c:4785
 napi_frags_finish net/core/dev.c:5226 [inline]
 napi_gro_frags+0x631/0xc40 net/core/dev.c:5299
 tun_get_user+0x3168/0x4290 drivers/net/tun.c:1951
 tun_chr_write_iter+0xb9/0x154 drivers/net/tun.c:1996
 call_write_iter include/linux/fs.h:1784 [inline]
 do_iter_readv_writev+0x859/0xa50 fs/read_write.c:680
 do_iter_write+0x185/0x5f0 fs/read_write.c:959
 vfs_writev+0x1c7/0x330 fs/read_write.c:1004
 do_writev+0x112/0x2f0 fs/read_write.c:1039
 __do_sys_writev fs/read_write.c:1112 [inline]
 __se_sys_writev fs/read_write.c:1109 [inline]
 __x64_sys_writev+0x75/0xb0 fs/read_write.c:1109
 do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: syzbot+0053c8...@syzkaller.appspotmail.com
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/xfrm6_policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 17e95a0386b3..d6b012295b45 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -123,7 +123,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	struct flowi6 *fl6 = &fl->u.ip6;
 	int onlyproto = 0;
 	const struct ipv6hdr *hdr = ipv6_hdr(skb);
-	u16 offset = sizeof(*hdr);
+	u32 offset = sizeof(*hdr);
 	struct ipv6_opt_hdr *exthdr;
 	const unsigned char *nh = skb_network_header(skb);
 	u16 nhoff = IP6CB(skb)->nhoff;

From e36bc9930d85f4ae29b67eb71bc66ae49e217b71 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 16 May 2018 21:25:46 +0100
Subject: [PATCH 46/62] afs: Fix directory permissions check

[ Upstream commit 378831e4daec75fbba6d3612bcf3b4dd00ddbf08 ]

Doing faccessat("/afs/some/directory", 0) triggers a BUG in the permissions
check code.

Fix this by just removing the BUG section.  If no permissions are asked
for, just return okay if the file exists.

Also:

 (1) Split up the directory check so that it has separate if-statements
     rather than if-else-if (e.g. checking for MAY_EXEC shouldn't skip the
     check for MAY_READ and MAY_WRITE).

 (2) Check for MAY_CHDIR as MAY_EXEC.

Without the main fix, the following BUG may occur:

 kernel BUG at fs/afs/security.c:386!
 invalid opcode: 0000 [#1] SMP PTI
 ...
 RIP: 0010:afs_permission+0x19d/0x1a0 [kafs]
 ...
 Call Trace:
  ? inode_permission+0xbe/0x180
  ? do_faccessat+0xdc/0x270
  ? do_syscall_64+0x60/0x1f0
  ? entry_SYSCALL_64_after_hwframe+0x49/0xbe

Fixes: 00d3b7a4533e ("[AFS]: Add security support.")
Reported-by: Jonathan Billings <jsbillings@jsbillings.org>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/afs/security.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/afs/security.c b/fs/afs/security.c
index faca66227ecf..859096e25f2c 100644
--- a/fs/afs/security.c
+++ b/fs/afs/security.c
@@ -323,18 +323,14 @@ int afs_permission(struct inode *inode, int mask)
 	       mask, access, S_ISDIR(inode->i_mode) ? "dir" : "file");
 
 	if (S_ISDIR(inode->i_mode)) {
-		if (mask & MAY_EXEC) {
+		if (mask & (MAY_EXEC | MAY_READ | MAY_CHDIR)) {
 			if (!(access & AFS_ACE_LOOKUP))
 				goto permission_denied;
-		} else if (mask & MAY_READ) {
-			if (!(access & AFS_ACE_LOOKUP))
-				goto permission_denied;
-		} else if (mask & MAY_WRITE) {
+		}
+		if (mask & MAY_WRITE) {
 			if (!(access & (AFS_ACE_DELETE | /* rmdir, unlink, rename from */
 					AFS_ACE_INSERT))) /* create, mkdir, symlink, rename to */
 				goto permission_denied;
-		} else {
-			BUG();
 		}
 	} else {
 		if (!(access & AFS_ACE_LOOKUP))

From db73501ebc3ad56b94aa5adb7365a9a4d5313523 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 27 Apr 2018 10:45:31 +0200
Subject: [PATCH 47/62] netfilter: ebtables: handle string from userspace with
 care

[ Upstream commit 94c752f99954797da583a84c4907ff19e92550a4 ]

strlcpy() can't be safely used on a user-space provided string,
as it can try to read beyond the buffer's end, if the latter is
not NULL terminated.

Leveraging the above, syzbot has been able to trigger the following
splat:

BUG: KASAN: stack-out-of-bounds in strlcpy include/linux/string.h:300
[inline]
BUG: KASAN: stack-out-of-bounds in compat_mtw_from_user
net/bridge/netfilter/ebtables.c:1957 [inline]
BUG: KASAN: stack-out-of-bounds in ebt_size_mwt
net/bridge/netfilter/ebtables.c:2059 [inline]
BUG: KASAN: stack-out-of-bounds in size_entry_mwt
net/bridge/netfilter/ebtables.c:2155 [inline]
BUG: KASAN: stack-out-of-bounds in compat_copy_entries+0x96c/0x14a0
net/bridge/netfilter/ebtables.c:2194
Write of size 33 at addr ffff8801b0abf888 by task syz-executor0/4504

CPU: 0 PID: 4504 Comm: syz-executor0 Not tainted 4.17.0-rc2+ #40
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
Call Trace:
  __dump_stack lib/dump_stack.c:77 [inline]
  dump_stack+0x1b9/0x294 lib/dump_stack.c:113
  print_address_description+0x6c/0x20b mm/kasan/report.c:256
  kasan_report_error mm/kasan/report.c:354 [inline]
  kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
  check_memory_region_inline mm/kasan/kasan.c:260 [inline]
  check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
  memcpy+0x37/0x50 mm/kasan/kasan.c:303
  strlcpy include/linux/string.h:300 [inline]
  compat_mtw_from_user net/bridge/netfilter/ebtables.c:1957 [inline]
  ebt_size_mwt net/bridge/netfilter/ebtables.c:2059 [inline]
  size_entry_mwt net/bridge/netfilter/ebtables.c:2155 [inline]
  compat_copy_entries+0x96c/0x14a0 net/bridge/netfilter/ebtables.c:2194
  compat_do_replace+0x483/0x900 net/bridge/netfilter/ebtables.c:2285
  compat_do_ebt_set_ctl+0x2ac/0x324 net/bridge/netfilter/ebtables.c:2367
  compat_nf_sockopt net/netfilter/nf_sockopt.c:144 [inline]
  compat_nf_setsockopt+0x9b/0x140 net/netfilter/nf_sockopt.c:156
  compat_ip_setsockopt+0xff/0x140 net/ipv4/ip_sockglue.c:1279
  inet_csk_compat_setsockopt+0x97/0x120 net/ipv4/inet_connection_sock.c:1041
  compat_tcp_setsockopt+0x49/0x80 net/ipv4/tcp.c:2901
  compat_sock_common_setsockopt+0xb4/0x150 net/core/sock.c:3050
  __compat_sys_setsockopt+0x1ab/0x7c0 net/compat.c:403
  __do_compat_sys_setsockopt net/compat.c:416 [inline]
  __se_compat_sys_setsockopt net/compat.c:413 [inline]
  __ia32_compat_sys_setsockopt+0xbd/0x150 net/compat.c:413
  do_syscall_32_irqs_on arch/x86/entry/common.c:323 [inline]
  do_fast_syscall_32+0x345/0xf9b arch/x86/entry/common.c:394
  entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139
RIP: 0023:0xf7fb3cb9
RSP: 002b:00000000fff0c26c EFLAGS: 00000282 ORIG_RAX: 000000000000016e
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 0000000000000000
RDX: 0000000000000080 RSI: 0000000020000300 RDI: 00000000000005f4
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000

The buggy address belongs to the page:
page:ffffea0006c2afc0 count:0 mapcount:0 mapping:0000000000000000 index:0x0
flags: 0x2fffc0000000000()
raw: 02fffc0000000000 0000000000000000 0000000000000000 00000000ffffffff
raw: 0000000000000000 ffffea0006c20101 0000000000000000 0000000000000000
page dumped because: kasan: bad access detected

Fix the issue replacing the unsafe function with strscpy() and
taking care of possible errors.

Fixes: 81e675c227ec ("netfilter: ebtables: add CONFIG_COMPAT support")
Reported-and-tested-by: syzbot+4e42a04e0bc33cb6c087@syzkaller.appspotmail.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/bridge/netfilter/ebtables.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 5b8cd359c4c0..e27fb6e97d18 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1950,7 +1950,8 @@ static int compat_mtw_from_user(struct compat_ebt_entry_mwt *mwt,
 	int off, pad = 0;
 	unsigned int size_kern, match_size = mwt->match_size;
 
-	strlcpy(name, mwt->u.name, sizeof(name));
+	if (strscpy(name, mwt->u.name, sizeof(name)) < 0)
+		return -EINVAL;
 
 	if (state->buf_kern_start)
 		dst = state->buf_kern_start + state->buf_kern_offset;

From 510e1e8020a8a1b7cf53a84f18a7d03757daba55 Mon Sep 17 00:00:00 2001
From: Sebastian Ott <sebott@linux.ibm.com>
Date: Tue, 15 May 2018 14:05:13 +0200
Subject: [PATCH 48/62] s390/dasd: use blk_mq_rq_from_pdu for per request data

[ Upstream commit f0f59a2fab8e52b9d582b39da39f22230ca80aee ]

Dasd uses completion_data from struct request to store per request
private data - this is problematic since this member is part of a
union which is also used by IO schedulers.
Let the block layer maintain space for per request data behind each
struct request.

Fixes crashes on block layer timeouts like this one:

Unable to handle kernel pointer dereference in virtual kernel address space
Failing address: 0000000000000000 TEID: 0000000000000483
Fault in home space mode while using kernel ASCE.
AS:0000000001308007 R3:00000000fffc8007 S:00000000fffcc000 P:000000000000013d
Oops: 0004 ilc:2 [#1] PREEMPT SMP
Modules linked in: [...]
CPU: 0 PID: 1480 Comm: kworker/0:2H Not tainted 4.17.0-rc4-00046-gaa3bcd43b5af #203
Hardware name: IBM 3906 M02 702 (LPAR)
Workqueue: kblockd blk_mq_timeout_work
Krnl PSW : 0000000067ac406b 00000000b6960308 (do_raw_spin_trylock+0x30/0x78)
           R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
Krnl GPRS: 0000000000000c00 0000000000000000 0000000000000000 0000000000000001
           0000000000b9d3c8 0000000000000000 0000000000000001 00000000cf9639d8
           0000000000000000 0700000000000000 0000000000000000 000000000099f09e
           0000000000000000 000000000076e9d0 000000006247bb08 000000006247bae0
Krnl Code: 00000000001c159c: b90400c2           lgr     %r12,%r2
           00000000001c15a0: a7180000           lhi     %r1,0
          #00000000001c15a4: 583003a4           l       %r3,932
          >00000000001c15a8: ba132000           cs      %r1,%r3,0(%r2)
           00000000001c15ac: a7180001           lhi     %r1,1
           00000000001c15b0: a784000b           brc     8,1c15c6
           00000000001c15b4: c0e5004e72aa       brasl   %r14,b8fb08
           00000000001c15ba: 1812               lr      %r1,%r2
Call Trace:
([<0700000000000000>] 0x700000000000000)
 [<0000000000b9d3d2>] _raw_spin_lock_irqsave+0x7a/0xb8
 [<000000000099f09e>] dasd_times_out+0x46/0x278
 [<000000000076ea6e>] blk_mq_terminate_expired+0x9e/0x108
 [<000000000077497a>] bt_for_each+0x102/0x130
 [<0000000000774e54>] blk_mq_queue_tag_busy_iter+0x74/0xd8
 [<000000000076fea0>] blk_mq_timeout_work+0x260/0x320
 [<0000000000169dd4>] process_one_work+0x3bc/0x708
 [<000000000016a382>] worker_thread+0x262/0x408
 [<00000000001723a8>] kthread+0x160/0x178
 [<0000000000b9e73a>] kernel_thread_starter+0x6/0xc
 [<0000000000b9e734>] kernel_thread_starter+0x0/0xc
INFO: lockdep is turned off.
Last Breaking-Event-Address:
 [<0000000000b9d3cc>] _raw_spin_lock_irqsave+0x74/0xb8

Kernel panic - not syncing: Fatal exception: panic_on_oops

Signed-off-by: Sebastian Ott <sebott@linux.ibm.com>
Reviewed-by: Stefan Haberland <sth@linux.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/block/dasd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index e67c1d8a193d..d072f84a8535 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -3049,7 +3049,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx,
 	cqr->callback_data = req;
 	cqr->status = DASD_CQR_FILLED;
 	cqr->dq = dq;
-	req->completion_data = cqr;
+	*((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)) = cqr;
+
 	blk_mq_start_request(req);
 	spin_lock(&block->queue_lock);
 	list_add_tail(&cqr->blocklist, &block->ccw_queue);
@@ -3073,12 +3074,13 @@ out:
  */
 enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved)
 {
-	struct dasd_ccw_req *cqr = req->completion_data;
 	struct dasd_block *block = req->q->queuedata;
 	struct dasd_device *device;
+	struct dasd_ccw_req *cqr;
 	unsigned long flags;
 	int rc = 0;
 
+	cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req));
 	if (!cqr)
 		return BLK_EH_NOT_HANDLED;
 
@@ -3184,6 +3186,7 @@ static int dasd_alloc_queue(struct dasd_block *block)
 	int rc;
 
 	block->tag_set.ops = &dasd_mq_ops;
+	block->tag_set.cmd_size = sizeof(struct dasd_ccw_req *);
 	block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES;
 	block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV;
 	block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;

From 27aa533f24e9951aff62b3b6a14c1feed8cbf624 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 16 May 2018 22:58:33 +0200
Subject: [PATCH 49/62] netfilter: nft_limit: fix packet ratelimiting

[ Upstream commit 3e0f64b7dd3149f75e8652ff1df56cffeedc8fc1 ]

Credit calculations for the packet ratelimiting are not correct, as per
the applied ratelimit of 25/second and burst 8, a total of 33 packets
should have been accepted.  This is true in iptables(33) but not in
nftables (~65). For packet ratelimiting, use:

	div_u64(limit->nsecs, limit->rate) * limit->burst;

to calculate credit, just like in iptables' xt_limit does.

Moreover, use default burst in iptables, users are expecting similar
behaviour.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/nft_limit.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index a9fc298ef4c3..72f13a1144dd 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -51,10 +51,13 @@ static inline bool nft_limit_eval(struct nft_limit *limit, u64 cost)
 	return !limit->invert;
 }
 
+/* Use same default as in iptables. */
+#define NFT_LIMIT_PKT_BURST_DEFAULT	5
+
 static int nft_limit_init(struct nft_limit *limit,
-			  const struct nlattr * const tb[])
+			  const struct nlattr * const tb[], bool pkts)
 {
-	u64 unit;
+	u64 unit, tokens;
 
 	if (tb[NFTA_LIMIT_RATE] == NULL ||
 	    tb[NFTA_LIMIT_UNIT] == NULL)
@@ -68,18 +71,25 @@ static int nft_limit_init(struct nft_limit *limit,
 
 	if (tb[NFTA_LIMIT_BURST])
 		limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
-	else
-		limit->burst = 0;
+
+	if (pkts && limit->burst == 0)
+		limit->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
 
 	if (limit->rate + limit->burst < limit->rate)
 		return -EOVERFLOW;
 
-	/* The token bucket size limits the number of tokens can be
-	 * accumulated. tokens_max specifies the bucket size.
-	 * tokens_max = unit * (rate + burst) / rate.
-	 */
-	limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
-				limit->rate);
+	if (pkts) {
+		tokens = div_u64(limit->nsecs, limit->rate) * limit->burst;
+	} else {
+		/* The token bucket size limits the number of tokens can be
+		 * accumulated. tokens_max specifies the bucket size.
+		 * tokens_max = unit * (rate + burst) / rate.
+		 */
+		tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
+				 limit->rate);
+	}
+
+	limit->tokens = tokens;
 	limit->tokens_max = limit->tokens;
 
 	if (tb[NFTA_LIMIT_FLAGS]) {
@@ -144,7 +154,7 @@ static int nft_limit_pkts_init(const struct nft_ctx *ctx,
 	struct nft_limit_pkts *priv = nft_expr_priv(expr);
 	int err;
 
-	err = nft_limit_init(&priv->limit, tb);
+	err = nft_limit_init(&priv->limit, tb, true);
 	if (err < 0)
 		return err;
 
@@ -185,7 +195,7 @@ static int nft_limit_bytes_init(const struct nft_ctx *ctx,
 {
 	struct nft_limit *priv = nft_expr_priv(expr);
 
-	return nft_limit_init(priv, tb);
+	return nft_limit_init(priv, tb, false);
 }
 
 static int nft_limit_bytes_dump(struct sk_buff *skb,
@@ -246,7 +256,7 @@ static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx,
 	struct nft_limit_pkts *priv = nft_obj_data(obj);
 	int err;
 
-	err = nft_limit_init(&priv->limit, tb);
+	err = nft_limit_init(&priv->limit, tb, true);
 	if (err < 0)
 		return err;
 
@@ -289,7 +299,7 @@ static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx,
 {
 	struct nft_limit *priv = nft_obj_data(obj);
 
-	return nft_limit_init(priv, tb);
+	return nft_limit_init(priv, tb, false);
 }
 
 static int nft_limit_obj_bytes_dump(struct sk_buff *skb,

From 4abab5dca7237e89f62069587f64feebc56dc105 Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Sat, 19 May 2018 18:22:35 +0300
Subject: [PATCH 50/62] ipvs: fix buffer overflow with sync daemon and service

[ Upstream commit 52f96757905bbf0edef47f3ee6c7c784e7f8ff8a ]

syzkaller reports for buffer overflow for interface name
when starting sync daemons [1]

What we do is that we copy user structure into larger stack
buffer but later we search NUL past the stack buffer.
The same happens for sched_name when adding/editing virtual server.

We are restricted by IP_VS_SCHEDNAME_MAXLEN and IP_VS_IFNAME_MAXLEN
being used as size in include/uapi/linux/ip_vs.h, so they
include the space for NUL.

As using strlcpy is wrong for unsafe source, replace it with
strscpy and add checks to return EINVAL if source string is not
NUL-terminated. The incomplete strlcpy fix comes from 2.6.13.

For the netlink interface reduce the len parameter for
IPVS_DAEMON_ATTR_MCAST_IFN and IPVS_SVC_ATTR_SCHED_NAME,
so that we get proper EINVAL.

[1]
kernel BUG at lib/string.c:1052!
invalid opcode: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
    (ftrace buffer empty)
Modules linked in:
CPU: 1 PID: 373 Comm: syz-executor936 Not tainted 4.17.0-rc4+ #45
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
Google 01/01/2011
RIP: 0010:fortify_panic+0x13/0x20 lib/string.c:1051
RSP: 0018:ffff8801c976f800 EFLAGS: 00010282
RAX: 0000000000000022 RBX: 0000000000000040 RCX: 0000000000000000
RDX: 0000000000000022 RSI: ffffffff8160f6f1 RDI: ffffed00392edef6
RBP: ffff8801c976f800 R08: ffff8801cf4c62c0 R09: ffffed003b5e4fb0
R10: ffffed003b5e4fb0 R11: ffff8801daf27d87 R12: ffff8801c976fa20
R13: ffff8801c976fae4 R14: ffff8801c976fae0 R15: 000000000000048b
FS:  00007fd99f75e700(0000) GS:ffff8801daf00000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00000000200001c0 CR3: 00000001d6843000 CR4: 00000000001406e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
  strlen include/linux/string.h:270 [inline]
  strlcpy include/linux/string.h:293 [inline]
  do_ip_vs_set_ctl+0x31c/0x1d00 net/netfilter/ipvs/ip_vs_ctl.c:2388
  nf_sockopt net/netfilter/nf_sockopt.c:106 [inline]
  nf_setsockopt+0x7d/0xd0 net/netfilter/nf_sockopt.c:115
  ip_setsockopt+0xd8/0xf0 net/ipv4/ip_sockglue.c:1253
  udp_setsockopt+0x62/0xa0 net/ipv4/udp.c:2487
  ipv6_setsockopt+0x149/0x170 net/ipv6/ipv6_sockglue.c:917
  tcp_setsockopt+0x93/0xe0 net/ipv4/tcp.c:3057
  sock_common_setsockopt+0x9a/0xe0 net/core/sock.c:3046
  __sys_setsockopt+0x1bd/0x390 net/socket.c:1903
  __do_sys_setsockopt net/socket.c:1914 [inline]
  __se_sys_setsockopt net/socket.c:1911 [inline]
  __x64_sys_setsockopt+0xbe/0x150 net/socket.c:1911
  do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
  entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x447369
RSP: 002b:00007fd99f75dda8 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 00000000006e39e4 RCX: 0000000000447369
RDX: 000000000000048b RSI: 0000000000000000 RDI: 0000000000000003
RBP: 0000000000000000 R08: 0000000000000018 R09: 0000000000000000
R10: 00000000200001c0 R11: 0000000000000246 R12: 00000000006e39e0
R13: 75a1ff93f0896195 R14: 6f745f3168746576 R15: 0000000000000001
Code: 08 5b 41 5c 41 5d 41 5e 41 5f 5d c3 0f 0b 48 89 df e8 d2 8f 48 fa eb
de 55 48 89 fe 48 c7 c7 60 65 64 88 48 89 e5 e8 91 dd f3 f9 <0f> 0b 90 90
90 90 90 90 90 90 90 90 90 55 48 89 e5 41 57 41 56
RIP: fortify_panic+0x13/0x20 lib/string.c:1051 RSP: ffff8801c976f800

Reported-and-tested-by: syzbot+aac887f77319868646df@syzkaller.appspotmail.com
Fixes: e4ff67513096 ("ipvs: add sync_maxlen parameter for the sync daemon")
Fixes: 4da62fc70d7c ("[IPVS]: Fix for overflows")
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Acked-by: Simon Horman <horms+renesas@verge.net.au>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e8f1556fa446..327ebe786eeb 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2384,8 +2384,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 			struct ipvs_sync_daemon_cfg cfg;
 
 			memset(&cfg, 0, sizeof(cfg));
-			strlcpy(cfg.mcast_ifn, dm->mcast_ifn,
-				sizeof(cfg.mcast_ifn));
+			ret = -EINVAL;
+			if (strscpy(cfg.mcast_ifn, dm->mcast_ifn,
+				    sizeof(cfg.mcast_ifn)) <= 0)
+				goto out_dec;
 			cfg.syncid = dm->syncid;
 			ret = start_sync_thread(ipvs, &cfg, dm->state);
 		} else {
@@ -2423,12 +2425,19 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
 		}
 	}
 
+	if ((cmd == IP_VS_SO_SET_ADD || cmd == IP_VS_SO_SET_EDIT) &&
+	    strnlen(usvc.sched_name, IP_VS_SCHEDNAME_MAXLEN) ==
+	    IP_VS_SCHEDNAME_MAXLEN) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
 	/* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
 	if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
 	    usvc.protocol != IPPROTO_SCTP) {
-		pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
+		pr_err("set_ctl: invalid protocol: %d %pI4:%d\n",
 		       usvc.protocol, &usvc.addr.ip,
-		       ntohs(usvc.port), usvc.sched_name);
+		       ntohs(usvc.port));
 		ret = -EFAULT;
 		goto out_unlock;
 	}
@@ -2850,7 +2859,7 @@ static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
 	[IPVS_DAEMON_ATTR_STATE]	= { .type = NLA_U32 },
 	[IPVS_DAEMON_ATTR_MCAST_IFN]	= { .type = NLA_NUL_STRING,
-					    .len = IP_VS_IFNAME_MAXLEN },
+					    .len = IP_VS_IFNAME_MAXLEN - 1 },
 	[IPVS_DAEMON_ATTR_SYNC_ID]	= { .type = NLA_U32 },
 	[IPVS_DAEMON_ATTR_SYNC_MAXLEN]	= { .type = NLA_U16 },
 	[IPVS_DAEMON_ATTR_MCAST_GROUP]	= { .type = NLA_U32 },
@@ -2868,7 +2877,7 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
 	[IPVS_SVC_ATTR_PORT]		= { .type = NLA_U16 },
 	[IPVS_SVC_ATTR_FWMARK]		= { .type = NLA_U32 },
 	[IPVS_SVC_ATTR_SCHED_NAME]	= { .type = NLA_NUL_STRING,
-					    .len = IP_VS_SCHEDNAME_MAXLEN },
+					    .len = IP_VS_SCHEDNAME_MAXLEN - 1 },
 	[IPVS_SVC_ATTR_PE_NAME]		= { .type = NLA_NUL_STRING,
 					    .len = IP_VS_PENAME_MAXLEN },
 	[IPVS_SVC_ATTR_FLAGS]		= { .type = NLA_BINARY,

From 3ee6bd9411a60f90b5adeea5440cbac302c3e9d0 Mon Sep 17 00:00:00 2001
From: Hao Wei Tee <angelsl@in04.sg>
Date: Tue, 29 May 2018 10:25:17 +0300
Subject: [PATCH 51/62] iwlwifi: pcie: compare with number of IRQs requested
 for, not number of CPUs

[ Upstream commit ab1068d6866e28bf6427ceaea681a381e5870a4a ]

When there are 16 or more logical CPUs, we request for
`IWL_MAX_RX_HW_QUEUES` (16) IRQs only as we limit to that number of
IRQs, but later on we compare the number of IRQs returned to
nr_online_cpus+2 instead of max_irqs, the latter being what we
actually asked for. This ends up setting num_rx_queues to 17 which
causes lots of out-of-bounds array accesses later on.

Compare to max_irqs instead, and also add an assertion in case
num_rx_queues > IWM_MAX_RX_HW_QUEUES.

This fixes https://bugzilla.kernel.org/show_bug.cgi?id=199551

Fixes: 2e5d4a8f61dc ("iwlwifi: pcie: Add new configuration to enable MSIX")
Signed-off-by: Hao Wei Tee <angelsl@in04.sg>
Tested-by: Sara Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 12a9b86d71ea..dffa697d71e0 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1499,14 +1499,13 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
 					struct iwl_trans *trans)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	int max_irqs, num_irqs, i, ret, nr_online_cpus;
+	int max_irqs, num_irqs, i, ret;
 	u16 pci_cmd;
 
 	if (!trans->cfg->mq_rx_supported)
 		goto enable_msi;
 
-	nr_online_cpus = num_online_cpus();
-	max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES);
+	max_irqs = min_t(u32, num_online_cpus() + 2, IWL_MAX_RX_HW_QUEUES);
 	for (i = 0; i < max_irqs; i++)
 		trans_pcie->msix_entries[i].entry = i;
 
@@ -1532,16 +1531,17 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev,
 	 * Two interrupts less: non rx causes shared with FBQ and RSS.
 	 * More than two interrupts: we will use fewer RSS queues.
 	 */
-	if (num_irqs <= nr_online_cpus) {
+	if (num_irqs <= max_irqs - 2) {
 		trans_pcie->trans->num_rx_queues = num_irqs + 1;
 		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX |
 			IWL_SHARED_IRQ_FIRST_RSS;
-	} else if (num_irqs == nr_online_cpus + 1) {
+	} else if (num_irqs == max_irqs - 1) {
 		trans_pcie->trans->num_rx_queues = num_irqs;
 		trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX;
 	} else {
 		trans_pcie->trans->num_rx_queues = num_irqs - 1;
 	}
+	WARN_ON(trans_pcie->trans->num_rx_queues > IWL_MAX_RX_HW_QUEUES);
 
 	trans_pcie->alloc_vecs = num_irqs;
 	trans_pcie->msix_enabled = true;

From d20dcd2f11357f2b0f52a243e4737c129240475d Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <brnkv.i1@gmail.com>
Date: Fri, 25 May 2018 20:49:52 +0300
Subject: [PATCH 52/62] atm: zatm: fix memcmp casting

[ Upstream commit f9c6442a8f0b1dde9e755eb4ff6fa22bcce4eabc ]

memcmp() returns int, but eprom_try_esi() cast it to unsigned char. One
can lose significant bits and get 0 from non-0 value returned by the
memcmp().

Signed-off-by: Ivan Bornyakov <brnkv.i1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/atm/zatm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c
index 9c9a22958717..a8d2eb0ceb8d 100644
--- a/drivers/atm/zatm.c
+++ b/drivers/atm/zatm.c
@@ -1151,8 +1151,8 @@ static void eprom_get_byte(struct zatm_dev *zatm_dev, unsigned char *byte,
 }
 
 
-static unsigned char eprom_try_esi(struct atm_dev *dev, unsigned short cmd,
-				   int offset, int swap)
+static int eprom_try_esi(struct atm_dev *dev, unsigned short cmd, int offset,
+			 int swap)
 {
 	unsigned char buf[ZEPROM_SIZE];
 	struct zatm_dev *zatm_dev;

From d689ad5c91af0843aceef7975d7ca9356c84a335 Mon Sep 17 00:00:00 2001
From: Josh Hill <josh@joshuajhill.com>
Date: Sun, 27 May 2018 20:10:41 -0400
Subject: [PATCH 53/62] net: qmi_wwan: Add Netgear Aircard 779S
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 2415f3bd059fe050eb98aedf93664d000ceb4e92 ]

Add support for Netgear Aircard 779S

Signed-off-by: Josh Hill <josh@joshuajhill.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 8e06f308ce44..b23ee948e7c9 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1103,6 +1103,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
 	{QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)},	/* YUGA CLM920-NC5 */
 	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
+	{QMI_FIXED_INTF(0x0846, 0x68d3, 8)},	/* Netgear Aircard 779S */
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
 	{QMI_FIXED_INTF(0x1435, 0xd181, 3)},	/* Wistron NeWeb D18Q1 */

From be5af6bec31a5c22f61de1be848c3fd0e6e3a2f8 Mon Sep 17 00:00:00 2001
From: Thomas Richter <tmricht@linux.ibm.com>
Date: Mon, 28 May 2018 09:36:57 +0200
Subject: [PATCH 54/62] perf test: "Session topology" dumps core on s390

[ Upstream commit d121109100bda84bbbb199dab97f9d56432ab235 ]

The "perf test Session topology" entry fails with core dump on s390. The root
cause is a NULL pointer dereference in function check_cpu_topology() line 76
(or line 82 without -v).

The session->header.env.cpu variable is NULL because on s390 function
process_cpu_topology() returns with error:

    socket_id number is too big.
    You may need to upgrade the perf tool.

and releases the env.cpu variable via zfree() and sets it to NULL.

Here is the gdb output:
(gdb) n
76                      pr_debug("CPU %d, core %d, socket %d\n", i,
(gdb) n

Program received signal SIGSEGV, Segmentation fault.
0x00000000010f4d9e in check_cpu_topology (path=0x3ffffffd6c8
	"/tmp/perf-test-J6CHMa", map=0x14a1740) at tests/topology.c:76
76  pr_debug("CPU %d, core %d, socket %d\n", i,
(gdb)

Make sure the env.cpu variable is not used when its NULL.
Test for NULL pointer and return TEST_SKIP if so.

Output before:

  [root@p23lp27 perf]# ./perf test -F 39
  39: Session topology  :Segmentation fault (core dumped)
  [root@p23lp27 perf]#

Output after:

  [root@p23lp27 perf]# ./perf test -vF 39
  39: Session topology                                      :
  --- start ---
  templ file: /tmp/perf-test-Ajx59D
  socket_id number is too big.You may need to upgrade the perf tool.
  ---- end ----
  Session topology: Skip
  [root@p23lp27 perf]#

Signed-off-by: Thomas Richter <tmricht@linux.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Link: http://lkml.kernel.org/r/20180528073657.11743-1-tmricht@linux.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/tests/topology.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index a59db7c45a65..81ede20f49d7 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -66,6 +66,27 @@ static int check_cpu_topology(char *path, struct cpu_map *map)
 	session = perf_session__new(&file, false, NULL);
 	TEST_ASSERT_VAL("can't get session", session);
 
+	/* On platforms with large numbers of CPUs process_cpu_topology()
+	 * might issue an error while reading the perf.data file section
+	 * HEADER_CPU_TOPOLOGY and the cpu_topology_map pointed to by member
+	 * cpu is a NULL pointer.
+	 * Example: On s390
+	 *   CPU 0 is on core_id 0 and physical_package_id 6
+	 *   CPU 1 is on core_id 1 and physical_package_id 3
+	 *
+	 *   Core_id and physical_package_id are platform and architecture
+	 *   dependend and might have higher numbers than the CPU id.
+	 *   This actually depends on the configuration.
+	 *
+	 *  In this case process_cpu_topology() prints error message:
+	 *  "socket_id number is too big. You may need to upgrade the
+	 *  perf tool."
+	 *
+	 *  This is the reason why this test might be skipped.
+	 */
+	if (!session->header.env.cpu)
+		return TEST_SKIP;
+
 	for (i = 0; i < session->header.env.nr_cpus_avail; i++) {
 		if (!cpu_map__has(map, i))
 			continue;
@@ -91,7 +112,7 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
 {
 	char path[PATH_MAX];
 	struct cpu_map *map;
-	int ret = -1;
+	int ret = TEST_FAIL;
 
 	TEST_ASSERT_VAL("can't get templ file", !get_temp(path));
 
@@ -106,12 +127,9 @@ int test__session_topology(struct test *test __maybe_unused, int subtest __maybe
 		goto free_path;
 	}
 
-	if (check_cpu_topology(path, map))
-		goto free_map;
-	ret = 0;
-
-free_map:
+	ret = check_cpu_topology(path, map);
 	cpu_map__put(map);
+
 free_path:
 	unlink(path);
 	return ret;

From ae14c044587eceb1775c03e0be9e4e547d262aa2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 11 May 2018 19:21:42 +0800
Subject: [PATCH 55/62] perf bpf: Fix NULL return handling in
 bpf__prepare_load()

[ Upstream commit ab4e32ff5aa797eaea551dbb67946e2fcb56cc7e ]

bpf_object__open()/bpf_object__open_buffer can return error pointer or
NULL, check the return values with IS_ERR_OR_NULL() in bpf__prepare_load
and bpf__prepare_load_buffer

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: netdev@vger.kernel.org
Link: https://lkml.kernel.org/n/tip-psf4xwc09n62al2cb9s33v9h@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 tools/perf/util/bpf-loader.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 72c107fcbc5a..c02d2cfd3aea 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -66,7 +66,7 @@ bpf__prepare_load_buffer(void *obj_buf, size_t obj_buf_sz, const char *name)
 	}
 
 	obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, name);
-	if (IS_ERR(obj)) {
+	if (IS_ERR_OR_NULL(obj)) {
 		pr_debug("bpf: failed to load buffer\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -102,14 +102,14 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source)
 			pr_debug("bpf: successfull builtin compilation\n");
 		obj = bpf_object__open_buffer(obj_buf, obj_buf_sz, filename);
 
-		if (!IS_ERR(obj) && llvm_param.dump_obj)
+		if (!IS_ERR_OR_NULL(obj) && llvm_param.dump_obj)
 			llvm__dump_obj(filename, obj_buf, obj_buf_sz);
 
 		free(obj_buf);
 	} else
 		obj = bpf_object__open(filename);
 
-	if (IS_ERR(obj)) {
+	if (IS_ERR_OR_NULL(obj)) {
 		pr_debug("bpf: failed to load %s\n", filename);
 		return obj;
 	}

From 93b84462eadf0ebbf72abddfcf75d46ac9b59730 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <darrick.wong@oracle.com>
Date: Wed, 30 May 2018 19:43:53 -0700
Subject: [PATCH 56/62] fs: clear writeback errors in inode_init_always

[ Upstream commit 829bc787c1a0403e4d886296dd4d90c5f9c1744a ]

In inode_init_always(), we clear the inode mapping flags, which clears
any retained error (AS_EIO, AS_ENOSPC) bits.  Unfortunately, we do not
also clear wb_err, which means that old mapping errors can leak through
to new inodes.

This is crucial for the XFS inode allocation path because we recycle old
in-core inodes and we do not want error state from an old file to leak
into the new file.  This bug was discovered by running generic/036 and
generic/047 in a loop and noticing that the EIOs generated by the
collision of direct and buffered writes in generic/036 would survive the
remount between 036 and 047, and get reported to the fsyncs (on
different files!) in generic/047.

Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Brian Foster <bfoster@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/inode.c b/fs/inode.c
index d1e35b53bb23..e07b3e1f5970 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -177,6 +177,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	mapping->a_ops = &empty_aops;
 	mapping->host = inode;
 	mapping->flags = 0;
+	mapping->wb_err = 0;
 	atomic_set(&mapping->i_mmap_writable, 0);
 	mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
 	mapping->private_data = NULL;

From e4c55e0e6a754d21ea3d2e528e384b546192b9a1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Jul 2017 18:58:21 +0200
Subject: [PATCH 57/62] sched/core: Fix rules for running on online && !active
 CPUs

[ Upstream commit 175f0e25abeaa2218d431141ce19cf1de70fa82d ]

As already enforced by the WARN() in __set_cpus_allowed_ptr(), the rules
for running on an online && !active CPU are stricter than just being a
kthread, you need to be a per-cpu kthread.

If you're not strictly per-CPU, you have better CPUs to run on and
don't need the partially booted one to get your work done.

The exception is to allow smpboot threads to bootstrap the CPU itself
and get kernel 'services' initialized before we allow userspace on it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 955dbdf4ce87 ("sched: Allow migrating kthreads into online but inactive CPUs")
Link: http://lkml.kernel.org/r/20170725165821.cejhb7v2s3kecems@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/core.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f287dcbe8cb2..002b56d2c9eb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -894,6 +894,33 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
 }
 
 #ifdef CONFIG_SMP
+
+static inline bool is_per_cpu_kthread(struct task_struct *p)
+{
+	if (!(p->flags & PF_KTHREAD))
+		return false;
+
+	if (p->nr_cpus_allowed != 1)
+		return false;
+
+	return true;
+}
+
+/*
+ * Per-CPU kthreads are allowed to run on !actie && online CPUs, see
+ * __set_cpus_allowed_ptr() and select_fallback_rq().
+ */
+static inline bool is_cpu_allowed(struct task_struct *p, int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+		return false;
+
+	if (is_per_cpu_kthread(p))
+		return cpu_online(cpu);
+
+	return cpu_active(cpu);
+}
+
 /*
  * This is how migration works:
  *
@@ -951,16 +978,8 @@ struct migration_arg {
 static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
 				 struct task_struct *p, int dest_cpu)
 {
-	if (p->flags & PF_KTHREAD) {
-		if (unlikely(!cpu_online(dest_cpu)))
-			return rq;
-	} else {
-		if (unlikely(!cpu_active(dest_cpu)))
-			return rq;
-	}
-
 	/* Affinity changed (again). */
-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+	if (!is_cpu_allowed(p, dest_cpu))
 		return rq;
 
 	update_rq_clock(rq);
@@ -1489,10 +1508,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 	for (;;) {
 		/* Any allowed, online CPU? */
 		for_each_cpu(dest_cpu, &p->cpus_allowed) {
-			if (!(p->flags & PF_KTHREAD) && !cpu_active(dest_cpu))
-				continue;
-			if (!cpu_online(dest_cpu))
+			if (!is_cpu_allowed(p, dest_cpu))
 				continue;
+
 			goto out;
 		}
 

From 0d5e04e239ad5b18c4099ef942843bf510af1122 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@mips.com>
Date: Sat, 26 May 2018 08:46:47 -0700
Subject: [PATCH 58/62] sched/core: Require cpu_active() in select_task_rq(),
 for user tasks

[ Upstream commit 7af443ee1697607541c6346c87385adab2214743 ]

select_task_rq() is used in a few paths to select the CPU upon which a
thread should be run - for example it is used by try_to_wake_up() & by
fork or exec balancing. As-is it allows use of any online CPU that is
present in the task's cpus_allowed mask.

This presents a problem because there is a period whilst CPUs are
brought online where a CPU is marked online, but is not yet fully
initialized - ie. the period where CPUHP_AP_ONLINE_IDLE <= state <
CPUHP_ONLINE. Usually we don't run any user tasks during this window,
but there are corner cases where this can happen. An example observed
is:

  - Some user task A, running on CPU X, forks to create task B.

  - sched_fork() calls __set_task_cpu() with cpu=X, setting task B's
    task_struct::cpu field to X.

  - CPU X is offlined.

  - Task A, currently somewhere between the __set_task_cpu() in
    copy_process() and the call to wake_up_new_task(), is migrated to
    CPU Y by migrate_tasks() when CPU X is offlined.

  - CPU X is onlined, but still in the CPUHP_AP_ONLINE_IDLE state. The
    scheduler is now active on CPU X, but there are no user tasks on
    the runqueue.

  - Task A runs on CPU Y & reaches wake_up_new_task(). This calls
    select_task_rq() with cpu=X, taken from task B's task_struct,
    and select_task_rq() allows CPU X to be returned.

  - Task A enqueues task B on CPU X's runqueue, via activate_task() &
    enqueue_task().

  - CPU X now has a user task on its runqueue before it has reached the
    CPUHP_ONLINE state.

In most cases, the user tasks that schedule on the newly onlined CPU
have no idea that anything went wrong, but one case observed to be
problematic is if the task goes on to invoke the sched_setaffinity
syscall. The newly onlined CPU reaches the CPUHP_AP_ONLINE_IDLE state
before the CPU that brought it online calls stop_machine_unpark(). This
means that for a portion of the window of time between
CPUHP_AP_ONLINE_IDLE & CPUHP_ONLINE the newly onlined CPU's struct
cpu_stopper has its enabled field set to false. If a user thread is
executed on the CPU during this window and it invokes sched_setaffinity
with a CPU mask that does not include the CPU it's running on, then when
__set_cpus_allowed_ptr() calls stop_one_cpu() intending to invoke
migration_cpu_stop() and perform the actual migration away from the CPU
it will simply return -ENOENT rather than calling migration_cpu_stop().
We then return from the sched_setaffinity syscall back to the user task
that is now running on a CPU which it just asked not to run on, and
which is not present in its cpus_allowed mask.

This patch resolves the problem by having select_task_rq() enforce that
user tasks run on CPUs that are active - the same requirement that
select_fallback_rq() already enforces. This should ensure that newly
onlined CPUs reach the CPUHP_AP_ACTIVE state before being able to
schedule user tasks, and also implies that bringup_wait_for_ap() will
have called stop_machine_unpark() which resolves the sched_setaffinity
issue above.

I haven't yet investigated them, but it may be of interest to review
whether any of the actions performed by hotplug states between
CPUHP_AP_ONLINE_IDLE & CPUHP_AP_ACTIVE could have similar unintended
effects on user tasks that might schedule before they are reached, which
might widen the scope of the problem from just affecting the behaviour
of sched_setaffinity.

Signed-off-by: Paul Burton <paul.burton@mips.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20180526154648.11635-2-paul.burton@mips.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/sched/core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 002b56d2c9eb..31615d1ae44c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1573,8 +1573,7 @@ int select_task_rq(struct task_struct *p, int cpu, int sd_flags, int wake_flags)
 	 * [ this allows ->select_task() to simply return task_cpu(p) and
 	 *   not worry about this generic constraint ]
 	 */
-	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
-		     !cpu_online(cpu)))
+	if (unlikely(!is_cpu_allowed(p, cpu)))
 		cpu = select_fallback_rq(task_cpu(p), p);
 
 	return cpu;

From 4888ced6b749aa51355b4614af25e2eec0818f62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Paulo=20Rechi=20Vita?= <jprvita@endlessm.com>
Date: Tue, 22 May 2018 14:30:15 -0700
Subject: [PATCH 59/62] platform/x86: asus-wmi: Fix NULL pointer dereference
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 32ffd6e8d1f6cef94bedca15dfcdebdeb590499d ]

Do not perform the rfkill cleanup routine when
(asus->driver->wlan_ctrl_by_user && ashs_present()) is true, since
nothing is registered with the rfkill subsystem in that case. Doing so
leads to the following kernel NULL pointer dereference:

  BUG: unable to handle kernel NULL pointer dereference at           (null)
  IP: [<ffffffff816c7348>] __mutex_lock_slowpath+0x98/0x120
  PGD 1a3aa8067
  PUD 1a3b3d067
  PMD 0

  Oops: 0002 [#1] PREEMPT SMP
  Modules linked in: bnep ccm binfmt_misc uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core hid_a4tech videodev x86_pkg_temp_thermal intel_powerclamp coretemp ath3k btusb btrtl btintel bluetooth kvm_intel snd_hda_codec_hdmi kvm snd_hda_codec_realtek snd_hda_codec_generic irqbypass crc32c_intel arc4 i915 snd_hda_intel snd_hda_codec ath9k ath9k_common ath9k_hw ath i2c_algo_bit snd_hwdep mac80211 ghash_clmulni_intel snd_hda_core snd_pcm snd_timer cfg80211 ehci_pci xhci_pci drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops drm xhci_hcd ehci_hcd asus_nb_wmi(-) asus_wmi sparse_keymap r8169 rfkill mxm_wmi serio_raw snd mii mei_me lpc_ich i2c_i801 video soundcore mei i2c_smbus wmi i2c_core mfd_core
  CPU: 3 PID: 3275 Comm: modprobe Not tainted 4.9.34-gentoo #34
  Hardware name: ASUSTeK COMPUTER INC. K56CM/K56CM, BIOS K56CM.206 08/21/2012
  task: ffff8801a639ba00 task.stack: ffffc900014cc000
  RIP: 0010:[<ffffffff816c7348>]  [<ffffffff816c7348>] __mutex_lock_slowpath+0x98/0x120
  RSP: 0018:ffffc900014cfce0  EFLAGS: 00010282
  RAX: 0000000000000000 RBX: ffff8801a54315b0 RCX: 00000000c0000100
  RDX: 0000000000000001 RSI: 0000000000000000 RDI: ffff8801a54315b4
  RBP: ffffc900014cfd30 R08: 0000000000000000 R09: 0000000000000002
  R10: 0000000000000000 R11: 0000000000000000 R12: ffff8801a54315b4
  R13: ffff8801a639ba00 R14: 00000000ffffffff R15: ffff8801a54315b8
  FS:  00007faa254fb700(0000) GS:ffff8801aef80000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 0000000000000000 CR3: 00000001a3b1b000 CR4: 00000000001406e0
  Stack:
   ffff8801a54315b8 0000000000000000 ffffffff814733ae ffffc900014cfd28
   ffffffff8146a28c ffff8801a54315b0 0000000000000000 ffff8801a54315b0
   ffff8801a66f3820 0000000000000000 ffffc900014cfd48 ffffffff816c73e7
  Call Trace:
   [<ffffffff814733ae>] ? acpi_ut_release_mutex+0x5d/0x61
   [<ffffffff8146a28c>] ? acpi_ns_get_node+0x49/0x52
   [<ffffffff816c73e7>] mutex_lock+0x17/0x30
   [<ffffffffa00a3bb4>] asus_rfkill_hotplug+0x24/0x1a0 [asus_wmi]
   [<ffffffffa00a4421>] asus_wmi_rfkill_exit+0x61/0x150 [asus_wmi]
   [<ffffffffa00a49f1>] asus_wmi_remove+0x61/0xb0 [asus_wmi]
   [<ffffffff814a5128>] platform_drv_remove+0x28/0x40
   [<ffffffff814a2901>] __device_release_driver+0xa1/0x160
   [<ffffffff814a29e3>] device_release_driver+0x23/0x30
   [<ffffffff814a1ffd>] bus_remove_device+0xfd/0x170
   [<ffffffff8149e5a9>] device_del+0x139/0x270
   [<ffffffff814a5028>] platform_device_del+0x28/0x90
   [<ffffffff814a50a2>] platform_device_unregister+0x12/0x30
   [<ffffffffa00a4209>] asus_wmi_unregister_driver+0x19/0x30 [asus_wmi]
   [<ffffffffa00da0ea>] asus_nb_wmi_exit+0x10/0xf26 [asus_nb_wmi]
   [<ffffffff8110c692>] SyS_delete_module+0x192/0x270
   [<ffffffff810022b2>] ? exit_to_usermode_loop+0x92/0xa0
   [<ffffffff816ca560>] entry_SYSCALL_64_fastpath+0x13/0x94
  Code: e8 5e 30 00 00 8b 03 83 f8 01 0f 84 93 00 00 00 48 8b 43 10 4c 8d 7b 08 48 89 63 10 41 be ff ff ff ff 4c 89 3c 24 48 89 44 24 08 <48> 89 20 4c 89 6c 24 10 eb 1d 4c 89 e7 49 c7 45 08 02 00 00 00
  RIP  [<ffffffff816c7348>] __mutex_lock_slowpath+0x98/0x120
   RSP <ffffc900014cfce0>
  CR2: 0000000000000000
  ---[ end trace 8d484233fa7cb512 ]---
  note: modprobe[3275] exited with preempt_count 2

https://bugzilla.kernel.org/show_bug.cgi?id=196467

Reported-by: red.f0xyz@gmail.com
Signed-off-by: João Paulo Rechi Vita <jprvita@endlessm.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/x86/asus-wmi.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 48e1541dc8d4..7440f650e81a 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c
@@ -161,6 +161,16 @@ MODULE_LICENSE("GPL");
 
 static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL };
 
+static bool ashs_present(void)
+{
+	int i = 0;
+	while (ashs_ids[i]) {
+		if (acpi_dev_found(ashs_ids[i++]))
+			return true;
+	}
+	return false;
+}
+
 struct bios_args {
 	u32 arg0;
 	u32 arg1;
@@ -962,6 +972,9 @@ static int asus_new_rfkill(struct asus_wmi *asus,
 
 static void asus_wmi_rfkill_exit(struct asus_wmi *asus)
 {
+	if (asus->driver->wlan_ctrl_by_user && ashs_present())
+		return;
+
 	asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P5");
 	asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P6");
 	asus_unregister_rfkill_notifier(asus, "\\_SB.PCI0.P0P7");
@@ -2058,16 +2071,6 @@ static int asus_wmi_fan_init(struct asus_wmi *asus)
 	return 0;
 }
 
-static bool ashs_present(void)
-{
-	int i = 0;
-	while (ashs_ids[i]) {
-		if (acpi_dev_found(ashs_ids[i++]))
-			return true;
-	}
-	return false;
-}
-
 /*
  * WMI Driver
  */

From 28b64cc7a8462cdfffc171ca583179f6c1c36b65 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@telegraphics.com.au>
Date: Wed, 30 May 2018 13:03:51 +1000
Subject: [PATCH 60/62] net/sonic: Use dma_mapping_error()

[ Upstream commit 26de0b76d9ba3200f09c6cb9d9618bda338be5f7 ]

With CONFIG_DMA_API_DEBUG=y, calling sonic_open() produces the
message, "DMA-API: device driver failed to check map error".
Add the missing dma_mapping_error() call.

Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: Finn Thain <fthain@telegraphics.com.au>
Acked-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/net/ethernet/natsemi/sonic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/natsemi/sonic.c b/drivers/net/ethernet/natsemi/sonic.c
index 612c7a44b26c..23821540ab07 100644
--- a/drivers/net/ethernet/natsemi/sonic.c
+++ b/drivers/net/ethernet/natsemi/sonic.c
@@ -71,7 +71,7 @@ static int sonic_open(struct net_device *dev)
 	for (i = 0; i < SONIC_NUM_RRS; i++) {
 		dma_addr_t laddr = dma_map_single(lp->device, skb_put(lp->rx_skb[i], SONIC_RBSIZE),
 		                                  SONIC_RBSIZE, DMA_FROM_DEVICE);
-		if (!laddr) {
+		if (dma_mapping_error(lp->device, laddr)) {
 			while(i > 0) { /* free any that were mapped successfully */
 				i--;
 				dma_unmap_single(lp->device, lp->rx_laddr[i], SONIC_RBSIZE, DMA_FROM_DEVICE);

From 88b01cac4add369bf3f0401a1da7a9bd6bf19305 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Damien=20Th=C3=A9bault?= <damien.thebault@vitec.com>
Date: Thu, 31 May 2018 07:04:01 +0000
Subject: [PATCH 61/62] net: dsa: b53: Add BCM5389 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit a95691bc54af1ac4b12c354f91e9cabf1cb068df ]

This patch adds support for the BCM5389 switch connected through MDIO.

Signed-off-by: Damien Thébault <damien.thebault@vitec.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/net/dsa/b53.txt |  1 +
 drivers/net/dsa/b53/b53_common.c                  | 13 +++++++++++++
 drivers/net/dsa/b53/b53_mdio.c                    |  5 ++++-
 drivers/net/dsa/b53/b53_priv.h                    |  1 +
 4 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/dsa/b53.txt b/Documentation/devicetree/bindings/net/dsa/b53.txt
index 8acf51a4dfa8..47a6a7fe0b86 100644
--- a/Documentation/devicetree/bindings/net/dsa/b53.txt
+++ b/Documentation/devicetree/bindings/net/dsa/b53.txt
@@ -10,6 +10,7 @@ Required properties:
       "brcm,bcm53128"
       "brcm,bcm5365"
       "brcm,bcm5395"
+      "brcm,bcm5389"
       "brcm,bcm5397"
       "brcm,bcm5398"
 
diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 274f3679f33d..acf64d4cd94c 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1549,6 +1549,18 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
+	{
+		.chip_id = BCM5389_DEVICE_ID,
+		.dev_name = "BCM5389",
+		.vlans = 4096,
+		.enabled_ports = 0x1f,
+		.arl_entries = 4,
+		.cpu_port = B53_CPU_PORT,
+		.vta_regs = B53_VTA_REGS,
+		.duplex_reg = B53_DUPLEX_STAT_GE,
+		.jumbo_pm_reg = B53_JUMBO_PORT_MASK,
+		.jumbo_size_reg = B53_JUMBO_MAX_SIZE,
+	},
 	{
 		.chip_id = BCM5395_DEVICE_ID,
 		.dev_name = "BCM5395",
@@ -1872,6 +1884,7 @@ int b53_switch_detect(struct b53_device *dev)
 		else
 			dev->chip_id = BCM5365_DEVICE_ID;
 		break;
+	case BCM5389_DEVICE_ID:
 	case BCM5395_DEVICE_ID:
 	case BCM5397_DEVICE_ID:
 	case BCM5398_DEVICE_ID:
diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c
index fa7556f5d4fb..a533a90e3904 100644
--- a/drivers/net/dsa/b53/b53_mdio.c
+++ b/drivers/net/dsa/b53/b53_mdio.c
@@ -285,6 +285,7 @@ static const struct b53_io_ops b53_mdio_ops = {
 #define B53_BRCM_OUI_1	0x0143bc00
 #define B53_BRCM_OUI_2	0x03625c00
 #define B53_BRCM_OUI_3	0x00406000
+#define B53_BRCM_OUI_4	0x01410c00
 
 static int b53_mdio_probe(struct mdio_device *mdiodev)
 {
@@ -311,7 +312,8 @@ static int b53_mdio_probe(struct mdio_device *mdiodev)
 	 */
 	if ((phy_id & 0xfffffc00) != B53_BRCM_OUI_1 &&
 	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_2 &&
-	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_3) {
+	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_3 &&
+	    (phy_id & 0xfffffc00) != B53_BRCM_OUI_4) {
 		dev_err(&mdiodev->dev, "Unsupported device: 0x%08x\n", phy_id);
 		return -ENODEV;
 	}
@@ -360,6 +362,7 @@ static const struct of_device_id b53_of_match[] = {
 	{ .compatible = "brcm,bcm53125" },
 	{ .compatible = "brcm,bcm53128" },
 	{ .compatible = "brcm,bcm5365" },
+	{ .compatible = "brcm,bcm5389" },
 	{ .compatible = "brcm,bcm5395" },
 	{ .compatible = "brcm,bcm5397" },
 	{ .compatible = "brcm,bcm5398" },
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 01bd8cbe9a3f..6b9e39ddaec1 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -48,6 +48,7 @@ struct b53_io_ops {
 enum {
 	BCM5325_DEVICE_ID = 0x25,
 	BCM5365_DEVICE_ID = 0x65,
+	BCM5389_DEVICE_ID = 0x89,
 	BCM5395_DEVICE_ID = 0x95,
 	BCM5397_DEVICE_ID = 0x97,
 	BCM5398_DEVICE_ID = 0x98,

From 5893f4c3fb784f48c020d2637c129a45da7be39e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 8 Jul 2018 15:30:53 +0200
Subject: [PATCH 62/62] Linux 4.14.54

---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index fb66998408f4..de0955d8dfa3 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 4
 PATCHLEVEL = 14
-SUBLEVEL = 53
+SUBLEVEL = 54
 EXTRAVERSION =
 NAME = Petit Gorille