From d34475b964b01067ed25187c4f52d8bdf2c0e113 Mon Sep 17 00:00:00 2001 From: Oren Givon Date: Thu, 24 Mar 2016 10:20:28 +0200 Subject: [PATCH 01/47] iwlwifi: add device IDs for the 8265 device Add new 8265 series PCI IDs. Signed-off-by: Oren Givon Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 05b968506836..79d7cd7d461e 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -479,8 +479,18 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24F3, 0x0930, iwl8260_2ac_cfg)}, {IWL_PCI_DEVICE(0x24F3, 0x0000, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0010, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x0110, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x1110, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x1010, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x0050, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x0150, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x9010, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x8110, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x8050, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x8010, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0810, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x9110, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x8130, iwl8265_2ac_cfg)}, /* 9000 Series */ {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)}, From cd49727e1a2bccc4ff008dde24c2f8430dd9e368 Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 3 Feb 2016 15:36:52 +0200 Subject: [PATCH 02/47] iwlwifi: mvm: avoid to WARN about gscan capabilities Gscan capabilities were updated with new capabilities supported by the device. Update GSCAN capabilities TLV and avoid to WARN if the firmware does not have the new capabilities. Signed-off-by: Ayala Beker Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index f899666acb41..33d0d51e32e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -1060,11 +1060,18 @@ static int iwl_parse_tlv_firmware(struct iwl_drv *drv, return -EINVAL; } - if (WARN(fw_has_capa(capa, IWL_UCODE_TLV_CAPA_GSCAN_SUPPORT) && - !gscan_capa, - "GSCAN is supported but capabilities TLV is unavailable\n")) + /* + * If ucode advertises that it supports GSCAN but GSCAN + * capabilities TLV is not present, or if it has an old format, + * warn and continue without GSCAN. + */ + if (fw_has_capa(capa, IWL_UCODE_TLV_CAPA_GSCAN_SUPPORT) && + !gscan_capa) { + IWL_DEBUG_INFO(drv, + "GSCAN is supported but capabilities TLV is unavailable\n"); __clear_bit((__force long)IWL_UCODE_TLV_CAPA_GSCAN_SUPPORT, capa->_capa); + } return 0; From e1ba684f762b9a8064de2c916bb60b1694dd8a17 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 11 Apr 2016 15:01:46 +0300 Subject: [PATCH 03/47] iwlwifi: 8000: fix MODULE_FIRMWARE input The firwmare name for 8000 is iwlwifi-8000C. The C is appended based on a value read from a register. This allows to load different firwmare versions based on the hardware step during development. Now that the hardware development is completed, we can hard code the 'C' and along the way, fix the input to MODULE_FIRMWARE. This fixes: https://bugzilla.kernel.org/show_bug.cgi?id=116041 Signed-off-by: Sara Sharon Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/intel/iwlwifi/iwl-8000.c | 2 +- drivers/net/wireless/intel/iwlwifi/iwl-drv.c | 13 ------------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index 97be104d1203..b5c57eebf995 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -93,7 +93,7 @@ #define IWL8260_SMEM_OFFSET 0x400000 #define IWL8260_SMEM_LEN 0x68000 -#define IWL8000_FW_PRE "iwlwifi-8000" +#define IWL8000_FW_PRE "iwlwifi-8000C-" #define IWL8000_MODULE_FIRMWARE(api) \ IWL8000_FW_PRE "-" __stringify(api) ".ucode" diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c index 33d0d51e32e9..9e45bf9c6071 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c @@ -238,19 +238,6 @@ static int iwl_request_firmware(struct iwl_drv *drv, bool first) snprintf(drv->firmware_name, sizeof(drv->firmware_name), "%s%s.ucode", name_pre, tag); - /* - * Starting 8000B - FW name format has changed. This overwrites the - * previous name and uses the new format. - */ - if (drv->trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) { - char rev_step = 'A' + CSR_HW_REV_STEP(drv->trans->hw_rev); - - if (rev_step != 'A') - snprintf(drv->firmware_name, - sizeof(drv->firmware_name), "%s%c-%s.ucode", - name_pre, rev_step, tag); - } - IWL_DEBUG_INFO(drv, "attempting to load firmware %s'%s'\n", (drv->fw_index == UCODE_EXPERIMENTAL_INDEX) ? "EXPERIMENTAL " : "", From f742aaf36edf0390c54d0614bc4d20fd4cd3762a Mon Sep 17 00:00:00 2001 From: Matti Gottlieb Date: Sun, 10 Apr 2016 10:53:57 +0300 Subject: [PATCH 04/47] iwlwifi: mvm: fix accessing Null pointer during fw dump collection The firwmare file can come with data that is relevant for paging. This data is availablet to the firmware upon request, but it stored in the host's memory. During the firmware init flow, the driver configures the firmware so that the firwmare knows where is the data. When paging is used, the variable paging_mem_size is the number of bytes that are available through paging. This variable is not zeror-ed if the driver fails to configure the paging in the firmware, but the memory is freed which is inconsistent. This inconsistency led to a NULL pointer dereference in the code that collects the debug data. Fix this by zero-ing the paging_mem_size variable and NULLify the relevant pointers, so that the code that collects the debug data will know that the paging data is not available. Signed-off-by: Matti Gottlieb Signed-off-by: Emmanuel Grumbach --- drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c | 6 ++++-- drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index 4856eac120f6..6938cd37be57 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -526,7 +526,8 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm) file_len += sizeof(*dump_data) + sizeof(*dump_mem) + sram2_len; /* Make room for fw's virtual image pages, if it exists */ - if (mvm->fw->img[mvm->cur_ucode].paging_mem_size) + if (mvm->fw->img[mvm->cur_ucode].paging_mem_size && + mvm->fw_paging_db[0].fw_paging_block) file_len += mvm->num_of_paging_blk * (sizeof(*dump_data) + sizeof(struct iwl_fw_error_dump_paging) + @@ -643,7 +644,8 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm) } /* Dump fw's virtual image */ - if (mvm->fw->img[mvm->cur_ucode].paging_mem_size) { + if (mvm->fw->img[mvm->cur_ucode].paging_mem_size && + mvm->fw_paging_db[0].fw_paging_block) { for (i = 1; i < mvm->num_of_paging_blk + 1; i++) { struct iwl_fw_error_dump_paging *paging; struct page *pages = diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 594cd0dc7df9..09d895fafaf2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -144,9 +144,11 @@ void iwl_free_fw_paging(struct iwl_mvm *mvm) __free_pages(mvm->fw_paging_db[i].fw_paging_block, get_order(mvm->fw_paging_db[i].fw_paging_size)); + mvm->fw_paging_db[i].fw_paging_block = NULL; } kfree(mvm->trans->paging_download_buf); mvm->trans->paging_download_buf = NULL; + mvm->trans->paging_db = NULL; memset(mvm->fw_paging_db, 0, sizeof(mvm->fw_paging_db)); } From de478a61389cacafe94dc8b035081b681b878f9d Mon Sep 17 00:00:00 2001 From: Oleksij Rempel Date: Tue, 12 Apr 2016 19:37:44 +0200 Subject: [PATCH 05/47] ath9k: ar5008_hw_cmn_spur_mitigate: add missing mask_m & mask_p initialisation by moving common code to ar5008_hw_cmn_spur_mitigate i forgot to move mask_m & mask_p initialisation. This coused a performance regression on ar9281. Fixes: f911085ffa88 ("ath9k: split ar5008_hw_spur_mitigate and reuse common code in ar9002_hw_spur_mitigate.") Reported-by: Gustav Frederiksen Tested-by: Gustav Frederiksen Cc: # 4.2+ Signed-off-by: Oleksij Rempel Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/ar5008_phy.c | 8 +++----- drivers/net/wireless/ath/ath9k/ar9002_phy.c | 5 ----- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 8f8793004b9f..1b271b99c49e 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -274,6 +274,9 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, }; static const int inc[4] = { 0, 100, 0, 0 }; + memset(&mask_m, 0, sizeof(int8_t) * 123); + memset(&mask_p, 0, sizeof(int8_t) * 123); + cur_bin = -6000; upper = bin + 100; lower = bin - 100; @@ -424,14 +427,9 @@ static void ar5008_hw_spur_mitigate(struct ath_hw *ah, int tmp, new; int i; - int8_t mask_m[123]; - int8_t mask_p[123]; int cur_bb_spur; bool is2GHz = IS_CHAN_2GHZ(chan); - memset(&mask_m, 0, sizeof(int8_t) * 123); - memset(&mask_p, 0, sizeof(int8_t) * 123); - for (i = 0; i < AR_EEPROM_MODAL_SPURS; i++) { cur_bb_spur = ah->eep_ops->get_spur_channel(ah, i, is2GHz); if (AR_NO_SPUR == cur_bb_spur) diff --git a/drivers/net/wireless/ath/ath9k/ar9002_phy.c b/drivers/net/wireless/ath/ath9k/ar9002_phy.c index db6624527d99..53d7445a5d12 100644 --- a/drivers/net/wireless/ath/ath9k/ar9002_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar9002_phy.c @@ -178,14 +178,9 @@ static void ar9002_hw_spur_mitigate(struct ath_hw *ah, int i; struct chan_centers centers; - int8_t mask_m[123]; - int8_t mask_p[123]; int cur_bb_spur; bool is2GHz = IS_CHAN_2GHZ(chan); - memset(&mask_m, 0, sizeof(int8_t) * 123); - memset(&mask_p, 0, sizeof(int8_t) * 123); - ath9k_hw_get_channel_centers(ah, chan, ¢ers); freq = centers.synth_center; From c78296665c3d81f040117432ab9e1cb125521b0c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Fri, 26 Feb 2016 17:56:13 +0100 Subject: [PATCH 06/47] batman-adv: Check skb size before using encapsulated ETH+VLAN header The encapsulated ethernet and VLAN header may be outside the received ethernet frame. Thus the skb buffer size has to be checked before it can be parsed to find out if it encapsulates another batman-adv packet. Fixes: 420193573f11 ("batman-adv: softif bridge loop avoidance") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/soft-interface.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 0710379491bf..8a136b6a1ff0 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -408,11 +408,17 @@ void batadv_interface_rx(struct net_device *soft_iface, */ nf_reset(skb); + if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) + goto dropped; + vid = batadv_get_vid(skb, 0); ethhdr = eth_hdr(skb); switch (ntohs(ethhdr->h_proto)) { case ETH_P_8021Q: + if (!pskb_may_pull(skb, VLAN_ETH_HLEN)) + goto dropped; + vhdr = (struct vlan_ethhdr *)skb->data; if (vhdr->h_vlan_encapsulated_proto != ethertype) @@ -424,8 +430,6 @@ void batadv_interface_rx(struct net_device *soft_iface, } /* skb->dev & skb->pkt_type are set here */ - if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) - goto dropped; skb->protocol = eth_type_trans(skb, soft_iface); /* should not be necessary anymore as we use skb_pull_rcsum() From e48474ed8a217b7f80f2a42bc05352406a06cb67 Mon Sep 17 00:00:00 2001 From: Marek Lindner Date: Fri, 11 Mar 2016 16:01:09 +0100 Subject: [PATCH 07/47] batman-adv: init neigh node last seen field Signed-off-by: Marek Lindner [sven@narfation.org: fix conflicts with current version] Signed-off-by: Sven Eckelmann Signed-off-by: Antonio Quartulli --- net/batman-adv/originator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index e4cbb0753e37..d52f67a0c057 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -663,6 +663,7 @@ batadv_neigh_node_new(struct batadv_orig_node *orig_node, ether_addr_copy(neigh_node->addr, neigh_addr); neigh_node->if_incoming = hard_iface; neigh_node->orig_node = orig_node; + neigh_node->last_seen = jiffies; /* extra reference for return */ kref_init(&neigh_node->refcount); From f2d23861b818d08bcd15cc1612ae94aa33b3931c Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sat, 19 Mar 2016 13:55:21 +0100 Subject: [PATCH 08/47] batman-adv: Deactivate TO_BE_ACTIVATED hardif on shutdown The shutdown of an batman-adv interface can happen with one of its slave interfaces still being in the BATADV_IF_TO_BE_ACTIVATED state. A possible reason for it is that the routing algorithm BATMAN_V was selected and batadv_schedule_bat_ogm was not yet called for this interface. This slave interface still has to be set to BATADV_IF_INACTIVE or the batman-adv interface will never reduce its usage counter and thus never gets shutdown. This problem can be simulated via: $ modprobe dummy $ modprobe batman-adv routing_algo=BATMAN_V $ ip link add bat0 type batadv $ ip link set dummy0 master bat0 $ ip link set dummy0 up $ ip link del bat0 unregister_netdevice: waiting for bat0 to become free. Usage count = 3 Reported-by: Matthias Schiffer Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/hard-interface.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b22b2775a0a5..c61d5b0b24d2 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -572,8 +572,7 @@ void batadv_hardif_disable_interface(struct batadv_hard_iface *hard_iface, struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hard_iface *primary_if = NULL; - if (hard_iface->if_status == BATADV_IF_ACTIVE) - batadv_hardif_deactivate_interface(hard_iface); + batadv_hardif_deactivate_interface(hard_iface); if (hard_iface->if_status != BATADV_IF_INACTIVE) goto out; From d1a65f1741bfd9c69f9e4e2ad447a89b6810427d Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 20 Mar 2016 12:27:53 +0100 Subject: [PATCH 09/47] batman-adv: Reduce refcnt of removed router when updating route _batadv_update_route rcu_derefences orig_ifinfo->router outside of a spinlock protected region to print some information messages to the debug log. But this pointer is not checked again when the new pointer is assigned in the spinlock protected region. Thus is can happen that the value of orig_ifinfo->router changed in the meantime and thus the reference counter of the wrong router gets reduced after the spinlock protected region. Just rcu_dereferencing the value of orig_ifinfo->router inside the spinlock protected region (which also set the new pointer) is enough to get the correct old router object. Fixes: e1a5382f978b ("batman-adv: Make orig_node->router an rcu protected pointer") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/routing.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 4dd646a52f1a..b781bf753250 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -105,6 +105,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, neigh_node = NULL; spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + rcu_assign_pointer(orig_ifinfo->router, neigh_node); spin_unlock_bh(&orig_node->neigh_list_lock); batadv_orig_ifinfo_put(orig_ifinfo); From c4fdb6cff2aa0ae740c5f19b6f745cbbe786d42f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Linus=20L=C3=BCssing?= Date: Fri, 11 Mar 2016 14:04:49 +0100 Subject: [PATCH 10/47] batman-adv: Fix broadcast/ogm queue limit on a removed interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When removing a single interface while a broadcast or ogm packet is still pending then we will free the forward packet without releasing the queue slots again. This patch is supposed to fix this issue. Fixes: 6d5808d4ae1b ("batman-adv: Add missing hardif_free_ref in forw_packet_free") Signed-off-by: Linus Lüssing [sven@narfation.org: fix conflicts with current version] Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: Antonio Quartulli --- net/batman-adv/send.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 3ce06e0a91b1..76417850d3fc 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -675,6 +675,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->bcast_queue_left); + batadv_forw_packet_free(forw_packet); } } @@ -702,6 +705,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); + if (!forw_packet->own) + atomic_inc(&bat_priv->batman_queue_left); + batadv_forw_packet_free(forw_packet); } } From e6436be21e77e3659b4ff7e357ab5a8342d132d2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 26 Apr 2016 13:47:08 +0200 Subject: [PATCH 11/47] mac80211: fix statistics leak if dev_alloc_name() fails In the case that dev_alloc_name() fails, e.g. because the name was given by the user and already exists, we need to clean up properly and free the per-CPU statistics. Fix that. Cc: stable@vger.kernel.org Fixes: 5a490510ba5f ("mac80211: use per-CPU TX/RX statistics") Signed-off-by: Johannes Berg --- net/mac80211/iface.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 453b4e741780..e1cb22c16530 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1761,7 +1761,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = dev_alloc_name(ndev, ndev->name); if (ret < 0) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } @@ -1847,7 +1847,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ret = register_netdevice(ndev); if (ret) { - free_netdev(ndev); + ieee80211_if_free(ndev); return ret; } } From 1fa72e29e14d97fbda15437c648d7cc4eb00bff8 Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Apr 2016 02:30:49 -0400 Subject: [PATCH 12/47] bnxt_en: Don't fallback to INTA on VF. Only MSI-X can be used on a VF. The driver should fail initialization if it cannot successfully enable MSI-X. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 12a009d720cd..e787debab851 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -4309,7 +4309,7 @@ static int bnxt_setup_int_mode(struct bnxt *bp) if (bp->flags & BNXT_FLAG_MSIX_CAP) rc = bnxt_setup_msix(bp); - if (!(bp->flags & BNXT_FLAG_USING_MSIX)) { + if (!(bp->flags & BNXT_FLAG_USING_MSIX) && BNXT_PF(bp)) { /* fallback to INTA */ rc = bnxt_setup_inta(bp); } From 2839f28bd5bf8fd2ab4a1ea3a5589c8f94364cbb Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Apr 2016 02:30:50 -0400 Subject: [PATCH 13/47] bnxt_en: Limit RX BD pages to be no bigger than 32K. The RX BD length field of this device is 16-bit, so the largest buffer size is 65535. For LRO and GRO, we allocate native CPU pages for the aggregation ring buffers. It won't work if the native CPU page size is 64K or bigger. We fix this by defining BNXT_RX_PAGE_SIZE to be native CPU page size up to 32K. Replace PAGE_SIZE with BNXT_RX_PAGE_SIZE in all appropriate places related to the rx aggregation ring logic. The next patch will add additional logic to divide the page into 32K chunks for aggrgation ring buffers if PAGE_SIZE is bigger than BNXT_RX_PAGE_SIZE. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 18 +++++++++--------- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 9 +++++++++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index e787debab851..28480f6c6a08 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -586,7 +586,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, if (!page) return -ENOMEM; - mapping = dma_map_page(&pdev->dev, page, 0, PAGE_SIZE, + mapping = dma_map_page(&pdev->dev, page, 0, BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE); if (dma_mapping_error(&pdev->dev, mapping)) { __free_page(page); @@ -740,7 +740,7 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, struct bnxt_napi *bnapi, return NULL; } - dma_unmap_page(&pdev->dev, mapping, PAGE_SIZE, + dma_unmap_page(&pdev->dev, mapping, BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE); skb->data_len += frag_len; @@ -1584,7 +1584,7 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) dma_unmap_page(&pdev->dev, dma_unmap_addr(rx_agg_buf, mapping), - PAGE_SIZE, PCI_DMA_FROMDEVICE); + BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE); rx_agg_buf->page = NULL; __clear_bit(j, rxr->rx_agg_bmap); @@ -1973,7 +1973,7 @@ static int bnxt_init_one_rx_ring(struct bnxt *bp, int ring_nr) if (!(bp->flags & BNXT_FLAG_AGG_RINGS)) return 0; - type = ((u32)PAGE_SIZE << RX_BD_LEN_SHIFT) | + type = ((u32)BNXT_RX_PAGE_SIZE << RX_BD_LEN_SHIFT) | RX_BD_TYPE_RX_AGG_BD | RX_BD_FLAGS_SOP; bnxt_init_rxbd_pages(ring, type); @@ -2164,7 +2164,7 @@ void bnxt_set_ring_params(struct bnxt *bp) bp->rx_agg_nr_pages = 0; if (bp->flags & BNXT_FLAG_TPA) - agg_factor = 4; + agg_factor = min_t(u32, 4, 65536 / BNXT_RX_PAGE_SIZE); bp->flags &= ~BNXT_FLAG_JUMBO; if (rx_space > PAGE_SIZE) { @@ -3020,12 +3020,12 @@ static int bnxt_hwrm_vnic_set_tpa(struct bnxt *bp, u16 vnic_id, u32 tpa_flags) /* Number of segs are log2 units, and first packet is not * included as part of this units. */ - if (mss <= PAGE_SIZE) { - n = PAGE_SIZE / mss; + if (mss <= BNXT_RX_PAGE_SIZE) { + n = BNXT_RX_PAGE_SIZE / mss; nsegs = (MAX_SKB_FRAGS - 1) * n; } else { - n = mss / PAGE_SIZE; - if (mss & (PAGE_SIZE - 1)) + n = mss / BNXT_RX_PAGE_SIZE; + if (mss & (BNXT_RX_PAGE_SIZE - 1)) n++; nsegs = (MAX_SKB_FRAGS - n) / n; } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 709b95b8fcba..0cf1e2a73179 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -407,6 +407,15 @@ struct rx_tpa_end_cmp_ext { #define BNXT_PAGE_SIZE (1 << BNXT_PAGE_SHIFT) +/* The RXBD length is 16-bit so we can only support page sizes < 64K */ +#if (PAGE_SHIFT > 15) +#define BNXT_RX_PAGE_SHIFT 15 +#else +#define BNXT_RX_PAGE_SHIFT PAGE_SHIFT +#endif + +#define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) + #define BNXT_MIN_PKT_SIZE 45 #define BNXT_NUM_TESTS(bp) 0 From 89d0a06c516339c0a2b3d02677f5d6310b3319fb Mon Sep 17 00:00:00 2001 From: Michael Chan Date: Mon, 25 Apr 2016 02:30:51 -0400 Subject: [PATCH 14/47] bnxt_en: Divide a page into 32K buffers for the aggregation ring if necessary. If PAGE_SIZE is bigger than BNXT_RX_PAGE_SIZE, that means the native CPU page is bigger than the maximum length of the RX BD. Divide the page into multiple 32K buffers for the aggregation ring. Add an offset field in the bnxt_sw_rx_agg_bd struct to keep track of the page offset of each buffer. Since each page can be referenced by multiple buffer entries, call get_page() as needed to get the proper reference count. Signed-off-by: Michael Chan Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 35 +++++++++++++++++++---- drivers/net/ethernet/broadcom/bnxt/bnxt.h | 4 +++ 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 28480f6c6a08..72eb29ed0359 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -581,12 +581,30 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, struct page *page; dma_addr_t mapping; u16 sw_prod = rxr->rx_sw_agg_prod; + unsigned int offset = 0; - page = alloc_page(gfp); - if (!page) - return -ENOMEM; + if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) { + page = rxr->rx_page; + if (!page) { + page = alloc_page(gfp); + if (!page) + return -ENOMEM; + rxr->rx_page = page; + rxr->rx_page_offset = 0; + } + offset = rxr->rx_page_offset; + rxr->rx_page_offset += BNXT_RX_PAGE_SIZE; + if (rxr->rx_page_offset == PAGE_SIZE) + rxr->rx_page = NULL; + else + get_page(page); + } else { + page = alloc_page(gfp); + if (!page) + return -ENOMEM; + } - mapping = dma_map_page(&pdev->dev, page, 0, BNXT_RX_PAGE_SIZE, + mapping = dma_map_page(&pdev->dev, page, offset, BNXT_RX_PAGE_SIZE, PCI_DMA_FROMDEVICE); if (dma_mapping_error(&pdev->dev, mapping)) { __free_page(page); @@ -601,6 +619,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp, rxr->rx_sw_agg_prod = NEXT_RX_AGG(sw_prod); rx_agg_buf->page = page; + rx_agg_buf->offset = offset; rx_agg_buf->mapping = mapping; rxbd->rx_bd_haddr = cpu_to_le64(mapping); rxbd->rx_bd_opaque = sw_prod; @@ -642,6 +661,7 @@ static void bnxt_reuse_rx_agg_bufs(struct bnxt_napi *bnapi, u16 cp_cons, page = cons_rx_buf->page; cons_rx_buf->page = NULL; prod_rx_buf->page = page; + prod_rx_buf->offset = cons_rx_buf->offset; prod_rx_buf->mapping = cons_rx_buf->mapping; @@ -709,7 +729,8 @@ static struct sk_buff *bnxt_rx_pages(struct bnxt *bp, struct bnxt_napi *bnapi, RX_AGG_CMP_LEN) >> RX_AGG_CMP_LEN_SHIFT; cons_rx_buf = &rxr->rx_agg_ring[cons]; - skb_fill_page_desc(skb, i, cons_rx_buf->page, 0, frag_len); + skb_fill_page_desc(skb, i, cons_rx_buf->page, + cons_rx_buf->offset, frag_len); __clear_bit(cons, rxr->rx_agg_bmap); /* It is possible for bnxt_alloc_rx_page() to allocate @@ -1591,6 +1612,10 @@ static void bnxt_free_rx_skbs(struct bnxt *bp) __free_page(page); } + if (rxr->rx_page) { + __free_page(rxr->rx_page); + rxr->rx_page = NULL; + } } } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 0cf1e2a73179..8b823ff558ff 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -515,6 +515,7 @@ struct bnxt_sw_rx_bd { struct bnxt_sw_rx_agg_bd { struct page *page; + unsigned int offset; dma_addr_t mapping; }; @@ -595,6 +596,9 @@ struct bnxt_rx_ring_info { unsigned long *rx_agg_bmap; u16 rx_agg_bmap_size; + struct page *rx_page; + unsigned int rx_page_offset; + dma_addr_t rx_desc_mapping[MAX_RX_PAGES]; dma_addr_t rx_agg_desc_mapping[MAX_RX_AGG_PAGES]; From edb9a1b8942b32762b1179039debf2172ad9cc32 Mon Sep 17 00:00:00 2001 From: Eric Engestrom Date: Mon, 25 Apr 2016 07:36:56 +0100 Subject: [PATCH 15/47] Documentation: networking: fix spelling mistakes Signed-off-by: Eric Engestrom Signed-off-by: David S. Miller --- Documentation/networking/altera_tse.txt | 6 +++--- Documentation/networking/ipvlan.txt | 6 +++--- Documentation/networking/pktgen.txt | 6 +++--- Documentation/networking/vrf.txt | 2 +- Documentation/networking/xfrm_sync.txt | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Documentation/networking/altera_tse.txt b/Documentation/networking/altera_tse.txt index 3f24df8c6e65..50b8589d12fd 100644 --- a/Documentation/networking/altera_tse.txt +++ b/Documentation/networking/altera_tse.txt @@ -6,7 +6,7 @@ This is the driver for the Altera Triple-Speed Ethernet (TSE) controllers using the SGDMA and MSGDMA soft DMA IP components. The driver uses the platform bus to obtain component resources. The designs used to test this driver were built for a Cyclone(R) V SOC FPGA board, a Cyclone(R) V FPGA board, -and tested with ARM and NIOS processor hosts seperately. The anticipated use +and tested with ARM and NIOS processor hosts separately. The anticipated use cases are simple communications between an embedded system and an external peer for status and simple configuration of the embedded system. @@ -65,14 +65,14 @@ Driver parameters can be also passed in command line by using: 4.1) Transmit process When the driver's transmit routine is called by the kernel, it sets up a transmit descriptor by calling the underlying DMA transmit routine (SGDMA or -MSGDMA), and initites a transmit operation. Once the transmit is complete, an +MSGDMA), and initiates a transmit operation. Once the transmit is complete, an interrupt is driven by the transmit DMA logic. The driver handles the transmit completion in the context of the interrupt handling chain by recycling resource required to send and track the requested transmit operation. 4.2) Receive process The driver will post receive buffers to the receive DMA logic during driver -intialization. Receive buffers may or may not be queued depending upon the +initialization. Receive buffers may or may not be queued depending upon the underlying DMA logic (MSGDMA is able queue receive buffers, SGDMA is not able to queue receive buffers to the SGDMA receive logic). When a packet is received, the DMA logic generates an interrupt. The driver handles a receive diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index cf996394e466..14422f8fcdc4 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -8,7 +8,7 @@ Initial Release: This is conceptually very similar to the macvlan driver with one major exception of using L3 for mux-ing /demux-ing among slaves. This property makes the master device share the L2 with it's slave devices. I have developed this -driver in conjuntion with network namespaces and not sure if there is use case +driver in conjunction with network namespaces and not sure if there is use case outside of it. @@ -42,7 +42,7 @@ out. In this mode the slaves will RX/TX multicast and broadcast (if applicable) as well. 4.2 L3 mode: - In this mode TX processing upto L3 happens on the stack instance attached + In this mode TX processing up to L3 happens on the stack instance attached to the slave device and packets are switched to the stack instance of the master device for the L2 processing and routing from that instance will be used before packets are queued on the outbound device. In this mode the slaves @@ -56,7 +56,7 @@ situations defines your use case then you can choose to use ipvlan - (a) The Linux host that is connected to the external switch / router has policy configured that allows only one mac per port. (b) No of virtual devices created on a master exceed the mac capacity and -puts the NIC in promiscous mode and degraded performance is a concern. +puts the NIC in promiscuous mode and degraded performance is a concern. (c) If the slave device is to be put into the hostile / untrusted network namespace where L2 on the slave could be changed / misused. diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt index f4be85e96005..2c4e3354e128 100644 --- a/Documentation/networking/pktgen.txt +++ b/Documentation/networking/pktgen.txt @@ -67,12 +67,12 @@ The two basic thread commands are: * add_device DEVICE@NAME -- adds a single device * rem_device_all -- remove all associated devices -When adding a device to a thread, a corrosponding procfile is created +When adding a device to a thread, a corresponding procfile is created which is used for configuring this device. Thus, device names need to be unique. To support adding the same device to multiple threads, which is useful -with multi queue NICs, a the device naming scheme is extended with "@": +with multi queue NICs, the device naming scheme is extended with "@": device@something The part after "@" can be anything, but it is custom to use the thread @@ -221,7 +221,7 @@ Sample scripts A collection of tutorial scripts and helpers for pktgen is in the samples/pktgen directory. The helper parameters.sh file support easy -and consistant parameter parsing across the sample scripts. +and consistent parameter parsing across the sample scripts. Usage example and help: ./pktgen_sample01_simple.sh -i eth4 -m 00:1B:21:3C:9D:F8 -d 192.168.8.2 diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt index d52aa10cfe91..5da679c573d2 100644 --- a/Documentation/networking/vrf.txt +++ b/Documentation/networking/vrf.txt @@ -41,7 +41,7 @@ using an rx_handler which gives the impression that packets flow through the VRF device. Similarly on egress routing rules are used to send packets to the VRF device driver before getting sent out the actual interface. This allows tcpdump on a VRF device to capture all packets into and out of the -VRF as a whole.[1] Similiarly, netfilter [2] and tc rules can be applied +VRF as a whole.[1] Similarly, netfilter [2] and tc rules can be applied using the VRF device to specify rules that apply to the VRF domain as a whole. [1] Packets in the forwarded state do not flow through the device, so those diff --git a/Documentation/networking/xfrm_sync.txt b/Documentation/networking/xfrm_sync.txt index d7aac9dedeb4..8d88e0f2ec49 100644 --- a/Documentation/networking/xfrm_sync.txt +++ b/Documentation/networking/xfrm_sync.txt @@ -4,7 +4,7 @@ Krisztian and others and additional patches from Jamal . The end goal for syncing is to be able to insert attributes + generate -events so that the an SA can be safely moved from one machine to another +events so that the SA can be safely moved from one machine to another for HA purposes. The idea is to synchronize the SA so that the takeover machine can do the processing of the SA as accurate as possible if it has access to it. @@ -13,7 +13,7 @@ We already have the ability to generate SA add/del/upd events. These patches add ability to sync and have accurate lifetime byte (to ensure proper decay of SAs) and replay counters to avoid replay attacks with as minimal loss at failover time. -This way a backup stays as closely uptodate as an active member. +This way a backup stays as closely up-to-date as an active member. Because the above items change for every packet the SA receives, it is possible for a lot of the events to be generated. @@ -163,7 +163,7 @@ If you have an SA that is getting hit by traffic in bursts such that there is a period where the timer threshold expires with no packets seen, then an odd behavior is seen as follows: The first packet arrival after a timer expiry will trigger a timeout -aevent; i.e we dont wait for a timeout period or a packet threshold +event; i.e we don't wait for a timeout period or a packet threshold to be reached. This is done for simplicity and efficiency reasons. -JHS From 4be2b49e2820eeaf6436bf7c6d0099c86b6cd237 Mon Sep 17 00:00:00 2001 From: Stanislaw Gruszka Date: Mon, 25 Apr 2016 10:59:19 +0200 Subject: [PATCH 16/47] myri10ge: fix sleeping with bh disabled napi_disable() can not be called with bh disabled, move locking just around myri10ge_ss_lock_napi() . Patches fixes following bug: [ 114.278378] BUG: sleeping function called from invalid context at net/core/dev.c:4383 [ 114.313712] Call Trace: [ 114.314943] [] dump_stack+0x19/0x1b [ 114.317673] [] __might_sleep+0x173/0x230 [ 114.320566] [] napi_disable+0x27/0x90 [ 114.323254] [] myri10ge_close+0xbf/0x3f0 [myri10ge] Signed-off-by: Stanislaw Gruszka Acked-by: Hyong-Youb Kim Signed-off-by: David S. Miller --- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 270c9eeb7ab6..6d1a956e3f77 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -2668,9 +2668,9 @@ static int myri10ge_close(struct net_device *dev) del_timer_sync(&mgp->watchdog_timer); mgp->running = MYRI10GE_ETH_STOPPING; - local_bh_disable(); /* myri10ge_ss_lock_napi needs bh disabled */ for (i = 0; i < mgp->num_slices; i++) { napi_disable(&mgp->ss[i].napi); + local_bh_disable(); /* myri10ge_ss_lock_napi needs this */ /* Lock the slice to prevent the busy_poll handler from * accessing it. Later when we bring the NIC up, myri10ge_open * resets the slice including this lock. @@ -2679,8 +2679,8 @@ static int myri10ge_close(struct net_device *dev) pr_info("Slice %d locked\n", i); mdelay(1); } + local_bh_enable(); } - local_bh_enable(); netif_carrier_off(dev); netif_tx_stop_all_queues(dev); From dcb4123cbec0294eee1815cfaa1e9b4a8fddb06a Mon Sep 17 00:00:00 2001 From: Jon Cooper Date: Mon, 25 Apr 2016 16:51:00 +0100 Subject: [PATCH 17/47] sfc: disable RSS when unsupported When certain firmware variants are selected (via the sfboot utility) the SFC7000 and SFC8000 series NICs don't support RSS. The driver still tries (and fails) to insert filters with the RSS flag, and the NIC fails to pass traffic. When the firmware reports RSS_LIMITED suppress allocating a default RSS context. The absence of an RSS context is picked up in filter insertion and RSS flags are discarded. Signed-off-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 98d33d462c6c..1681084cc96f 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1920,6 +1920,10 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context, return 0; } + if (nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN) + return -EOPNOTSUPP; + MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID, nic_data->vport_id); MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type); @@ -2923,9 +2927,16 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx, bool replacing) { struct efx_ef10_nic_data *nic_data = efx->nic_data; + u32 flags = spec->flags; memset(inbuf, 0, MC_CMD_FILTER_OP_IN_LEN); + /* Remove RSS flag if we don't have an RSS context. */ + if (flags & EFX_FILTER_FLAG_RX_RSS && + spec->rss_context == EFX_FILTER_RSS_CONTEXT_DEFAULT && + nic_data->rx_rss_context == EFX_EF10_RSS_CONTEXT_INVALID) + flags &= ~EFX_FILTER_FLAG_RX_RSS; + if (replacing) { MCDI_SET_DWORD(inbuf, FILTER_OP_IN_OP, MC_CMD_FILTER_OP_IN_OP_REPLACE); @@ -2985,10 +2996,10 @@ static void efx_ef10_filter_push_prep(struct efx_nic *efx, spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ? 0 : spec->dmaq_id); MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_MODE, - (spec->flags & EFX_FILTER_FLAG_RX_RSS) ? + (flags & EFX_FILTER_FLAG_RX_RSS) ? MC_CMD_FILTER_OP_IN_RX_MODE_RSS : MC_CMD_FILTER_OP_IN_RX_MODE_SIMPLE); - if (spec->flags & EFX_FILTER_FLAG_RX_RSS) + if (flags & EFX_FILTER_FLAG_RX_RSS) MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_CONTEXT, spec->rss_context != EFX_FILTER_RSS_CONTEXT_DEFAULT ? From e00f80173bacba88a80ec531f78f04c5ae92d7d1 Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Mon, 25 Apr 2016 17:42:12 +0100 Subject: [PATCH 18/47] MAINTAINERS: net: update sfc maintainers Add myself and Edward Cree as maintainers. Remove Shradha Shah, who is on extended leave. Cc: David S. Miller Cc: Edward Cree Cc: Shradha Shah Signed-off-by: Bert Kenward Acked-by: Edward Cree Signed-off-by: David S. Miller --- MAINTAINERS | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 849133608da7..17ad61503184 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10014,7 +10014,8 @@ F: drivers/infiniband/hw/ocrdma/ SFC NETWORK DRIVER M: Solarflare linux maintainers -M: Shradha Shah +M: Edward Cree +M: Bert Kenward L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/sfc/ From 62522ef3c399996f6c8120bfd14b94280bc9f490 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Mon, 25 Apr 2016 19:41:38 +0200 Subject: [PATCH 19/47] net: ethernet: davinci_emac: Fix devioctl while in fixed link When configured in fixed link, the DaVinci emac driver sets the priv->phydev to NULL and further ioctl calls to the phy_mii_ioctl() causes the kernel to crash. Cc: Brian Hutchinson Fixes: 1bb6aa56bb38 ("net: davinci_emac: Add support for fixed-link PHY") Signed-off-by: Neil Armstrong Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/davinci_emac.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 58d58f002559..f56d66e6ec15 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -1512,7 +1512,10 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd) /* TODO: Add phy read and write and private statistics get feature */ - return phy_mii_ioctl(priv->phydev, ifrq, cmd); + if (priv->phydev) + return phy_mii_ioctl(priv->phydev, ifrq, cmd); + else + return -EOPNOTSUPP; } static int match_first_device(struct device *dev, void *data) From 7b7483409f09c15f30ac43242ead1ab3061e1f59 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 25 Apr 2016 15:13:17 -0300 Subject: [PATCH 20/47] net: fix net_gso_ok for new GSO types. Fix casting in net_gso_ok. Otherwise the shift on gso_type << NETIF_F_GSO_SHIFT may hit the 32th bit and make it look like a INT_MIN, which is then promoted from signed to uint64 which is 0xffffffff80000000, resulting in wrong behavior when it is and'ed with the feature itself, as in: This test app: #include #include int main(int argc, char **argv) { uint64_t feature1; uint64_t feature2; int gso_type = 1 << 15; feature1 = gso_type << 16; feature2 = (uint64_t)gso_type << 16; printf("%lx %lx\n", feature1, feature2); return 0; } Gives: ffffffff80000000 80000000 So that this: return (features & feature) == feature; Actually works on more bits than expected and invalid ones. Fix is to promote it earlier. Issue noted while rebasing SCTP GSO patch but posting separetely as someone else may experience this meanwhile. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8395308a2445..b3c46b019ac1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4004,7 +4004,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { - netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; + netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT; /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); From 1d9619d5337df6cf56eb66b6c8213d1317583513 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 25 Apr 2016 23:11:22 +0100 Subject: [PATCH 21/47] net: dsa: mv88e6xxx: fix uninitialized error return The error return err is not initialized and there is a possibility that err is not assigned causing mv88e6xxx_port_bridge_join to return a garbage error return status. Fix this by initializing err to 0. Signed-off-by: Colin Ian King Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- drivers/net/dsa/mv88e6xxx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index a2904029cccc..5e572b3510b9 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -2181,7 +2181,7 @@ int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { struct mv88e6xxx_priv_state *ps = ds_to_priv(ds); - int i, err; + int i, err = 0; mutex_lock(&ps->smi_mutex); From 74d79a2e30c476ccc7d45ecf978f38d815287f81 Mon Sep 17 00:00:00 2001 From: Woojung Huh Date: Mon, 25 Apr 2016 22:22:32 +0000 Subject: [PATCH 22/47] lan78xx: fix statistics counter error Fix rx_bytes, tx_bytes and tx_frames error in netdev.stats. - rx_bytes counted bytes excluding size of struct ethhdr. - tx_packets didn't count multiple packets in a single urb - tx_bytes included 8 bytes of extra commands. Signed-off-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index f20890ee03f3..0460b81a8784 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -269,6 +269,7 @@ struct skb_data { /* skb->cb is one of these */ struct lan78xx_net *dev; enum skb_state state; size_t length; + int num_of_packet; }; struct usb_context { @@ -2464,7 +2465,7 @@ static void tx_complete(struct urb *urb) struct lan78xx_net *dev = entry->dev; if (urb->status == 0) { - dev->net->stats.tx_packets++; + dev->net->stats.tx_packets += entry->num_of_packet; dev->net->stats.tx_bytes += entry->length; } else { dev->net->stats.tx_errors++; @@ -2681,10 +2682,11 @@ void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) return; } - skb->protocol = eth_type_trans(skb, dev->net); dev->net->stats.rx_packets++; dev->net->stats.rx_bytes += skb->len; + skb->protocol = eth_type_trans(skb, dev->net); + netif_dbg(dev, rx_status, dev->net, "< rx, len %zu, type 0x%x\n", skb->len + sizeof(struct ethhdr), skb->protocol); memset(skb->cb, 0, sizeof(struct skb_data)); @@ -2934,13 +2936,16 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) skb_totallen = 0; pkt_cnt = 0; + count = 0; + length = 0; for (skb = tqp->next; pkt_cnt < tqp->qlen; skb = skb->next) { if (skb_is_gso(skb)) { if (pkt_cnt) { /* handle previous packets first */ break; } - length = skb->len; + count = 1; + length = skb->len - TX_OVERHEAD; skb2 = skb_dequeue(tqp); goto gso_skb; } @@ -2961,14 +2966,13 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) for (count = pos = 0; count < pkt_cnt; count++) { skb2 = skb_dequeue(tqp); if (skb2) { + length += (skb2->len - TX_OVERHEAD); memcpy(skb->data + pos, skb2->data, skb2->len); pos += roundup(skb2->len, sizeof(u32)); dev_kfree_skb(skb2); } } - length = skb_totallen; - gso_skb: urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { @@ -2980,6 +2984,7 @@ gso_skb: entry->urb = urb; entry->dev = dev; entry->length = length; + entry->num_of_packet = count; spin_lock_irqsave(&dev->txq.lock, flags); ret = usb_autopm_get_interface_async(dev->intf); From 14437e3fa284f465dbbc8611fd4331ca8d60e986 Mon Sep 17 00:00:00 2001 From: Woojung Huh Date: Mon, 25 Apr 2016 22:22:36 +0000 Subject: [PATCH 23/47] lan78xx: workaround of forced 100 Full/Half duplex mode error At forced 100 Full & Half duplex mode, chip may fail to set mode correctly when cable is switched between long(~50+m) and short one. As workaround, set to 10 before setting to 100 at forced 100 F/H mode. Signed-off-by: Woojung Huh Signed-off-by: David S. Miller --- drivers/net/usb/lan78xx.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 0460b81a8784..f64778ad9753 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1804,7 +1804,34 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) static void lan78xx_link_status_change(struct net_device *net) { - /* nothing to do */ + struct phy_device *phydev = net->phydev; + int ret, temp; + + /* At forced 100 F/H mode, chip may fail to set mode correctly + * when cable is switched between long(~50+m) and short one. + * As workaround, set to 10 before setting to 100 + * at forced 100 F/H mode. + */ + if (!phydev->autoneg && (phydev->speed == 100)) { + /* disable phy interrupt */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; + ret = phy_write(phydev, LAN88XX_INT_MASK, temp); + + temp = phy_read(phydev, MII_BMCR); + temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); + phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ + temp |= BMCR_SPEED100; + phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ + + /* clear pending interrupt generated while workaround */ + temp = phy_read(phydev, LAN88XX_INT_STS); + + /* enable phy interrupt back */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; + ret = phy_write(phydev, LAN88XX_INT_MASK, temp); + } } static int lan78xx_phy_init(struct lan78xx_net *dev) From eb63efb4f263f1f4c2375731d3a9e2040030bc6a Mon Sep 17 00:00:00 2001 From: Christophe Jaillet Date: Tue, 26 Apr 2016 04:33:43 +0200 Subject: [PATCH 24/47] ps3_gelic: fix memcpy parameter The size allocated for target->hwinfo and the number of bytes copied in it should be consistent. Signed-off-by: Christophe JAILLET Signed-off-by: David S. Miller --- drivers/net/ethernet/toshiba/ps3_gelic_wireless.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c index 13214a6492ac..743b18266a7c 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c @@ -1622,7 +1622,7 @@ static void gelic_wl_scan_complete_event(struct gelic_wl_info *wl) continue; /* copy hw scan info */ - memcpy(target->hwinfo, scan_info, scan_info->size); + memcpy(target->hwinfo, scan_info, be16_to_cpu(scan_info->size)); target->essid_len = strnlen(scan_info->essid, sizeof(scan_info->essid)); target->rate_len = 0; From a05d7dfc51417f6437ff35e4682fe547fb665286 Mon Sep 17 00:00:00 2001 From: Timur Tabi Date: Tue, 26 Apr 2016 12:44:18 -0500 Subject: [PATCH 25/47] net: phy: at803x: only the AT8030 needs a hardware reset on link change Commit 13a56b44 ("at803x: Add support for hardware reset") added a work-around for a hardware bug on the AT8030. However, the work-around was being called for all 803x PHYs, even those that don't need it. Function at803x_link_change_notify() checks to make sure that it only resets the PHY on the 8030, but it makes more sense to not call that function at all if it isn't needed. Signed-off-by: Timur Tabi Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/at803x.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index b3ffaee30858..f279a897a5c7 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -359,27 +359,25 @@ static void at803x_link_change_notify(struct phy_device *phydev) * in the FIFO. In such cases, the FIFO enters an error mode it * cannot recover from by software. */ - if (phydev->drv->phy_id == ATH8030_PHY_ID) { - if (phydev->state == PHY_NOLINK) { - if (priv->gpiod_reset && !priv->phy_reset) { - struct at803x_context context; + if (phydev->state == PHY_NOLINK) { + if (priv->gpiod_reset && !priv->phy_reset) { + struct at803x_context context; - at803x_context_save(phydev, &context); + at803x_context_save(phydev, &context); - gpiod_set_value(priv->gpiod_reset, 1); - msleep(1); - gpiod_set_value(priv->gpiod_reset, 0); - msleep(1); + gpiod_set_value(priv->gpiod_reset, 1); + msleep(1); + gpiod_set_value(priv->gpiod_reset, 0); + msleep(1); - at803x_context_restore(phydev, &context); + at803x_context_restore(phydev, &context); - phydev_dbg(phydev, "%s(): phy was reset\n", - __func__); - priv->phy_reset = true; - } - } else { - priv->phy_reset = false; + phydev_dbg(phydev, "%s(): phy was reset\n", + __func__); + priv->phy_reset = true; } + } else { + priv->phy_reset = false; } } @@ -391,7 +389,6 @@ static struct phy_driver at803x_driver[] = { .phy_id_mask = 0xffffffef, .probe = at803x_probe, .config_init = at803x_config_init, - .link_change_notify = at803x_link_change_notify, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, .suspend = at803x_suspend, @@ -427,7 +424,6 @@ static struct phy_driver at803x_driver[] = { .phy_id_mask = 0xffffffef, .probe = at803x_probe, .config_init = at803x_config_init, - .link_change_notify = at803x_link_change_notify, .set_wol = at803x_set_wol, .get_wol = at803x_get_wol, .suspend = at803x_suspend, From a64b04d86d14c81f50f68e102f79ef301e3d0a0e Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 27 Apr 2016 11:29:06 +0200 Subject: [PATCH 26/47] gre: do not assign header_ops in collect metadata mode In ipgre mode (i.e. not gretap) with collect metadata flag set, the tunnel is incorrectly assumed to be mGRE in NBMA mode (see commit 6a5f44d7a048c). This is not the case, we're controlling the encapsulation addresses by lwtunnel metadata. And anyway, assigning dev->header_ops in collect metadata mode does not make sense. Although it would be more user firendly to reject requests that specify both the collect metadata flag and a remote/local IP address, this would break current users of gretap or introduce ugly code and differences in handling ipgre and gretap configuration. Keep the current behavior of remote/local IP address being ignored in such case. v3: Back to v1, added explanation paragraph. v2: Reject configuration specifying both remote/local address and collect metadata flag. Fixes: 2e15ea390e6f4 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index af5d1f38217f..d0abde4236af 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -893,7 +893,7 @@ static int ipgre_tunnel_init(struct net_device *dev) netif_keep_dst(dev); dev->addr_len = 4; - if (iph->daddr) { + if (iph->daddr && !tunnel->collect_md) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { if (!iph->saddr) @@ -902,8 +902,9 @@ static int ipgre_tunnel_init(struct net_device *dev) dev->header_ops = &ipgre_header_ops; } #endif - } else + } else if (!tunnel->collect_md) { dev->header_ops = &ipgre_header_ops; + } return ip_tunnel_init(dev); } From 2090714e1d6e80979dd6926be22b0de9ca432273 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 27 Apr 2016 11:29:07 +0200 Subject: [PATCH 27/47] gre: build header correctly for collect metadata tunnels In ipgre (i.e. not gretap) + collect metadata mode, the skb was assumed to contain Ethernet header and was encapsulated as ETH_P_TEB. This is not the case, the interface is ARPHRD_IPGRE and the protocol to be used for encapsulation is skb->protocol. Fixes: 2e15ea390e6f4 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jiri Benc Acked-by: Pravin B Shelar Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index d0abde4236af..f973e0a58993 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -523,7 +523,8 @@ static struct rtable *gre_get_rt(struct sk_buff *skb, return ip_route_output_key(net, fl); } -static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) +static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, + __be16 proto) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; @@ -575,7 +576,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev) } flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); - build_header(skb, tunnel_hlen, flags, htons(ETH_P_TEB), + build_header(skb, tunnel_hlen, flags, proto, tunnel_id_to_key(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; @@ -616,7 +617,7 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, const struct iphdr *tnl_params; if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, skb->protocol); return NETDEV_TX_OK; } @@ -660,7 +661,7 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, struct ip_tunnel *tunnel = netdev_priv(dev); if (tunnel->collect_md) { - gre_fb_xmit(skb, dev); + gre_fb_xmit(skb, dev, htons(ETH_P_TEB)); return NETDEV_TX_OK; } From b7302ca05871e50208bc328cbc8199a21f5d876e Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Wed, 27 Apr 2016 14:24:49 +0300 Subject: [PATCH 28/47] pegasus: fixes URB buffer allocation size; usb_fill_bulk_urb() receives buffer length parameter 8 bytes larger than what's allocated by alloc_skb(); This seems to be a problem with older (pegasus usb-1.1) devices, which may silently return more data than the maximal packet length. Reported-by: Lincoln Ramsay Signed-off-by: Petko Manolov Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f84080215915..f919e20360c1 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -528,7 +528,7 @@ static void read_bulk_callback(struct urb *urb) goon: usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, usb_rcvbulkpipe(pegasus->usb, 1), - pegasus->rx_skb->data, PEGASUS_MTU + 8, + pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); rx_status = usb_submit_urb(pegasus->rx_urb, GFP_ATOMIC); if (rx_status == -ENODEV) @@ -569,7 +569,7 @@ static void rx_fixup(unsigned long data) } usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, usb_rcvbulkpipe(pegasus->usb, 1), - pegasus->rx_skb->data, PEGASUS_MTU + 8, + pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); try_again: status = usb_submit_urb(pegasus->rx_urb, GFP_ATOMIC); @@ -823,7 +823,7 @@ static int pegasus_open(struct net_device *net) usb_fill_bulk_urb(pegasus->rx_urb, pegasus->usb, usb_rcvbulkpipe(pegasus->usb, 1), - pegasus->rx_skb->data, PEGASUS_MTU + 8, + pegasus->rx_skb->data, PEGASUS_MTU, read_bulk_callback, pegasus); if ((res = usb_submit_urb(pegasus->rx_urb, GFP_KERNEL))) { if (res == -ENODEV) From 1a8deec09d12c1a2946f131aa171d5e0479333de Mon Sep 17 00:00:00 2001 From: Petko Manolov Date: Wed, 27 Apr 2016 14:24:50 +0300 Subject: [PATCH 29/47] pegasus: fixes reported packet length The default Pegasus setup was to append the status and CRC at the end of each received packet. The status bits are used to update various stats, but CRC has been ignored. The new default is to not append CRC at the end of RX packets. Signed-off-by: Petko Manolov Signed-off-by: David S. Miller --- drivers/net/usb/pegasus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index f919e20360c1..82129eef7774 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -411,7 +411,7 @@ static int enable_net_traffic(struct net_device *dev, struct usb_device *usb) int ret; read_mii_word(pegasus, pegasus->phy, MII_LPA, &linkpart); - data[0] = 0xc9; + data[0] = 0xc8; /* TX & RX enable, append status, no CRC */ data[1] = 0; if (linkpart & (ADVERTISE_100FULL | ADVERTISE_10FULL)) data[1] |= 0x20; /* set full duplex */ @@ -497,7 +497,7 @@ static void read_bulk_callback(struct urb *urb) pkt_len = buf[count - 3] << 8; pkt_len += buf[count - 4]; pkt_len &= 0xfff; - pkt_len -= 8; + pkt_len -= 4; } /* From 946b636f1730c64e05ff7fe8cf7136422fa8ea70 Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Wed, 27 Apr 2016 14:08:01 +0200 Subject: [PATCH 30/47] gre: reject GUE and FOU in collect metadata mode The collect metadata mode does not support GUE nor FOU. This might be implemented later; until then, we should reject such config. I think this is okay to be changed. It's unlikely anyone has such configuration (as it doesn't work anyway) and we may need a way to distinguish whether it's supported or not by the kernel later. For backwards compatibility with iproute2, it's not possible to just check the attribute presence (iproute2 always includes the attribute), the actual value has to be checked, too. Fixes: 2e15ea390e6f4 ("ip_gre: Add support to collect tunnel metadata.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f973e0a58993..f502d34bcb40 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -948,6 +948,11 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) if (flags & (GRE_VERSION|GRE_ROUTING)) return -EINVAL; + if (data[IFLA_GRE_COLLECT_METADATA] && + data[IFLA_GRE_ENCAP_TYPE] && + nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE) + return -EINVAL; + return 0; } From bbdd09ebd7ce87d2122fcc7d97f35a4f8931bc55 Mon Sep 17 00:00:00 2001 From: Andreas Larsson Date: Wed, 27 Apr 2016 16:46:10 +0200 Subject: [PATCH 31/47] MAINTAINERS: net: Change maintainer for GRETH 10/100/1G Ethernet MAC device driver Signed-off-by: Andreas Larsson Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 17ad61503184..d82a21c86dde 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4903,7 +4903,7 @@ F: net/ipv4/gre_offload.c F: include/net/gre.h GRETH 10/100/1G Ethernet MAC device driver -M: Kristoffer Glembo +M: Andreas Larsson L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/aeroflex/ From 552165bcf7060b998b4a9b5b86110b6a5e04dfd9 Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 27 Apr 2016 21:25:25 -0400 Subject: [PATCH 32/47] drivers: net: cpsw: fix parsing of phy-handle DT property in dual_emac config Commit 9e42f715264ff158478fa30eaed847f6e131366b ("drivers: net: cpsw: add phy-handle parsing") saved the "phy-handle" phandle into a new cpsw_priv field. However, phy connections are per-slave, so the phy_node field should be in cpsw_slave_data rather than cpsw_priv. This would go unnoticed in a single emac configuration. But in dual_emac mode, the last "phy-handle" property parsed for either slave would be used by both of them, causing them both to refer to the same phy_device. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Signed-off-by: David Rivshin Tested-by: Nicolas Chauvet Tested-by: Andrew Goodbody Reviewed-by: Mugunthan V N Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 13 ++++++------- drivers/net/ethernet/ti/cpsw.h | 1 + 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index bbb77cd8ad67..ce0b0caee35b 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -367,7 +367,6 @@ struct cpsw_priv { spinlock_t lock; struct platform_device *pdev; struct net_device *ndev; - struct device_node *phy_node; struct napi_struct napi_rx; struct napi_struct napi_tx; struct device *dev; @@ -1148,8 +1147,8 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); - if (priv->phy_node) - slave->phy = of_phy_connect(priv->ndev, priv->phy_node, + if (slave->data->phy_node) + slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node, &cpsw_adjust_link, 0, slave->data->phy_if); else slave->phy = phy_connect(priv->ndev, slave->data->phy_id, @@ -1940,12 +1939,11 @@ static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv, slave->port_vlan = data->dual_emac_res_vlan; } -static int cpsw_probe_dt(struct cpsw_priv *priv, +static int cpsw_probe_dt(struct cpsw_platform_data *data, struct platform_device *pdev) { struct device_node *node = pdev->dev.of_node; struct device_node *slave_node; - struct cpsw_platform_data *data = &priv->data; int i = 0, ret; u32 prop; @@ -2033,7 +2031,8 @@ static int cpsw_probe_dt(struct cpsw_priv *priv, if (strcmp(slave_node->name, "slave")) continue; - priv->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); + slave_data->phy_node = of_parse_phandle(slave_node, + "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", &lenp); if (of_phy_is_fixed_link(slave_node)) { struct device_node *phy_node; @@ -2275,7 +2274,7 @@ static int cpsw_probe(struct platform_device *pdev) /* Select default pin state */ pinctrl_pm_select_default_state(&pdev->dev); - if (cpsw_probe_dt(priv, pdev)) { + if (cpsw_probe_dt(&priv->data, pdev)) { dev_err(&pdev->dev, "cpsw: platform data missing\n"); ret = -ENODEV; goto clean_runtime_disable_ret; diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h index 442a7038e660..e50afd1b2eda 100644 --- a/drivers/net/ethernet/ti/cpsw.h +++ b/drivers/net/ethernet/ti/cpsw.h @@ -18,6 +18,7 @@ #include struct cpsw_slave_data { + struct device_node *phy_node; char phy_id[MII_BUS_ID_SIZE]; int phy_if; u8 mac_addr[ETH_ALEN]; From d733f7542ad47cf73e033c90cf55158587e1d060 Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 27 Apr 2016 21:32:31 -0400 Subject: [PATCH 33/47] drivers: net: cpsw: fix segfault in case of bad phy-handle If an emac node has a phy-handle property that points to something which is not a phy, then a segmentation fault will occur when the interface is brought up. This is because while phy_connect() will return ERR_PTR() on failure, of_phy_connect() will return NULL. The common error check uses IS_ERR(), and so missed when of_phy_connect() fails. The NULL pointer is then dereferenced. Also, the common error message referenced slave->data->phy_id, which would be empty in the case of phy-handle. Instead, use the name of the device_node as a useful identifier. And in the phy_id case add the error code for completeness. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Signed-off-by: David Rivshin Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 37 +++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index ce0b0caee35b..5903448e8ee9 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -1147,25 +1147,34 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); - if (slave->data->phy_node) + if (slave->data->phy_node) { slave->phy = of_phy_connect(priv->ndev, slave->data->phy_node, &cpsw_adjust_link, 0, slave->data->phy_if); - else + if (!slave->phy) { + dev_err(priv->dev, "phy \"%s\" not found on slave %d\n", + slave->data->phy_node->full_name, + slave->slave_num); + return; + } + } else { slave->phy = phy_connect(priv->ndev, slave->data->phy_id, &cpsw_adjust_link, slave->data->phy_if); - if (IS_ERR(slave->phy)) { - dev_err(priv->dev, "phy %s not found on slave %d\n", - slave->data->phy_id, slave->slave_num); - slave->phy = NULL; - } else { - phy_attached_info(slave->phy); - - phy_start(slave->phy); - - /* Configure GMII_SEL register */ - cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, - slave->slave_num); + if (IS_ERR(slave->phy)) { + dev_err(priv->dev, + "phy \"%s\" not found on slave %d, err %ld\n", + slave->data->phy_id, slave->slave_num, + PTR_ERR(slave->phy)); + slave->phy = NULL; + return; + } } + + phy_attached_info(slave->phy); + + phy_start(slave->phy); + + /* Configure GMII_SEL register */ + cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num); } static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) From ae092b5bded24d5dc7dae0e0aef4669c169ce874 Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 27 Apr 2016 21:38:26 -0400 Subject: [PATCH 34/47] drivers: net: cpsw: don't ignore phy-mode if phy-handle is used The phy-mode emac property was only being processed in the phy_id or fixed-link cases. However if phy-handle was specified instead, an error message would complain about the lack of phy_id or fixed-link, and then jump past the of_get_phy_mode(). This would result in the PHY mode defaulting to MII, regardless of what the devicetree specified. Fixes: 9e42f715264f ("drivers: net: cpsw: add phy-handle parsing") Signed-off-by: David Rivshin Tested-by: Nicolas Chauvet Tested-by: Andrew Goodbody Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 5903448e8ee9..712bc6d75f21 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2043,7 +2043,11 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, slave_data->phy_node = of_parse_phandle(slave_node, "phy-handle", 0); parp = of_get_property(slave_node, "phy_id", &lenp); - if (of_phy_is_fixed_link(slave_node)) { + if (slave_data->phy_node) { + dev_dbg(&pdev->dev, + "slave[%d] using phy-handle=\"%s\"\n", + i, slave_data->phy_node->full_name); + } else if (of_phy_is_fixed_link(slave_node)) { struct device_node *phy_node; struct phy_device *phy_dev; @@ -2080,7 +2084,9 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), PHY_ID_FMT, mdio->name, phyid); } else { - dev_err(&pdev->dev, "No slave[%d] phy_id or fixed-link property\n", i); + dev_err(&pdev->dev, + "No slave[%d] phy_id, phy-handle, or fixed-link property\n", + i); goto no_phy_slave; } slave_data->phy_if = of_get_phy_mode(slave_node); From a5d2cb3b27441fe7432852d4198eadda1d9d19be Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 27 Apr 2016 21:42:47 -0400 Subject: [PATCH 35/47] dt: cpsw: phy-handle, phy_id, and fixed-link are mutually exclusive The phy-handle, phy_id, and fixed-link properties are mutually exclusive, and only one need be specified. Make this clear in the binding doc. Also mark the phy_id property as deprecated, as phy-handle should be used instead. Signed-off-by: David Rivshin Reviewed-by: Mugunthan V N Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/cpsw.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt index 28a4781ab6d7..0ae06491b430 100644 --- a/Documentation/devicetree/bindings/net/cpsw.txt +++ b/Documentation/devicetree/bindings/net/cpsw.txt @@ -45,13 +45,13 @@ Required properties: Optional properties: - dual_emac_res_vlan : Specifies VID to be used to segregate the ports - mac-address : See ethernet.txt file in the same directory -- phy_id : Specifies slave phy id +- phy_id : Specifies slave phy id (deprecated, use phy-handle) - phy-handle : See ethernet.txt file in the same directory Slave sub-nodes: - fixed-link : See fixed-link.txt file in the same directory - Either the property phy_id, or the sub-node - fixed-link can be specified + +Note: Exactly one of phy_id, phy-handle, or fixed-link must be specified. Note: "ti,hwmods" field is used to fetch the base address and irq resources from TI, omap hwmod data base during device registration. From 06cd6d6eda4bedbb826ed36e4c89734ea364ec4b Mon Sep 17 00:00:00 2001 From: David Rivshin Date: Wed, 27 Apr 2016 21:45:45 -0400 Subject: [PATCH 36/47] drivers: net: cpsw: use of_phy_connect() in fixed-link case If a fixed-link DT subnode is used, the phy_device was looked up so that a PHY ID string could be constructed and passed to phy_connect(). This is not necessary, as the device_node can be passed directly to of_phy_connect() instead. This reuses the same codepath as if the phy-handle DT property was used. Signed-off-by: David Rivshin Tested-by: Nicolas Chauvet Tested-by: Andrew Goodbody Reviewed-by: Mugunthan V N Reviewed-by: Grygorii Strashko Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index 712bc6d75f21..e2fcdf1eec44 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2048,22 +2048,13 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data, "slave[%d] using phy-handle=\"%s\"\n", i, slave_data->phy_node->full_name); } else if (of_phy_is_fixed_link(slave_node)) { - struct device_node *phy_node; - struct phy_device *phy_dev; - /* In the case of a fixed PHY, the DT node associated * to the PHY is the Ethernet MAC DT node. */ ret = of_phy_register_fixed_link(slave_node); if (ret) return ret; - phy_node = of_node_get(slave_node); - phy_dev = of_phy_find_device(phy_node); - if (!phy_dev) - return -ENODEV; - snprintf(slave_data->phy_id, sizeof(slave_data->phy_id), - PHY_ID_FMT, phy_dev->mdio.bus->id, - phy_dev->mdio.addr); + slave_data->phy_node = of_node_get(slave_node); } else if (parp) { u32 phyid; struct device_node *mdio_node; From 92117d8443bc5afacc8d5ba82e541946310f106e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Apr 2016 18:56:20 -0700 Subject: [PATCH 37/47] bpf: fix refcnt overflow On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK, the malicious application may overflow 32-bit bpf program refcnt. It's also possible to overflow map refcnt on 1Tb system. Impose 32k hard limit which means that the same bpf program or map cannot be shared by more than 32k processes. Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 ++- kernel/bpf/inode.c | 7 ++++--- kernel/bpf/syscall.c | 24 ++++++++++++++++++++---- kernel/bpf/verifier.c | 11 +++++++---- 4 files changed, 33 insertions(+), 12 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 21ee41b92e8a..f1d5c5acc8dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -171,12 +171,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -void bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index f2ece3c174a5..8f94ca1860cf 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -31,10 +31,10 @@ static void *bpf_any_get(void *raw, enum bpf_type type) { switch (type) { case BPF_TYPE_PROG: - atomic_inc(&((struct bpf_prog *)raw)->aux->refcnt); + raw = bpf_prog_inc(raw); break; case BPF_TYPE_MAP: - bpf_map_inc(raw, true); + raw = bpf_map_inc(raw, true); break; default: WARN_ON_ONCE(1); @@ -297,7 +297,8 @@ static void *bpf_obj_do_get(const struct filename *pathname, goto out; raw = bpf_any_get(inode->i_private, *type); - touch_atime(&path); + if (!IS_ERR(raw)) + touch_atime(&path); path_put(&path); return raw; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index adc5e4bd74f8..cf5e9f7ad13a 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -218,11 +218,18 @@ struct bpf_map *__bpf_map_get(struct fd f) return f.file->private_data; } -void bpf_map_inc(struct bpf_map *map, bool uref) +/* prog's and map's refcnt limit */ +#define BPF_MAX_REFCNT 32768 + +struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) { - atomic_inc(&map->refcnt); + if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { + atomic_dec(&map->refcnt); + return ERR_PTR(-EBUSY); + } if (uref) atomic_inc(&map->usercnt); + return map; } struct bpf_map *bpf_map_get_with_uref(u32 ufd) @@ -234,7 +241,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd) if (IS_ERR(map)) return map; - bpf_map_inc(map, true); + map = bpf_map_inc(map, true); fdput(f); return map; @@ -658,6 +665,15 @@ static struct bpf_prog *__bpf_prog_get(struct fd f) return f.file->private_data; } +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) { + atomic_dec(&prog->aux->refcnt); + return ERR_PTR(-EBUSY); + } + return prog; +} + /* called by sockets/tracing/seccomp before attaching program to an event * pairs with bpf_prog_put() */ @@ -670,7 +686,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd) if (IS_ERR(prog)) return prog; - atomic_inc(&prog->aux->refcnt); + prog = bpf_prog_inc(prog); fdput(f); return prog; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index db2574e7b8b0..89bcaa0966da 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2049,15 +2049,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) return -E2BIG; } - /* remember this map */ - env->used_maps[env->used_map_cnt++] = map; - /* hold the map. If the program is rejected by verifier, * the map will be released by release_maps() or it * will be used by the valid program until it's unloaded * and all maps are released in free_bpf_prog_info() */ - bpf_map_inc(map, false); + map = bpf_map_inc(map, false); + if (IS_ERR(map)) { + fdput(f); + return PTR_ERR(map); + } + env->used_maps[env->used_map_cnt++] = map; + fdput(f); next_insn: insn++; From 6aff67c85c9e5a4bc99e5211c1bac547936626ca Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Apr 2016 18:56:21 -0700 Subject: [PATCH 38/47] bpf: fix check_map_func_compatibility logic The commit 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") introduced clever way to check bpf_helper<->map_type compatibility. Later on commit a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") adjusted the logic and inadvertently broke it. Get rid of the clever bool compare and go back to two-way check from map and from helper perspective. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 65 ++++++++++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 89bcaa0966da..c5c17a62f509 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -239,16 +239,6 @@ static const char * const reg_type_str[] = { [CONST_IMM] = "imm", }; -static const struct { - int map_type; - int func_id; -} func_limit[] = { - {BPF_MAP_TYPE_PROG_ARRAY, BPF_FUNC_tail_call}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_read}, - {BPF_MAP_TYPE_PERF_EVENT_ARRAY, BPF_FUNC_perf_event_output}, - {BPF_MAP_TYPE_STACK_TRACE, BPF_FUNC_get_stackid}, -}; - static void print_verifier_state(struct verifier_env *env) { enum bpf_reg_type t; @@ -921,27 +911,52 @@ static int check_func_arg(struct verifier_env *env, u32 regno, static int check_map_func_compatibility(struct bpf_map *map, int func_id) { - bool bool_map, bool_func; - int i; - if (!map) return 0; - for (i = 0; i < ARRAY_SIZE(func_limit); i++) { - bool_map = (map->map_type == func_limit[i].map_type); - bool_func = (func_id == func_limit[i].func_id); - /* only when map & func pair match it can continue. - * don't allow any other map type to be passed into - * the special func; - */ - if (bool_func && bool_map != bool_func) { - verbose("cannot pass map_type %d into func %d\n", - map->map_type, func_id); - return -EINVAL; - } + /* We need a two way check, first is from map perspective ... */ + switch (map->map_type) { + case BPF_MAP_TYPE_PROG_ARRAY: + if (func_id != BPF_FUNC_tail_call) + goto error; + break; + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + if (func_id != BPF_FUNC_perf_event_read && + func_id != BPF_FUNC_perf_event_output) + goto error; + break; + case BPF_MAP_TYPE_STACK_TRACE: + if (func_id != BPF_FUNC_get_stackid) + goto error; + break; + default: + break; + } + + /* ... and second from the function itself. */ + switch (func_id) { + case BPF_FUNC_tail_call: + if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) + goto error; + break; + case BPF_FUNC_perf_event_read: + case BPF_FUNC_perf_event_output: + if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) + goto error; + break; + case BPF_FUNC_get_stackid: + if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) + goto error; + break; + default: + break; } return 0; +error: + verbose("cannot pass map_type %d into func %d\n", + map->map_type, func_id); + return -EINVAL; } static int check_call(struct verifier_env *env, int func_id) From 569cc39d39385a74b23145496bca2df5ac8b2fb8 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Apr 2016 18:56:22 -0700 Subject: [PATCH 39/47] samples/bpf: fix trace_output example llvm cannot always recognize memset as builtin function and optimize it away, so just delete it. It was a leftover from testing of bpf_perf_event_output() with large data structures. Fixes: 39111695b1b8 ("samples: bpf: add bpf_perf_event_output example") Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- samples/bpf/trace_output_kern.c | 1 - 1 file changed, 1 deletion(-) diff --git a/samples/bpf/trace_output_kern.c b/samples/bpf/trace_output_kern.c index 8d8d1ec429eb..9b96f4fb8cea 100644 --- a/samples/bpf/trace_output_kern.c +++ b/samples/bpf/trace_output_kern.c @@ -18,7 +18,6 @@ int bpf_prog1(struct pt_regs *ctx) u64 cookie; } data; - memset(&data, 0, sizeof(data)); data.pid = bpf_get_current_pid_tgid(); data.cookie = 0x12345678; From f27337e16f2d0e52a8d05ea599ed13cd266ac291 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 28 Apr 2016 11:04:51 +0200 Subject: [PATCH 40/47] ip_tunnel: fix preempt warning in ip tunnel creation/updating After the commit e09acddf873b ("ip_tunnel: replace dst_cache with generic implementation"), a preemption debug warning is triggered on ip4 tunnels updating; the dst cache helper needs to be invoked in unpreemptible context. We don't need to load the cache on tunnel update, so this commit fixes the warning replacing the load with a dst cache reset, which is preempt safe. Fixes: e09acddf873b ("ip_tunnel: replace dst_cache with generic implementation") Reported-by: Eric Dumazet Signed-off-by: Paolo Abeni Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_tunnel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 6aad0192443d..a69ed94bda1b 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -326,12 +326,12 @@ static int ip_tunnel_bind_dev(struct net_device *dev) if (!IS_ERR(rt)) { tdev = rt->dst.dev; - dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst, - fl4.saddr); ip_rt_put(rt); } if (dev->type != ARPHRD_ETHER) dev->flags |= IFF_POINTOPOINT; + + dst_cache_reset(&tunnel->dst_cache); } if (!tdev && tunnel->parms.link) From 018f8258582381bcce484312f0e9ec2970d0383e Mon Sep 17 00:00:00 2001 From: Wang Shanker Date: Fri, 29 Apr 2016 01:29:43 +0800 Subject: [PATCH 41/47] net: l2tp: fix reversed udp6 checksum flags This patch fixes a bug which causes the behavior of whether to ignore udp6 checksum of udp6 encapsulated l2tp tunnel contrary to what userspace program requests. When the flag `L2TP_ATTR_UDP_ZERO_CSUM6_RX` is set by userspace, it is expected that udp6 checksums of received packets of the l2tp tunnel to create should be ignored. In `l2tp_netlink.c`: `l2tp_nl_cmd_tunnel_create()`, `cfg.udp6_zero_rx_checksums` is set according to the flag, and then passed to `l2tp_core.c`: `l2tp_tunnel_create()` and then `l2tp_tunnel_sock_create()`. In `l2tp_tunnel_sock_create()`, `udp_conf.use_udp6_rx_checksums` is set the same to `cfg.udp6_zero_rx_checksums`. However, if we want the checksum to be ignored, `udp_conf.use_udp6_rx_checksums` should be set to `false`, i.e. be set to the contrary. Similarly, the same should be done to `udp_conf.use_udp6_tx_checksums`. Signed-off-by: Miao Wang Acked-by: James Chapman Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index afca2eb4dfa7..6edfa9980314 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1376,9 +1376,9 @@ static int l2tp_tunnel_sock_create(struct net *net, memcpy(&udp_conf.peer_ip6, cfg->peer_ip6, sizeof(udp_conf.peer_ip6)); udp_conf.use_udp6_tx_checksums = - cfg->udp6_zero_tx_checksums; + ! cfg->udp6_zero_tx_checksums; udp_conf.use_udp6_rx_checksums = - cfg->udp6_zero_rx_checksums; + ! cfg->udp6_zero_rx_checksums; } else #endif { From 90e5d0db2b221f0cbbb91e9e61fdb7dbb9e1afc2 Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Thu, 28 Apr 2016 19:24:32 -0400 Subject: [PATCH 42/47] soreuseport: Fix TCP listener hash collision I forgot to include a check for listener port equality when deciding if two sockets should belong to the same reuseport group. This was not caught previously because it's only necessary when two listening sockets for the same user happen to hash to the same listener bucket. The same error does not exist in the UDP path. Fixes: c125e80b8868("soreuseport: fast reuseport TCP socket selection") Signed-off-by: Craig Gallek Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/inet_hashtables.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bc68eced0105..0d9e9d7bb029 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -470,6 +470,7 @@ static int inet_reuseport_add_sock(struct sock *sk, const struct sock *sk2, bool match_wildcard)) { + struct inet_bind_bucket *tb = inet_csk(sk)->icsk_bind_hash; struct sock *sk2; struct hlist_nulls_node *node; kuid_t uid = sock_i_uid(sk); @@ -479,6 +480,7 @@ static int inet_reuseport_add_sock(struct sock *sk, sk2->sk_family == sk->sk_family && ipv6_only_sock(sk2) == ipv6_only_sock(sk) && sk2->sk_bound_dev_if == sk->sk_bound_dev_if && + inet_csk(sk2)->icsk_bind_hash == tb && sk2->sk_reuseport && uid_eq(uid, sock_i_uid(sk2)) && saddr_same(sk, sk2, false)) return reuseport_add_sock(sk, sk2); From c489565b536ff5382460273fd9513f0adebec024 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 29 Apr 2016 09:05:59 +0200 Subject: [PATCH 43/47] net/smscx5xx: use the device tree for mac address This takes the MAC address for smsc75xx/smsc95xx USB network devices from a the device tree. This is required to get a usable persistent address on the popular beagleboard, whose hardware designers accidentally forgot that an ethernet device really requires an a MAC address to be functional. The Raspberry Pi also ships smsc9514 without a serial EEPROM, stores the MAC address in ROM accessible via VC4 firmware. The smsc75xx and smsc95xx drivers are just two copies of the same code, so better fix both. [lkundrak@v3.sk: updated to use of_get_property() as per suggestion from Arnd, reworded the message and comments a bit] Tested-by: Lubomir Rintel Signed-off-by: Arnd Bergmann Signed-off-by: Lubomir Rintel Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 12 +++++++++++- drivers/net/usb/smsc95xx.c | 12 +++++++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index 30033dbe6662..c369db99c005 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "smsc75xx.h" #define SMSC_CHIPNAME "smsc75xx" @@ -761,6 +762,15 @@ static int smsc75xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static void smsc75xx_init_mac_address(struct usbnet *dev) { + const u8 *mac_addr; + + /* maybe the boot loader passed the MAC address in devicetree */ + mac_addr = of_get_mac_address(dev->udev->dev.of_node); + if (mac_addr) { + memcpy(dev->net->dev_addr, mac_addr, ETH_ALEN); + return; + } + /* try reading mac address from EEPROM */ if (smsc75xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, dev->net->dev_addr) == 0) { @@ -772,7 +782,7 @@ static void smsc75xx_init_mac_address(struct usbnet *dev) } } - /* no eeprom, or eeprom values are invalid. generate random MAC */ + /* no useful static MAC address found. generate a random one */ eth_hw_addr_random(dev->net); netif_dbg(dev, ifup, dev->net, "MAC address set to eth_random_addr\n"); } diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 66b3ab9f614e..2edc2bc6d1b9 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "smsc95xx.h" #define SMSC_CHIPNAME "smsc95xx" @@ -765,6 +766,15 @@ static int smsc95xx_ioctl(struct net_device *netdev, struct ifreq *rq, int cmd) static void smsc95xx_init_mac_address(struct usbnet *dev) { + const u8 *mac_addr; + + /* maybe the boot loader passed the MAC address in devicetree */ + mac_addr = of_get_mac_address(dev->udev->dev.of_node); + if (mac_addr) { + memcpy(dev->net->dev_addr, mac_addr, ETH_ALEN); + return; + } + /* try reading mac address from EEPROM */ if (smsc95xx_read_eeprom(dev, EEPROM_MAC_OFFSET, ETH_ALEN, dev->net->dev_addr) == 0) { @@ -775,7 +785,7 @@ static void smsc95xx_init_mac_address(struct usbnet *dev) } } - /* no eeprom, or eeprom values are invalid. generate random MAC */ + /* no useful static MAC address found. generate a random one */ eth_hw_addr_random(dev->net); netif_dbg(dev, ifup, dev->net, "MAC address set to eth_random_addr\n"); } From 0b86a2a1e5807326f8eb44e9919d0baadeda3a69 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Fri, 29 Apr 2016 11:06:50 +0200 Subject: [PATCH 44/47] cxgb3: fix out of bounds read An out of bounds read of 2 bytes was discovered in cxgb3 with KASAN. t3_config_rss() expects both arrays it gets as parameters to have terminators. setup_rss(), the caller, forgets to add a terminator to one of the arrays. Thankfully the iteration in t3_config_rss() stops anyway, but in the last iteration the check for the terminator is an out of bounds read. Add the missing terminator to rspq_map[]. Reported-by: Jan Stancek Signed-off-by: Michal Schmidt Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c index 60908eab3b3a..43da891fab97 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c +++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c @@ -576,7 +576,7 @@ static void setup_rss(struct adapter *adap) unsigned int nq0 = adap2pinfo(adap, 0)->nqsets; unsigned int nq1 = adap->port[1] ? adap2pinfo(adap, 1)->nqsets : 1; u8 cpus[SGE_QSETS + 1]; - u16 rspq_map[RSS_TABLE_SIZE]; + u16 rspq_map[RSS_TABLE_SIZE + 1]; for (i = 0; i < SGE_QSETS; ++i) cpus[i] = i; @@ -586,6 +586,7 @@ static void setup_rss(struct adapter *adap) rspq_map[i] = i % nq0; rspq_map[i + RSS_TABLE_SIZE / 2] = (i % nq1) + nq0; } + rspq_map[RSS_TABLE_SIZE] = 0xffff; /* terminator */ t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN | F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | From efe790502be85c60daa65c8aa51f05c333186e12 Mon Sep 17 00:00:00 2001 From: Hamish Martin Date: Fri, 29 Apr 2016 10:40:24 -0400 Subject: [PATCH 45/47] tipc: only process unicast on intended node We have observed complete lock up of broadcast-link transmission due to unacknowledged packets never being removed from the 'transmq' queue. This is traced to nodes having their ack field set beyond the sequence number of packets that have actually been transmitted to them. Consider an example where node 1 has sent 10 packets to node 2 on a link and node 3 has sent 20 packets to node 2 on another link. We see examples of an ack from node 2 destined for node 3 being treated as an ack from node 2 at node 1. This leads to the ack on the node 1 to node 2 link being increased to 20 even though we have only sent 10 packets. When node 1 does get around to sending further packets, none of the packets with sequence numbers less than 21 are actually removed from the transmq. To resolve this we reinstate some code lost in commit d999297c3dbb ("tipc: reduce locking scope during packet reception") which ensures that only messages destined for the receiving node are processed by that node. This prevents the sequence numbers from getting out of sync and resolves the packet leakage, thereby resolving the broadcast-link transmission lock-ups we observed. While we are aware that this change only patches over a root problem that we still haven't identified, this is a sanity test that it is always legitimate to do. It will remain in the code even after we identify and fix the real problem. Reviewed-by: Chris Packham Reviewed-by: John Thompson Signed-off-by: Hamish Martin Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/node.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/tipc/node.c b/net/tipc/node.c index ace178fd3850..9aaa1bc566ae 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1444,6 +1444,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) int bearer_id = b->identity; struct tipc_link_entry *le; u16 bc_ack = msg_bcast_ack(hdr); + u32 self = tipc_own_addr(net); int rc = 0; __skb_queue_head_init(&xmitq); @@ -1460,6 +1461,10 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) return tipc_node_bc_rcv(net, skb, bearer_id); } + /* Discard unicast link messages destined for another node */ + if (unlikely(!msg_short(hdr) && (msg_destnode(hdr) != self))) + goto discard; + /* Locate neighboring node that sent packet */ n = tipc_node_find(net, msg_prevnode(hdr)); if (unlikely(!n)) From 2c94b53738549d81dc7464a32117d1f5112c64d3 Mon Sep 17 00:00:00 2001 From: Tim Bingham Date: Fri, 29 Apr 2016 13:30:23 -0400 Subject: [PATCH 46/47] net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the implementation of net_dbg_ratelimited() was buggy for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases. The bug was that net_ratelimit() was being called and, despite returning true, nothing was being printed to the console. This resulted in messages like the following - "net_ratelimit: %d callbacks suppressed" with no other output nearby. After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the bug is fixed for the DEBUG case. However, there's no output at all for CONFIG_DYNAMIC_DEBUG case. This patch restores debug output (if enabled) for the CONFIG_DYNAMIC_DEBUG case. Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG case. The implementation takes care to check that dynamic debugging is enabled before calling net_ratelimit(). Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") Signed-off-by: Tim Bingham Signed-off-by: David S. Miller --- include/linux/net.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index 49175e4ced11..f840d77c6c31 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -246,7 +246,15 @@ do { \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) -#if defined(DEBUG) +#if defined(CONFIG_DYNAMIC_DEBUG) +#define net_dbg_ratelimited(fmt, ...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ + net_ratelimit()) \ + __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ +} while (0) +#elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else From b7f8fe251e4609e2a437bd2c2dea01e61db6849c Mon Sep 17 00:00:00 2001 From: Jiri Benc Date: Fri, 29 Apr 2016 23:31:32 +0200 Subject: [PATCH 47/47] gre: do not pull header in ICMP error processing iptunnel_pull_header expects that IP header was already pulled; with this expectation, it pulls the tunnel header. This is not true in gre_err. Furthermore, ipv4_update_pmtu and ipv4_redirect expect that skb->data points to the IP header. We cannot pull the tunnel header in this path. It's just a matter of not calling iptunnel_pull_header - we don't need any of its effects. Fixes: bda7bb463436 ("gre: Allow multiple protocol listener for gre protocol.") Signed-off-by: Jiri Benc Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index f502d34bcb40..205a2b8a5a84 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -179,6 +179,7 @@ static __be16 tnl_flags_to_gre_flags(__be16 tflags) return flags; } +/* Fills in tpi and returns header length to be pulled. */ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, bool *csum_err) { @@ -238,7 +239,7 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, return -EINVAL; } } - return iptunnel_pull_header(skb, hdr_len, tpi->proto, false); + return hdr_len; } static void ipgre_err(struct sk_buff *skb, u32 info, @@ -341,7 +342,7 @@ static void gre_err(struct sk_buff *skb, u32 info) struct tnl_ptk_info tpi; bool csum_err = false; - if (parse_gre_header(skb, &tpi, &csum_err)) { + if (parse_gre_header(skb, &tpi, &csum_err) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -419,6 +420,7 @@ static int gre_rcv(struct sk_buff *skb) { struct tnl_ptk_info tpi; bool csum_err = false; + int hdr_len; #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(ip_hdr(skb)->daddr)) { @@ -428,7 +430,10 @@ static int gre_rcv(struct sk_buff *skb) } #endif - if (parse_gre_header(skb, &tpi, &csum_err) < 0) + hdr_len = parse_gre_header(skb, &tpi, &csum_err); + if (hdr_len < 0) + goto drop; + if (iptunnel_pull_header(skb, hdr_len, tpi.proto, false) < 0) goto drop; if (ipgre_rcv(skb, &tpi) == PACKET_RCVD)