From 1769192a3c50778e03352a3d95faec830d47ba55 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 11 May 2011 18:22:36 +0000 Subject: [PATCH 01/19] l2tp: fix potential rcu race While trying to remove useless synchronize_rcu() calls, I found l2tp is indeed incorrectly using two of such calls, but also bumps tunnel refcount after list insertion. tunnel refcount must be incremented before being made publically visible by rcu readers. This fix can be applied to 2.6.35+ and might need a backport for older kernels, since things were shuffled in commit fd558d186df2c (l2tp: Split pppol2tp patch into separate l2tp and ppp parts) Signed-off-by: Eric Dumazet CC: Paul E. McKenney CC: James Chapman Reviewed-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 9be095e00450..ed8a2335442f 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1435,16 +1435,15 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); - spin_lock_bh(&pn->l2tp_tunnel_list_lock); - list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); - spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - synchronize_rcu(); atomic_inc(&l2tp_tunnel_count); /* Bump the reference count. The tunnel context is deleted - * only when this drops to zero. + * only when this drops to zero. Must be done before list insertion */ l2tp_tunnel_inc_refcount(tunnel); + spin_lock_bh(&pn->l2tp_tunnel_list_lock); + list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list); + spin_unlock_bh(&pn->l2tp_tunnel_list_lock); err = 0; err: @@ -1636,7 +1635,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn hlist_add_head_rcu(&session->global_hlist, l2tp_session_id_hash_2(pn, session_id)); spin_unlock_bh(&pn->l2tp_session_hlist_lock); - synchronize_rcu(); } /* Ignore management session in session count value */ From 517aa0bcda9b092a4c3fab7bf93f0cebe372ece0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 12 May 2011 11:27:20 +0000 Subject: [PATCH 02/19] sctp: sctp_sendmsg: Don't initialize default_sinfo This variable only needs initialization when cmsgs.info is NULL. Use memset to ensure padding is also zeroed so kernel doesn't leak any data. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/sctp/socket.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 33d9ee629b4e..d4b8db177e27 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1496,7 +1496,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct sctp_chunk *chunk; union sctp_addr to; struct sockaddr *msg_name = NULL; - struct sctp_sndrcvinfo default_sinfo = { 0 }; + struct sctp_sndrcvinfo default_sinfo; struct sctp_sndrcvinfo *sinfo; struct sctp_initmsg *sinit; sctp_assoc_t associd = 0; @@ -1760,6 +1760,7 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, /* If the user didn't specify SNDRCVINFO, make up one with * some defaults. */ + memset(&default_sinfo, 0, sizeof(default_sinfo)); default_sinfo.sinfo_stream = asoc->default_stream; default_sinfo.sinfo_flags = asoc->default_flags; default_sinfo.sinfo_ppid = asoc->default_ppid; From afd7614c00e364f8f1327e73ad291b02f6d4d1a6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 12 May 2011 09:19:10 +0000 Subject: [PATCH 03/19] sctp: sctp_sendmsg: Don't test known non-null sinfo It's already known non-null above. Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- net/sctp/socket.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d4b8db177e27..6766913a53e6 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1791,12 +1791,10 @@ SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, goto out_free; } - if (sinfo) { - /* Check for invalid stream. */ - if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { - err = -EINVAL; - goto out_free; - } + /* Check for invalid stream. */ + if (sinfo->sinfo_stream >= asoc->c.sinit_num_ostreams) { + err = -EINVAL; + goto out_free; } timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); From e154b639bbe53dc91d1873cd37d162bb2fe87aab Mon Sep 17 00:00:00 2001 From: Shreyas Bhatewara Date: Tue, 10 May 2011 06:13:56 +0000 Subject: [PATCH 04/19] vmxnet3: Use single tx queue when CONFIG_PCI_MSI not defined Resending this patch with few changes. Avoid multiple queues when MSI or MSI-X not available Limit number of Tx queues to 1 if MSI/MSI-X support is not configured in the kernel. This will make number of tx and rx queues equal when MSI/X is not configured thus providing better performance. Signed-off-by: Bhavesh Davda Signed-off-by: Shreyas N Bhatewara Signed-off-by: David S. Miller --- drivers/net/vmxnet3/vmxnet3_drv.c | 3 +++ drivers/net/vmxnet3/vmxnet3_int.h | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index c0da23096160..fa6e2ac7475a 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -2884,6 +2884,9 @@ vmxnet3_probe_device(struct pci_dev *pdev, int num_tx_queues; int num_rx_queues; + if (!pci_msi_enabled()) + enable_mq = 0; + #ifdef VMXNET3_RSS if (enable_mq) num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES, diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index 8ba7b5f67de2..f50d36fdf405 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -68,10 +68,10 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.0.25.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.1.9.0-k" /* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01001900 +#define VMXNET3_DRIVER_VERSION_NUM 0x01010900 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ From f607a158004d75c9005423ce1ba70dc6ec3dd9c2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 12 May 2011 17:46:56 -0400 Subject: [PATCH 05/19] garp: remove last synchronize_rcu() call When removing last vlan from a device, garp_uninit_applicant() calls synchronize_rcu() to make sure no user can still manipulate struct garp_applicant before we free it. Use call_rcu() instead, as a step to further net_device dismantle optimizations. Add the temporary garp_cleanup_module() function to make sure no pending call_rcu() are left at module unload time [ this will be removed when kfree_rcu() is available ] Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/garp.h | 1 + net/802/garp.c | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/net/garp.h b/include/net/garp.h index 8cabbf087169..834d8add9e5f 100644 --- a/include/net/garp.h +++ b/include/net/garp.h @@ -104,6 +104,7 @@ struct garp_applicant { struct sk_buff_head queue; struct sk_buff *pdu; struct rb_root gid; + struct rcu_head rcu; }; struct garp_port { diff --git a/net/802/garp.c b/net/802/garp.c index 5dbe8967bbd5..f8300a8b5fbc 100644 --- a/net/802/garp.c +++ b/net/802/garp.c @@ -603,6 +603,11 @@ err1: } EXPORT_SYMBOL_GPL(garp_init_applicant); +static void garp_app_kfree_rcu(struct rcu_head *head) +{ + kfree(container_of(head, struct garp_applicant, rcu)); +} + void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl) { struct garp_port *port = rtnl_dereference(dev->garp_port); @@ -611,7 +616,6 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl ASSERT_RTNL(); rcu_assign_pointer(port->applicants[appl->type], NULL); - synchronize_rcu(); /* Delete timer and generate a final TRANSMIT_PDU event to flush out * all pending messages before the applicant is gone. */ @@ -621,7 +625,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl garp_queue_xmit(app); dev_mc_del(dev, appl->proto.group_address); - kfree(app); + call_rcu(&app->rcu, garp_app_kfree_rcu); garp_release_port(dev); } EXPORT_SYMBOL_GPL(garp_uninit_applicant); @@ -639,3 +643,9 @@ void garp_unregister_application(struct garp_application *appl) stp_proto_unregister(&appl->proto); } EXPORT_SYMBOL_GPL(garp_unregister_application); + +static void __exit garp_cleanup_module(void) +{ + rcu_barrier(); /* Wait for completion of call_rcu()'s */ +} +module_exit(garp_cleanup_module); From 864b5418ebc16893cfd27ff3cda254eff0f56d6f Mon Sep 17 00:00:00 2001 From: Franco Fichtner Date: Thu, 12 May 2011 06:42:04 +0000 Subject: [PATCH 06/19] ethtool: bring back missing comma in netdev features strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The issue was introduced in commit eed2a12f1ed9aabf. Signed-off-by: Franco Fichtner Acked-by: Michał Mirosław Acked-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index b6f405888538..b8c2b10f397a 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -361,7 +361,7 @@ static const char netdev_features_strings[ETHTOOL_DEV_FEATURE_WORDS * 32][ETH_GS /* NETIF_F_NTUPLE */ "rx-ntuple-filter", /* NETIF_F_RXHASH */ "rx-hashing", /* NETIF_F_RXCSUM */ "rx-checksum", - /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy" + /* NETIF_F_NOCACHE_COPY */ "tx-nocache-copy", /* NETIF_F_LOOPBACK */ "loopback", }; From f0a619ccfb8a2ec8ff083a02299cfd076c362b27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Fri, 6 May 2011 07:56:29 +0000 Subject: [PATCH 07/19] net: Fix vlan_features propagation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix VLAN features propagation for devices which change vlan_features. For this to work, driver needs to make sure netdev_features_changed() gets called after the change (it is e.g. after ndo_set_features()). Side effect is that a user might request features that will never be enabled on a VLAN device. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- net/8021q/vlan_dev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 58f8010e1aef..f247f5bff88d 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -528,7 +528,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_DORMANT))) | (1<<__LINK_STATE_PRESENT); - dev->hw_features = real_dev->vlan_features & NETIF_F_ALL_TX_OFFLOADS; + dev->hw_features = NETIF_F_ALL_TX_OFFLOADS; dev->features |= real_dev->vlan_features | NETIF_F_LLTX; dev->gso_max_size = real_dev->gso_max_size; @@ -587,9 +587,11 @@ static u32 vlan_dev_fix_features(struct net_device *dev, u32 features) { struct net_device *real_dev = vlan_dev_info(dev)->real_dev; - features &= (real_dev->features | NETIF_F_LLTX); + features &= real_dev->features; + features &= real_dev->vlan_features; if (dev_ethtool_get_rx_csum(real_dev)) features |= NETIF_F_RXCSUM; + features |= NETIF_F_LLTX; return features; } From 7f267051bd7a280265b1b5ead58e9c6e4e1ac3a4 Mon Sep 17 00:00:00 2001 From: Yi Zou Date: Mon, 9 May 2011 11:53:27 +0000 Subject: [PATCH 08/19] net: group FCoE related feature flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Michał Mirosław's patch (http://patchwork.ozlabs.org/patch/94421/) fixes the issue (http://patchwork.ozlabs.org/patch/94188/) about not populating FCoE related flags correctly on vlan devices. However, only NETIF_F_FCOE_CRC is part of the NETIF_F_ALL_TX_OFFLOADS right now, where weed NETIF_F_FCOE_MTU and NETIF_F_FSO as well. Therefore, add NETIF_F_ALL_FCOE to indicate feature flags used by FCoE TX offloads. These include NETIF_F_FCOE_CRC, NETIF_F_FCOE_MTU, and NETIF_F_FSO and add them to be part of NETIF_F_ALL_TX_OFFLOADS. This would eventually make sure all FCoE needed flags are populated properly to vlan devices. Signed-off-by: Yi Zou Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e7244ed1f9a8..00d650c74800 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1097,10 +1097,14 @@ struct net_device { #define NETIF_F_ALL_TSO (NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN) +#define NETIF_F_ALL_FCOE (NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \ + NETIF_F_FSO) + #define NETIF_F_ALL_TX_OFFLOADS (NETIF_F_ALL_CSUM | NETIF_F_SG | \ NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \ NETIF_F_HIGHDMA | \ - NETIF_F_SCTP_CSUM | NETIF_F_FCOE_CRC) + NETIF_F_SCTP_CSUM | \ + NETIF_F_ALL_FCOE) /* * If one device supports one of these features, then enable them From cbfd1526f6076ece92b4e2dcc0a2f1e89041b3bd Mon Sep 17 00:00:00 2001 From: Amit Virdi Date: Thu, 12 May 2011 01:04:40 +0000 Subject: [PATCH 09/19] net/irda/ircomm_tty.c: Use flip buffers to deliver data use tty_insert_flip_string and tty_flip_buffer_push to deliver incoming data packets from the IrDA device instead of delivering the packets directly to the line discipline. Following later approach resulted in warning "Sleeping function called from invalid context". Signed-off-by: Amit Virdi Acked-by: Alan Cox Signed-off-by: David S. Miller --- net/irda/ircomm/ircomm_tty.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index a39cca8331df..b3cc8b3989a9 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include /* for MODULE_ALIAS_CHARDEV_MAJOR */ @@ -1132,7 +1133,6 @@ static int ircomm_tty_data_indication(void *instance, void *sap, struct sk_buff *skb) { struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - struct tty_ldisc *ld; IRDA_DEBUG(2, "%s()\n", __func__ ); @@ -1161,15 +1161,11 @@ static int ircomm_tty_data_indication(void *instance, void *sap, } /* - * Just give it over to the line discipline. There is no need to - * involve the flip buffers, since we are not running in an interrupt - * handler + * Use flip buffer functions since the code may be called from interrupt + * context */ - - ld = tty_ldisc_ref(self->tty); - if (ld) - ld->ops->receive_buf(self->tty, skb->data, NULL, skb->len); - tty_ldisc_deref(ld); + tty_insert_flip_string(self->tty, skb->data, skb->len); + tty_flip_buffer_push(self->tty); /* No need to kfree_skb - see ircomm_ttp_data_indication() */ From 06c03c02ea528af0cbce50ce45ddd6a361864550 Mon Sep 17 00:00:00 2001 From: Mahesh Bandewar Date: Sun, 8 May 2011 06:51:48 +0000 Subject: [PATCH 10/19] tg3: Allow ethtool to enable/disable loopback. This patch adds tg3_set_features() to handle loopback mode. Currently the capability is added for the devices which support internal MAC loopback mode. So when enabled, it enables internal-MAC loopback. Signed-off-by: Mahesh Bandewar Signed-off-by: David S. Miller --- drivers/net/tg3.c | 69 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 67 insertions(+), 2 deletions(-) diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index ec1953043102..d5a1f9e3794c 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3373,8 +3373,8 @@ relink: tg3_phy_copper_begin(tp); tg3_readphy(tp, MII_BMSR, &bmsr); - if (!tg3_readphy(tp, MII_BMSR, &bmsr) && - (bmsr & BMSR_LSTATUS)) + if ((!tg3_readphy(tp, MII_BMSR, &bmsr) && (bmsr & BMSR_LSTATUS)) || + (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK)) current_link_up = 1; } @@ -6309,6 +6309,42 @@ dma_error: return NETDEV_TX_OK; } +static void tg3_set_loopback(struct net_device *dev, u32 features) +{ + struct tg3 *tp = netdev_priv(dev); + + if (features & NETIF_F_LOOPBACK) { + if (tp->mac_mode & MAC_MODE_PORT_INT_LPBACK) + return; + + /* + * Clear MAC_MODE_HALF_DUPLEX or you won't get packets back in + * loopback mode if Half-Duplex mode was negotiated earlier. + */ + tp->mac_mode &= ~MAC_MODE_HALF_DUPLEX; + + /* Enable internal MAC loopback mode */ + tp->mac_mode |= MAC_MODE_PORT_INT_LPBACK; + spin_lock_bh(&tp->lock); + tw32(MAC_MODE, tp->mac_mode); + netif_carrier_on(tp->dev); + spin_unlock_bh(&tp->lock); + netdev_info(dev, "Internal MAC loopback mode enabled.\n"); + } else { + if (!(tp->mac_mode & MAC_MODE_PORT_INT_LPBACK)) + return; + + /* Disable internal MAC loopback mode */ + tp->mac_mode &= ~MAC_MODE_PORT_INT_LPBACK; + spin_lock_bh(&tp->lock); + tw32(MAC_MODE, tp->mac_mode); + /* Force link status check */ + tg3_setup_phy(tp, 1); + spin_unlock_bh(&tp->lock); + netdev_info(dev, "Internal MAC loopback mode disabled.\n"); + } +} + static u32 tg3_fix_features(struct net_device *dev, u32 features) { struct tg3 *tp = netdev_priv(dev); @@ -6319,6 +6355,16 @@ static u32 tg3_fix_features(struct net_device *dev, u32 features) return features; } +static int tg3_set_features(struct net_device *dev, u32 features) +{ + u32 changed = dev->features ^ features; + + if ((changed & NETIF_F_LOOPBACK) && netif_running(dev)) + tg3_set_loopback(dev, features); + + return 0; +} + static inline void tg3_set_mtu(struct net_device *dev, struct tg3 *tp, int new_mtu) { @@ -9485,6 +9531,13 @@ static int tg3_open(struct net_device *dev) netif_tx_start_all_queues(dev); + /* + * Reset loopback feature if it was turned on while the device was down + * make sure that it's installed properly now. + */ + if (dev->features & NETIF_F_LOOPBACK) + tg3_set_loopback(dev, dev->features); + return 0; err_out3: @@ -15033,6 +15086,7 @@ static const struct net_device_ops tg3_netdev_ops = { .ndo_tx_timeout = tg3_tx_timeout, .ndo_change_mtu = tg3_change_mtu, .ndo_fix_features = tg3_fix_features, + .ndo_set_features = tg3_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tg3_poll_controller, #endif @@ -15049,6 +15103,7 @@ static const struct net_device_ops tg3_netdev_ops_dma_bug = { .ndo_do_ioctl = tg3_ioctl, .ndo_tx_timeout = tg3_tx_timeout, .ndo_change_mtu = tg3_change_mtu, + .ndo_set_features = tg3_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tg3_poll_controller, #endif @@ -15246,6 +15301,16 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, dev->features |= hw_features; dev->vlan_features |= hw_features; + /* + * Add loopback capability only for a subset of devices that support + * MAC-LOOPBACK. Eventually this need to be enhanced to allow INT-PHY + * loopback for the remaining devices. + */ + if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5780 && + !tg3_flag(tp, CPMU_PRESENT)) + /* Add the loopback capability */ + dev->hw_features |= NETIF_F_LOOPBACK; + if (tp->pci_chip_rev_id == CHIPREV_ID_5705_A1 && !tg3_flag(tp, TSO_CAPABLE) && !(tr32(TG3PCI_PCISTATE) & PCISTATE_BUS_SPEED_HIGH)) { From e58b34425bfcb08c6bc8c520b82c37ffcec87072 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 18:22:34 -0400 Subject: [PATCH 11/19] ipvs: Use IP_VS_RT_MODE_* instead of magic constants. [ Add some cases I missed, from Julian Anastasov ] Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_xmit.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 6132b213eddc..5d393c5a802d 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -229,8 +229,6 @@ out_err: /* * Get route to destination or remote server - * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, - * &4=Allow redirect from remote daddr to local */ static struct rt6_info * __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, @@ -274,13 +272,14 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, } local = __ip_vs_is_local_route6(rt); - if (!((local ? 1 : 2) & rt_mode)) { + if (!((local ? IP_VS_RT_MODE_LOCAL : IP_VS_RT_MODE_NON_LOCAL) & + rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", local ? "local":"non-local", daddr); dst_release(&rt->dst); return NULL; } - if (local && !(rt_mode & 4) && + if (local && !(rt_mode & IP_VS_RT_MODE_RDR) && !((ort = (struct rt6_info *) skb_dst(skb)) && __ip_vs_is_local_route6(ort))) { IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " @@ -440,7 +439,8 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2))) + if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, + IP_VS_RT_MODE_NON_LOCAL))) goto tx_error_icmp; /* MTU checking */ @@ -632,7 +632,9 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, 1|2|4))) + 0, (IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR)))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); /* @@ -875,7 +877,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, - &saddr, 1, 1|2))) + &saddr, 1, (IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL)))) goto tx_error_icmp; if (__ip_vs_is_local_route6(rt)) { dst_release(&rt->dst); @@ -1050,7 +1053,8 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, 1|2))) + 0, (IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL)))) goto tx_error_icmp; if (__ip_vs_is_local_route6(rt)) { dst_release(&rt->dst); @@ -1254,7 +1258,9 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, */ if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, 1|2|4))) + 0, (IP_VS_RT_MODE_LOCAL | + IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR)))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); From 44e3125ccd521585e73e6dd228b283ab26993c68 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 9 May 2011 14:38:06 -0700 Subject: [PATCH 12/19] ipvs: Eliminate rt->rt_dst usage in __ip_vs_get_out_rt(). We can simply track what destination address is used based upon which code block is taken at the top of the function. Signed-off-by: David S. Miller --- net/netfilter/ipvs/ip_vs_xmit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 5d393c5a802d..3f3e0f4c38a5 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -110,6 +110,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, &dest->addr.ip, atomic_read(&rt->dst.__refcnt), rtos); } + daddr = dest->addr.ip; spin_unlock(&dest->dst_lock); } else { rt = ip_route_output(net, daddr, 0, rtos, 0); @@ -125,7 +126,7 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, rt_mode)) { IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", (rt->rt_flags & RTCF_LOCAL) ? - "local":"non-local", &rt->rt_dst); + "local":"non-local", &daddr); ip_rt_put(rt); return NULL; } @@ -133,14 +134,14 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, !((ort = skb_rtable(skb)) && ort->rt_flags & RTCF_LOCAL)) { IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " "requires NAT method, dest: %pI4\n", - &ip_hdr(skb)->daddr, &rt->rt_dst); + &ip_hdr(skb)->daddr, &daddr); ip_rt_put(rt); return NULL; } if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " "to non-local address, dest: %pI4\n", - &ip_hdr(skb)->saddr, &rt->rt_dst); + &ip_hdr(skb)->saddr, &daddr); ip_rt_put(rt); return NULL; } From c92f5ca2e5120796c56455e0a4b7cc0dfd6ceb49 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 10 May 2011 12:46:05 +0000 Subject: [PATCH 13/19] ipvs: Remove all remaining references to rt->rt_{src,dst} Remove all remaining references to rt->rt_{src,dst} by using dest->dst_saddr to cache saddr (used for TUN mode). For ICMP in FORWARD hook just restrict the rt_mode for NAT to disable LOCALNODE. All other modes do not allow IP_VS_RT_MODE_RDR, so we should be safe with the ICMP forwarding. Using cp->daddr as replacement for rt_dst is safe for all modes except BYPASS, even when cp->dest is NULL because it is cp->daddr that is used to assign cp->dest for sync-ed connections. Signed-off-by: Julian Anastasov Signed-off-by: David S. Miller --- include/net/ip_vs.h | 9 ++--- net/netfilter/ipvs/ip_vs_core.c | 24 +---------- net/netfilter/ipvs/ip_vs_xmit.c | 72 ++++++++++++++++++++++----------- 3 files changed, 54 insertions(+), 51 deletions(-) diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9d1f510ab6d0..4fff432aeade 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -665,9 +665,7 @@ struct ip_vs_dest { struct dst_entry *dst_cache; /* destination cache entry */ u32 dst_rtos; /* RT_TOS(tos) for dst */ u32 dst_cookie; -#ifdef CONFIG_IP_VS_IPV6 - struct in6_addr dst_saddr; -#endif + union nf_inet_addr dst_saddr; /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ @@ -1253,7 +1251,8 @@ extern int ip_vs_tunnel_xmit extern int ip_vs_dr_xmit (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); extern int ip_vs_icmp_xmit -(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, int offset); +(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, + int offset, unsigned int hooknum); extern void ip_vs_dst_reset(struct ip_vs_dest *dest); #ifdef CONFIG_IP_VS_IPV6 @@ -1267,7 +1266,7 @@ extern int ip_vs_dr_xmit_v6 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); extern int ip_vs_icmp_xmit_v6 (struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp, - int offset); + int offset, unsigned int hooknum); #endif #ifdef CONFIG_SYSCTL diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a74dae6c5dbc..bfa808f4da13 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1382,15 +1382,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) offset += 2 * sizeof(__u16); - verdict = ip_vs_icmp_xmit(skb, cp, pp, offset); - /* LOCALNODE from FORWARD hook is not supported */ - if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD && - skb_rtable(skb)->rt_flags & RTCF_LOCAL) { - IP_VS_DBG(1, "%s(): " - "local delivery to %pI4 but in FORWARD\n", - __func__, &skb_rtable(skb)->rt_dst); - verdict = NF_DROP; - } + verdict = ip_vs_icmp_xmit(skb, cp, pp, offset, hooknum); out: __ip_vs_conn_put(cp); @@ -1412,7 +1404,6 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) struct ip_vs_protocol *pp; struct ip_vs_proto_data *pd; unsigned int offset, verdict; - struct rt6_info *rt; *related = 1; @@ -1474,23 +1465,12 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (!cp) return NF_ACCEPT; - verdict = NF_DROP; - /* do the statistics and put it back */ ip_vs_in_stats(cp, skb); if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr || IPPROTO_SCTP == cih->nexthdr) offset += 2 * sizeof(__u16); - verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); - /* LOCALNODE from FORWARD hook is not supported */ - if (verdict == NF_ACCEPT && hooknum == NF_INET_FORWARD && - (rt = (struct rt6_info *) skb_dst(skb)) && - rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK) { - IP_VS_DBG(1, "%s(): " - "local delivery to %pI6 but in FORWARD\n", - __func__, &rt->rt6i_dst); - verdict = NF_DROP; - } + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset, hooknum); __ip_vs_conn_put(cp); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3f3e0f4c38a5..ee319a4338b0 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -87,7 +87,7 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) /* Get route to destination or remote server */ static struct rtable * __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, - __be32 daddr, u32 rtos, int rt_mode) + __be32 daddr, u32 rtos, int rt_mode, __be32 *ret_saddr) { struct net *net = dev_net(skb_dst(skb)->dev); struct rtable *rt; /* Route to the other host */ @@ -98,7 +98,12 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) __ip_vs_dst_check(dest, rtos))) { - rt = ip_route_output(net, dest->addr.ip, 0, rtos, 0); + struct flowi4 fl4; + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = dest->addr.ip; + fl4.flowi4_tos = rtos; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", @@ -106,19 +111,30 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); - IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", - &dest->addr.ip, + dest->dst_saddr.ip = fl4.saddr; + IP_VS_DBG(10, "new dst %pI4, src %pI4, refcnt=%d, " + "rtos=%X\n", + &dest->addr.ip, &dest->dst_saddr.ip, atomic_read(&rt->dst.__refcnt), rtos); } daddr = dest->addr.ip; + if (ret_saddr) + *ret_saddr = dest->dst_saddr.ip; spin_unlock(&dest->dst_lock); } else { - rt = ip_route_output(net, daddr, 0, rtos, 0); + struct flowi4 fl4; + + memset(&fl4, 0, sizeof(fl4)); + fl4.daddr = daddr; + fl4.flowi4_tos = rtos; + rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr); return NULL; } + if (ret_saddr) + *ret_saddr = fl4.saddr; } local = rt->rt_flags & RTCF_LOCAL; @@ -249,7 +265,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, u32 cookie; dst = __ip_vs_route_output_v6(net, &dest->addr.in6, - &dest->dst_saddr, + &dest->dst_saddr.in6, do_xfrm); if (!dst) { spin_unlock(&dest->dst_lock); @@ -259,11 +275,11 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", - &dest->addr.in6, &dest->dst_saddr, + &dest->addr.in6, &dest->dst_saddr.in6, atomic_read(&rt->dst.__refcnt)); } if (ret_saddr) - ipv6_addr_copy(ret_saddr, &dest->dst_saddr); + ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6); spin_unlock(&dest->dst_lock); } else { dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); @@ -386,7 +402,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, RT_TOS(iph->tos), - IP_VS_RT_MODE_NON_LOCAL))) + IP_VS_RT_MODE_NON_LOCAL, NULL))) goto tx_error_icmp; /* MTU checking */ @@ -518,7 +534,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR))) + IP_VS_RT_MODE_RDR, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; /* @@ -540,7 +556,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(rt->rt_dst) && + if (local && ipv4_is_loopback(cp->daddr.ip) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); @@ -751,6 +767,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rtable *rt; /* Route to the other host */ + __be32 saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; @@ -764,7 +781,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL))) + IP_VS_RT_MODE_NON_LOCAL, + &saddr))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); @@ -832,8 +850,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->frag_off = df; iph->protocol = IPPROTO_IPIP; iph->tos = tos; - iph->daddr = rt->rt_dst; - iph->saddr = rt->rt_src; + iph->daddr = cp->daddr.ip; + iph->saddr = saddr; iph->ttl = old_iph->ttl; ip_select_ident(iph, &rt->dst, NULL); @@ -996,7 +1014,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(iph->tos), IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL))) + IP_VS_RT_MODE_NON_LOCAL, NULL))) goto tx_error_icmp; if (rt->rt_flags & RTCF_LOCAL) { ip_rt_put(rt); @@ -1114,12 +1132,13 @@ tx_error: */ int ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, int offset) + struct ip_vs_protocol *pp, int offset, unsigned int hooknum) { struct rtable *rt; /* Route to the other host */ int mtu; int rc; int local; + int rt_mode; EnterFunction(10); @@ -1140,11 +1159,13 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ + /* LOCALNODE from FORWARD hook is not supported */ + rt_mode = (hooknum != NF_INET_FORWARD) ? + IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, RT_TOS(ip_hdr(skb)->tos), - IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR))) + rt_mode, NULL))) goto tx_error_icmp; local = rt->rt_flags & RTCF_LOCAL; @@ -1167,7 +1188,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #endif /* From world but DNAT to loopback address? */ - if (local && ipv4_is_loopback(rt->rt_dst) && + if (local && ipv4_is_loopback(cp->daddr.ip) && rt_is_input_route(skb_rtable(skb))) { IP_VS_DBG(1, "%s(): " "stopping DNAT to loopback %pI4\n", @@ -1232,12 +1253,13 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, #ifdef CONFIG_IP_VS_IPV6 int ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct ip_vs_protocol *pp, int offset) + struct ip_vs_protocol *pp, int offset, unsigned int hooknum) { struct rt6_info *rt; /* Route to the other host */ int mtu; int rc; int local; + int rt_mode; EnterFunction(10); @@ -1258,10 +1280,12 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ + /* LOCALNODE from FORWARD hook is not supported */ + rt_mode = (hooknum != NF_INET_FORWARD) ? + IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | + IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, - 0, (IP_VS_RT_MODE_LOCAL | - IP_VS_RT_MODE_NON_LOCAL | - IP_VS_RT_MODE_RDR)))) + 0, rt_mode))) goto tx_error_icmp; local = __ip_vs_is_local_route6(rt); From 6c60408e33aba6d1d7241bc9be3b8d1b39509291 Mon Sep 17 00:00:00 2001 From: Alexey Orishko Date: Fri, 6 May 2011 03:01:30 +0000 Subject: [PATCH 14/19] CDC NCM: Add mising short packet in cdc_ncm driver Changes: - while making NTB, driver shall check if device dwNtbOutMaxSize is higher than host value and shall add a short packet if this is the case - previous temporary patch for this issue is replaced by this one Signed-off-by: Alexey Orishko Signed-off-by: David S. Miller --- drivers/net/usb/cdc_ncm.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/net/usb/cdc_ncm.c b/drivers/net/usb/cdc_ncm.c index 1033ef6476a4..4ab557d0287d 100644 --- a/drivers/net/usb/cdc_ncm.c +++ b/drivers/net/usb/cdc_ncm.c @@ -54,13 +54,13 @@ #include #include -#define DRIVER_VERSION "23-Apr-2011" +#define DRIVER_VERSION "06-May-2011" /* CDC NCM subclass 3.2.1 */ #define USB_CDC_NCM_NDP16_LENGTH_MIN 0x10 /* Maximum NTB length */ -#define CDC_NCM_NTB_MAX_SIZE_TX (16384 + 4) /* bytes, must be short terminated */ +#define CDC_NCM_NTB_MAX_SIZE_TX 16384 /* bytes */ #define CDC_NCM_NTB_MAX_SIZE_RX 16384 /* bytes */ /* Minimum value for MaxDatagramSize, ch. 6.2.9 */ @@ -722,7 +722,7 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) } else { /* reset variables */ - skb_out = alloc_skb(ctx->tx_max, GFP_ATOMIC); + skb_out = alloc_skb((ctx->tx_max + 1), GFP_ATOMIC); if (skb_out == NULL) { if (skb != NULL) { dev_kfree_skb_any(skb); @@ -861,8 +861,11 @@ cdc_ncm_fill_tx_frame(struct cdc_ncm_ctx *ctx, struct sk_buff *skb) /* store last offset */ last_offset = offset; - if ((last_offset < ctx->tx_max) && ((last_offset % - le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0)) { + if (((last_offset < ctx->tx_max) && ((last_offset % + le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0)) || + (((last_offset == ctx->tx_max) && ((ctx->tx_max % + le16_to_cpu(ctx->out_ep->desc.wMaxPacketSize)) == 0)) && + (ctx->tx_max < le32_to_cpu(ctx->ncm_parm.dwNtbOutMaxSize)))) { /* force short packet */ *(((u8 *)skb_out->data) + last_offset) = 0; last_offset++; From afe12cc86b0ba545a01ad8716539ab07ab6e9e89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 7 May 2011 03:22:17 +0000 Subject: [PATCH 15/19] net: introduce netdev_change_features() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will be needed by bonding and other drivers changing vlan_features after ndo_init callback. As a bonus, this includes kernel-doc for netdev_update_features(). Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + net/core/dev.c | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 00d650c74800..1d9696a9ee4d 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2565,6 +2565,7 @@ u32 netdev_increment_features(u32 all, u32 one, u32 mask); u32 netdev_fix_features(struct net_device *dev, u32 features); int __netdev_update_features(struct net_device *dev); void netdev_update_features(struct net_device *dev); +void netdev_change_features(struct net_device *dev); void netif_stacked_transfer_operstate(const struct net_device *rootdev, struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 75898a32c038..ea23353e6251 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5289,6 +5289,14 @@ int __netdev_update_features(struct net_device *dev) return 1; } +/** + * netdev_update_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications if it + * has changed. Should be called after driver or hardware dependent + * conditions might have changed that influence the features. + */ void netdev_update_features(struct net_device *dev) { if (__netdev_update_features(dev)) @@ -5296,6 +5304,23 @@ void netdev_update_features(struct net_device *dev) } EXPORT_SYMBOL(netdev_update_features); +/** + * netdev_change_features - recalculate device features + * @dev: the device to check + * + * Recalculate dev->features set and send notifications even + * if they have not changed. Should be called instead of + * netdev_update_features() if also dev->vlan_features might + * have changed to allow the changes to be propagated to stacked + * VLAN devices. + */ +void netdev_change_features(struct net_device *dev) +{ + __netdev_update_features(dev); + netdev_features_change(dev); +} +EXPORT_SYMBOL(netdev_change_features); + /** * netif_stacked_transfer_operstate - transfer operstate * @rootdev: the root or lower level device to transfer state from From b2a103e6d0afa432dff66b36473c5a55b6b0376c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Miros=C5=82aw?= Date: Sat, 7 May 2011 03:22:17 +0000 Subject: [PATCH 16/19] bonding: convert to ndo_fix_features MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should also fix updating of vlan_features and propagating changes to VLAN devices on the bond. Side effect: it allows user to force-disable some offloads on the bond interface. Note: NETIF_F_VLAN_CHALLENGED is managed by bond_fix_features() now. Signed-off-by: Michał Mirosław Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 157 ++++++++++++++------------------ 1 file changed, 66 insertions(+), 91 deletions(-) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 6312db1f7838..088fd845ffdf 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -343,32 +343,6 @@ out: return res; } -/** - * bond_has_challenged_slaves - * @bond: the bond we're working on - * - * Searches the slave list. Returns 1 if a vlan challenged slave - * was found, 0 otherwise. - * - * Assumes bond->lock is held. - */ -static int bond_has_challenged_slaves(struct bonding *bond) -{ - struct slave *slave; - int i; - - bond_for_each_slave(bond, slave, i) { - if (slave->dev->features & NETIF_F_VLAN_CHALLENGED) { - pr_debug("found VLAN challenged slave - %s\n", - slave->dev->name); - return 1; - } - } - - pr_debug("no VLAN challenged slaves found\n"); - return 0; -} - /** * bond_next_vlan - safely skip to the next item in the vlans list. * @bond: the bond we're working on @@ -1406,52 +1380,68 @@ static int bond_sethwaddr(struct net_device *bond_dev, return 0; } -#define BOND_VLAN_FEATURES \ - (NETIF_F_VLAN_CHALLENGED | NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX | \ - NETIF_F_HW_VLAN_FILTER) +static u32 bond_fix_features(struct net_device *dev, u32 features) +{ + struct slave *slave; + struct bonding *bond = netdev_priv(dev); + u32 mask; + int i; -/* - * Compute the common dev->feature set available to all slaves. Some - * feature bits are managed elsewhere, so preserve those feature bits - * on the master device. - */ -static int bond_compute_features(struct bonding *bond) + read_lock(&bond->lock); + + if (!bond->first_slave) { + /* Disable adding VLANs to empty bond. But why? --mq */ + features |= NETIF_F_VLAN_CHALLENGED; + goto out; + } + + mask = features; + features &= ~NETIF_F_ONE_FOR_ALL; + features |= NETIF_F_ALL_FOR_ALL; + + bond_for_each_slave(bond, slave, i) { + features = netdev_increment_features(features, + slave->dev->features, + mask); + } + +out: + read_unlock(&bond->lock); + return features; +} + +#define BOND_VLAN_FEATURES (NETIF_F_ALL_TX_OFFLOADS | \ + NETIF_F_SOFT_FEATURES | \ + NETIF_F_LRO) + +static void bond_compute_features(struct bonding *bond) { struct slave *slave; struct net_device *bond_dev = bond->dev; - u32 features = bond_dev->features; - u32 vlan_features = 0; - unsigned short max_hard_header_len = max((u16)ETH_HLEN, - bond_dev->hard_header_len); + u32 vlan_features = BOND_VLAN_FEATURES; + unsigned short max_hard_header_len = ETH_HLEN; int i; - features &= ~(NETIF_F_ALL_CSUM | BOND_VLAN_FEATURES); - features |= NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_NOCACHE_COPY; + read_lock(&bond->lock); if (!bond->first_slave) goto done; - features &= ~NETIF_F_ONE_FOR_ALL; - - vlan_features = bond->first_slave->dev->vlan_features; bond_for_each_slave(bond, slave, i) { - features = netdev_increment_features(features, - slave->dev->features, - NETIF_F_ONE_FOR_ALL); vlan_features = netdev_increment_features(vlan_features, - slave->dev->vlan_features, - NETIF_F_ONE_FOR_ALL); + slave->dev->vlan_features, BOND_VLAN_FEATURES); + if (slave->dev->hard_header_len > max_hard_header_len) max_hard_header_len = slave->dev->hard_header_len; } done: - features |= (bond_dev->features & BOND_VLAN_FEATURES); - bond_dev->features = netdev_fix_features(bond_dev, features); - bond_dev->vlan_features = netdev_fix_features(bond_dev, vlan_features); + bond_dev->vlan_features = vlan_features; bond_dev->hard_header_len = max_hard_header_len; - return 0; + read_unlock(&bond->lock); + + netdev_change_features(bond_dev); } static void bond_setup_by_slave(struct net_device *bond_dev, @@ -1544,7 +1534,6 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) struct netdev_hw_addr *ha; struct sockaddr addr; int link_reporting; - int old_features = bond_dev->features; int res = 0; if (!bond->params.use_carrier && slave_dev->ethtool_ops == NULL && @@ -1577,16 +1566,9 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) pr_warning("%s: Warning: enslaved VLAN challenged slave %s. Adding VLANs will be blocked as long as %s is part of bond %s\n", bond_dev->name, slave_dev->name, slave_dev->name, bond_dev->name); - bond_dev->features |= NETIF_F_VLAN_CHALLENGED; } } else { pr_debug("%s: ! NETIF_F_VLAN_CHALLENGED\n", slave_dev->name); - if (bond->slave_cnt == 0) { - /* First slave, and it is not VLAN challenged, - * so remove the block of adding VLANs over the bond. - */ - bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; - } } /* @@ -1775,10 +1757,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) new_slave->delay = 0; new_slave->link_failure_count = 0; - bond_compute_features(bond); - write_unlock_bh(&bond->lock); + bond_compute_features(bond); + read_lock(&bond->lock); new_slave->last_arp_rx = jiffies; @@ -1958,7 +1940,7 @@ err_free: kfree(new_slave); err_undo_flags: - bond_dev->features = old_features; + bond_compute_features(bond); return res; } @@ -1979,6 +1961,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) struct bonding *bond = netdev_priv(bond_dev); struct slave *slave, *oldcurrent; struct sockaddr addr; + u32 old_features = bond_dev->features; /* slave is not a slave or master is not master of this slave */ if (!(slave_dev->flags & IFF_SLAVE) || @@ -2039,8 +2022,6 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) /* release the slave from its bond */ bond_detach_slave(bond, slave); - bond_compute_features(bond); - if (bond->primary_slave == slave) bond->primary_slave = NULL; @@ -2084,24 +2065,23 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev) */ memset(bond_dev->dev_addr, 0, bond_dev->addr_len); - if (!bond->vlgrp) { - bond_dev->features |= NETIF_F_VLAN_CHALLENGED; - } else { + if (bond->vlgrp) { pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", bond_dev->name, bond_dev->name); pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", bond_dev->name); } - } else if ((bond_dev->features & NETIF_F_VLAN_CHALLENGED) && - !bond_has_challenged_slaves(bond)) { - pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n", - bond_dev->name, slave_dev->name, bond_dev->name); - bond_dev->features &= ~NETIF_F_VLAN_CHALLENGED; } write_unlock_bh(&bond->lock); unblock_netpoll_tx(); + bond_compute_features(bond); + if (!(bond_dev->features & NETIF_F_VLAN_CHALLENGED) && + (old_features & NETIF_F_VLAN_CHALLENGED)) + pr_info("%s: last VLAN challenged slave %s left bond %s. VLAN blocking is removed\n", + bond_dev->name, slave_dev->name, bond_dev->name); + /* must do this from outside any spinlocks */ bond_destroy_slave_symlinks(bond_dev, slave_dev); @@ -2219,8 +2199,6 @@ static int bond_release_all(struct net_device *bond_dev) bond_alb_deinit_slave(bond, slave); } - bond_compute_features(bond); - bond_destroy_slave_symlinks(bond_dev, slave_dev); bond_del_vlans_from_slave(bond, slave_dev); @@ -2269,9 +2247,7 @@ static int bond_release_all(struct net_device *bond_dev) */ memset(bond_dev->dev_addr, 0, bond_dev->addr_len); - if (!bond->vlgrp) { - bond_dev->features |= NETIF_F_VLAN_CHALLENGED; - } else { + if (bond->vlgrp) { pr_warning("%s: Warning: clearing HW address of %s while it still has VLANs.\n", bond_dev->name, bond_dev->name); pr_warning("%s: When re-adding slaves, make sure the bond's HW address matches its VLANs'.\n", @@ -2282,6 +2258,9 @@ static int bond_release_all(struct net_device *bond_dev) out: write_unlock_bh(&bond->lock); + + bond_compute_features(bond); + return 0; } @@ -4337,11 +4316,6 @@ static void bond_ethtool_get_drvinfo(struct net_device *bond_dev, static const struct ethtool_ops bond_ethtool_ops = { .get_drvinfo = bond_ethtool_get_drvinfo, .get_link = ethtool_op_get_link, - .get_tx_csum = ethtool_op_get_tx_csum, - .get_sg = ethtool_op_get_sg, - .get_tso = ethtool_op_get_tso, - .get_ufo = ethtool_op_get_ufo, - .get_flags = ethtool_op_get_flags, }; static const struct net_device_ops bond_netdev_ops = { @@ -4367,6 +4341,7 @@ static const struct net_device_ops bond_netdev_ops = { #endif .ndo_add_slave = bond_enslave, .ndo_del_slave = bond_release, + .ndo_fix_features = bond_fix_features, }; static void bond_destructor(struct net_device *bond_dev) @@ -4422,14 +4397,14 @@ static void bond_setup(struct net_device *bond_dev) * when there are slaves that are not hw accel * capable */ - bond_dev->features |= (NETIF_F_HW_VLAN_TX | - NETIF_F_HW_VLAN_RX | - NETIF_F_HW_VLAN_FILTER); - /* By default, we enable GRO on bonding devices. - * Actual support requires lowlevel drivers are GRO ready. - */ - bond_dev->features |= NETIF_F_GRO; + bond_dev->hw_features = BOND_VLAN_FEATURES | + NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_FILTER; + + bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM); + bond_dev->features |= bond_dev->hw_features; } static void bond_work_cancel_all(struct bonding *bond) From 10949550bd1e50cc91c0f5085f7080a44b0871fe Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 19:26:57 -0400 Subject: [PATCH 17/19] ipv4: Kill spurious opt->srr check in ip_options_rcv_srr(). All call sites conditionalize the call to ip_options_rcv_srr() with a check of opt->srr, so no need to check it again there. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 01fc40965848..e82c304806bb 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -601,7 +601,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) unsigned long orefdst; int err; - if (!opt->srr || !rt) + if (!rt) return 0; if (skb->pkt_type != PACKET_HOST) From c30883bdff0b3544900a5c4aba18b8985436878f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 19:30:58 -0400 Subject: [PATCH 18/19] ipv4: Simplify iph->daddr overwrite in ip_options_rcv_srr(). We already copy the 4-byte nexthop from the options block into local variable "nexthop" for the route lookup. Re-use that variable instead of memcpy()'ing again when assigning to iph->daddr after the route lookup succeeds. Signed-off-by: David S. Miller --- net/ipv4/ip_options.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e82c304806bb..c5c26192b057 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -635,7 +635,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) if (rt2->rt_type != RTN_LOCAL) break; /* Superfast 8) loopback forward */ - memcpy(&iph->daddr, &optptr[srrptr-1], 4); + iph->daddr = nexthop; opt->is_changed = 1; } if (srrptr <= srrspace) { From def57687e9579b7a797681990dff763c411f5347 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 12 May 2011 19:34:30 -0400 Subject: [PATCH 19/19] ipv4: Elide use of rt->rt_dst in ip_forward() No matter what kind of header mangling occurs due to IP options processing, rt->rt_dst will always equal iph->daddr in the packet. So we can safely use iph->daddr instead of rt->rt_dst here. Signed-off-by: David S. Miller --- net/ipv4/ip_forward.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 99461f09320f..fcbc0c8f1261 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -84,7 +84,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway) + if (opt->is_strictroute && iph->daddr != rt->rt_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&