From 59b93b41e7fa71138734a911b11b044340dd16bd Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Wed, 5 Nov 2014 15:27:48 -0800 Subject: [PATCH 01/14] net: Remove MPLS GSO feature. Device can export MPLS GSO support in dev->mpls_features same way it export vlan features in dev->vlan_features. So it is safe to remove NETIF_F_GSO_MPLS redundant flag. Signed-off-by: Pravin B Shelar --- include/linux/netdev_features.h | 5 +---- include/linux/netdevice.h | 1 - include/linux/skbuff.h | 4 +--- net/core/ethtool.c | 1 - net/ipv4/af_inet.c | 1 - net/ipv4/tcp_offload.c | 1 - net/ipv4/udp_offload.c | 3 +-- net/ipv6/ip6_offload.c | 1 - net/ipv6/udp_offload.c | 3 +-- net/mpls/mpls_gso.c | 3 +-- 10 files changed, 5 insertions(+), 18 deletions(-) diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 8c94b07e654a..8e30685affeb 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -47,7 +47,6 @@ enum { NETIF_F_GSO_SIT_BIT, /* ... SIT tunnel with TSO */ NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */ NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */ - NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, @@ -119,7 +118,6 @@ enum { #define NETIF_F_GSO_SIT __NETIF_F(GSO_SIT) #define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL) #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM) -#define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) @@ -183,7 +181,6 @@ enum { NETIF_F_GSO_IPIP | \ NETIF_F_GSO_SIT | \ NETIF_F_GSO_UDP_TUNNEL | \ - NETIF_F_GSO_UDP_TUNNEL_CSUM | \ - NETIF_F_GSO_MPLS) + NETIF_F_GSO_UDP_TUNNEL_CSUM) #endif /* _LINUX_NETDEV_FEATURES_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4767f546d7c0..90ac95900a11 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3583,7 +3583,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_SIT != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT)); - BUILD_BUG_ON(SKB_GSO_MPLS != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 39ec7530ae27..53f4f6c93356 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -372,9 +372,7 @@ enum { SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11, - SKB_GSO_MPLS = 1 << 12, - - SKB_GSO_TUNNEL_REMCSUM = 1 << 13, + SKB_GSO_TUNNEL_REMCSUM = 1 << 12, }; #if BITS_PER_LONG > 32 diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 06dfb293e5aa..b0f84f5ddda8 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -84,7 +84,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_GSO_IPIP_BIT] = "tx-ipip-segmentation", [NETIF_F_GSO_SIT_BIT] = "tx-sit-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", - [NETIF_F_GSO_MPLS_BIT] = "tx-mpls-segmentation", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CSUM_BIT] = "tx-checksum-sctp", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ed2c672c5b01..3a096bb2d596 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1223,7 +1223,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_MPLS | 0))) goto out; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a1b2a5624f91..9d7930ba8e0f 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -94,7 +94,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | - SKB_GSO_MPLS | SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0a5a70d0e84c..d3e537ef6b7f 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -207,8 +207,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | SKB_GSO_IPIP | - SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_MPLS) || + SKB_GSO_GRE | SKB_GSO_GRE_CSUM) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index e9767079a360..fd76ce938c32 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -79,7 +79,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_TUNNEL_REMCSUM | - SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 637ba2e438b7..b6aa8ed18257 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -46,8 +46,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | - SKB_GSO_SIT | - SKB_GSO_MPLS) || + SKB_GSO_SIT) || !(type & (SKB_GSO_UDP)))) goto out; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index e3545f21a099..ca27837974fe 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -34,8 +34,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, SKB_GSO_TCP_ECN | SKB_GSO_GRE | SKB_GSO_GRE_CSUM | - SKB_GSO_IPIP | - SKB_GSO_MPLS))) + SKB_GSO_IPIP))) goto out; /* Setup inner SKB. */ From 25cd9ba0abc0749e5cb78e6493c6f6b3311ec6c5 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 6 Oct 2014 05:05:13 -0700 Subject: [PATCH 02/14] openvswitch: Add basic MPLS support to kernel Allow datapath to recognize and extract MPLS labels into flow keys and execute actions which push, pop, and set labels on packets. Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe Stringer. Cc: Ravi K Cc: Leo Alterman Cc: Isaku Yamahata Cc: Joe Stringer Signed-off-by: Simon Horman Signed-off-by: Jesse Gross Signed-off-by: Pravin B Shelar --- include/net/mpls.h | 39 +++++++++ include/uapi/linux/openvswitch.h | 32 +++++++ net/core/dev.c | 3 +- net/openvswitch/Kconfig | 1 + net/openvswitch/actions.c | 106 ++++++++++++++++++++++- net/openvswitch/datapath.c | 6 +- net/openvswitch/flow.c | 30 +++++++ net/openvswitch/flow.h | 17 ++-- net/openvswitch/flow_netlink.c | 139 +++++++++++++++++++++++++++---- net/openvswitch/flow_netlink.h | 2 +- 10 files changed, 345 insertions(+), 30 deletions(-) create mode 100644 include/net/mpls.h diff --git a/include/net/mpls.h b/include/net/mpls.h new file mode 100644 index 000000000000..5b3b5addfb08 --- /dev/null +++ b/include/net/mpls.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2014 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _NET_MPLS_H +#define _NET_MPLS_H 1 + +#include +#include + +#define MPLS_HLEN 4 + +static inline bool eth_p_mpls(__be16 eth_type) +{ + return eth_type == htons(ETH_P_MPLS_UC) || + eth_type == htons(ETH_P_MPLS_MC); +} + +/* + * For non-MPLS skbs this will correspond to the network header. + * For MPLS skbs it will be before the network_header as the MPLS + * label stack lies between the end of the mac header and the network + * header. That is, for MPLS skbs the end of the mac header + * is the top of the MPLS label stack. + */ +static inline unsigned char *skb_mpls_header(struct sk_buff *skb) +{ + return skb_mac_header(skb) + skb->mac_len; +} +#endif diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 435eabc5ffaa..631056b66f80 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -293,6 +293,9 @@ enum ovs_key_attr { OVS_KEY_ATTR_DP_HASH, /* u32 hash value. Value 0 indicates the hash is not computed by the datapath. */ OVS_KEY_ATTR_RECIRC_ID, /* u32 recirc id */ + OVS_KEY_ATTR_MPLS, /* array of struct ovs_key_mpls. + * The implementation may restrict + * the accepted length of the array. */ #ifdef __KERNEL__ OVS_KEY_ATTR_TUNNEL_INFO, /* struct ovs_tunnel_info */ @@ -340,6 +343,10 @@ struct ovs_key_ethernet { __u8 eth_dst[ETH_ALEN]; }; +struct ovs_key_mpls { + __be32 mpls_lse; +}; + struct ovs_key_ipv4 { __be32 ipv4_src; __be32 ipv4_dst; @@ -483,6 +490,19 @@ enum ovs_userspace_attr { #define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1) +/** + * struct ovs_action_push_mpls - %OVS_ACTION_ATTR_PUSH_MPLS action argument. + * @mpls_lse: MPLS label stack entry to push. + * @mpls_ethertype: Ethertype to set in the encapsulating ethernet frame. + * + * The only values @mpls_ethertype should ever be given are %ETH_P_MPLS_UC and + * %ETH_P_MPLS_MC, indicating MPLS unicast or multicast. Other are rejected. + */ +struct ovs_action_push_mpls { + __be32 mpls_lse; + __be16 mpls_ethertype; /* Either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC */ +}; + /** * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument. * @vlan_tpid: Tag protocol identifier (TPID) to push. @@ -534,6 +554,15 @@ struct ovs_action_hash { * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. + * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the + * top of the packets MPLS label stack. Set the ethertype of the + * encapsulating frame to either %ETH_P_MPLS_UC or %ETH_P_MPLS_MC to + * indicate the new packet contents. + * @OVS_ACTION_ATTR_POP_MPLS: Pop an MPLS label stack entry off of the + * packet's MPLS label stack. Set the encapsulating frame's ethertype to + * indicate the new packet contents. This could potentially still be + * %ETH_P_MPLS if the resulting MPLS label stack is not empty. If there + * is no MPLS label stack, as determined by ethertype, no action is taken. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -550,6 +579,9 @@ enum ovs_action_attr { OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_RECIRC, /* u32 recirc_id. */ OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */ + OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */ + OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ + __OVS_ACTION_ATTR_MAX }; diff --git a/net/core/dev.c b/net/core/dev.c index 40be481268de..70bb609c283d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -118,6 +118,7 @@ #include #include #include +#include #include #include #include @@ -2530,7 +2531,7 @@ static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) { - if (type == htons(ETH_P_MPLS_UC) || type == htons(ETH_P_MPLS_MC)) + if (eth_p_mpls(type)) features &= skb->dev->mpls_features; return features; diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 2a9673e39ca1..454ce12efbbf 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -30,6 +30,7 @@ config OPENVSWITCH config OPENVSWITCH_GRE tristate "Open vSwitch GRE tunneling support" + select NET_MPLS_GSO depends on INET depends on OPENVSWITCH depends on NET_IPGRE_DEMUX diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 922c133b1933..930b1b6e4cef 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -28,10 +28,12 @@ #include #include #include + #include #include #include #include +#include #include #include "datapath.h" @@ -118,6 +120,92 @@ static int make_writable(struct sk_buff *skb, int write_len) return pskb_expand_head(skb, 0, 0, GFP_ATOMIC); } +static int push_mpls(struct sk_buff *skb, + const struct ovs_action_push_mpls *mpls) +{ + __be32 *new_mpls_lse; + struct ethhdr *hdr; + + /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ + if (skb->encapsulation) + return -ENOTSUPP; + + if (skb_cow_head(skb, MPLS_HLEN) < 0) + return -ENOMEM; + + skb_push(skb, MPLS_HLEN); + memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + skb_reset_mac_header(skb); + + new_mpls_lse = (__be32 *)skb_mpls_header(skb); + *new_mpls_lse = mpls->mpls_lse; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_add(skb->csum, csum_partial(new_mpls_lse, + MPLS_HLEN, 0)); + + hdr = eth_hdr(skb); + hdr->h_proto = mpls->mpls_ethertype; + + skb_set_inner_protocol(skb, skb->protocol); + skb->protocol = mpls->mpls_ethertype; + + return 0; +} + +static int pop_mpls(struct sk_buff *skb, const __be16 ethertype) +{ + struct ethhdr *hdr; + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->csum = csum_sub(skb->csum, + csum_partial(skb_mpls_header(skb), + MPLS_HLEN, 0)); + + memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), + skb->mac_len); + + __skb_pull(skb, MPLS_HLEN); + skb_reset_mac_header(skb); + + /* skb_mpls_header() is used to locate the ethertype + * field correctly in the presence of VLAN tags. + */ + hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr->h_proto = ethertype; + if (eth_p_mpls(skb->protocol)) + skb->protocol = ethertype; + return 0; +} + +static int set_mpls(struct sk_buff *skb, const __be32 *mpls_lse) +{ + __be32 *stack; + int err; + + err = make_writable(skb, skb->mac_len + MPLS_HLEN); + if (unlikely(err)) + return err; + + stack = (__be32 *)skb_mpls_header(skb); + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __be32 diff[] = { ~(*stack), *mpls_lse }; + + skb->csum = ~csum_partial((char *)diff, sizeof(diff), + ~skb->csum); + } + + *stack = *mpls_lse; + + return 0; +} + /* remove VLAN header from packet and update csum accordingly. */ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) { @@ -140,10 +228,12 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci) vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; + if (skb_network_offset(skb) < ETH_HLEN) skb_set_network_header(skb, ETH_HLEN); - skb_reset_mac_len(skb); + /* Update mac_len for subsequent MPLS actions */ + skb_reset_mac_len(skb); return 0; } @@ -186,6 +276,8 @@ static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vla if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag)) return -ENOMEM; + /* Update mac_len for subsequent MPLS actions */ + skb->mac_len += VLAN_HLEN; if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_add(skb->csum, csum_partial(skb->data @@ -612,6 +704,10 @@ static int execute_set_action(struct sk_buff *skb, case OVS_KEY_ATTR_SCTP: err = set_sctp(skb, nla_data(nested_attr)); break; + + case OVS_KEY_ATTR_MPLS: + err = set_mpls(skb, nla_data(nested_attr)); + break; } return err; @@ -690,6 +786,14 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, execute_hash(skb, key, a); break; + case OVS_ACTION_ATTR_PUSH_MPLS: + err = push_mpls(skb, nla_data(a)); + break; + + case OVS_ACTION_ATTR_POP_MPLS: + err = pop_mpls(skb, nla_get_be16(a)); + break; + case OVS_ACTION_ATTR_PUSH_VLAN: err = push_vlan(skb, nla_data(a)); if (unlikely(err)) /* skb already freed. */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f18302f32049..688cb9bc0ef1 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -560,7 +560,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_flow_free; err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], - &flow->key, 0, &acts); + &flow->key, &acts); if (err) goto err_flow_free; @@ -846,7 +846,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_kfree_flow; error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, - 0, &acts); + &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); goto err_kfree_acts; @@ -953,7 +953,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, return acts; ovs_flow_mask_key(&masked_key, key, mask); - error = ovs_nla_copy_actions(a, &masked_key, 0, &acts); + error = ovs_nla_copy_actions(a, &masked_key, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); kfree(acts); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 2b78789ea7c5..90a21010fc8f 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include "datapath.h" @@ -480,6 +482,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) return -ENOMEM; skb_reset_network_header(skb); + skb_reset_mac_len(skb); __skb_push(skb, skb->data - skb_mac_header(skb)); /* Network layer. */ @@ -584,6 +587,33 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->ip, 0, sizeof(key->ip)); memset(&key->ipv4, 0, sizeof(key->ipv4)); } + } else if (eth_p_mpls(key->eth.type)) { + size_t stack_len = MPLS_HLEN; + + /* In the presence of an MPLS label stack the end of the L2 + * header and the beginning of the L3 header differ. + * + * Advance network_header to the beginning of the L3 + * header. mac_len corresponds to the end of the L2 header. + */ + while (1) { + __be32 lse; + + error = check_header(skb, skb->mac_len + stack_len); + if (unlikely(error)) + return 0; + + memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + + if (stack_len == MPLS_HLEN) + memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); + + skb_set_network_header(skb, skb->mac_len + stack_len); + if (lse & htonl(MPLS_LS_S_MASK)) + break; + + stack_len += MPLS_HLEN; + } } else if (key->eth.type == htons(ETH_P_IPV6)) { int nh_len; /* IPv6 Header + Extensions */ diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 71813318c8c7..4962bee81a11 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -102,12 +102,17 @@ struct sw_flow_key { __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ __be16 type; /* Ethernet frame type. */ } eth; - struct { - u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ - u8 tos; /* IP ToS. */ - u8 ttl; /* IP TTL/hop limit. */ - u8 frag; /* One of OVS_FRAG_TYPE_*. */ - } ip; + union { + struct { + __be32 top_lse; /* top label stack entry */ + } mpls; + struct { + u8 proto; /* IP protocol or lower 8 bits of ARP opcode. */ + u8 tos; /* IP ToS. */ + u8 ttl; /* IP TTL/hop limit. */ + u8 frag; /* One of OVS_FRAG_TYPE_*. */ + } ip; + }; struct { __be16 src; /* TCP/UDP/SCTP source port. */ __be16 dst; /* TCP/UDP/SCTP destination port. */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 939bcb32100f..569309c49cc0 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "flow_netlink.h" @@ -134,7 +135,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1 << OVS_KEY_ATTR_ICMP) | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) - | (1 << OVS_KEY_ATTR_ND)); + | (1 << OVS_KEY_ATTR_ND) + | (1 << OVS_KEY_ATTR_MPLS)); /* Always allowed mask fields. */ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) @@ -149,6 +151,12 @@ static bool match_validate(const struct sw_flow_match *match, mask_allowed |= 1 << OVS_KEY_ATTR_ARP; } + if (eth_p_mpls(match->key->eth.type)) { + key_expected |= 1 << OVS_KEY_ATTR_MPLS; + if (match->mask && (match->mask->key.eth.type == htons(0xffff))) + mask_allowed |= 1 << OVS_KEY_ATTR_MPLS; + } + if (match->key->eth.type == htons(ETH_P_IP)) { key_expected |= 1 << OVS_KEY_ATTR_IPV4; if (match->mask && (match->mask->key.eth.type == htons(0xffff))) @@ -266,6 +274,7 @@ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32), [OVS_KEY_ATTR_DP_HASH] = sizeof(u32), [OVS_KEY_ATTR_TUNNEL] = -1, + [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls), }; static bool is_all_zero(const u8 *fp, size_t size) @@ -735,6 +744,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, attrs &= ~(1 << OVS_KEY_ATTR_ARP); } + if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { + const struct ovs_key_mpls *mpls_key; + + mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]); + SW_FLOW_KEY_PUT(match, mpls.top_lse, + mpls_key->mpls_lse, is_mask); + + attrs &= ~(1 << OVS_KEY_ATTR_MPLS); + } + if (attrs & (1 << OVS_KEY_ATTR_TCP)) { const struct ovs_key_tcp *tcp_key; @@ -1140,6 +1159,14 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey, arp_key->arp_op = htons(output->ip.proto); ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); + } else if (eth_p_mpls(swkey->eth.type)) { + struct ovs_key_mpls *mpls_key; + + nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); + if (!nla) + goto nla_put_failure; + mpls_key = nla_data(nla); + mpls_key->mpls_lse = output->mpls.top_lse; } if ((swkey->eth.type == htons(ETH_P_IP) || @@ -1336,9 +1363,15 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci); + static int validate_and_copy_sample(const struct nlattr *attr, const struct sw_flow_key *key, int depth, - struct sw_flow_actions **sfa) + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1]; const struct nlattr *probability, *actions; @@ -1375,7 +1408,8 @@ static int validate_and_copy_sample(const struct nlattr *attr, if (st_acts < 0) return st_acts; - err = ovs_nla_copy_actions(actions, key, depth + 1, sfa); + err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa, + eth_type, vlan_tci); if (err) return err; @@ -1385,10 +1419,10 @@ static int validate_and_copy_sample(const struct nlattr *attr, return 0; } -static int validate_tp_port(const struct sw_flow_key *flow_key) +static int validate_tp_port(const struct sw_flow_key *flow_key, + __be16 eth_type) { - if ((flow_key->eth.type == htons(ETH_P_IP) || - flow_key->eth.type == htons(ETH_P_IPV6)) && + if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) && (flow_key->tp.src || flow_key->tp.dst)) return 0; @@ -1483,7 +1517,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, static int validate_set(const struct nlattr *a, const struct sw_flow_key *flow_key, struct sw_flow_actions **sfa, - bool *set_tun) + bool *set_tun, __be16 eth_type) { const struct nlattr *ovs_key = nla_data(a); int key_type = nla_type(ovs_key); @@ -1508,6 +1542,9 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_TUNNEL: + if (eth_p_mpls(eth_type)) + return -EINVAL; + *set_tun = true; err = validate_and_copy_set_tun(a, sfa); if (err) @@ -1515,7 +1552,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV4: - if (flow_key->eth.type != htons(ETH_P_IP)) + if (eth_type != htons(ETH_P_IP)) return -EINVAL; if (!flow_key->ip.proto) @@ -1531,7 +1568,7 @@ static int validate_set(const struct nlattr *a, break; case OVS_KEY_ATTR_IPV6: - if (flow_key->eth.type != htons(ETH_P_IPV6)) + if (eth_type != htons(ETH_P_IPV6)) return -EINVAL; if (!flow_key->ip.proto) @@ -1553,19 +1590,24 @@ static int validate_set(const struct nlattr *a, if (flow_key->ip.proto != IPPROTO_TCP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); case OVS_KEY_ATTR_UDP: if (flow_key->ip.proto != IPPROTO_UDP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); + + case OVS_KEY_ATTR_MPLS: + if (!eth_p_mpls(eth_type)) + return -EINVAL; + break; case OVS_KEY_ATTR_SCTP: if (flow_key->ip.proto != IPPROTO_SCTP) return -EINVAL; - return validate_tp_port(flow_key); + return validate_tp_port(flow_key, eth_type); default: return -EINVAL; @@ -1609,12 +1651,13 @@ static int copy_action(const struct nlattr *from, return 0; } -int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, - int depth, - struct sw_flow_actions **sfa) +static int ovs_nla_copy_actions__(const struct nlattr *attr, + const struct sw_flow_key *key, + int depth, struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci) { const struct nlattr *a; + bool out_tnl_port = false; int rem, err; if (depth >= SAMPLE_ACTION_DEPTH) @@ -1626,6 +1669,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), [OVS_ACTION_ATTR_RECIRC] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, + [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls), + [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16), [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, [OVS_ACTION_ATTR_SET] = (u32)-1, @@ -1655,6 +1700,8 @@ int ovs_nla_copy_actions(const struct nlattr *attr, case OVS_ACTION_ATTR_OUTPUT: if (nla_get_u32(a) >= DP_MAX_PORTS) return -EINVAL; + out_tnl_port = false; + break; case OVS_ACTION_ATTR_HASH: { @@ -1671,6 +1718,7 @@ int ovs_nla_copy_actions(const struct nlattr *attr, } case OVS_ACTION_ATTR_POP_VLAN: + vlan_tci = htons(0); break; case OVS_ACTION_ATTR_PUSH_VLAN: @@ -1679,19 +1727,66 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; + vlan_tci = vlan->vlan_tci; break; case OVS_ACTION_ATTR_RECIRC: break; + case OVS_ACTION_ATTR_PUSH_MPLS: { + const struct ovs_action_push_mpls *mpls = nla_data(a); + + /* Networking stack do not allow simultaneous Tunnel + * and MPLS GSO. + */ + if (out_tnl_port) + return -EINVAL; + + if (!eth_p_mpls(mpls->mpls_ethertype)) + return -EINVAL; + /* Prohibit push MPLS other than to a white list + * for packets that have a known tag order. + */ + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + (eth_type != htons(ETH_P_IP) && + eth_type != htons(ETH_P_IPV6) && + eth_type != htons(ETH_P_ARP) && + eth_type != htons(ETH_P_RARP) && + !eth_p_mpls(eth_type))) + return -EINVAL; + eth_type = mpls->mpls_ethertype; + break; + } + + case OVS_ACTION_ATTR_POP_MPLS: + if (vlan_tci & htons(VLAN_TAG_PRESENT) || + !eth_p_mpls(eth_type)) + return -EINVAL; + + /* Disallow subsequent L2.5+ set and mpls_pop actions + * as there is no check here to ensure that the new + * eth_type is valid and thus set actions could + * write off the end of the packet or otherwise + * corrupt it. + * + * Support for these actions is planned using packet + * recirculation. + */ + eth_type = htons(0); + break; + case OVS_ACTION_ATTR_SET: - err = validate_set(a, key, sfa, &skip_copy); + err = validate_set(a, key, sfa, + &out_tnl_port, eth_type); if (err) return err; + + skip_copy = out_tnl_port; break; case OVS_ACTION_ATTR_SAMPLE: - err = validate_and_copy_sample(a, key, depth, sfa); + err = validate_and_copy_sample(a, key, depth, sfa, + eth_type, vlan_tci); if (err) return err; skip_copy = true; @@ -1713,6 +1808,14 @@ int ovs_nla_copy_actions(const struct nlattr *attr, return 0; } +int ovs_nla_copy_actions(const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa) +{ + return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type, + key->eth.tci); +} + static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) { const struct nlattr *a; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 206e45add888..6355b1d01329 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -49,7 +49,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, const struct nlattr *); int ovs_nla_copy_actions(const struct nlattr *attr, - const struct sw_flow_key *key, int depth, + const struct sw_flow_key *key, struct sw_flow_actions **sfa); int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); From 9b996e544a6bc7d201060fdcbdb5d4a9b734aa1b Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Tue, 6 May 2014 18:41:20 -0700 Subject: [PATCH 03/14] openvswitch: Move table destroy to dp-rcu callback. Ths simplifies flow-table-destroy API. No need to pass explicit parameter about context. Signed-off-by: Pravin B Shelar Acked-by: Thomas Graf --- net/openvswitch/datapath.c | 5 ++--- net/openvswitch/flow_table.c | 11 +++++++---- net/openvswitch/flow_table.h | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 688cb9bc0ef1..a532a9c46d20 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -187,6 +187,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) { struct datapath *dp = container_of(rcu, struct datapath, rcu); + ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); release_net(ovs_dp_get_net(dp)); kfree(dp->ports); @@ -1444,7 +1445,7 @@ err_destroy_ports_array: err_destroy_percpu: free_percpu(dp->stats_percpu); err_destroy_table: - ovs_flow_tbl_destroy(&dp->table, false); + ovs_flow_tbl_destroy(&dp->table); err_free_dp: release_net(ovs_dp_get_net(dp)); kfree(dp); @@ -1476,8 +1477,6 @@ static void __dp_destroy(struct datapath *dp) ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL)); /* RCU destroy the flow table */ - ovs_flow_tbl_destroy(&dp->table, true); - call_rcu(&dp->rcu, destroy_dp_rcu); } diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index cf2d853646f0..90f8b40a350b 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2007-2013 Nicira, Inc. + * Copyright (c) 2007-2014 Nicira, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -250,11 +250,14 @@ skip_flows: __table_instance_destroy(ti); } -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred) +/* No need for locking this function is called from RCU callback or + * error path. + */ +void ovs_flow_tbl_destroy(struct flow_table *table) { - struct table_instance *ti = ovsl_dereference(table->ti); + struct table_instance *ti = rcu_dereference_raw(table->ti); - table_instance_destroy(ti, deferred); + table_instance_destroy(ti, false); } struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h index 5918bff7f3f6..f682c8c07f44 100644 --- a/net/openvswitch/flow_table.h +++ b/net/openvswitch/flow_table.h @@ -62,7 +62,7 @@ void ovs_flow_free(struct sw_flow *, bool deferred); int ovs_flow_tbl_init(struct flow_table *); int ovs_flow_tbl_count(struct flow_table *table); -void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred); +void ovs_flow_tbl_destroy(struct flow_table *table); int ovs_flow_tbl_flush(struct flow_table *flow_table); int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow, From 1b760fb9a8a8c0babc15f886ee5740cb33744168 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Sun, 7 Sep 2014 22:11:08 -0700 Subject: [PATCH 04/14] openvswitch: Remove redundant tcp_flags code. These two cases used to be treated differently for IPv4/IPv6, but they are now identical. Signed-off-by: Joe Stringer Acked-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- net/openvswitch/flow_netlink.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 569309c49cc0..5a91d792505d 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -611,7 +611,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, const struct nlattr **a, bool is_mask) { int err; - u64 orig_attrs = attrs; err = metadata_from_nlattrs(match, &attrs, a, is_mask); if (err) @@ -764,15 +763,9 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, } if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) { - if (orig_attrs & (1 << OVS_KEY_ATTR_IPV4)) { - SW_FLOW_KEY_PUT(match, tp.flags, - nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), - is_mask); - } else { - SW_FLOW_KEY_PUT(match, tp.flags, - nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), - is_mask); - } + SW_FLOW_KEY_PUT(match, tp.flags, + nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]), + is_mask); attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS); } From 426cda5cc177301f9c196f3a9b6a1287051ba599 Mon Sep 17 00:00:00 2001 From: Jesse Gross Date: Mon, 6 Oct 2014 05:08:38 -0700 Subject: [PATCH 05/14] openvswitch: Additional logging for -EINVAL on flow setups. There are many possible ways that a flow can be invalid so we've added logging for most of them. This adds logs for the remaining possible cases so there isn't any ambiguity while debugging. CC: Federico Iezzi Signed-off-by: Jesse Gross Acked-by: Thomas Graf Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 12 +++++++++--- net/openvswitch/flow_netlink.c | 17 +++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index a532a9c46d20..04a26ae4a4f6 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -817,10 +817,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) /* Must have key and actions. */ error = -EINVAL; - if (!a[OVS_FLOW_ATTR_KEY]) + if (!a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR("Flow key attribute not present in new flow.\n"); goto error; - if (!a[OVS_FLOW_ATTR_ACTIONS]) + } + if (!a[OVS_FLOW_ATTR_ACTIONS]) { + OVS_NLERR("Flow actions attribute not present in new flow.\n"); goto error; + } /* Most of the time we need to allocate a new flow, do it before * locking. @@ -979,8 +983,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) /* Extract key. */ error = -EINVAL; - if (!a[OVS_FLOW_ATTR_KEY]) + if (!a[OVS_FLOW_ATTR_KEY]) { + OVS_NLERR("Flow key attribute not present in set flow.\n"); goto error; + } ovs_match_init(&match, &key, &mask); error = ovs_nla_get_match(&match, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 5a91d792505d..1b29ea706912 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -581,10 +581,13 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs, if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) { u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]); - if (is_mask) + if (is_mask) { in_port = 0xffffffff; /* Always exact match in_port. */ - else if (in_port >= DP_MAX_PORTS) + } else if (in_port >= DP_MAX_PORTS) { + OVS_NLERR("Port (%d) exceeds maximum allowable (%d).\n", + in_port, DP_MAX_PORTS); return -EINVAL; + } SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask); *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT); @@ -824,8 +827,11 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, attrs &= ~(1 << OVS_KEY_ATTR_ND); } - if (attrs != 0) + if (attrs != 0) { + OVS_NLERR("Unknown key attributes (%llx).\n", + (unsigned long long)attrs); return -EINVAL; + } return 0; } @@ -1250,8 +1256,10 @@ struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; - if (size > MAX_ACTIONS_BUFSIZE) + if (size > MAX_ACTIONS_BUFSIZE) { + OVS_NLERR("Flow action size (%u bytes) exceeds maximum", size); return ERR_PTR(-EINVAL); + } sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL); if (!sfa) @@ -1786,6 +1794,7 @@ static int ovs_nla_copy_actions__(const struct nlattr *attr, break; default: + OVS_NLERR("Unknown tunnel attribute (%d).\n", type); return -EINVAL; } if (!skip_copy) { From 738967b8bf57e582db1a23ce773c36fefd4b7d37 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 8 Sep 2014 00:35:02 -0700 Subject: [PATCH 06/14] openvswitch: refactor do_output() to move NULL check out of fast path skb_clone() NULL check is implemented in do_output(), as past of the common (fast) path. Refactoring so that NULL check is done in the slow path, immediately after skb_clone() is called. Besides optimization, this change also improves code readability by making the skb_clone() NULL check consistent within OVS datapath module. Signed-off-by: Andy Zhou Signed-off-by: Pravin B Shelar --- net/openvswitch/actions.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 930b1b6e4cef..9fd33c0bf173 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -551,21 +551,14 @@ static int set_sctp(struct sk_buff *skb, return 0; } -static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port) +static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port) { - struct vport *vport; + struct vport *vport = ovs_vport_rcu(dp, out_port); - if (unlikely(!skb)) - return -ENOMEM; - - vport = ovs_vport_rcu(dp, out_port); - if (unlikely(!vport)) { + if (likely(vport)) + ovs_vport_send(vport, skb); + else kfree_skb(skb); - return -ENODEV; - } - - ovs_vport_send(vport, skb); - return 0; } static int output_userspace(struct datapath *dp, struct sk_buff *skb, @@ -768,8 +761,12 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, a = nla_next(a, &rem)) { int err = 0; - if (prev_port != -1) { - do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port); + if (unlikely(prev_port != -1)) { + struct sk_buff *out_skb = skb_clone(skb, GFP_ATOMIC); + + if (out_skb) + do_output(dp, out_skb, prev_port); + prev_port = -1; } From ca7105f278b3f7bd2c6f2b336c928f679054de4d Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Mon, 8 Sep 2014 13:09:37 -0700 Subject: [PATCH 07/14] openvswitch: Refactor ovs_flow_cmd_fill_info(). Split up ovs_flow_cmd_fill_info() to make it easier to cache parts of a dump reply. This will be used to streamline flow_dump in a future patch. Signed-off-by: Joe Stringer Acked-by: Thomas Graf Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 93 +++++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 27 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 04a26ae4a4f6..bbb920bf48da 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -674,58 +674,67 @@ static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) } /* Called with ovs_mutex or RCU read lock. */ -static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, - struct sk_buff *skb, u32 portid, - u32 seq, u32 flags, u8 cmd) +static int ovs_flow_cmd_fill_match(const struct sw_flow *flow, + struct sk_buff *skb) { - const int skb_orig_len = skb->len; - struct nlattr *start; - struct ovs_flow_stats stats; - __be16 tcp_flags; - unsigned long used; - struct ovs_header *ovs_header; struct nlattr *nla; int err; - ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); - if (!ovs_header) - return -EMSGSIZE; - - ovs_header->dp_ifindex = dp_ifindex; - /* Fill flow key. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY); if (!nla) - goto nla_put_failure; + return -EMSGSIZE; err = ovs_nla_put_flow(&flow->unmasked_key, &flow->unmasked_key, skb); if (err) - goto error; + return err; + nla_nest_end(skb, nla); + /* Fill flow mask. */ nla = nla_nest_start(skb, OVS_FLOW_ATTR_MASK); if (!nla) - goto nla_put_failure; + return -EMSGSIZE; err = ovs_nla_put_flow(&flow->key, &flow->mask->key, skb); if (err) - goto error; + return err; nla_nest_end(skb, nla); + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ +static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow, + struct sk_buff *skb) +{ + struct ovs_flow_stats stats; + __be16 tcp_flags; + unsigned long used; ovs_flow_stats_get(flow, &stats, &used, &tcp_flags); if (used && nla_put_u64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used))) - goto nla_put_failure; + return -EMSGSIZE; if (stats.n_packets && nla_put(skb, OVS_FLOW_ATTR_STATS, sizeof(struct ovs_flow_stats), &stats)) - goto nla_put_failure; + return -EMSGSIZE; if ((u8)ntohs(tcp_flags) && nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags))) - goto nla_put_failure; + return -EMSGSIZE; + + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ +static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow, + struct sk_buff *skb, int skb_orig_len) +{ + struct nlattr *start; + int err; /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if * this is the first flow to be dumped into 'skb'. This is unusual for @@ -749,17 +758,47 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, nla_nest_end(skb, start); else { if (skb_orig_len) - goto error; + return err; nla_nest_cancel(skb, start); } - } else if (skb_orig_len) - goto nla_put_failure; + } else if (skb_orig_len) { + return -EMSGSIZE; + } + + return 0; +} + +/* Called with ovs_mutex or RCU read lock. */ +static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex, + struct sk_buff *skb, u32 portid, + u32 seq, u32 flags, u8 cmd) +{ + const int skb_orig_len = skb->len; + struct ovs_header *ovs_header; + int err; + + ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, + flags, cmd); + if (!ovs_header) + return -EMSGSIZE; + + ovs_header->dp_ifindex = dp_ifindex; + + err = ovs_flow_cmd_fill_match(flow, skb); + if (err) + goto error; + + err = ovs_flow_cmd_fill_stats(flow, skb); + if (err) + goto error; + + err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len); + if (err) + goto error; return genlmsg_end(skb, ovs_header); -nla_put_failure: - err = -EMSGSIZE; error: genlmsg_cancel(skb, ovs_header); return err; From cc3a5ae6f23336ddc1d136a66c0a6a60276e99a3 Mon Sep 17 00:00:00 2001 From: Andy Zhou Date: Mon, 8 Sep 2014 13:14:22 -0700 Subject: [PATCH 08/14] openvswitch: Refactor get_dp() function into multiple access APIs. Avoid recursive read_rcu_lock() by using the lighter weight get_dp_rcu() API. Add proper locking assertions to get_dp(). Signed-off-by: Andy Zhou Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index bbb920bf48da..cdbc44c18714 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -140,19 +140,30 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *, static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *); -/* Must be called with rcu_read_lock or ovs_mutex. */ -static struct datapath *get_dp(struct net *net, int dp_ifindex) +/* Must be called with rcu_read_lock. */ +static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) { - struct datapath *dp = NULL; - struct net_device *dev; + struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); - rcu_read_lock(); - dev = dev_get_by_index_rcu(net, dp_ifindex); if (dev) { struct vport *vport = ovs_internal_dev_get_vport(dev); if (vport) - dp = vport->dp; + return vport->dp; } + + return NULL; +} + +/* The caller must hold either ovs_mutex or rcu_read_lock to keep the + * returned dp pointer valid. + */ +static inline struct datapath *get_dp(struct net *net, int dp_ifindex) +{ + struct datapath *dp; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); + rcu_read_lock(); + dp = get_dp_rcu(net, dp_ifindex); rcu_read_unlock(); return dp; @@ -573,7 +584,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) packet->mark = flow->key.phy.skb_mark; rcu_read_lock(); - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); err = -ENODEV; if (!dp) goto err_unlock; @@ -1227,7 +1238,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) struct datapath *dp; rcu_read_lock(); - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { rcu_read_unlock(); return -ENODEV; @@ -1989,7 +2000,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) int i, j = 0; rcu_read_lock(); - dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex); if (!dp) { rcu_read_unlock(); return -ENODEV; From e1f9c356d2946ec717d035ccac5532b1c5709c52 Mon Sep 17 00:00:00 2001 From: Chunhe Li Date: Mon, 8 Sep 2014 13:17:21 -0700 Subject: [PATCH 09/14] openvswitch: Drop packets when interdev is not up If the internal device is not up, it should drop received packets. Sometimes it receive the broadcast or multicast packets, and the ip protocol stack will casue more cpu usage wasted. Signed-off-by: Chunhe Li Signed-off-by: Pravin B Shelar --- net/openvswitch/vport-internal_dev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 10dc07e1678b..6a55f7105505 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -224,6 +224,11 @@ static int internal_dev_recv(struct vport *vport, struct sk_buff *skb) struct net_device *netdev = netdev_vport_priv(vport)->dev; int len; + if (unlikely(!(netdev->flags & IFF_UP))) { + kfree_skb(skb); + return 0; + } + len = skb->len; skb_dst_drop(skb); From 1a4e96a0e989200f7180264e27b22e9a85f3fcc8 Mon Sep 17 00:00:00 2001 From: Jarno Rajahalme Date: Tue, 30 Sep 2014 10:52:32 -0700 Subject: [PATCH 10/14] openvswitch: Fix the type of struct ovs_key_nd nd_target field. Should be the same as other IPv6 address fields. Current master produces sparse warnings without this change. Signed-off-by: Jarno Rajahalme Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 631056b66f80..26c36c4cf7e2 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -400,9 +400,9 @@ struct ovs_key_arp { }; struct ovs_key_nd { - __u32 nd_target[4]; - __u8 nd_sll[ETH_ALEN]; - __u8 nd_tll[ETH_ALEN]; + __be32 nd_target[4]; + __u8 nd_sll[ETH_ALEN]; + __u8 nd_tll[ETH_ALEN]; }; /** From d98612b8c1150cb73ecd45e94c62de053f89441c Mon Sep 17 00:00:00 2001 From: Lorand Jakab Date: Mon, 6 Oct 2014 05:45:32 -0700 Subject: [PATCH 11/14] openvswitch: Remove flow member from struct ovs_skb_cb The 'flow' memeber was chosen for removal because it's only used in ovs_execute_actions() we can pass it as argument to this function. Signed-off-by: Lorand Jakab Signed-off-by: Pravin B Shelar --- net/openvswitch/actions.c | 5 +---- net/openvswitch/datapath.c | 12 +++++++----- net/openvswitch/datapath.h | 4 +--- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 9fd33c0bf173..f7e589159e4a 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -865,14 +865,11 @@ static void process_deferred_actions(struct datapath *dp) /* Execute a list of actions against 'skb'. */ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *key) + struct sw_flow_actions *acts, struct sw_flow_key *key) { int level = this_cpu_read(exec_actions_level); - struct sw_flow_actions *acts; int err; - acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts); - this_cpu_inc(exec_actions_level); OVS_CB(skb)->egress_tun_info = NULL; err = do_execute_actions(dp, skb, key, diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index cdbc44c18714..4fd8a45e5d56 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -257,6 +257,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; + struct sw_flow_actions *sf_acts; struct dp_stats_percpu *stats; u64 *stats_counter; u32 n_mask_hit; @@ -282,10 +283,10 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) goto out; } - OVS_CB(skb)->flow = flow; + ovs_flow_stats_update(flow, key->tp.flags, skb); + sf_acts = rcu_dereference(flow->sf_acts); + ovs_execute_actions(dp, skb, sf_acts, key); - ovs_flow_stats_update(OVS_CB(skb)->flow, key->tp.flags, skb); - ovs_execute_actions(dp, skb, key); stats_counter = &stats->n_hit; out: @@ -524,6 +525,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) struct sw_flow_actions *acts; struct sk_buff *packet; struct sw_flow *flow; + struct sw_flow_actions *sf_acts; struct datapath *dp; struct ethhdr *eth; struct vport *input_vport; @@ -579,7 +581,6 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) rcu_assign_pointer(flow->sf_acts, acts); OVS_CB(packet)->egress_tun_info = NULL; - OVS_CB(packet)->flow = flow; packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; @@ -597,9 +598,10 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) goto err_unlock; OVS_CB(packet)->input_vport = input_vport; + sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); - err = ovs_execute_actions(dp, packet, &flow->key); + err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); local_bh_enable(); rcu_read_unlock(); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 974135439c5c..1c56a80d6677 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -94,14 +94,12 @@ struct datapath { /** * struct ovs_skb_cb - OVS data in skb CB - * @flow: The flow associated with this packet. May be %NULL if no flow. * @egress_tun_key: Tunnel information about this packet on egress path. * NULL if the packet is not being tunneled. * @input_vport: The original vport packet came in on. This value is cached * when a packet is received by OVS. */ struct ovs_skb_cb { - struct sw_flow *flow; struct ovs_tunnel_info *egress_tun_info; struct vport *input_vport; }; @@ -194,7 +192,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, - struct sw_flow_key *); + struct sw_flow_actions *acts, struct sw_flow_key *); void ovs_dp_notify_wq(struct work_struct *work); From 41af73e9c17d5fb549fced2be97faeb4b8606fb8 Mon Sep 17 00:00:00 2001 From: Joe Stringer Date: Sat, 18 Oct 2014 16:14:14 -0700 Subject: [PATCH 12/14] openvswitch: Move key_attr_size() to flow_netlink.h. flow-netlink has netlink related code. Signed-off-by: Joe Stringer Signed-off-by: Pravin B Shelar --- net/openvswitch/datapath.c | 31 +++---------------------------- net/openvswitch/flow_netlink.c | 32 ++++++++++++++++++++++++++++++++ net/openvswitch/flow_netlink.h | 2 ++ 3 files changed, 37 insertions(+), 28 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 4fd8a45e5d56..51017805b40b 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -375,37 +375,12 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, return err; } -static size_t key_attr_size(void) -{ - return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ - + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ - + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ - + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ - + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ - + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ - + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ - + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ - + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ - + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ - + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ - + nla_total_size(4) /* OVS_KEY_ATTR_8021Q */ - + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ - + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ - + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ - + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ - + nla_total_size(28); /* OVS_KEY_ATTR_ND */ -} - static size_t upcall_msg_size(const struct nlattr *userdata, unsigned int hdrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ - + nla_total_size(key_attr_size()); /* OVS_PACKET_ATTR_KEY */ + + nla_total_size(ovs_key_attr_size()); /* OVS_PACKET_ATTR_KEY */ /* OVS_PACKET_ATTR_USERDATA */ if (userdata) @@ -678,8 +653,8 @@ static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats, static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts) { return NLMSG_ALIGN(sizeof(struct ovs_header)) - + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_KEY */ - + nla_total_size(key_attr_size()) /* OVS_FLOW_ATTR_MASK */ + + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */ + + nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */ + nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */ + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */ + nla_total_size(8) /* OVS_FLOW_ATTR_USED */ diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 1b29ea706912..1050b2882b9e 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -252,6 +252,38 @@ static bool match_validate(const struct sw_flow_match *match, return true; } +size_t ovs_key_attr_size(void) +{ + /* Whenever adding new OVS_KEY_ FIELDS, we should consider + * updating this function. + */ + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 22); + + return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ + + nla_total_size(8) /* OVS_TUNNEL_KEY_ATTR_ID */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_SRC */ + + nla_total_size(4) /* OVS_TUNNEL_KEY_ATTR_IPV4_DST */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TOS */ + + nla_total_size(1) /* OVS_TUNNEL_KEY_ATTR_TTL */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_CSUM */ + + nla_total_size(0) /* OVS_TUNNEL_KEY_ATTR_OAM */ + + nla_total_size(256) /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */ + + nla_total_size(4) /* OVS_KEY_ATTR_IN_PORT */ + + nla_total_size(4) /* OVS_KEY_ATTR_SKB_MARK */ + + nla_total_size(4) /* OVS_KEY_ATTR_DP_HASH */ + + nla_total_size(4) /* OVS_KEY_ATTR_RECIRC_ID */ + + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ + + nla_total_size(0) /* OVS_KEY_ATTR_ENCAP */ + + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + + nla_total_size(40) /* OVS_KEY_ATTR_IPV6 */ + + nla_total_size(2) /* OVS_KEY_ATTR_ICMPV6 */ + + nla_total_size(28); /* OVS_KEY_ATTR_ND */ +} + /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { [OVS_KEY_ATTR_ENCAP] = -1, diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 6355b1d01329..4f0370646bc6 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -37,6 +37,8 @@ #include "flow.h" +size_t ovs_key_attr_size(void); + void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, struct sw_flow_mask *mask); From 2fdb957d634a906ae8939bff23d45968307acbf7 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 19 Oct 2014 11:19:51 -0700 Subject: [PATCH 13/14] openvswitch: Refactor action alloc and copy api. There are two separate API to allocate and copy actions list. Anytime OVS needs to copy action list, it needs to call both functions. Following patch moves action allocation to copy function to avoid code duplication. Signed-off-by: Pravin B Shelar Acked-by: Jarno Rajahalme --- net/openvswitch/datapath.c | 25 ++++--------------------- net/openvswitch/flow_netlink.c | 24 +++++++++++++++++------- net/openvswitch/flow_netlink.h | 1 - 3 files changed, 21 insertions(+), 29 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 51017805b40b..014485ec4b0d 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -543,18 +543,12 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) if (err) goto err_flow_free; - acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_PACKET_ATTR_ACTIONS])); - err = PTR_ERR(acts); - if (IS_ERR(acts)) - goto err_flow_free; - err = ovs_nla_copy_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, &acts); if (err) goto err_flow_free; rcu_assign_pointer(flow->sf_acts, acts); - OVS_CB(packet)->egress_tun_info = NULL; packet->priority = flow->key.phy.priority; packet->mark = flow->key.phy.skb_mark; @@ -872,16 +866,11 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask); /* Validate actions. */ - acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS])); - error = PTR_ERR(acts); - if (IS_ERR(acts)) - goto err_kfree_flow; - error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts); if (error) { OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - goto err_kfree_acts; + goto err_kfree_flow; } reply = ovs_flow_cmd_alloc_info(acts, info, false); @@ -972,6 +961,7 @@ error: return error; } +/* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, const struct sw_flow_key *key, const struct sw_flow_mask *mask) @@ -980,15 +970,10 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a, struct sw_flow_key masked_key; int error; - acts = ovs_nla_alloc_flow_actions(nla_len(a)); - if (IS_ERR(acts)) - return acts; - ovs_flow_mask_key(&masked_key, key, mask); error = ovs_nla_copy_actions(a, &masked_key, &acts); if (error) { - OVS_NLERR("Flow actions may not be safe on all matching packets.\n"); - kfree(acts); + OVS_NLERR("Actions may not be safe on all matching packets.\n"); return ERR_PTR(error); } @@ -1028,10 +1013,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) error = PTR_ERR(acts); goto error; } - } - /* Can allocate before locking if have acts. */ - if (acts) { + /* Can allocate before locking if have acts. */ reply = ovs_flow_cmd_alloc_info(acts, info, false); if (IS_ERR(reply)) { error = PTR_ERR(reply); diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 1050b2882b9e..482a0cbb22e8 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -1284,7 +1284,7 @@ nla_put_failure: #define MAX_ACTIONS_BUFSIZE (32 * 1024) -struct sw_flow_actions *ovs_nla_alloc_flow_actions(int size) +static struct sw_flow_actions *nla_alloc_flow_actions(int size) { struct sw_flow_actions *sfa; @@ -1329,7 +1329,7 @@ static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa, new_acts_size = MAX_ACTIONS_BUFSIZE; } - acts = ovs_nla_alloc_flow_actions(new_acts_size); + acts = nla_alloc_flow_actions(new_acts_size); if (IS_ERR(acts)) return (void *)acts; @@ -1396,7 +1396,7 @@ static inline void add_nested_action_end(struct sw_flow_actions *sfa, a->nla_len = sfa->actions_len - st_offset; } -static int ovs_nla_copy_actions__(const struct nlattr *attr, +static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci); @@ -1441,7 +1441,7 @@ static int validate_and_copy_sample(const struct nlattr *attr, if (st_acts < 0) return st_acts; - err = ovs_nla_copy_actions__(actions, key, depth + 1, sfa, + err = __ovs_nla_copy_actions(actions, key, depth + 1, sfa, eth_type, vlan_tci); if (err) return err; @@ -1684,7 +1684,7 @@ static int copy_action(const struct nlattr *from, return 0; } -static int ovs_nla_copy_actions__(const struct nlattr *attr, +static int __ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, int depth, struct sw_flow_actions **sfa, __be16 eth_type, __be16 vlan_tci) @@ -1846,8 +1846,18 @@ int ovs_nla_copy_actions(const struct nlattr *attr, const struct sw_flow_key *key, struct sw_flow_actions **sfa) { - return ovs_nla_copy_actions__(attr, key, 0, sfa, key->eth.type, - key->eth.tci); + int err; + + *sfa = nla_alloc_flow_actions(nla_len(attr)); + if (IS_ERR(*sfa)) + return PTR_ERR(*sfa); + + err = __ovs_nla_copy_actions(attr, key, 0, sfa, key->eth.type, + key->eth.tci); + if (err) + kfree(*sfa); + + return err; } static int sample_action_to_attr(const struct nlattr *attr, struct sk_buff *skb) diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 4f0370646bc6..eb0b177300ad 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -56,7 +56,6 @@ int ovs_nla_copy_actions(const struct nlattr *attr, int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb); -struct sw_flow_actions *ovs_nla_alloc_flow_actions(int actions_len); void ovs_nla_free_flow_actions(struct sw_flow_actions *); #endif /* flow_netlink.h */ From a85311bf1f9f8185682990cafdd4e0572c0ed373 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Sun, 19 Oct 2014 12:03:40 -0700 Subject: [PATCH 14/14] openvswitch: Avoid NULL mask check while building mask OVS does mask validation even if it does not need to convert netlink mask attributes to mask structure. ovs_nla_get_match() caller can pass NULL mask structure pointer if the caller does not need mask. Therefore NULL check is required in SW_FLOW_KEY* macros. Following patch does not convert mask netlink attributes if mask pointer is NULL, so we do not need these checks in SW_FLOW_KEY* macro. Signed-off-by: Pravin B Shelar Acked-by: Daniele Di Proietto Acked-by: Andy Zhou --- net/openvswitch/flow_netlink.c | 107 ++++++++++++++++----------------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 482a0cbb22e8..ed3109761827 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -50,21 +50,18 @@ #include "flow_netlink.h" -static void update_range__(struct sw_flow_match *match, - size_t offset, size_t size, bool is_mask) +static void update_range(struct sw_flow_match *match, + size_t offset, size_t size, bool is_mask) { - struct sw_flow_key_range *range = NULL; + struct sw_flow_key_range *range; size_t start = rounddown(offset, sizeof(long)); size_t end = roundup(offset + size, sizeof(long)); if (!is_mask) range = &match->range; - else if (match->mask) + else range = &match->mask->range; - if (!range) - return; - if (range->start == range->end) { range->start = start; range->end = end; @@ -80,22 +77,20 @@ static void update_range__(struct sw_flow_match *match, #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \ do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - sizeof((match)->key->field), is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - (match)->mask->key.field = value; \ - } else { \ + update_range(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) \ + (match)->mask->key.field = value; \ + else \ (match)->key->field = value; \ - } \ } while (0) #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask) \ do { \ - update_range__(match, offset, len, is_mask); \ + update_range(match, offset, len, is_mask); \ if (is_mask) \ memcpy((u8 *)&(match)->mask->key + offset, value_p, \ - len); \ + len); \ else \ memcpy((u8 *)(match)->key + offset, value_p, len); \ } while (0) @@ -104,18 +99,16 @@ static void update_range__(struct sw_flow_match *match, SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \ value_p, len, is_mask) -#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ - do { \ - update_range__(match, offsetof(struct sw_flow_key, field), \ - sizeof((match)->key->field), is_mask); \ - if (is_mask) { \ - if ((match)->mask) \ - memset((u8 *)&(match)->mask->key.field, value,\ - sizeof((match)->mask->key.field)); \ - } else { \ +#define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask) \ + do { \ + update_range(match, offsetof(struct sw_flow_key, field), \ + sizeof((match)->key->field), is_mask); \ + if (is_mask) \ + memset((u8 *)&(match)->mask->key.field, value, \ + sizeof((match)->mask->key.field)); \ + else \ memset((u8 *)&(match)->key->field, value, \ sizeof((match)->key->field)); \ - } \ } while (0) static bool match_validate(const struct sw_flow_match *match, @@ -677,8 +670,7 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs, SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); attrs &= ~(1 << OVS_KEY_ATTR_VLAN); - } else if (!is_mask) - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); + } if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { __be16 eth_type; @@ -903,8 +895,8 @@ static void mask_set_nlattr(struct nlattr *attr, u8 val) * attribute specifies the mask field of the wildcarded flow. */ int ovs_nla_get_match(struct sw_flow_match *match, - const struct nlattr *key, - const struct nlattr *mask) + const struct nlattr *nla_key, + const struct nlattr *nla_mask) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; @@ -914,7 +906,7 @@ int ovs_nla_get_match(struct sw_flow_match *match, bool encap_valid = false; int err; - err = parse_flow_nlattrs(key, a, &key_attrs); + err = parse_flow_nlattrs(nla_key, a, &key_attrs); if (err) return err; @@ -955,36 +947,43 @@ int ovs_nla_get_match(struct sw_flow_match *match, if (err) return err; - if (match->mask && !mask) { - /* Create an exact match mask. We need to set to 0xff all the - * 'match->mask' fields that have been touched in 'match->key'. - * We cannot simply memset 'match->mask', because padding bytes - * and fields not specified in 'match->key' should be left to 0. - * Instead, we use a stream of netlink attributes, copied from - * 'key' and set to 0xff: ovs_key_from_nlattrs() will take care - * of filling 'match->mask' appropriately. - */ - newmask = kmemdup(key, nla_total_size(nla_len(key)), - GFP_KERNEL); - if (!newmask) - return -ENOMEM; + if (match->mask) { + if (!nla_mask) { + /* Create an exact match mask. We need to set to 0xff + * all the 'match->mask' fields that have been touched + * in 'match->key'. We cannot simply memset + * 'match->mask', because padding bytes and fields not + * specified in 'match->key' should be left to 0. + * Instead, we use a stream of netlink attributes, + * copied from 'key' and set to 0xff. + * ovs_key_from_nlattrs() will take care of filling + * 'match->mask' appropriately. + */ + newmask = kmemdup(nla_key, + nla_total_size(nla_len(nla_key)), + GFP_KERNEL); + if (!newmask) + return -ENOMEM; - mask_set_nlattr(newmask, 0xff); + mask_set_nlattr(newmask, 0xff); - /* The userspace does not send tunnel attributes that are 0, - * but we should not wildcard them nonetheless. - */ - if (match->key->tun_key.ipv4_dst) - SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, 0xff, true); + /* The userspace does not send tunnel attributes that + * are 0, but we should not wildcard them nonetheless. + */ + if (match->key->tun_key.ipv4_dst) + SW_FLOW_KEY_MEMSET_FIELD(match, tun_key, + 0xff, true); - mask = newmask; - } + nla_mask = newmask; + } - if (mask) { - err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); + err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs); if (err) goto free_newmask; + /* Always match on tci. */ + SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); + if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { __be16 eth_type = 0; __be16 tci = 0;