From ece0cc17939287730f7b7337b51338b9cf0331cb Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 17 Nov 2016 10:51:30 +0000 Subject: [PATCH 1/5] sfc: Update MCDI protocol definitions Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mcdi_pcol.h | 483 +++++++++++++++++++++++++-- 1 file changed, 462 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index ccceafc15896..35cc3d4fa5f6 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -276,6 +276,9 @@ /* The clock whose frequency you've attempted to set set * doesn't exist on this NIC */ #define MC_CMD_ERR_NO_CLOCK 0x1015 +/* Returned by MC_CMD_TESTASSERT if the action that should + * have caused an assertion failed to do so. */ +#define MC_CMD_ERR_UNREACHABLE 0x1016 #define MC_CMD_ERR_CODE_OFST 0 @@ -933,6 +936,8 @@ #define MC_CMD_COPYCODE_IN_BOOT_MAGIC_SKIP_BOOT_ICORE_SYNC_WIDTH 1 #define MC_CMD_COPYCODE_IN_BOOT_MAGIC_FORCE_STANDALONE_LBN 5 #define MC_CMD_COPYCODE_IN_BOOT_MAGIC_FORCE_STANDALONE_WIDTH 1 +#define MC_CMD_COPYCODE_IN_BOOT_MAGIC_DISABLE_XIP_LBN 6 +#define MC_CMD_COPYCODE_IN_BOOT_MAGIC_DISABLE_XIP_WIDTH 1 /* Destination address */ #define MC_CMD_COPYCODE_IN_DEST_ADDR_OFST 4 #define MC_CMD_COPYCODE_IN_NUMWORDS_OFST 8 @@ -1659,6 +1664,8 @@ #define MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST 8 #define MC_CMD_PTP_OUT_GET_ATTRIBUTES_REPORT_SYNC_STATUS_LBN 0 #define MC_CMD_PTP_OUT_GET_ATTRIBUTES_REPORT_SYNC_STATUS_WIDTH 1 +#define MC_CMD_PTP_OUT_GET_ATTRIBUTES_RX_TSTAMP_OOB_LBN 1 +#define MC_CMD_PTP_OUT_GET_ATTRIBUTES_RX_TSTAMP_OOB_WIDTH 1 #define MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED0_OFST 12 #define MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED1_OFST 16 #define MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED2_OFST 20 @@ -2211,6 +2218,10 @@ #define MC_CMD_FW_HIGH_TX_RATE 0x3 /* enum: Reserved value */ #define MC_CMD_FW_PACKED_STREAM_HASH_MODE_1 0x4 +/* enum: Prefer to use firmware with additional "rules engine" filtering + * support + */ +#define MC_CMD_FW_RULES_ENGINE 0x5 /* enum: Only this option is allowed for non-admin functions */ #define MC_CMD_FW_DONT_CARE 0xffffffff @@ -3654,12 +3665,27 @@ #define MC_CMD_0x38_PRIVILEGE_CTG SRIOV_CTG_ADMIN -/* MC_CMD_NVRAM_UPDATE_START_IN msgrequest */ +/* MC_CMD_NVRAM_UPDATE_START_IN msgrequest: Legacy NVRAM_UPDATE_START request. + * Use NVRAM_UPDATE_START_V2_IN in new code + */ #define MC_CMD_NVRAM_UPDATE_START_IN_LEN 4 #define MC_CMD_NVRAM_UPDATE_START_IN_TYPE_OFST 0 /* Enum values, see field(s): */ /* MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */ +/* MC_CMD_NVRAM_UPDATE_START_V2_IN msgrequest: Extended NVRAM_UPDATE_START + * request with additional flags indicating version of command in use. See + * MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT for details of extended functionality. Use + * paired up with NVRAM_UPDATE_FINISH_V2_IN. + */ +#define MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN 8 +#define MC_CMD_NVRAM_UPDATE_START_V2_IN_TYPE_OFST 0 +/* Enum values, see field(s): */ +/* MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */ +#define MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAGS_OFST 4 +#define MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT_LBN 0 +#define MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT_WIDTH 1 + /* MC_CMD_NVRAM_UPDATE_START_OUT msgresponse */ #define MC_CMD_NVRAM_UPDATE_START_OUT_LEN 0 @@ -3784,16 +3810,81 @@ #define MC_CMD_0x3c_PRIVILEGE_CTG SRIOV_CTG_ADMIN -/* MC_CMD_NVRAM_UPDATE_FINISH_IN msgrequest */ +/* MC_CMD_NVRAM_UPDATE_FINISH_IN msgrequest: Legacy NVRAM_UPDATE_FINISH + * request. Use NVRAM_UPDATE_FINISH_V2_IN in new code + */ #define MC_CMD_NVRAM_UPDATE_FINISH_IN_LEN 8 #define MC_CMD_NVRAM_UPDATE_FINISH_IN_TYPE_OFST 0 /* Enum values, see field(s): */ /* MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */ #define MC_CMD_NVRAM_UPDATE_FINISH_IN_REBOOT_OFST 4 -/* MC_CMD_NVRAM_UPDATE_FINISH_OUT msgresponse */ +/* MC_CMD_NVRAM_UPDATE_FINISH_V2_IN msgrequest: Extended NVRAM_UPDATE_FINISH + * request with additional flags indicating version of NVRAM_UPDATE commands in + * use. See MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT for details of extended + * functionality. Use paired up with NVRAM_UPDATE_START_V2_IN. + */ +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN 12 +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_TYPE_OFST 0 +/* Enum values, see field(s): */ +/* MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */ +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_REBOOT_OFST 4 +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAGS_OFST 8 +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT_LBN 0 +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT_WIDTH 1 + +/* MC_CMD_NVRAM_UPDATE_FINISH_OUT msgresponse: Legacy NVRAM_UPDATE_FINISH + * response. Use NVRAM_UPDATE_FINISH_V2_OUT in new code + */ #define MC_CMD_NVRAM_UPDATE_FINISH_OUT_LEN 0 +/* MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT msgresponse: + * + * Extended NVRAM_UPDATE_FINISH response that communicates the result of secure + * firmware validation where applicable back to the host. + * + * Medford only: For signed firmware images, such as those for medford, the MC + * firmware verifies the signature before marking the firmware image as valid. + * This process takes a few seconds to complete. So is likely to take more than + * the MCDI timeout. Hence signature verification is initiated when + * MC_CMD_NVRAM_UPDATE_FINISH_V2_IN is received by the firmware, however, the + * MCDI command returns immediately with error code EAGAIN. Subsequent + * NVRAM_UPDATE_FINISH_V2_IN requests also return EAGAIN if the verification is + * in progress. Once the verification has completed, this response payload + * includes the results of the signature verification. Note that the nvram lock + * in firmware is only released after the verification has completed and the + * host has read back the result code from firmware. + */ +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN 4 +/* Result of nvram update completion processing */ +#define MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE_OFST 0 +/* enum: Verify succeeded without any errors. */ +#define MC_CMD_NVRAM_VERIFY_RC_SUCCESS 0x1 +/* enum: CMS format verification failed due to an internal error. */ +#define MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED 0x2 +/* enum: Invalid CMS format in image metadata. */ +#define MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT 0x3 +/* enum: Message digest verification failed due to an internal error. */ +#define MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED 0x4 +/* enum: Error in message digest calculated over the reflash-header, payload + * and reflash-trailer. + */ +#define MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST 0x5 +/* enum: Signature verification failed due to an internal error. */ +#define MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED 0x6 +/* enum: There are no valid signatures in the image. */ +#define MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES 0x7 +/* enum: Trusted approvers verification failed due to an internal error. */ +#define MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED 0x8 +/* enum: The Trusted approver's list is empty. */ +#define MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS 0x9 +/* enum: Signature chain verification failed due to an internal error. */ +#define MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED 0xa +/* enum: The signers of the signatures in the image are not listed in the + * Trusted approver's list. + */ +#define MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH 0xb + /***********************************/ /* MC_CMD_REBOOT @@ -4356,6 +4447,28 @@ /* MC_CMD_TESTASSERT_OUT msgresponse */ #define MC_CMD_TESTASSERT_OUT_LEN 0 +/* MC_CMD_TESTASSERT_V2_IN msgrequest */ +#define MC_CMD_TESTASSERT_V2_IN_LEN 4 +/* How to provoke the assertion */ +#define MC_CMD_TESTASSERT_V2_IN_TYPE_OFST 0 +/* enum: Assert using the FAIL_ASSERTION_WITH_USEFUL_VALUES macro. Unless + * you're testing firmware, this is what you want. + */ +#define MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES 0x0 +/* enum: Assert using assert(0); */ +#define MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE 0x1 +/* enum: Deliberately trigger a watchdog */ +#define MC_CMD_TESTASSERT_V2_IN_WATCHDOG 0x2 +/* enum: Deliberately trigger a trap by loading from an invalid address */ +#define MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP 0x3 +/* enum: Deliberately trigger a trap by storing to an invalid address */ +#define MC_CMD_TESTASSERT_V2_IN_STORE_TRAP 0x4 +/* enum: Jump to an invalid address */ +#define MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP 0x5 + +/* MC_CMD_TESTASSERT_V2_OUT msgresponse */ +#define MC_CMD_TESTASSERT_V2_OUT_LEN 0 + /***********************************/ /* MC_CMD_WORKAROUND @@ -4421,6 +4534,7 @@ * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1 * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80. + * Anything else: currently undefined. Locks required: None. Return code: 0. */ #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b @@ -5362,12 +5476,14 @@ #define NVRAM_PARTITION_TYPE_EXPANSION_UEFI 0xd00 /* enum: Spare partition 0 */ #define NVRAM_PARTITION_TYPE_SPARE_0 0x1000 -/* enum: Spare partition 1 */ -#define NVRAM_PARTITION_TYPE_SPARE_1 0x1100 +/* enum: Used for XIP code of shmbooted images */ +#define NVRAM_PARTITION_TYPE_XIP_SCRATCH 0x1100 /* enum: Spare partition 2 */ #define NVRAM_PARTITION_TYPE_SPARE_2 0x1200 -/* enum: Spare partition 3 */ -#define NVRAM_PARTITION_TYPE_SPARE_3 0x1300 +/* enum: Manufacturing partition. Used during manufacture to pass information + * between XJTAG and Manftest. + */ +#define NVRAM_PARTITION_TYPE_MANUFACTURING 0x1300 /* enum: Spare partition 4 */ #define NVRAM_PARTITION_TYPE_SPARE_4 0x1400 /* enum: Spare partition 5 */ @@ -5402,6 +5518,14 @@ #define LICENSED_APP_ID_CAPTURE_SOLARSYSTEM 0x40 /* enum: Network Access Control */ #define LICENSED_APP_ID_NETWORK_ACCESS_CONTROL 0x80 +/* enum: TCP Direct */ +#define LICENSED_APP_ID_TCP_DIRECT 0x100 +/* enum: Low Latency */ +#define LICENSED_APP_ID_LOW_LATENCY 0x200 +/* enum: SolarCapture Tap */ +#define LICENSED_APP_ID_SOLARCAPTURE_TAP 0x400 +/* enum: Capture SolarSystem 40G */ +#define LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_40G 0x800 #define LICENSED_APP_ID_ID_LBN 0 #define LICENSED_APP_ID_ID_WIDTH 32 @@ -5458,6 +5582,14 @@ #define LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_WIDTH 1 #define LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_LBN 7 #define LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_WIDTH 1 +#define LICENSED_V3_APPS_TCP_DIRECT_LBN 8 +#define LICENSED_V3_APPS_TCP_DIRECT_WIDTH 1 +#define LICENSED_V3_APPS_LOW_LATENCY_LBN 9 +#define LICENSED_V3_APPS_LOW_LATENCY_WIDTH 1 +#define LICENSED_V3_APPS_SOLARCAPTURE_TAP_LBN 10 +#define LICENSED_V3_APPS_SOLARCAPTURE_TAP_WIDTH 1 +#define LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_40G_LBN 11 +#define LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_40G_WIDTH 1 #define LICENSED_V3_APPS_MASK_LBN 0 #define LICENSED_V3_APPS_MASK_WIDTH 64 @@ -5988,6 +6120,8 @@ #define MC_CMD_INIT_TXQ_EXT_IN_FLAG_INNER_TCP_CSUM_EN_WIDTH 1 #define MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_LBN 12 #define MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_WIDTH 1 +#define MC_CMD_INIT_TXQ_EXT_IN_FLAG_CTPIO_LBN 13 +#define MC_CMD_INIT_TXQ_EXT_IN_FLAG_CTPIO_WIDTH 1 /* Owner ID to use if in buffer mode (zero if physical) */ #define MC_CMD_INIT_TXQ_EXT_IN_OWNER_ID_OFST 20 /* The port ID associated with the v-adaptor which should contain this DMAQ. */ @@ -7728,6 +7862,8 @@ * tests (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe /* enum: RX PD firmware parsing but not filtering network overlay tunnel @@ -7763,6 +7899,8 @@ * tests (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine TX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe /* Hardware capabilities of NIC */ @@ -7913,6 +8051,8 @@ * tests (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe /* enum: RX PD firmware parsing but not filtering network overlay tunnel @@ -7948,6 +8088,8 @@ * tests (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine TX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ #define MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe /* Hardware capabilities of NIC */ @@ -7980,6 +8122,8 @@ #define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1 #define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11 #define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_WIDTH 1 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present * on older firmware (check the length). */ @@ -8247,6 +8391,8 @@ #define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1 #define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11 #define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_WIDTH 1 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present * on older firmware (check the length). */ @@ -8304,7 +8450,7 @@ #define MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2 /* On chips later than Medford the amount of address space assigned to each VI * is configurable. This is a global setting that the driver must query to - * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available + * discover the VI to address mapping. Cut-through PIO (CTPIO) is not available * with 8k VI windows. */ #define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72 @@ -10283,6 +10429,8 @@ * more data is returned. */ #define MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT 0x6 +/* enum: Enable the SERDES BIST and set it to generate a 200MHz square wave */ +#define MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE 0x7 /* Align the arguments to 32 bits */ #define MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_OFST 1 #define MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_LEN 3 @@ -10468,6 +10616,12 @@ #define MC_CMD_PCIE_TUNE_POLL_EYE_PLOT_OUT_SAMPLES_MINNUM 0 #define MC_CMD_PCIE_TUNE_POLL_EYE_PLOT_OUT_SAMPLES_MAXNUM 126 +/* MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_IN msgrequest */ +#define MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_IN_LEN 0 + +/* MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_OUT msgrequest */ +#define MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_OUT_LEN 0 + /***********************************/ /* MC_CMD_LICENSING @@ -10783,29 +10937,45 @@ #define MC_CMD_0xd4_PRIVILEGE_CTG SRIOV_CTG_GENERAL /* MC_CMD_LICENSED_V3_VALIDATE_APP_IN msgrequest */ -#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_LEN 72 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_LEN 56 +/* challenge for validation (384 bits) */ +#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_OFST 0 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_LEN 48 /* application ID expressed as a single bit mask */ -#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_OFST 0 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_OFST 48 #define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LEN 8 -#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LO_OFST 0 -#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_HI_OFST 4 -/* challenge for validation */ -#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_OFST 8 -#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_LEN 64 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LO_OFST 48 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_HI_OFST 52 /* MC_CMD_LICENSED_V3_VALIDATE_APP_OUT msgresponse */ -#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_LEN 72 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_LEN 116 +/* validation response to challenge in the form of ECDSA signature consisting + * of two 384-bit integers, r and s, in big-endian order. The signature signs a + * SHA-384 digest of a message constructed from the concatenation of the input + * message and the remaining fields of this output message, e.g. challenge[48 + * bytes] ... expiry_time[4 bytes] ... + */ +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_OFST 0 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_LEN 96 /* application expiry time */ -#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_TIME_OFST 0 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_TIME_OFST 96 /* application expiry units */ -#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 4 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 100 /* enum: expiry units are accounting units */ #define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC 0x0 /* enum: expiry units are calendar days */ #define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS 0x1 -/* validation response to challenge */ -#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_OFST 8 -#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_LEN 64 +/* base MAC address of the NIC stored in NVRAM (note that this is a constant + * value for a given NIC regardless which function is calling, effectively this + * is PF0 base MAC address) + */ +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_BASE_MACADDR_OFST 104 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_BASE_MACADDR_LEN 6 +/* MAC address of v-adaptor associated with the client. If no such v-adapator + * exists, then the field is filled with 0xFF. + */ +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_VADAPTOR_MACADDR_OFST 110 +#define MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_VADAPTOR_MACADDR_LEN 6 /***********************************/ @@ -10834,6 +11004,70 @@ #define MC_CMD_LICENSED_V3_MASK_FEATURES_OUT_LEN 0 +/***********************************/ +/* MC_CMD_LICENSING_V3_TEMPORARY + * Perform operations to support installation of a single temporary license in + * the adapter, in addition to those found in the licensing partition. See + * SF-116124-SW for an overview of how this could be used. The license is + * stored in MC persistent data and so will survive a MC reboot, but will be + * erased when the adapter is power cycled + */ +#define MC_CMD_LICENSING_V3_TEMPORARY 0xd6 + +#define MC_CMD_0xd6_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_LICENSING_V3_TEMPORARY_IN msgrequest */ +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_LEN 4 +/* operation code */ +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_OP_OFST 0 +/* enum: install a new license, overwriting any existing temporary license. + * This is an asynchronous operation owing to the time taken to validate an + * ECDSA license + */ +#define MC_CMD_LICENSING_V3_TEMPORARY_SET 0x0 +/* enum: clear the license immediately rather than waiting for the next power + * cycle + */ +#define MC_CMD_LICENSING_V3_TEMPORARY_CLEAR 0x1 +/* enum: get the status of the asynchronous MC_CMD_LICENSING_V3_TEMPORARY_SET + * operation + */ +#define MC_CMD_LICENSING_V3_TEMPORARY_STATUS 0x2 + +/* MC_CMD_LICENSING_V3_TEMPORARY_IN_SET msgrequest */ +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LEN 164 +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_OP_OFST 0 +/* ECDSA license and signature */ +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LICENSE_OFST 4 +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LICENSE_LEN 160 + +/* MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR msgrequest */ +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR_LEN 4 +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR_OP_OFST 0 + +/* MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS msgrequest */ +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS_LEN 4 +#define MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS_OP_OFST 0 + +/* MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS msgresponse */ +#define MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LEN 12 +/* status code */ +#define MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_OFST 0 +/* enum: finished validating and installing license */ +#define MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK 0x0 +/* enum: license validation and installation in progress */ +#define MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS 0x1 +/* enum: licensing error. More specific error messages are not provided to + * avoid exposing details of the licensing system to the client + */ +#define MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR 0x2 +/* bitmask of licensed features */ +#define MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_OFST 4 +#define MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LEN 8 +#define MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LO_OFST 4 +#define MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_HI_OFST 8 + + /***********************************/ /* MC_CMD_SET_PORT_SNIFF_CONFIG * Configure RX port sniffing for the physical port associated with the calling @@ -11705,6 +11939,66 @@ /* MC_CMD_RX_BALANCING_OUT msgresponse */ #define MC_CMD_RX_BALANCING_OUT_LEN 0 + +/***********************************/ +/* MC_CMD_NVRAM_PRIVATE_APPEND + * Append a single TLV to the MC_USAGE_TLV partition. Returns MC_CMD_ERR_EEXIST + * if the tag is already present. + */ +#define MC_CMD_NVRAM_PRIVATE_APPEND 0x11c + +#define MC_CMD_0x11c_PRIVILEGE_CTG SRIOV_CTG_ADMIN + +/* MC_CMD_NVRAM_PRIVATE_APPEND_IN msgrequest */ +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENMIN 9 +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENMAX 252 +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_LEN(num) (8+1*(num)) +/* The tag to be appended */ +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_TAG_OFST 0 +/* The length of the data */ +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENGTH_OFST 4 +/* The data to be contained in the TLV structure */ +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_OFST 8 +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_LEN 1 +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_MINNUM 1 +#define MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_MAXNUM 244 + +/* MC_CMD_NVRAM_PRIVATE_APPEND_OUT msgresponse */ +#define MC_CMD_NVRAM_PRIVATE_APPEND_OUT_LEN 0 + + +/***********************************/ +/* MC_CMD_XPM_VERIFY_CONTENTS + * Verify that the contents of the XPM memory is correct (Medford only). This + * is used during manufacture to check that the XPM memory has been programmed + * correctly at ATE. + */ +#define MC_CMD_XPM_VERIFY_CONTENTS 0x11b + +#define MC_CMD_0x11b_PRIVILEGE_CTG SRIOV_CTG_ADMIN + +/* MC_CMD_XPM_VERIFY_CONTENTS_IN msgrequest */ +#define MC_CMD_XPM_VERIFY_CONTENTS_IN_LEN 4 +/* Data type to be checked */ +#define MC_CMD_XPM_VERIFY_CONTENTS_IN_DATA_TYPE_OFST 0 + +/* MC_CMD_XPM_VERIFY_CONTENTS_OUT msgresponse */ +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_LENMIN 12 +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_LENMAX 252 +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_LEN(num) (12+1*(num)) +/* Number of sectors found (test builds only) */ +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_NUM_SECTORS_OFST 0 +/* Number of bytes found (test builds only) */ +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_NUM_BYTES_OFST 4 +/* Length of signature */ +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIG_LENGTH_OFST 8 +/* Signature */ +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_OFST 12 +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_LEN 1 +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_MINNUM 0 +#define MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_MAXNUM 240 + + /***********************************/ /* MC_CMD_SET_EVQ_TMR * Update the timer load, timer reload and timer mode values for a given EVQ. @@ -11798,4 +12092,151 @@ */ #define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32 + +/***********************************/ +/* MC_CMD_ALLOCATE_TX_VFIFO_CP + * When we use the TX_vFIFO_ULL mode, we can allocate common pools using the + * non used switch buffers. + */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP 0x11d + +#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_ADMIN + +/* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN msgrequest */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_LEN 20 +/* Desired instance. Must be set to a specific instance, which is a function + * local queue index. + */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INSTANCE_OFST 0 +/* Will the common pool be used as TX_vFIFO_ULL (1) */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_OFST 4 +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED 0x1 /* enum */ +/* enum: Using this interface without TX_vFIFO_ULL is not supported for now */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED 0x0 +/* Number of buffers to reserve for the common pool */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_OFST 8 +/* TX datapath to which the Common Pool is connected to. */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_OFST 12 +/* enum: Extracts information from function */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1 +/* Network port or RX Engine to which the common pool connects. */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_OFST 16 +/* enum: Extracts information from function */ +/* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE -0x1 */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0 0x0 /* enum */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1 0x1 /* enum */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2 0x2 /* enum */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3 0x3 /* enum */ +/* enum: To enable Switch loopback with Rx engine 0 */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0 0x4 +/* enum: To enable Switch loopback with Rx engine 1 */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1 0x5 + +/* MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT msgresponse */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_LEN 4 +/* ID of the common pool allocated */ +#define MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_CP_ID_OFST 0 + + +/***********************************/ +/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO + * When we use the TX_vFIFO_ULL mode, we can allocate vFIFOs using the + * previously allocated common pools. + */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO 0x11e + +#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_ADMIN + +/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN msgrequest */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LEN 20 +/* Common pool previously allocated to which the new vFIFO will be associated + */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_CP_OFST 0 +/* Port or RX engine to associate the vFIFO egress */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_OFST 4 +/* enum: Extracts information from common pool */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE -0x1 +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0 0x0 /* enum */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1 0x1 /* enum */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2 0x2 /* enum */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3 0x3 /* enum */ +/* enum: To enable Switch loopback with Rx engine 0 */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0 0x4 +/* enum: To enable Switch loopback with Rx engine 1 */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1 0x5 +/* Minimum number of buffers that the pool must have */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_OFST 8 +/* enum: Do not check the space available */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM 0x0 +/* Will the vFIFO be used as TX_vFIFO_ULL */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_OFST 12 +/* Network priority of the vFIFO,if applicable */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_OFST 16 +/* enum: Search for the lowest unused priority */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE -0x1 + +/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT msgresponse */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_LEN 8 +/* Short vFIFO ID */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_VID_OFST 0 +/* Network priority of the vFIFO */ +#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_PRIORITY_OFST 4 + + +/***********************************/ +/* MC_CMD_TEARDOWN_TX_VFIFO_VF + * This interface clears the configuration of the given vFIFO and leaves it + * ready to be re-used. + */ +#define MC_CMD_TEARDOWN_TX_VFIFO_VF 0x11f + +#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_ADMIN + +/* MC_CMD_TEARDOWN_TX_VFIFO_VF_IN msgrequest */ +#define MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_LEN 4 +/* Short vFIFO ID */ +#define MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_VFIFO_OFST 0 + +/* MC_CMD_TEARDOWN_TX_VFIFO_VF_OUT msgresponse */ +#define MC_CMD_TEARDOWN_TX_VFIFO_VF_OUT_LEN 0 + + +/***********************************/ +/* MC_CMD_DEALLOCATE_TX_VFIFO_CP + * This interface clears the configuration of the given common pool and leaves + * it ready to be re-used. + */ +#define MC_CMD_DEALLOCATE_TX_VFIFO_CP 0x121 + +#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_ADMIN + +/* MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN msgrequest */ +#define MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_LEN 4 +/* Common pool ID given when pool allocated */ +#define MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_POOL_ID_OFST 0 + +/* MC_CMD_DEALLOCATE_TX_VFIFO_CP_OUT msgresponse */ +#define MC_CMD_DEALLOCATE_TX_VFIFO_CP_OUT_LEN 0 + + +/***********************************/ +/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS + * This interface allows the host to find out how many common pool buffers are + * not yet assigned. + */ +#define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS 0x124 + +#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_ADMIN + +/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN msgrequest */ +#define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN_LEN 0 + +/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT msgresponse */ +#define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_LEN 8 +/* Available buffers for the ENG to NET vFIFOs. */ +#define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_NET_OFST 0 +/* Available buffers for the ENG to ENG and NET to ENG vFIFOs. */ +#define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_ENG_OFST 4 + + #endif /* MCDI_PCOL_H */ From e17705c43b4273d2587297d5f8168ee432d9c94a Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 17 Nov 2016 10:51:39 +0000 Subject: [PATCH 2/5] sfc: Update EF10 register definitions Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10_regs.h | 103 +++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10_regs.h b/drivers/net/ethernet/sfc/ef10_regs.h index 62a55dde61d5..2c4bf9476c37 100644 --- a/drivers/net/ethernet/sfc/ef10_regs.h +++ b/drivers/net/ethernet/sfc/ef10_regs.h @@ -1,6 +1,6 @@ /**************************************************************************** * Driver for Solarflare network controllers and boards - * Copyright 2012-2013 Solarflare Communications Inc. + * Copyright 2012-2015 Solarflare Communications Inc. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 as published @@ -147,8 +147,14 @@ #define ESF_DZ_RX_OVERRIDE_HOLDOFF_WIDTH 1 #define ESF_DZ_RX_DROP_EVENT_LBN 58 #define ESF_DZ_RX_DROP_EVENT_WIDTH 1 -#define ESF_DZ_RX_EV_RSVD2_LBN 54 -#define ESF_DZ_RX_EV_RSVD2_WIDTH 4 +#define ESF_DD_RX_EV_RSVD2_LBN 54 +#define ESF_DD_RX_EV_RSVD2_WIDTH 4 +#define ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_LBN 57 +#define ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_WIDTH 1 +#define ESF_EZ_RX_IP_INNER_CHKSUM_ERR_LBN 56 +#define ESF_EZ_RX_IP_INNER_CHKSUM_ERR_WIDTH 1 +#define ESF_EZ_RX_EV_RSVD2_LBN 54 +#define ESF_EZ_RX_EV_RSVD2_WIDTH 2 #define ESF_DZ_RX_EV_SOFT2_LBN 52 #define ESF_DZ_RX_EV_SOFT2_WIDTH 2 #define ESF_DZ_RX_DSC_PTR_LBITS_LBN 48 @@ -192,12 +198,21 @@ #define ESF_DZ_RX_MAC_CLASS_WIDTH 1 #define ESE_DZ_MAC_CLASS_MCAST 1 #define ESE_DZ_MAC_CLASS_UCAST 0 -#define ESF_DZ_RX_EV_SOFT1_LBN 32 -#define ESF_DZ_RX_EV_SOFT1_WIDTH 3 -#define ESF_DZ_RX_EV_RSVD1_LBN 31 -#define ESF_DZ_RX_EV_RSVD1_WIDTH 1 -#define ESF_DZ_RX_ABORT_LBN 30 -#define ESF_DZ_RX_ABORT_WIDTH 1 +#define ESF_DD_RX_EV_SOFT1_LBN 32 +#define ESF_DD_RX_EV_SOFT1_WIDTH 3 +#define ESF_EZ_RX_EV_SOFT1_LBN 34 +#define ESF_EZ_RX_EV_SOFT1_WIDTH 1 +#define ESF_EZ_RX_ENCAP_HDR_LBN 32 +#define ESF_EZ_RX_ENCAP_HDR_WIDTH 2 +#define ESE_EZ_ENCAP_HDR_GRE 2 +#define ESE_EZ_ENCAP_HDR_VXLAN 1 +#define ESE_EZ_ENCAP_HDR_NONE 0 +#define ESF_DD_RX_EV_RSVD1_LBN 30 +#define ESF_DD_RX_EV_RSVD1_WIDTH 2 +#define ESF_EZ_RX_EV_RSVD1_LBN 31 +#define ESF_EZ_RX_EV_RSVD1_WIDTH 1 +#define ESF_EZ_RX_ABORT_LBN 30 +#define ESF_EZ_RX_ABORT_WIDTH 1 #define ESF_DZ_RX_ECC_ERR_LBN 29 #define ESF_DZ_RX_ECC_ERR_WIDTH 1 #define ESF_DZ_RX_CRC1_ERR_LBN 28 @@ -235,6 +250,12 @@ #define ESE_DZ_TX_OPTION_DESC_TSO 7 #define ESE_DZ_TX_OPTION_DESC_VLAN 6 #define ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0 +#define ESF_DZ_TX_OPTION_TS_AT_TXDP_LBN 8 +#define ESF_DZ_TX_OPTION_TS_AT_TXDP_WIDTH 1 +#define ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM_LBN 7 +#define ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM_WIDTH 1 +#define ESF_DZ_TX_OPTION_INNER_IP_CSUM_LBN 6 +#define ESF_DZ_TX_OPTION_INNER_IP_CSUM_WIDTH 1 #define ESF_DZ_TX_TIMESTAMP_LBN 5 #define ESF_DZ_TX_TIMESTAMP_WIDTH 1 #define ESF_DZ_TX_OPTION_CRC_MODE_LBN 2 @@ -257,14 +278,22 @@ #define ESF_DZ_TX_OVERRIDE_HOLDOFF_WIDTH 1 #define ESF_DZ_TX_DROP_EVENT_LBN 58 #define ESF_DZ_TX_DROP_EVENT_WIDTH 1 -#define ESF_DZ_TX_EV_RSVD_LBN 48 -#define ESF_DZ_TX_EV_RSVD_WIDTH 10 +#define ESF_DD_TX_EV_RSVD_LBN 48 +#define ESF_DD_TX_EV_RSVD_WIDTH 10 +#define ESF_EZ_TCP_UDP_INNER_CHKSUM_ERR_LBN 57 +#define ESF_EZ_TCP_UDP_INNER_CHKSUM_ERR_WIDTH 1 +#define ESF_EZ_IP_INNER_CHKSUM_ERR_LBN 56 +#define ESF_EZ_IP_INNER_CHKSUM_ERR_WIDTH 1 +#define ESF_EZ_TX_EV_RSVD_LBN 48 +#define ESF_EZ_TX_EV_RSVD_WIDTH 8 #define ESF_DZ_TX_SOFT2_LBN 32 #define ESF_DZ_TX_SOFT2_WIDTH 16 -#define ESF_DZ_TX_CAN_MERGE_LBN 31 -#define ESF_DZ_TX_CAN_MERGE_WIDTH 1 -#define ESF_DZ_TX_SOFT1_LBN 24 -#define ESF_DZ_TX_SOFT1_WIDTH 7 +#define ESF_DD_TX_SOFT1_LBN 24 +#define ESF_DD_TX_SOFT1_WIDTH 8 +#define ESF_EZ_TX_CAN_MERGE_LBN 31 +#define ESF_EZ_TX_CAN_MERGE_WIDTH 1 +#define ESF_EZ_TX_SOFT1_LBN 24 +#define ESF_EZ_TX_SOFT1_WIDTH 7 #define ESF_DZ_TX_QLABEL_LBN 16 #define ESF_DZ_TX_QLABEL_WIDTH 5 #define ESF_DZ_TX_DESCR_INDX_LBN 0 @@ -301,6 +330,10 @@ #define ESE_DZ_TX_OPTION_DESC_TSO 7 #define ESE_DZ_TX_OPTION_DESC_VLAN 6 #define ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0 +#define ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56 +#define ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4 +#define ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1 +#define ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0 #define ESF_DZ_TX_TSO_TCP_FLAGS_LBN 48 #define ESF_DZ_TX_TSO_TCP_FLAGS_WIDTH 8 #define ESF_DZ_TX_TSO_IP_ID_LBN 32 @@ -308,6 +341,46 @@ #define ESF_DZ_TX_TSO_TCP_SEQNO_LBN 0 #define ESF_DZ_TX_TSO_TCP_SEQNO_WIDTH 32 +/* TX_TSO_FATSO2A_DESC */ +#define ESF_DZ_TX_DESC_IS_OPT_LBN 63 +#define ESF_DZ_TX_DESC_IS_OPT_WIDTH 1 +#define ESF_DZ_TX_OPTION_TYPE_LBN 60 +#define ESF_DZ_TX_OPTION_TYPE_WIDTH 3 +#define ESE_DZ_TX_OPTION_DESC_TSO 7 +#define ESE_DZ_TX_OPTION_DESC_VLAN 6 +#define ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0 +#define ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56 +#define ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4 +#define ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B 3 +#define ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A 2 +#define ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1 +#define ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0 +#define ESF_DZ_TX_TSO_IP_ID_LBN 32 +#define ESF_DZ_TX_TSO_IP_ID_WIDTH 16 +#define ESF_DZ_TX_TSO_TCP_SEQNO_LBN 0 +#define ESF_DZ_TX_TSO_TCP_SEQNO_WIDTH 32 + + +/* TX_TSO_FATSO2B_DESC */ +#define ESF_DZ_TX_DESC_IS_OPT_LBN 63 +#define ESF_DZ_TX_DESC_IS_OPT_WIDTH 1 +#define ESF_DZ_TX_OPTION_TYPE_LBN 60 +#define ESF_DZ_TX_OPTION_TYPE_WIDTH 3 +#define ESE_DZ_TX_OPTION_DESC_TSO 7 +#define ESE_DZ_TX_OPTION_DESC_VLAN 6 +#define ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0 +#define ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56 +#define ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4 +#define ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B 3 +#define ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A 2 +#define ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1 +#define ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0 +#define ESF_DZ_TX_TSO_OUTER_IP_ID_LBN 0 +#define ESF_DZ_TX_TSO_OUTER_IP_ID_WIDTH 16 +#define ESF_DZ_TX_TSO_TCP_MSS_LBN 32 +#define ESF_DZ_TX_TSO_TCP_MSS_WIDTH 16 + + /*************************************************************************/ /* TX_DESC_UPD_REG: Transmit descriptor update register. From e9117e5099ea29592c2a6180f368951948837a8b Mon Sep 17 00:00:00 2001 From: Bert Kenward Date: Thu, 17 Nov 2016 10:51:54 +0000 Subject: [PATCH 3/5] sfc: Firmware-Assisted TSO version 2 Add support for FATSOv2 to the driver. FATSOv2 offloads far more of the task of TCP segmentation to the firmware, such that we now just pass a single super-packet to the NIC. This means TSO has a great deal in common with a normal DMA transmit, apart from adding a couple of option descriptors. NIC-specific checks have been moved off the fast path and in to initialisation where possible. This also moves FATSOv1/SWTSO to a new file (tx_tso.c). The end of transmit and some error handling is now outside TSO, since it is common with other code. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/Makefile | 2 +- drivers/net/ethernet/sfc/ef10.c | 123 ++- drivers/net/ethernet/sfc/ethtool.c | 1 + drivers/net/ethernet/sfc/falcon.c | 2 + drivers/net/ethernet/sfc/farch.c | 15 + drivers/net/ethernet/sfc/net_driver.h | 47 +- drivers/net/ethernet/sfc/nic.h | 2 + drivers/net/ethernet/sfc/siena.c | 1 + drivers/net/ethernet/sfc/tx.c | 1019 ++++++------------------- drivers/net/ethernet/sfc/tx.h | 27 + drivers/net/ethernet/sfc/tx_tso.c | 570 ++++++++++++++ 11 files changed, 1038 insertions(+), 771 deletions(-) create mode 100644 drivers/net/ethernet/sfc/tx.h create mode 100644 drivers/net/ethernet/sfc/tx_tso.c diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile index ce8470fe79d5..b3b620f3d91f 100644 --- a/drivers/net/ethernet/sfc/Makefile +++ b/drivers/net/ethernet/sfc/Makefile @@ -1,7 +1,7 @@ sfc-y += efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \ rx.o selftest.o ethtool.o qt202x_phy.o mdio_10g.o \ tenxpress.o txc43128_phy.o falcon_boards.o \ - mcdi.o mcdi_port.o mcdi_mon.o ptp.o + mcdi.o mcdi_port.o mcdi_mon.o ptp.o tx_tso.o sfc-$(CONFIG_SFC_MTD) += mtd.o sfc-$(CONFIG_SFC_SRIOV) += sriov.o siena_sriov.o ef10_sriov.o diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e61807e6d47b..4b69ed9e6336 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2086,6 +2086,78 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue, ER_DZ_TX_DESC_UPD, tx_queue->queue); } +/* Add Firmware-Assisted TSO v2 option descriptors to a queue. + */ +static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, + struct sk_buff *skb, + bool *data_mapped) +{ + struct efx_tx_buffer *buffer; + struct tcphdr *tcp; + struct iphdr *ip; + + u16 ipv4_id; + u32 seqnum; + u32 mss; + + EFX_BUG_ON_PARANOID(tx_queue->tso_version != 2); + + mss = skb_shinfo(skb)->gso_size; + + if (unlikely(mss < 4)) { + WARN_ONCE(1, "MSS of %u is too small for TSO v2\n", mss); + return -EINVAL; + } + + ip = ip_hdr(skb); + if (ip->version == 4) { + /* Modify IPv4 header if needed. */ + ip->tot_len = 0; + ip->check = 0; + ipv4_id = ip->id; + } else { + /* Modify IPv6 header if needed. */ + struct ipv6hdr *ipv6 = ipv6_hdr(skb); + + ipv6->payload_len = 0; + ipv4_id = 0; + } + + tcp = tcp_hdr(skb); + seqnum = ntohl(tcp->seq); + + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + + buffer->flags = EFX_TX_BUF_OPTION; + buffer->len = 0; + buffer->unmap_len = 0; + EFX_POPULATE_QWORD_5(buffer->option, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_OPTION_TYPE, + ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A, + ESF_DZ_TX_TSO_IP_ID, ipv4_id, + ESF_DZ_TX_TSO_TCP_SEQNO, seqnum + ); + ++tx_queue->insert_count; + + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + + buffer->flags = EFX_TX_BUF_OPTION; + buffer->len = 0; + buffer->unmap_len = 0; + EFX_POPULATE_QWORD_4(buffer->option, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_OPTION_TYPE, + ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B, + ESF_DZ_TX_TSO_TCP_MSS, mss + ); + ++tx_queue->insert_count; + + return 0; +} + static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) { MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / @@ -2095,6 +2167,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) struct efx_channel *channel = tx_queue->channel; struct efx_nic *efx = tx_queue->efx; struct efx_ef10_nic_data *nic_data = efx->nic_data; + bool tso_v2 = false; size_t inlen; dma_addr_t dma_addr; efx_qword_t *txd; @@ -2102,13 +2175,33 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) int i; BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0); + /* TSOv2 is a limited resource that can only be configured on a limited + * number of queues. TSO without checksum offload is not really a thing, + * so we only enable it for those queues. + * + * TODO: handle failure to allocate this in the case where we've used + * all the queues. + */ + if (csum_offload && (nic_data->datapath_caps2 & + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) { + tso_v2 = true; + netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n", + channel->channel); + } + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); - MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS, + MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS, + /* This flag was removed from mcdi_pcol.h for + * the non-_EXT version of INIT_TXQ. However, + * firmware still honours it. + */ + INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); + MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); @@ -2146,8 +2239,11 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) ESF_DZ_TX_OPTION_IP_CSUM, csum_offload); tx_queue->write_count = 1; - if (nic_data->datapath_caps & - (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) { + if (tso_v2) { + tx_queue->handle_tso = efx_ef10_tx_tso_desc; + tx_queue->tso_version = 2; + } else if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) { tx_queue->tso_version = 1; } @@ -2202,6 +2298,25 @@ static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue) ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue); } +#define EFX_EF10_MAX_TX_DESCRIPTOR_LEN 0x3fff + +static unsigned int efx_ef10_tx_limit_len(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len) +{ + if (len > EFX_EF10_MAX_TX_DESCRIPTOR_LEN) { + /* If we need to break across multiple descriptors we should + * stop at a page boundary. This assumes the length limit is + * greater than the page size. + */ + dma_addr_t end = dma_addr + EFX_EF10_MAX_TX_DESCRIPTOR_LEN; + + BUILD_BUG_ON(EFX_EF10_MAX_TX_DESCRIPTOR_LEN < EFX_PAGE_SIZE); + len = (end & (~(EFX_PAGE_SIZE - 1))) - dma_addr; + } + + return len; +} + static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue) { unsigned int old_write_count = tx_queue->write_count; @@ -5469,6 +5584,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = { .tx_init = efx_ef10_tx_init, .tx_remove = efx_ef10_tx_remove, .tx_write = efx_ef10_tx_write, + .tx_limit_len = efx_ef10_tx_limit_len, .rx_push_rss_config = efx_ef10_vf_rx_push_rss_config, .rx_probe = efx_ef10_rx_probe, .rx_init = efx_ef10_rx_init, @@ -5575,6 +5691,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { .tx_init = efx_ef10_tx_init, .tx_remove = efx_ef10_tx_remove, .tx_write = efx_ef10_tx_write, + .tx_limit_len = efx_ef10_tx_limit_len, .rx_push_rss_config = efx_ef10_pf_rx_push_rss_config, .rx_probe = efx_ef10_rx_probe, .rx_init = efx_ef10_rx_init, diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index bf126f935ade..bd5edd61bf64 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -71,6 +71,7 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets), EFX_ETHTOOL_UINT_TXQ_STAT(pushes), EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets), + EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets), EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc), EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err), diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index 1a7092602aec..6a1e74b93445 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -2750,6 +2750,7 @@ const struct efx_nic_type falcon_a1_nic_type = { .tx_init = efx_farch_tx_init, .tx_remove = efx_farch_tx_remove, .tx_write = efx_farch_tx_write, + .tx_limit_len = efx_farch_tx_limit_len, .rx_push_rss_config = dummy_rx_push_rss_config, .rx_probe = efx_farch_rx_probe, .rx_init = efx_farch_rx_init, @@ -2849,6 +2850,7 @@ const struct efx_nic_type falcon_b0_nic_type = { .tx_init = efx_farch_tx_init, .tx_remove = efx_farch_tx_remove, .tx_write = efx_farch_tx_write, + .tx_limit_len = efx_farch_tx_limit_len, .rx_push_rss_config = falcon_b0_rx_push_rss_config, .rx_probe = efx_farch_rx_probe, .rx_init = efx_farch_rx_init, diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 4762ec444cb8..3d5b91bc4ce6 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -356,6 +356,21 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue) } } +unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len) +{ + /* Don't cross 4K boundaries with descriptors. */ + unsigned int limit = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1; + + len = min(limit, len); + + if (EFX_WORKAROUND_5391(tx_queue->efx) && (dma_addr & 0xf)) + len = min_t(unsigned int, len, 512 - (dma_addr & 0xf)); + + return len; +} + + /* Allocate hardware resources for a TX queue */ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue) { diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index fec51c4b2607..2da3e8fb6d71 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -189,13 +189,17 @@ struct efx_tx_buffer { * @channel: The associated channel * @core_txq: The networking core TX queue structure * @buffer: The software buffer ring - * @tsoh_page: Array of pages of TSO header buffers + * @cb_page: Array of pages of copy buffers. Carved up according to + * %EFX_TX_CB_ORDER into %EFX_TX_CB_SIZE-sized chunks. * @txd: The hardware descriptor ring * @ptr_mask: The size of the ring minus 1. * @piobuf: PIO buffer region for this TX queue (shared with its partner). * Size of the region is efx_piobuf_size. * @piobuf_offset: Buffer offset to be specified in PIO descriptors * @initialised: Has hardware queue been initialised? + * @tx_min_size: Minimum transmit size for this queue. Depends on HW. + * @handle_tso: TSO xmit preparation handler. Sets up the TSO metadata and + * may also map tx data, depending on the nature of the TSO implementation. * @read_count: Current read pointer. * This is the number of buffers that have been removed from both rings. * @old_write_count: The value of @write_count when last checked. @@ -224,6 +228,7 @@ struct efx_tx_buffer { * @pushes: Number of times the TX push feature has been used * @pio_packets: Number of times the TX PIO feature has been used * @xmit_more_available: Are any packets waiting to be pushed to the NIC + * @cb_packets: Number of times the TX copybreak feature has been used * @empty_read_count: If the completion path has seen the queue as empty * and the transmission path has not yet checked this, the value of * @read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0. @@ -236,12 +241,16 @@ struct efx_tx_queue { struct efx_channel *channel; struct netdev_queue *core_txq; struct efx_tx_buffer *buffer; - struct efx_buffer *tsoh_page; + struct efx_buffer *cb_page; struct efx_special_buffer txd; unsigned int ptr_mask; void __iomem *piobuf; unsigned int piobuf_offset; bool initialised; + unsigned int tx_min_size; + + /* Function pointers used in the fast path. */ + int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *); /* Members used mainly on the completion path */ unsigned int read_count ____cacheline_aligned_in_smp; @@ -260,6 +269,7 @@ struct efx_tx_queue { unsigned int pushes; unsigned int pio_packets; bool xmit_more_available; + unsigned int cb_packets; /* Statistics to supplement MAC stats */ unsigned long tx_packets; @@ -269,6 +279,9 @@ struct efx_tx_queue { atomic_t flush_outstanding; }; +#define EFX_TX_CB_ORDER 7 +#define EFX_TX_CB_SIZE (1 << EFX_TX_CB_ORDER) - NET_IP_ALIGN + /** * struct efx_rx_buffer - An Efx RX data buffer * @dma_addr: DMA base address of the buffer @@ -1288,6 +1301,8 @@ struct efx_nic_type { void (*tx_init)(struct efx_tx_queue *tx_queue); void (*tx_remove)(struct efx_tx_queue *tx_queue); void (*tx_write)(struct efx_tx_queue *tx_queue); + unsigned int (*tx_limit_len)(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len); int (*rx_push_rss_config)(struct efx_nic *efx, bool user, const u32 *rx_indir_table); int (*rx_probe)(struct efx_rx_queue *rx_queue); @@ -1545,4 +1560,32 @@ static inline netdev_features_t efx_supported_features(const struct efx_nic *efx return net_dev->features | net_dev->hw_features; } +/* Get the current TX queue insert index. */ +static inline unsigned int +efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue) +{ + return tx_queue->insert_count & tx_queue->ptr_mask; +} + +/* Get a TX buffer. */ +static inline struct efx_tx_buffer * +__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) +{ + return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)]; +} + +/* Get a TX buffer, checking it's not currently in use. */ +static inline struct efx_tx_buffer * +efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer = + __efx_tx_queue_get_insert_buffer(tx_queue); + + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->flags); + EFX_BUG_ON_PARANOID(buffer->unmap_len); + + return buffer; +} + #endif /* EFX_NET_DRIVER_H */ diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 73bee7ea332a..06dd96e25bb6 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -681,6 +681,8 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue); void efx_farch_tx_fini(struct efx_tx_queue *tx_queue); void efx_farch_tx_remove(struct efx_tx_queue *tx_queue); void efx_farch_tx_write(struct efx_tx_queue *tx_queue); +unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len); int efx_farch_rx_probe(struct efx_rx_queue *rx_queue); void efx_farch_rx_init(struct efx_rx_queue *rx_queue); void efx_farch_rx_fini(struct efx_rx_queue *rx_queue); diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 04ed1b4c7cd9..3975cad19d37 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -977,6 +977,7 @@ const struct efx_nic_type siena_a0_nic_type = { .tx_init = efx_farch_tx_init, .tx_remove = efx_farch_tx_remove, .tx_write = efx_farch_tx_write, + .tx_limit_len = efx_farch_tx_limit_len, .rx_push_rss_config = siena_rx_push_rss_config, .rx_probe = efx_farch_rx_probe, .rx_init = efx_farch_rx_init, diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 233778911557..3089e888a08d 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -22,6 +22,7 @@ #include "efx.h" #include "io.h" #include "nic.h" +#include "tx.h" #include "workarounds.h" #include "ef10_regs.h" @@ -33,29 +34,30 @@ unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF; #endif /* EFX_USE_PIO */ -static inline unsigned int -efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue) +static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer) { - return tx_queue->insert_count & tx_queue->ptr_mask; + unsigned int index = efx_tx_queue_get_insert_index(tx_queue); + struct efx_buffer *page_buf = + &tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)]; + unsigned int offset = + ((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1); + + if (unlikely(!page_buf->addr) && + efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE, + GFP_ATOMIC)) + return NULL; + buffer->dma_addr = page_buf->dma_addr + offset; + buffer->unmap_len = 0; + return (u8 *)page_buf->addr + offset; } -static inline struct efx_tx_buffer * -__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) +u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, size_t len) { - return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)]; -} - -static inline struct efx_tx_buffer * -efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue) -{ - struct efx_tx_buffer *buffer = - __efx_tx_queue_get_insert_buffer(tx_queue); - - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->flags); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - - return buffer; + if (len > EFX_TX_CB_SIZE) + return NULL; + return efx_tx_get_copy_buffer(tx_queue, buffer); } static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, @@ -90,27 +92,6 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue, buffer->flags = 0; } -static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, - struct sk_buff *skb); - -static inline unsigned -efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr) -{ - /* Depending on the NIC revision, we can use descriptor - * lengths up to 8K or 8K-1. However, since PCI Express - * devices must split read requests at 4K boundaries, there is - * little benefit from using descriptors that cross those - * boundaries and we keep things simple by not doing so. - */ - unsigned len = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1; - - /* Work around hardware bug for unaligned buffers. */ - if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf)) - len = min_t(unsigned, len, 512 - (dma_addr & 0xf)); - - return len; -} - unsigned int efx_tx_max_skb_descs(struct efx_nic *efx) { /* Header and payload descriptor for each output segment, plus @@ -173,6 +154,39 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1) } } +static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue, + struct sk_buff *skb) +{ + unsigned int min_len = tx_queue->tx_min_size; + unsigned int copy_len = skb->len; + struct efx_tx_buffer *buffer; + u8 *copy_buffer; + int rc; + + EFX_BUG_ON_PARANOID(copy_len > EFX_TX_CB_SIZE); + + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + + copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer); + if (unlikely(!copy_buffer)) + return -ENOMEM; + + rc = skb_copy_bits(skb, 0, copy_buffer, copy_len); + EFX_WARN_ON_PARANOID(rc); + if (unlikely(copy_len < min_len)) { + memset(copy_buffer + copy_len, 0, min_len - copy_len); + buffer->len = min_len; + } else { + buffer->len = copy_len; + } + + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB; + + ++tx_queue->insert_count; + return rc; +} + #ifdef EFX_USE_PIO struct efx_short_copy_buffer { @@ -267,8 +281,8 @@ static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb, EFX_BUG_ON_PARANOID(skb_shinfo(skb)->frag_list); } -static struct efx_tx_buffer * -efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb) +static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, + struct sk_buff *skb) { struct efx_tx_buffer *buffer = efx_tx_queue_get_insert_buffer(tx_queue); @@ -292,7 +306,7 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb) efx_flush_copy_buffer(tx_queue->efx, piobuf, ©_buf); } else { /* Pad the write to the size of a cache line. - * We can do this because we know the skb_shared_info sruct is + * We can do this because we know the skb_shared_info struct is * after the source, and the destination buffer is big enough. */ BUILD_BUG_ON(L1_CACHE_BYTES > @@ -301,6 +315,9 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ALIGN(skb->len, L1_CACHE_BYTES) >> 3); } + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION; + EFX_POPULATE_QWORD_5(buffer->option, ESF_DZ_TX_DESC_IS_OPT, 1, ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO, @@ -308,12 +325,133 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb) ESF_DZ_TX_PIO_BYTE_CNT, skb->len, ESF_DZ_TX_PIO_BUF_ADDR, tx_queue->piobuf_offset); - ++tx_queue->pio_packets; ++tx_queue->insert_count; - return buffer; + return 0; } #endif /* EFX_USE_PIO */ +static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, + size_t len) +{ + const struct efx_nic_type *nic_type = tx_queue->efx->type; + struct efx_tx_buffer *buffer; + unsigned int dma_len; + + /* Map the fragment taking account of NIC-dependent DMA limits. */ + do { + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len); + + buffer->len = dma_len; + buffer->dma_addr = dma_addr; + buffer->flags = EFX_TX_BUF_CONT; + len -= dma_len; + dma_addr += dma_len; + ++tx_queue->insert_count; + } while (len); + + return buffer; +} + +/* Map all data from an SKB for DMA and create descriptors on the queue. + */ +static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + unsigned int segment_count) +{ + struct efx_nic *efx = tx_queue->efx; + struct device *dma_dev = &efx->pci_dev->dev; + unsigned int frag_index, nr_frags; + dma_addr_t dma_addr, unmap_addr; + unsigned short dma_flags; + size_t len, unmap_len; + + nr_frags = skb_shinfo(skb)->nr_frags; + frag_index = 0; + + /* Map header data. */ + len = skb_headlen(skb); + dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE); + dma_flags = EFX_TX_BUF_MAP_SINGLE; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + + if (segment_count) { + /* For TSO we need to put the header in to a separate + * descriptor. Map this separately if necessary. + */ + size_t header_len = skb_transport_header(skb) - skb->data + + (tcp_hdr(skb)->doff << 2u); + + if (header_len != len) { + tx_queue->tso_long_headers++; + efx_tx_map_chunk(tx_queue, dma_addr, header_len); + len -= header_len; + dma_addr += header_len; + } + } + + /* Add descriptors for each fragment. */ + do { + struct efx_tx_buffer *buffer; + skb_frag_t *fragment; + + buffer = efx_tx_map_chunk(tx_queue, dma_addr, len); + + /* The final descriptor for a fragment is responsible for + * unmapping the whole fragment. + */ + buffer->flags = EFX_TX_BUF_CONT | dma_flags; + buffer->unmap_len = unmap_len; + buffer->dma_offset = buffer->dma_addr - unmap_addr; + + if (frag_index >= nr_frags) { + /* Store SKB details with the final buffer for + * the completion. + */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB | dma_flags; + return 0; + } + + /* Move on to the next fragment. */ + fragment = &skb_shinfo(skb)->frags[frag_index++]; + len = skb_frag_size(fragment); + dma_addr = skb_frag_dma_map(dma_dev, fragment, + 0, len, DMA_TO_DEVICE); + dma_flags = 0; + unmap_len = len; + unmap_addr = dma_addr; + + if (unlikely(dma_mapping_error(dma_dev, dma_addr))) + return -EIO; + } while (1); +} + +/* Remove buffers put into a tx_queue. None of the buffers must have + * an skb attached. + */ +static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) +{ + struct efx_tx_buffer *buffer; + + /* Work backwards until we hit the original insert pointer value */ + while (tx_queue->insert_count != tx_queue->write_count) { + --tx_queue->insert_count; + buffer = __efx_tx_queue_get_insert_buffer(tx_queue); + efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); + } +} + +static int efx_tx_tso_sw(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + bool *data_mapped) +{ + return efx_enqueue_skb_tso(tx_queue, skb, data_mapped); +} + /* * Add a socket buffer to a TX queue * @@ -332,103 +470,47 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb) */ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) { - struct efx_nic *efx = tx_queue->efx; - struct device *dma_dev = &efx->pci_dev->dev; - struct efx_tx_buffer *buffer; - unsigned int old_insert_count = tx_queue->insert_count; - skb_frag_t *fragment; - unsigned int len, unmap_len = 0; - dma_addr_t dma_addr, unmap_addr = 0; - unsigned int dma_len; - unsigned short dma_flags; - int i = 0; + bool data_mapped = false; + unsigned int segments; + unsigned int skb_len; - if (skb_shinfo(skb)->gso_size) - return efx_enqueue_skb_tso(tx_queue, skb); + skb_len = skb->len; + segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; + if (segments == 1) + segments = 0; /* Don't use TSO for a single segment. */ - /* Get size of the initial fragment */ - len = skb_headlen(skb); - - /* Pad if necessary */ - if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) { - EFX_BUG_ON_PARANOID(skb->data_len); - len = 32 + 1; - if (skb_pad(skb, len - skb->len)) - return NETDEV_TX_OK; - } - - /* Consider using PIO for short packets */ -#ifdef EFX_USE_PIO - if (skb->len <= efx_piobuf_size && !skb->xmit_more && - efx_nic_may_tx_pio(tx_queue)) { - buffer = efx_enqueue_skb_pio(tx_queue, skb); - dma_flags = EFX_TX_BUF_OPTION; - goto finish_packet; - } -#endif - - /* Map for DMA. Use dma_map_single rather than dma_map_page - * since this is more efficient on machines with sparse - * memory. + /* Handle TSO first - it's *possible* (although unlikely) that we might + * be passed a packet to segment that's smaller than the copybreak/PIO + * size limit. */ - dma_flags = EFX_TX_BUF_MAP_SINGLE; - dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE); - - /* Process all fragments */ - while (1) { - if (unlikely(dma_mapping_error(dma_dev, dma_addr))) - goto dma_err; - - /* Store fields for marking in the per-fragment final - * descriptor */ - unmap_len = len; - unmap_addr = dma_addr; - - /* Add to TX queue, splitting across DMA boundaries */ - do { - buffer = efx_tx_queue_get_insert_buffer(tx_queue); - - dma_len = efx_max_tx_len(efx, dma_addr); - if (likely(dma_len >= len)) - dma_len = len; - - /* Fill out per descriptor fields */ - buffer->len = dma_len; - buffer->dma_addr = dma_addr; - buffer->flags = EFX_TX_BUF_CONT; - len -= dma_len; - dma_addr += dma_len; - ++tx_queue->insert_count; - } while (len); - - /* Transfer ownership of the unmapping to the final buffer */ - buffer->flags = EFX_TX_BUF_CONT | dma_flags; - buffer->unmap_len = unmap_len; - buffer->dma_offset = buffer->dma_addr - unmap_addr; - unmap_len = 0; - - /* Get address and size of next fragment */ - if (i >= skb_shinfo(skb)->nr_frags) - break; - fragment = &skb_shinfo(skb)->frags[i]; - len = skb_frag_size(fragment); - i++; - /* Map for DMA */ - dma_flags = 0; - dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len, - DMA_TO_DEVICE); + if (segments) { + EFX_BUG_ON_PARANOID(!tx_queue->handle_tso); + if (tx_queue->handle_tso(tx_queue, skb, &data_mapped)) + goto err; +#ifdef EFX_USE_PIO + } else if (skb_len <= efx_piobuf_size && !skb->xmit_more && + efx_nic_may_tx_pio(tx_queue)) { + /* Use PIO for short packets with an empty queue. */ + if (efx_enqueue_skb_pio(tx_queue, skb)) + goto err; + tx_queue->pio_packets++; + data_mapped = true; +#endif + } else if (skb_len < tx_queue->tx_min_size || + (skb->data_len && skb_len <= EFX_TX_CB_SIZE)) { + /* Pad short packets or coalesce short fragmented packets. */ + if (efx_enqueue_skb_copy(tx_queue, skb)) + goto err; + tx_queue->cb_packets++; + data_mapped = true; } - /* Transfer ownership of the skb to the final buffer */ -#ifdef EFX_USE_PIO -finish_packet: -#endif - buffer->skb = skb; - buffer->flags = EFX_TX_BUF_SKB | dma_flags; + /* Map for DMA and create descriptors if we haven't done so already. */ + if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments))) + goto err; - netdev_tx_sent_queue(tx_queue->core_txq, skb->len); - - efx_tx_maybe_stop_queue(tx_queue); + /* Update BQL */ + netdev_tx_sent_queue(tx_queue->core_txq, skb_len); /* Pass off to hardware */ if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { @@ -446,37 +528,22 @@ finish_packet: tx_queue->xmit_more_available = skb->xmit_more; } - tx_queue->tx_packets++; + if (segments) { + tx_queue->tso_bursts++; + tx_queue->tso_packets += segments; + tx_queue->tx_packets += segments; + } else { + tx_queue->tx_packets++; + } + + efx_tx_maybe_stop_queue(tx_queue); return NETDEV_TX_OK; - dma_err: - netif_err(efx, tx_err, efx->net_dev, - " TX queue %d could not map skb with %d bytes %d " - "fragments for DMA\n", tx_queue->queue, skb->len, - skb_shinfo(skb)->nr_frags + 1); - /* Mark the packet as transmitted, and free the SKB ourselves */ +err: + efx_enqueue_unwind(tx_queue); dev_kfree_skb_any(skb); - - /* Work backwards until we hit the original insert pointer value */ - while (tx_queue->insert_count != old_insert_count) { - unsigned int pkts_compl = 0, bytes_compl = 0; - --tx_queue->insert_count; - buffer = __efx_tx_queue_get_insert_buffer(tx_queue); - efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl); - } - - /* Free the fragment we were mid-way through pushing */ - if (unmap_len) { - if (dma_flags & EFX_TX_BUF_MAP_SINGLE) - dma_unmap_single(dma_dev, unmap_addr, unmap_len, - DMA_TO_DEVICE); - else - dma_unmap_page(dma_dev, unmap_addr, unmap_len, - DMA_TO_DEVICE); - } - return NETDEV_TX_OK; } @@ -667,19 +734,9 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index) } } -/* Size of page-based TSO header buffers. Larger blocks must be - * allocated from the heap. - */ -#define TSOH_STD_SIZE 128 -#define TSOH_PER_PAGE (PAGE_SIZE / TSOH_STD_SIZE) - -/* At most half the descriptors in the queue at any time will refer to - * a TSO header buffer, since they must always be followed by a - * payload descriptor referring to an skb. - */ -static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue) +static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue) { - return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE); + return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER); } int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) @@ -703,14 +760,11 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) if (!tx_queue->buffer) return -ENOMEM; - if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) { - tx_queue->tsoh_page = - kcalloc(efx_tsoh_page_count(tx_queue), - sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL); - if (!tx_queue->tsoh_page) { - rc = -ENOMEM; - goto fail1; - } + tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue), + sizeof(tx_queue->cb_page[0]), GFP_KERNEL); + if (!tx_queue->cb_page) { + rc = -ENOMEM; + goto fail1; } /* Allocate hardware ring */ @@ -721,8 +775,8 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue) return 0; fail2: - kfree(tx_queue->tsoh_page); - tx_queue->tsoh_page = NULL; + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; fail1: kfree(tx_queue->buffer); tx_queue->buffer = NULL; @@ -731,7 +785,9 @@ fail1: void efx_init_tx_queue(struct efx_tx_queue *tx_queue) { - netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev, + struct efx_nic *efx = tx_queue->efx; + + netif_dbg(efx, drv, efx->net_dev, "initialising TX queue %d\n", tx_queue->queue); tx_queue->insert_count = 0; @@ -742,6 +798,14 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID; tx_queue->xmit_more_available = false; + /* Set up default function pointers. These may get replaced by + * efx_nic_init_tx() based off NIC/queue capabilities. + */ + tx_queue->handle_tso = efx_tx_tso_sw; + + /* Some older hardware requires Tx writes larger than 32. */ + tx_queue->tx_min_size = EFX_WORKAROUND_15592(efx) ? 33 : 0; + /* Set up TX descriptor ring */ efx_nic_init_tx(tx_queue); @@ -781,589 +845,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue) "destroying TX queue %d\n", tx_queue->queue); efx_nic_remove_tx(tx_queue); - if (tx_queue->tsoh_page) { - for (i = 0; i < efx_tsoh_page_count(tx_queue); i++) + if (tx_queue->cb_page) { + for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++) efx_nic_free_buffer(tx_queue->efx, - &tx_queue->tsoh_page[i]); - kfree(tx_queue->tsoh_page); - tx_queue->tsoh_page = NULL; + &tx_queue->cb_page[i]); + kfree(tx_queue->cb_page); + tx_queue->cb_page = NULL; } kfree(tx_queue->buffer); tx_queue->buffer = NULL; } - - -/* Efx TCP segmentation acceleration. - * - * Why? Because by doing it here in the driver we can go significantly - * faster than the GSO. - * - * Requires TX checksum offload support. - */ - -#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) - -/** - * struct tso_state - TSO state for an SKB - * @out_len: Remaining length in current segment - * @seqnum: Current sequence number - * @ipv4_id: Current IPv4 ID, host endian - * @packet_space: Remaining space in current packet - * @dma_addr: DMA address of current position - * @in_len: Remaining length in current SKB fragment - * @unmap_len: Length of SKB fragment - * @unmap_addr: DMA address of SKB fragment - * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0 - * @protocol: Network protocol (after any VLAN header) - * @ip_off: Offset of IP header - * @tcp_off: Offset of TCP header - * @header_len: Number of bytes of header - * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload - * @header_dma_addr: Header DMA address, when using option descriptors - * @header_unmap_len: Header DMA mapped length, or 0 if not using option - * descriptors - * - * The state used during segmentation. It is put into this data structure - * just to make it easy to pass into inline functions. - */ -struct tso_state { - /* Output position */ - unsigned out_len; - unsigned seqnum; - u16 ipv4_id; - unsigned packet_space; - - /* Input position */ - dma_addr_t dma_addr; - unsigned in_len; - unsigned unmap_len; - dma_addr_t unmap_addr; - unsigned short dma_flags; - - __be16 protocol; - unsigned int ip_off; - unsigned int tcp_off; - unsigned header_len; - unsigned int ip_base_len; - dma_addr_t header_dma_addr; - unsigned int header_unmap_len; -}; - - -/* - * Verify that our various assumptions about sk_buffs and the conditions - * under which TSO will be attempted hold true. Return the protocol number. - */ -static __be16 efx_tso_check_protocol(struct sk_buff *skb) -{ - __be16 protocol = skb->protocol; - - EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto != - protocol); - if (protocol == htons(ETH_P_8021Q)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } - - if (protocol == htons(ETH_P_IP)) { - EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP); - } else { - EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6)); - EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP); - } - EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data) - + (tcp_hdr(skb)->doff << 2u)) > - skb_headlen(skb)); - - return protocol; -} - -static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer, unsigned int len) -{ - u8 *result; - - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->flags); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - - if (likely(len <= TSOH_STD_SIZE - NET_IP_ALIGN)) { - unsigned index = - (tx_queue->insert_count & tx_queue->ptr_mask) / 2; - struct efx_buffer *page_buf = - &tx_queue->tsoh_page[index / TSOH_PER_PAGE]; - unsigned offset = - TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + NET_IP_ALIGN; - - if (unlikely(!page_buf->addr) && - efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE, - GFP_ATOMIC)) - return NULL; - - result = (u8 *)page_buf->addr + offset; - buffer->dma_addr = page_buf->dma_addr + offset; - buffer->flags = EFX_TX_BUF_CONT; - } else { - tx_queue->tso_long_headers++; - - buffer->heap_buf = kmalloc(NET_IP_ALIGN + len, GFP_ATOMIC); - if (unlikely(!buffer->heap_buf)) - return NULL; - result = (u8 *)buffer->heap_buf + NET_IP_ALIGN; - buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; - } - - buffer->len = len; - - return result; -} - -/** - * efx_tx_queue_insert - push descriptors onto the TX queue - * @tx_queue: Efx TX queue - * @dma_addr: DMA address of fragment - * @len: Length of fragment - * @final_buffer: The final buffer inserted into the queue - * - * Push descriptors onto the TX queue. - */ -static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, - dma_addr_t dma_addr, unsigned len, - struct efx_tx_buffer **final_buffer) -{ - struct efx_tx_buffer *buffer; - struct efx_nic *efx = tx_queue->efx; - unsigned dma_len; - - EFX_BUG_ON_PARANOID(len <= 0); - - while (1) { - buffer = efx_tx_queue_get_insert_buffer(tx_queue); - ++tx_queue->insert_count; - - EFX_BUG_ON_PARANOID(tx_queue->insert_count - - tx_queue->read_count >= - efx->txq_entries); - - buffer->dma_addr = dma_addr; - - dma_len = efx_max_tx_len(efx, dma_addr); - - /* If there is enough space to send then do so */ - if (dma_len >= len) - break; - - buffer->len = dma_len; - buffer->flags = EFX_TX_BUF_CONT; - dma_addr += dma_len; - len -= dma_len; - } - - EFX_BUG_ON_PARANOID(!len); - buffer->len = len; - *final_buffer = buffer; -} - - -/* - * Put a TSO header into the TX queue. - * - * This is special-cased because we know that it is small enough to fit in - * a single fragment, and we know it doesn't cross a page boundary. It - * also allows us to not worry about end-of-packet etc. - */ -static int efx_tso_put_header(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer, u8 *header) -{ - if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) { - buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, - header, buffer->len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, - buffer->dma_addr))) { - kfree(buffer->heap_buf); - buffer->len = 0; - buffer->flags = 0; - return -ENOMEM; - } - buffer->unmap_len = buffer->len; - buffer->dma_offset = 0; - buffer->flags |= EFX_TX_BUF_MAP_SINGLE; - } - - ++tx_queue->insert_count; - return 0; -} - - -/* Remove buffers put into a tx_queue. None of the buffers must have - * an skb attached. - */ -static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue, - unsigned int insert_count) -{ - struct efx_tx_buffer *buffer; - - /* Work backwards until we hit the original insert pointer value */ - while (tx_queue->insert_count != insert_count) { - --tx_queue->insert_count; - buffer = __efx_tx_queue_get_insert_buffer(tx_queue); - efx_dequeue_buffer(tx_queue, buffer, NULL, NULL); - } -} - - -/* Parse the SKB header and initialise state. */ -static int tso_start(struct tso_state *st, struct efx_nic *efx, - struct efx_tx_queue *tx_queue, - const struct sk_buff *skb) -{ - struct device *dma_dev = &efx->pci_dev->dev; - unsigned int header_len, in_len; - bool use_opt_desc = false; - dma_addr_t dma_addr; - - if (tx_queue->tso_version == 1) - use_opt_desc = true; - - st->ip_off = skb_network_header(skb) - skb->data; - st->tcp_off = skb_transport_header(skb) - skb->data; - header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); - in_len = skb_headlen(skb) - header_len; - st->header_len = header_len; - st->in_len = in_len; - if (st->protocol == htons(ETH_P_IP)) { - st->ip_base_len = st->header_len - st->ip_off; - st->ipv4_id = ntohs(ip_hdr(skb)->id); - } else { - st->ip_base_len = st->header_len - st->tcp_off; - st->ipv4_id = 0; - } - st->seqnum = ntohl(tcp_hdr(skb)->seq); - - EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); - EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn); - EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst); - - st->out_len = skb->len - header_len; - - if (!use_opt_desc) { - st->header_unmap_len = 0; - - if (likely(in_len == 0)) { - st->dma_flags = 0; - st->unmap_len = 0; - return 0; - } - - dma_addr = dma_map_single(dma_dev, skb->data + header_len, - in_len, DMA_TO_DEVICE); - st->dma_flags = EFX_TX_BUF_MAP_SINGLE; - st->dma_addr = dma_addr; - st->unmap_addr = dma_addr; - st->unmap_len = in_len; - } else { - dma_addr = dma_map_single(dma_dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - st->header_dma_addr = dma_addr; - st->header_unmap_len = skb_headlen(skb); - st->dma_flags = 0; - st->dma_addr = dma_addr + header_len; - st->unmap_len = 0; - } - - return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0; -} - -static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, - skb_frag_t *frag) -{ - st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, - skb_frag_size(frag), DMA_TO_DEVICE); - if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->dma_flags = 0; - st->unmap_len = skb_frag_size(frag); - st->in_len = skb_frag_size(frag); - st->dma_addr = st->unmap_addr; - return 0; - } - return -ENOMEM; -} - - -/** - * tso_fill_packet_with_fragment - form descriptors for the current fragment - * @tx_queue: Efx TX queue - * @skb: Socket buffer - * @st: TSO state - * - * Form descriptors for the current fragment, until we reach the end - * of fragment or end-of-packet. - */ -static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, - const struct sk_buff *skb, - struct tso_state *st) -{ - struct efx_tx_buffer *buffer; - int n; - - if (st->in_len == 0) - return; - if (st->packet_space == 0) - return; - - EFX_BUG_ON_PARANOID(st->in_len <= 0); - EFX_BUG_ON_PARANOID(st->packet_space <= 0); - - n = min(st->in_len, st->packet_space); - - st->packet_space -= n; - st->out_len -= n; - st->in_len -= n; - - efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); - - if (st->out_len == 0) { - /* Transfer ownership of the skb */ - buffer->skb = skb; - buffer->flags = EFX_TX_BUF_SKB; - } else if (st->packet_space != 0) { - buffer->flags = EFX_TX_BUF_CONT; - } - - if (st->in_len == 0) { - /* Transfer ownership of the DMA mapping */ - buffer->unmap_len = st->unmap_len; - buffer->dma_offset = buffer->unmap_len - buffer->len; - buffer->flags |= st->dma_flags; - st->unmap_len = 0; - } - - st->dma_addr += n; -} - - -/** - * tso_start_new_packet - generate a new header and prepare for the new packet - * @tx_queue: Efx TX queue - * @skb: Socket buffer - * @st: TSO state - * - * Generate a new header and prepare for the new packet. Return 0 on - * success, or -%ENOMEM if failed to alloc header. - */ -static int tso_start_new_packet(struct efx_tx_queue *tx_queue, - const struct sk_buff *skb, - struct tso_state *st) -{ - struct efx_tx_buffer *buffer = - efx_tx_queue_get_insert_buffer(tx_queue); - bool is_last = st->out_len <= skb_shinfo(skb)->gso_size; - u8 tcp_flags_clear; - - if (!is_last) { - st->packet_space = skb_shinfo(skb)->gso_size; - tcp_flags_clear = 0x09; /* mask out FIN and PSH */ - } else { - st->packet_space = st->out_len; - tcp_flags_clear = 0x00; - } - - if (!st->header_unmap_len) { - /* Allocate and insert a DMA-mapped header buffer. */ - struct tcphdr *tsoh_th; - unsigned ip_length; - u8 *header; - int rc; - - header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); - if (!header) - return -ENOMEM; - - tsoh_th = (struct tcphdr *)(header + st->tcp_off); - - /* Copy and update the headers. */ - memcpy(header, skb->data, st->header_len); - - tsoh_th->seq = htonl(st->seqnum); - ((u8 *)tsoh_th)[13] &= ~tcp_flags_clear; - - ip_length = st->ip_base_len + st->packet_space; - - if (st->protocol == htons(ETH_P_IP)) { - struct iphdr *tsoh_iph = - (struct iphdr *)(header + st->ip_off); - - tsoh_iph->tot_len = htons(ip_length); - tsoh_iph->id = htons(st->ipv4_id); - } else { - struct ipv6hdr *tsoh_iph = - (struct ipv6hdr *)(header + st->ip_off); - - tsoh_iph->payload_len = htons(ip_length); - } - - rc = efx_tso_put_header(tx_queue, buffer, header); - if (unlikely(rc)) - return rc; - } else { - /* Send the original headers with a TSO option descriptor - * in front - */ - u8 tcp_flags = ((u8 *)tcp_hdr(skb))[13] & ~tcp_flags_clear; - - buffer->flags = EFX_TX_BUF_OPTION; - buffer->len = 0; - buffer->unmap_len = 0; - EFX_POPULATE_QWORD_5(buffer->option, - ESF_DZ_TX_DESC_IS_OPT, 1, - ESF_DZ_TX_OPTION_TYPE, - ESE_DZ_TX_OPTION_DESC_TSO, - ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags, - ESF_DZ_TX_TSO_IP_ID, st->ipv4_id, - ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum); - ++tx_queue->insert_count; - - /* We mapped the headers in tso_start(). Unmap them - * when the last segment is completed. - */ - buffer = efx_tx_queue_get_insert_buffer(tx_queue); - buffer->dma_addr = st->header_dma_addr; - buffer->len = st->header_len; - if (is_last) { - buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE; - buffer->unmap_len = st->header_unmap_len; - buffer->dma_offset = 0; - /* Ensure we only unmap them once in case of a - * later DMA mapping error and rollback - */ - st->header_unmap_len = 0; - } else { - buffer->flags = EFX_TX_BUF_CONT; - buffer->unmap_len = 0; - } - ++tx_queue->insert_count; - } - - st->seqnum += skb_shinfo(skb)->gso_size; - - /* Linux leaves suitable gaps in the IP ID space for us to fill. */ - ++st->ipv4_id; - - ++tx_queue->tso_packets; - - ++tx_queue->tx_packets; - - return 0; -} - - -/** - * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer - * @tx_queue: Efx TX queue - * @skb: Socket buffer - * - * Context: You must hold netif_tx_lock() to call this function. - * - * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if - * @skb was not enqueued. In all cases @skb is consumed. Return - * %NETDEV_TX_OK. - */ -static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, - struct sk_buff *skb) -{ - struct efx_nic *efx = tx_queue->efx; - unsigned int old_insert_count = tx_queue->insert_count; - int frag_i, rc; - struct tso_state state; - - /* Find the packet protocol and sanity-check it */ - state.protocol = efx_tso_check_protocol(skb); - - rc = tso_start(&state, efx, tx_queue, skb); - if (rc) - goto mem_err; - - if (likely(state.in_len == 0)) { - /* Grab the first payload fragment. */ - EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1); - frag_i = 0; - rc = tso_get_fragment(&state, efx, - skb_shinfo(skb)->frags + frag_i); - if (rc) - goto mem_err; - } else { - /* Payload starts in the header area. */ - frag_i = -1; - } - - if (tso_start_new_packet(tx_queue, skb, &state) < 0) - goto mem_err; - - while (1) { - tso_fill_packet_with_fragment(tx_queue, skb, &state); - - /* Move onto the next fragment? */ - if (state.in_len == 0) { - if (++frag_i >= skb_shinfo(skb)->nr_frags) - /* End of payload reached. */ - break; - rc = tso_get_fragment(&state, efx, - skb_shinfo(skb)->frags + frag_i); - if (rc) - goto mem_err; - } - - /* Start at new packet? */ - if (state.packet_space == 0 && - tso_start_new_packet(tx_queue, skb, &state) < 0) - goto mem_err; - } - - netdev_tx_sent_queue(tx_queue->core_txq, skb->len); - - efx_tx_maybe_stop_queue(tx_queue); - - /* Pass off to hardware */ - if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) { - struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue); - - /* There could be packets left on the partner queue if those - * SKBs had skb->xmit_more set. If we do not push those they - * could be left for a long time and cause a netdev watchdog. - */ - if (txq2->xmit_more_available) - efx_nic_push_buffers(txq2); - - efx_nic_push_buffers(tx_queue); - } else { - tx_queue->xmit_more_available = skb->xmit_more; - } - - tx_queue->tso_bursts++; - return NETDEV_TX_OK; - - mem_err: - netif_err(efx, tx_err, efx->net_dev, - "Out of memory for TSO headers, or DMA mapping error\n"); - dev_kfree_skb_any(skb); - - /* Free the DMA mapping we were in the process of writing out */ - if (state.unmap_len) { - if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) - dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, - state.unmap_len, DMA_TO_DEVICE); - else - dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr, - state.unmap_len, DMA_TO_DEVICE); - } - - /* Free the header DMA mapping, if using option descriptors */ - if (state.header_unmap_len) - dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr, - state.header_unmap_len, DMA_TO_DEVICE); - - efx_enqueue_unwind(tx_queue, old_insert_count); - return NETDEV_TX_OK; -} diff --git a/drivers/net/ethernet/sfc/tx.h b/drivers/net/ethernet/sfc/tx.h new file mode 100644 index 000000000000..1cccc97ec676 --- /dev/null +++ b/drivers/net/ethernet/sfc/tx.h @@ -0,0 +1,27 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2006-2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#ifndef EFX_TX_H +#define EFX_TX_H + +#include + +/* Driver internal tx-path related declarations. */ + +unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len); + +u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, size_t len); + +int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb, + bool *data_mapped); + +#endif /* EFX_TX_H */ diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c new file mode 100644 index 000000000000..99936d70ed71 --- /dev/null +++ b/drivers/net/ethernet/sfc/tx_tso.c @@ -0,0 +1,570 @@ +/**************************************************************************** + * Driver for Solarflare network controllers and boards + * Copyright 2005-2006 Fen Systems Ltd. + * Copyright 2005-2015 Solarflare Communications Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "net_driver.h" +#include "efx.h" +#include "io.h" +#include "nic.h" +#include "tx.h" +#include "workarounds.h" +#include "ef10_regs.h" + +/* Efx legacy TCP segmentation acceleration. + * + * Why? Because by doing it here in the driver we can go significantly + * faster than the GSO. + * + * Requires TX checksum offload support. + */ + +#define PTR_DIFF(p1, p2) ((u8 *)(p1) - (u8 *)(p2)) + +/** + * struct tso_state - TSO state for an SKB + * @out_len: Remaining length in current segment + * @seqnum: Current sequence number + * @ipv4_id: Current IPv4 ID, host endian + * @packet_space: Remaining space in current packet + * @dma_addr: DMA address of current position + * @in_len: Remaining length in current SKB fragment + * @unmap_len: Length of SKB fragment + * @unmap_addr: DMA address of SKB fragment + * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0 + * @protocol: Network protocol (after any VLAN header) + * @ip_off: Offset of IP header + * @tcp_off: Offset of TCP header + * @header_len: Number of bytes of header + * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload + * @header_dma_addr: Header DMA address, when using option descriptors + * @header_unmap_len: Header DMA mapped length, or 0 if not using option + * descriptors + * + * The state used during segmentation. It is put into this data structure + * just to make it easy to pass into inline functions. + */ +struct tso_state { + /* Output position */ + unsigned int out_len; + unsigned int seqnum; + u16 ipv4_id; + unsigned int packet_space; + + /* Input position */ + dma_addr_t dma_addr; + unsigned int in_len; + unsigned int unmap_len; + dma_addr_t unmap_addr; + unsigned short dma_flags; + + __be16 protocol; + unsigned int ip_off; + unsigned int tcp_off; + unsigned int header_len; + unsigned int ip_base_len; + dma_addr_t header_dma_addr; + unsigned int header_unmap_len; +}; + +static inline void prefetch_ptr(struct efx_tx_queue *tx_queue) +{ + unsigned int insert_ptr = efx_tx_queue_get_insert_index(tx_queue); + char *ptr; + + ptr = (char *) (tx_queue->buffer + insert_ptr); + prefetch(ptr); + prefetch(ptr + 0x80); + + ptr = (char *) (((efx_qword_t *)tx_queue->txd.buf.addr) + insert_ptr); + prefetch(ptr); + prefetch(ptr + 0x80); +} + +/** + * efx_tx_queue_insert - push descriptors onto the TX queue + * @tx_queue: Efx TX queue + * @dma_addr: DMA address of fragment + * @len: Length of fragment + * @final_buffer: The final buffer inserted into the queue + * + * Push descriptors onto the TX queue. + */ +static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue, + dma_addr_t dma_addr, unsigned int len, + struct efx_tx_buffer **final_buffer) +{ + struct efx_tx_buffer *buffer; + unsigned int dma_len; + + EFX_BUG_ON_PARANOID(len <= 0); + + while (1) { + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + ++tx_queue->insert_count; + + EFX_BUG_ON_PARANOID(tx_queue->insert_count - + tx_queue->read_count >= + tx_queue->efx->txq_entries); + + buffer->dma_addr = dma_addr; + + dma_len = tx_queue->efx->type->tx_limit_len(tx_queue, + dma_addr, len); + + /* If there's space for everything this is our last buffer. */ + if (dma_len >= len) + break; + + buffer->len = dma_len; + buffer->flags = EFX_TX_BUF_CONT; + dma_addr += dma_len; + len -= dma_len; + } + + EFX_BUG_ON_PARANOID(!len); + buffer->len = len; + *final_buffer = buffer; +} + +/* + * Verify that our various assumptions about sk_buffs and the conditions + * under which TSO will be attempted hold true. Return the protocol number. + */ +static __be16 efx_tso_check_protocol(struct sk_buff *skb) +{ + __be16 protocol = skb->protocol; + + EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto != + protocol); + if (protocol == htons(ETH_P_8021Q)) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + + protocol = veh->h_vlan_encapsulated_proto; + } + + if (protocol == htons(ETH_P_IP)) { + EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP); + } else { + EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6)); + EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP); + } + EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data) + + (tcp_hdr(skb)->doff << 2u)) > + skb_headlen(skb)); + + return protocol; +} + +static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, unsigned int len) +{ + u8 *result; + + EFX_BUG_ON_PARANOID(buffer->len); + EFX_BUG_ON_PARANOID(buffer->flags); + EFX_BUG_ON_PARANOID(buffer->unmap_len); + + result = efx_tx_get_copy_buffer_limited(tx_queue, buffer, len); + + if (result) { + buffer->flags = EFX_TX_BUF_CONT; + } else { + buffer->heap_buf = kmalloc(NET_IP_ALIGN + len, GFP_ATOMIC); + if (unlikely(!buffer->heap_buf)) + return NULL; + tx_queue->tso_long_headers++; + result = (u8 *)buffer->heap_buf + NET_IP_ALIGN; + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; + } + + buffer->len = len; + + return result; +} + +/* + * Put a TSO header into the TX queue. + * + * This is special-cased because we know that it is small enough to fit in + * a single fragment, and we know it doesn't cross a page boundary. It + * also allows us to not worry about end-of-packet etc. + */ +static int efx_tso_put_header(struct efx_tx_queue *tx_queue, + struct efx_tx_buffer *buffer, u8 *header) +{ + if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) { + buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, + header, buffer->len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, + buffer->dma_addr))) { + kfree(buffer->heap_buf); + buffer->len = 0; + buffer->flags = 0; + return -ENOMEM; + } + buffer->unmap_len = buffer->len; + buffer->dma_offset = 0; + buffer->flags |= EFX_TX_BUF_MAP_SINGLE; + } + + ++tx_queue->insert_count; + return 0; +} + + +/* Parse the SKB header and initialise state. */ +static int tso_start(struct tso_state *st, struct efx_nic *efx, + struct efx_tx_queue *tx_queue, + const struct sk_buff *skb) +{ + struct device *dma_dev = &efx->pci_dev->dev; + unsigned int header_len, in_len; + bool use_opt_desc = false; + dma_addr_t dma_addr; + + if (tx_queue->tso_version == 1) + use_opt_desc = true; + + st->ip_off = skb_network_header(skb) - skb->data; + st->tcp_off = skb_transport_header(skb) - skb->data; + header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); + in_len = skb_headlen(skb) - header_len; + st->header_len = header_len; + st->in_len = in_len; + if (st->protocol == htons(ETH_P_IP)) { + st->ip_base_len = st->header_len - st->ip_off; + st->ipv4_id = ntohs(ip_hdr(skb)->id); + } else { + st->ip_base_len = st->header_len - st->tcp_off; + st->ipv4_id = 0; + } + st->seqnum = ntohl(tcp_hdr(skb)->seq); + + EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg); + EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn); + EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst); + + st->out_len = skb->len - header_len; + + if (!use_opt_desc) { + st->header_unmap_len = 0; + + if (likely(in_len == 0)) { + st->dma_flags = 0; + st->unmap_len = 0; + return 0; + } + + dma_addr = dma_map_single(dma_dev, skb->data + header_len, + in_len, DMA_TO_DEVICE); + st->dma_flags = EFX_TX_BUF_MAP_SINGLE; + st->dma_addr = dma_addr; + st->unmap_addr = dma_addr; + st->unmap_len = in_len; + } else { + dma_addr = dma_map_single(dma_dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + st->header_dma_addr = dma_addr; + st->header_unmap_len = skb_headlen(skb); + st->dma_flags = 0; + st->dma_addr = dma_addr + header_len; + st->unmap_len = 0; + } + + return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0; +} + +static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, + skb_frag_t *frag) +{ + st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, + skb_frag_size(frag), DMA_TO_DEVICE); + if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { + st->dma_flags = 0; + st->unmap_len = skb_frag_size(frag); + st->in_len = skb_frag_size(frag); + st->dma_addr = st->unmap_addr; + return 0; + } + return -ENOMEM; +} + + +/** + * tso_fill_packet_with_fragment - form descriptors for the current fragment + * @tx_queue: Efx TX queue + * @skb: Socket buffer + * @st: TSO state + * + * Form descriptors for the current fragment, until we reach the end + * of fragment or end-of-packet. + */ +static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb, + struct tso_state *st) +{ + struct efx_tx_buffer *buffer; + int n; + + if (st->in_len == 0) + return; + if (st->packet_space == 0) + return; + + EFX_BUG_ON_PARANOID(st->in_len <= 0); + EFX_BUG_ON_PARANOID(st->packet_space <= 0); + + n = min(st->in_len, st->packet_space); + + st->packet_space -= n; + st->out_len -= n; + st->in_len -= n; + + efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer); + + if (st->out_len == 0) { + /* Transfer ownership of the skb */ + buffer->skb = skb; + buffer->flags = EFX_TX_BUF_SKB; + } else if (st->packet_space != 0) { + buffer->flags = EFX_TX_BUF_CONT; + } + + if (st->in_len == 0) { + /* Transfer ownership of the DMA mapping */ + buffer->unmap_len = st->unmap_len; + buffer->dma_offset = buffer->unmap_len - buffer->len; + buffer->flags |= st->dma_flags; + st->unmap_len = 0; + } + + st->dma_addr += n; +} + + +#define TCP_FLAGS_OFFSET 13 + +/** + * tso_start_new_packet - generate a new header and prepare for the new packet + * @tx_queue: Efx TX queue + * @skb: Socket buffer + * @st: TSO state + * + * Generate a new header and prepare for the new packet. Return 0 on + * success, or -%ENOMEM if failed to alloc header. + */ +static int tso_start_new_packet(struct efx_tx_queue *tx_queue, + const struct sk_buff *skb, + struct tso_state *st) +{ + struct efx_tx_buffer *buffer = + efx_tx_queue_get_insert_buffer(tx_queue); + bool is_last = st->out_len <= skb_shinfo(skb)->gso_size; + u8 tcp_flags_mask; + + if (!is_last) { + st->packet_space = skb_shinfo(skb)->gso_size; + tcp_flags_mask = 0x09; /* mask out FIN and PSH */ + } else { + st->packet_space = st->out_len; + tcp_flags_mask = 0x00; + } + + if (!st->header_unmap_len) { + /* Allocate and insert a DMA-mapped header buffer. */ + struct tcphdr *tsoh_th; + unsigned int ip_length; + u8 *header; + int rc; + + header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); + if (!header) + return -ENOMEM; + + tsoh_th = (struct tcphdr *)(header + st->tcp_off); + + /* Copy and update the headers. */ + memcpy(header, skb->data, st->header_len); + + tsoh_th->seq = htonl(st->seqnum); + ((u8 *)tsoh_th)[TCP_FLAGS_OFFSET] &= ~tcp_flags_mask; + + ip_length = st->ip_base_len + st->packet_space; + + if (st->protocol == htons(ETH_P_IP)) { + struct iphdr *tsoh_iph = + (struct iphdr *)(header + st->ip_off); + + tsoh_iph->tot_len = htons(ip_length); + tsoh_iph->id = htons(st->ipv4_id); + } else { + struct ipv6hdr *tsoh_iph = + (struct ipv6hdr *)(header + st->ip_off); + + tsoh_iph->payload_len = htons(ip_length); + } + + rc = efx_tso_put_header(tx_queue, buffer, header); + if (unlikely(rc)) + return rc; + } else { + /* Send the original headers with a TSO option descriptor + * in front + */ + u8 tcp_flags = ((u8 *)tcp_hdr(skb))[TCP_FLAGS_OFFSET] & + ~tcp_flags_mask; + + buffer->flags = EFX_TX_BUF_OPTION; + buffer->len = 0; + buffer->unmap_len = 0; + EFX_POPULATE_QWORD_5(buffer->option, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags, + ESF_DZ_TX_TSO_IP_ID, st->ipv4_id, + ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum); + ++tx_queue->insert_count; + + /* We mapped the headers in tso_start(). Unmap them + * when the last segment is completed. + */ + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + buffer->dma_addr = st->header_dma_addr; + buffer->len = st->header_len; + if (is_last) { + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE; + buffer->unmap_len = st->header_unmap_len; + buffer->dma_offset = 0; + /* Ensure we only unmap them once in case of a + * later DMA mapping error and rollback + */ + st->header_unmap_len = 0; + } else { + buffer->flags = EFX_TX_BUF_CONT; + buffer->unmap_len = 0; + } + ++tx_queue->insert_count; + } + + st->seqnum += skb_shinfo(skb)->gso_size; + + /* Linux leaves suitable gaps in the IP ID space for us to fill. */ + ++st->ipv4_id; + + return 0; +} + +/** + * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer + * @tx_queue: Efx TX queue + * @skb: Socket buffer + * @data_mapped: Did we map the data? Always set to true + * by this on success. + * + * Context: You must hold netif_tx_lock() to call this function. + * + * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if + * @skb was not enqueued. In all cases @skb is consumed. Return + * %NETDEV_TX_OK. + */ +int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, + struct sk_buff *skb, + bool *data_mapped) +{ + struct efx_nic *efx = tx_queue->efx; + int frag_i, rc; + struct tso_state state; + + prefetch(skb->data); + + /* Find the packet protocol and sanity-check it */ + state.protocol = efx_tso_check_protocol(skb); + + EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count); + + rc = tso_start(&state, efx, tx_queue, skb); + if (rc) + goto mem_err; + + if (likely(state.in_len == 0)) { + /* Grab the first payload fragment. */ + EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1); + frag_i = 0; + rc = tso_get_fragment(&state, efx, + skb_shinfo(skb)->frags + frag_i); + if (rc) + goto mem_err; + } else { + /* Payload starts in the header area. */ + frag_i = -1; + } + + if (tso_start_new_packet(tx_queue, skb, &state) < 0) + goto mem_err; + + prefetch_ptr(tx_queue); + + while (1) { + tso_fill_packet_with_fragment(tx_queue, skb, &state); + + /* Move onto the next fragment? */ + if (state.in_len == 0) { + if (++frag_i >= skb_shinfo(skb)->nr_frags) + /* End of payload reached. */ + break; + rc = tso_get_fragment(&state, efx, + skb_shinfo(skb)->frags + frag_i); + if (rc) + goto mem_err; + } + + /* Start at new packet? */ + if (state.packet_space == 0 && + tso_start_new_packet(tx_queue, skb, &state) < 0) + goto mem_err; + } + + *data_mapped = true; + + return 0; + + mem_err: + netif_err(efx, tx_err, efx->net_dev, + "Out of memory for TSO headers, or DMA mapping error\n"); + + /* Free the DMA mapping we were in the process of writing out */ + if (state.unmap_len) { + if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) + dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, + state.unmap_len, DMA_TO_DEVICE); + else + dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr, + state.unmap_len, DMA_TO_DEVICE); + } + + /* Free the header DMA mapping, if using option descriptors */ + if (state.header_unmap_len) + dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr, + state.header_unmap_len, DMA_TO_DEVICE); + + return -ENOMEM; +} From e638ee1d0a6aa10a1a32d9161e88758ecb8d1823 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 17 Nov 2016 10:52:07 +0000 Subject: [PATCH 4/5] sfc: handle failure to allocate TSOv2 contexts If we fail to init the TXQ because of insufficient TSOv2 contexts, try again with TSOv2 disabled. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 40 ++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 4b69ed9e6336..9cbe573563d7 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2178,9 +2178,6 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) /* TSOv2 is a limited resource that can only be configured on a limited * number of queues. TSO without checksum offload is not really a thing, * so we only enable it for those queues. - * - * TODO: handle failure to allocate this in the case where we've used - * all the queues. */ if (csum_offload && (nic_data->datapath_caps2 & (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) { @@ -2193,15 +2190,6 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue); - MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS, - /* This flag was removed from mcdi_pcol.h for - * the non-_EXT version of INIT_TXQ. However, - * firmware still honours it. - */ - INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, - INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, - INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); - MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0); MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id); @@ -2217,10 +2205,30 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) inlen = MC_CMD_INIT_TXQ_IN_LEN(entries); - rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen, - NULL, 0, NULL); - if (rc) - goto fail; + do { + MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS, + /* This flag was removed from mcdi_pcol.h for + * the non-_EXT version of INIT_TXQ. However, + * firmware still honours it. + */ + INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2, + INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload, + INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload); + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen, + NULL, 0, NULL); + if (rc == -ENOSPC && tso_v2) { + /* Retry without TSOv2 if we're short on contexts. */ + tso_v2 = false; + netif_warn(efx, probe, efx->net_dev, + "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n"); + } else if (rc) { + efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ, + MC_CMD_INIT_TXQ_EXT_IN_LEN, + NULL, 0, rc); + goto fail; + } + } while (rc); /* A previous user of this TX queue might have set us up the * bomb by writing a descriptor to the TX push collector but From 46d1efd852ccbc94e8c4f8c41cfd84147a103436 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Thu, 17 Nov 2016 10:52:36 +0000 Subject: [PATCH 5/5] sfc: remove Software TSO It gives no advantage over GSO now that xmit_more exists. If we find ourselves unable to handle a TSO skb (because our TXQ doesn't have a TSOv2 context and the NIC doesn't support TSOv1), hand it back to GSO. Also do that if the TSO handler fails with EINVAL for any other reason. As Falcon-architecture NICs don't support any firmware-assisted TSO, they no longer advertise TSO feature flags at all. Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/ef10.c | 15 ++ drivers/net/ethernet/sfc/efx.c | 38 ++-- drivers/net/ethernet/sfc/ethtool.c | 1 + drivers/net/ethernet/sfc/net_driver.h | 5 + drivers/net/ethernet/sfc/tx.c | 46 ++++- drivers/net/ethernet/sfc/tx_tso.c | 258 +++++++------------------- 6 files changed, 153 insertions(+), 210 deletions(-) diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 9cbe573563d7..0f58ea8147d4 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -2158,6 +2158,20 @@ static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue, return 0; } +static u32 efx_ef10_tso_versions(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + u32 tso_versions = 0; + + if (nic_data->datapath_caps & + (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) + tso_versions |= BIT(1); + if (nic_data->datapath_caps2 & + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN)) + tso_versions |= BIT(2); + return tso_versions; +} + static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue) { MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 / @@ -5759,6 +5773,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = { #endif .get_mac_address = efx_ef10_get_mac_address_pf, .set_mac_address = efx_ef10_set_mac_address, + .tso_versions = efx_ef10_tso_versions, .revision = EFX_REV_HUNT_A0, .max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH), diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 52ca0404c491..756a09640550 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3200,23 +3200,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev, efx = netdev_priv(net_dev); efx->type = (const struct efx_nic_type *) entry->driver_data; efx->fixed_features |= NETIF_F_HIGHDMA; - net_dev->features |= (efx->type->offload_features | NETIF_F_SG | - NETIF_F_TSO | NETIF_F_RXCSUM); - if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) - net_dev->features |= NETIF_F_TSO6; - /* Mask for features that also apply to VLAN devices */ - net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | - NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | - NETIF_F_RXCSUM); - - net_dev->hw_features = net_dev->features & ~efx->fixed_features; - - /* Disable VLAN filtering by default. It may be enforced if - * the feature is fixed (i.e. VLAN filters are required to - * receive VLAN tagged packets due to vPort restrictions). - */ - net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; - net_dev->features |= efx->fixed_features; pci_set_drvdata(pci_dev, efx); SET_NETDEV_DEV(net_dev, &pci_dev->dev); @@ -3239,6 +3222,27 @@ static int efx_pci_probe(struct pci_dev *pci_dev, if (rc) goto fail3; + net_dev->features |= (efx->type->offload_features | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_RXCSUM); + if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM)) + net_dev->features |= NETIF_F_TSO6; + /* Check whether device supports TSO */ + if (!efx->type->tso_versions || !efx->type->tso_versions(efx)) + net_dev->features &= ~NETIF_F_ALL_TSO; + /* Mask for features that also apply to VLAN devices */ + net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG | + NETIF_F_HIGHDMA | NETIF_F_ALL_TSO | + NETIF_F_RXCSUM); + + net_dev->hw_features = net_dev->features & ~efx->fixed_features; + + /* Disable VLAN filtering by default. It may be enforced if + * the feature is fixed (i.e. VLAN filters are required to + * receive VLAN tagged packets due to vPort restrictions). + */ + net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER; + net_dev->features |= efx->fixed_features; + rc = efx_register_netdev(efx); if (rc) goto fail4; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index bd5edd61bf64..740cdf08fa19 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -69,6 +69,7 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = { EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts), EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers), EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets), + EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks), EFX_ETHTOOL_UINT_TXQ_STAT(pushes), EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets), EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets), diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 2da3e8fb6d71..f97f828a0898 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -225,6 +225,7 @@ struct efx_tx_buffer { * @tso_long_headers: Number of packets with headers too long for standard * blocks * @tso_packets: Number of packets via the TSO xmit path + * @tso_fallbacks: Number of times TSO fallback used * @pushes: Number of times the TX push feature has been used * @pio_packets: Number of times the TX PIO feature has been used * @xmit_more_available: Are any packets waiting to be pushed to the NIC @@ -266,6 +267,7 @@ struct efx_tx_queue { unsigned int tso_bursts; unsigned int tso_long_headers; unsigned int tso_packets; + unsigned int tso_fallbacks; unsigned int pushes; unsigned int pio_packets; bool xmit_more_available; @@ -1225,6 +1227,8 @@ struct efx_mtd_partition { * and tx_type will already have been validated but this operation * must validate and update rx_filter. * @set_mac_address: Set the MAC address of the device + * @tso_versions: Returns mask of firmware-assisted TSO versions supported. + * If %NULL, then device does not support any TSO version. * @revision: Hardware architecture revision * @txd_ptr_tbl_base: TX descriptor ring base address * @rxd_ptr_tbl_base: RX descriptor ring base address @@ -1381,6 +1385,7 @@ struct efx_nic_type { void (*vswitching_remove)(struct efx_nic *efx); int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr); int (*set_mac_address)(struct efx_nic *efx); + u32 (*tso_versions)(struct efx_nic *efx); int revision; unsigned int txd_ptr_tbl_base; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 3089e888a08d..1aa728cfa8ba 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -446,10 +446,38 @@ static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue) } } -static int efx_tx_tso_sw(struct efx_tx_queue *tx_queue, struct sk_buff *skb, - bool *data_mapped) +/* + * Fallback to software TSO. + * + * This is used if we are unable to send a GSO packet through hardware TSO. + * This should only ever happen due to per-queue restrictions - unsupported + * packets should first be filtered by the feature flags. + * + * Returns 0 on success, error code otherwise. + */ +static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue, + struct sk_buff *skb) { - return efx_enqueue_skb_tso(tx_queue, skb, data_mapped); + struct sk_buff *segments, *next; + + segments = skb_gso_segment(skb, 0); + if (IS_ERR(segments)) + return PTR_ERR(segments); + + dev_kfree_skb_any(skb); + skb = segments; + + while (skb) { + next = skb->next; + skb->next = NULL; + + if (next) + skb->xmit_more = true; + efx_enqueue_skb(tx_queue, skb); + skb = next; + } + + return 0; } /* @@ -473,6 +501,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) bool data_mapped = false; unsigned int segments; unsigned int skb_len; + int rc; skb_len = skb->len; segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0; @@ -485,7 +514,14 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) */ if (segments) { EFX_BUG_ON_PARANOID(!tx_queue->handle_tso); - if (tx_queue->handle_tso(tx_queue, skb, &data_mapped)) + rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped); + if (rc == -EINVAL) { + rc = efx_tx_tso_fallback(tx_queue, skb); + tx_queue->tso_fallbacks++; + if (rc == 0) + return 0; + } + if (rc) goto err; #ifdef EFX_USE_PIO } else if (skb_len <= efx_piobuf_size && !skb->xmit_more && @@ -801,7 +837,7 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue) /* Set up default function pointers. These may get replaced by * efx_nic_init_tx() based off NIC/queue capabilities. */ - tx_queue->handle_tso = efx_tx_tso_sw; + tx_queue->handle_tso = efx_enqueue_skb_tso; /* Some older hardware requires Tx writes larger than 32. */ tx_queue->tx_min_size = EFX_WORKAROUND_15592(efx) ? 33 : 0; diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c index 99936d70ed71..60328870aad7 100644 --- a/drivers/net/ethernet/sfc/tx_tso.c +++ b/drivers/net/ethernet/sfc/tx_tso.c @@ -29,8 +29,7 @@ /* Efx legacy TCP segmentation acceleration. * - * Why? Because by doing it here in the driver we can go significantly - * faster than the GSO. + * Utilises firmware support to go faster than GSO (but not as fast as TSOv2). * * Requires TX checksum offload support. */ @@ -47,15 +46,13 @@ * @in_len: Remaining length in current SKB fragment * @unmap_len: Length of SKB fragment * @unmap_addr: DMA address of SKB fragment - * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0 * @protocol: Network protocol (after any VLAN header) * @ip_off: Offset of IP header * @tcp_off: Offset of TCP header * @header_len: Number of bytes of header * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload - * @header_dma_addr: Header DMA address, when using option descriptors - * @header_unmap_len: Header DMA mapped length, or 0 if not using option - * descriptors + * @header_dma_addr: Header DMA address + * @header_unmap_len: Header DMA mapped length * * The state used during segmentation. It is put into this data structure * just to make it easy to pass into inline functions. @@ -72,7 +69,6 @@ struct tso_state { unsigned int in_len; unsigned int unmap_len; dma_addr_t unmap_addr; - unsigned short dma_flags; __be16 protocol; unsigned int ip_off; @@ -172,63 +168,6 @@ static __be16 efx_tso_check_protocol(struct sk_buff *skb) return protocol; } -static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer, unsigned int len) -{ - u8 *result; - - EFX_BUG_ON_PARANOID(buffer->len); - EFX_BUG_ON_PARANOID(buffer->flags); - EFX_BUG_ON_PARANOID(buffer->unmap_len); - - result = efx_tx_get_copy_buffer_limited(tx_queue, buffer, len); - - if (result) { - buffer->flags = EFX_TX_BUF_CONT; - } else { - buffer->heap_buf = kmalloc(NET_IP_ALIGN + len, GFP_ATOMIC); - if (unlikely(!buffer->heap_buf)) - return NULL; - tx_queue->tso_long_headers++; - result = (u8 *)buffer->heap_buf + NET_IP_ALIGN; - buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP; - } - - buffer->len = len; - - return result; -} - -/* - * Put a TSO header into the TX queue. - * - * This is special-cased because we know that it is small enough to fit in - * a single fragment, and we know it doesn't cross a page boundary. It - * also allows us to not worry about end-of-packet etc. - */ -static int efx_tso_put_header(struct efx_tx_queue *tx_queue, - struct efx_tx_buffer *buffer, u8 *header) -{ - if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) { - buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev, - header, buffer->len, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev, - buffer->dma_addr))) { - kfree(buffer->heap_buf); - buffer->len = 0; - buffer->flags = 0; - return -ENOMEM; - } - buffer->unmap_len = buffer->len; - buffer->dma_offset = 0; - buffer->flags |= EFX_TX_BUF_MAP_SINGLE; - } - - ++tx_queue->insert_count; - return 0; -} - /* Parse the SKB header and initialise state. */ static int tso_start(struct tso_state *st, struct efx_nic *efx, @@ -237,12 +176,8 @@ static int tso_start(struct tso_state *st, struct efx_nic *efx, { struct device *dma_dev = &efx->pci_dev->dev; unsigned int header_len, in_len; - bool use_opt_desc = false; dma_addr_t dma_addr; - if (tx_queue->tso_version == 1) - use_opt_desc = true; - st->ip_off = skb_network_header(skb) - skb->data; st->tcp_off = skb_transport_header(skb) - skb->data; header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u); @@ -264,30 +199,12 @@ static int tso_start(struct tso_state *st, struct efx_nic *efx, st->out_len = skb->len - header_len; - if (!use_opt_desc) { - st->header_unmap_len = 0; - - if (likely(in_len == 0)) { - st->dma_flags = 0; - st->unmap_len = 0; - return 0; - } - - dma_addr = dma_map_single(dma_dev, skb->data + header_len, - in_len, DMA_TO_DEVICE); - st->dma_flags = EFX_TX_BUF_MAP_SINGLE; - st->dma_addr = dma_addr; - st->unmap_addr = dma_addr; - st->unmap_len = in_len; - } else { - dma_addr = dma_map_single(dma_dev, skb->data, - skb_headlen(skb), DMA_TO_DEVICE); - st->header_dma_addr = dma_addr; - st->header_unmap_len = skb_headlen(skb); - st->dma_flags = 0; - st->dma_addr = dma_addr + header_len; - st->unmap_len = 0; - } + dma_addr = dma_map_single(dma_dev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + st->header_dma_addr = dma_addr; + st->header_unmap_len = skb_headlen(skb); + st->dma_addr = dma_addr + header_len; + st->unmap_len = 0; return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0; } @@ -298,7 +215,6 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { - st->dma_flags = 0; st->unmap_len = skb_frag_size(frag); st->in_len = skb_frag_size(frag); st->dma_addr = st->unmap_addr; @@ -352,7 +268,6 @@ static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, /* Transfer ownership of the DMA mapping */ buffer->unmap_len = st->unmap_len; buffer->dma_offset = buffer->unmap_len - buffer->len; - buffer->flags |= st->dma_flags; st->unmap_len = 0; } @@ -369,7 +284,7 @@ static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue, * @st: TSO state * * Generate a new header and prepare for the new packet. Return 0 on - * success, or -%ENOMEM if failed to alloc header. + * success, or -%ENOMEM if failed to alloc header, or other negative error. */ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, const struct sk_buff *skb, @@ -378,7 +293,7 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, struct efx_tx_buffer *buffer = efx_tx_queue_get_insert_buffer(tx_queue); bool is_last = st->out_len <= skb_shinfo(skb)->gso_size; - u8 tcp_flags_mask; + u8 tcp_flags_mask, tcp_flags; if (!is_last) { st->packet_space = skb_shinfo(skb)->gso_size; @@ -388,82 +303,44 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, tcp_flags_mask = 0x00; } - if (!st->header_unmap_len) { - /* Allocate and insert a DMA-mapped header buffer. */ - struct tcphdr *tsoh_th; - unsigned int ip_length; - u8 *header; - int rc; + if (WARN_ON(!st->header_unmap_len)) + return -EINVAL; + /* Send the original headers with a TSO option descriptor + * in front + */ + tcp_flags = ((u8 *)tcp_hdr(skb))[TCP_FLAGS_OFFSET] & ~tcp_flags_mask; - header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len); - if (!header) - return -ENOMEM; + buffer->flags = EFX_TX_BUF_OPTION; + buffer->len = 0; + buffer->unmap_len = 0; + EFX_POPULATE_QWORD_5(buffer->option, + ESF_DZ_TX_DESC_IS_OPT, 1, + ESF_DZ_TX_OPTION_TYPE, + ESE_DZ_TX_OPTION_DESC_TSO, + ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags, + ESF_DZ_TX_TSO_IP_ID, st->ipv4_id, + ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum); + ++tx_queue->insert_count; - tsoh_th = (struct tcphdr *)(header + st->tcp_off); - - /* Copy and update the headers. */ - memcpy(header, skb->data, st->header_len); - - tsoh_th->seq = htonl(st->seqnum); - ((u8 *)tsoh_th)[TCP_FLAGS_OFFSET] &= ~tcp_flags_mask; - - ip_length = st->ip_base_len + st->packet_space; - - if (st->protocol == htons(ETH_P_IP)) { - struct iphdr *tsoh_iph = - (struct iphdr *)(header + st->ip_off); - - tsoh_iph->tot_len = htons(ip_length); - tsoh_iph->id = htons(st->ipv4_id); - } else { - struct ipv6hdr *tsoh_iph = - (struct ipv6hdr *)(header + st->ip_off); - - tsoh_iph->payload_len = htons(ip_length); - } - - rc = efx_tso_put_header(tx_queue, buffer, header); - if (unlikely(rc)) - return rc; + /* We mapped the headers in tso_start(). Unmap them + * when the last segment is completed. + */ + buffer = efx_tx_queue_get_insert_buffer(tx_queue); + buffer->dma_addr = st->header_dma_addr; + buffer->len = st->header_len; + if (is_last) { + buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE; + buffer->unmap_len = st->header_unmap_len; + buffer->dma_offset = 0; + /* Ensure we only unmap them once in case of a + * later DMA mapping error and rollback + */ + st->header_unmap_len = 0; } else { - /* Send the original headers with a TSO option descriptor - * in front - */ - u8 tcp_flags = ((u8 *)tcp_hdr(skb))[TCP_FLAGS_OFFSET] & - ~tcp_flags_mask; - - buffer->flags = EFX_TX_BUF_OPTION; - buffer->len = 0; + buffer->flags = EFX_TX_BUF_CONT; buffer->unmap_len = 0; - EFX_POPULATE_QWORD_5(buffer->option, - ESF_DZ_TX_DESC_IS_OPT, 1, - ESF_DZ_TX_OPTION_TYPE, - ESE_DZ_TX_OPTION_DESC_TSO, - ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags, - ESF_DZ_TX_TSO_IP_ID, st->ipv4_id, - ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum); - ++tx_queue->insert_count; - - /* We mapped the headers in tso_start(). Unmap them - * when the last segment is completed. - */ - buffer = efx_tx_queue_get_insert_buffer(tx_queue); - buffer->dma_addr = st->header_dma_addr; - buffer->len = st->header_len; - if (is_last) { - buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE; - buffer->unmap_len = st->header_unmap_len; - buffer->dma_offset = 0; - /* Ensure we only unmap them once in case of a - * later DMA mapping error and rollback - */ - st->header_unmap_len = 0; - } else { - buffer->flags = EFX_TX_BUF_CONT; - buffer->unmap_len = 0; - } - ++tx_queue->insert_count; } + ++tx_queue->insert_count; st->seqnum += skb_shinfo(skb)->gso_size; @@ -483,8 +360,8 @@ static int tso_start_new_packet(struct efx_tx_queue *tx_queue, * Context: You must hold netif_tx_lock() to call this function. * * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if - * @skb was not enqueued. In all cases @skb is consumed. Return - * %NETDEV_TX_OK. + * @skb was not enqueued. @skb is consumed unless return value is + * %EINVAL. */ int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb, @@ -494,6 +371,9 @@ int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, int frag_i, rc; struct tso_state state; + if (tx_queue->tso_version != 1) + return -EINVAL; + prefetch(skb->data); /* Find the packet protocol and sanity-check it */ @@ -503,7 +383,7 @@ int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, rc = tso_start(&state, efx, tx_queue, skb); if (rc) - goto mem_err; + goto fail; if (likely(state.in_len == 0)) { /* Grab the first payload fragment. */ @@ -512,14 +392,15 @@ int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, rc = tso_get_fragment(&state, efx, skb_shinfo(skb)->frags + frag_i); if (rc) - goto mem_err; + goto fail; } else { /* Payload starts in the header area. */ frag_i = -1; } - if (tso_start_new_packet(tx_queue, skb, &state) < 0) - goto mem_err; + rc = tso_start_new_packet(tx_queue, skb, &state); + if (rc) + goto fail; prefetch_ptr(tx_queue); @@ -534,37 +415,38 @@ int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, rc = tso_get_fragment(&state, efx, skb_shinfo(skb)->frags + frag_i); if (rc) - goto mem_err; + goto fail; } /* Start at new packet? */ - if (state.packet_space == 0 && - tso_start_new_packet(tx_queue, skb, &state) < 0) - goto mem_err; + if (state.packet_space == 0) { + rc = tso_start_new_packet(tx_queue, skb, &state); + if (rc) + goto fail; + } } *data_mapped = true; return 0; - mem_err: - netif_err(efx, tx_err, efx->net_dev, - "Out of memory for TSO headers, or DMA mapping error\n"); +fail: + if (rc == -ENOMEM) + netif_err(efx, tx_err, efx->net_dev, + "Out of memory for TSO headers, or DMA mapping error\n"); + else + netif_err(efx, tx_err, efx->net_dev, "TSO failed, rc = %d\n", rc); /* Free the DMA mapping we were in the process of writing out */ if (state.unmap_len) { - if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE) - dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr, - state.unmap_len, DMA_TO_DEVICE); - else - dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr, - state.unmap_len, DMA_TO_DEVICE); + dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr, + state.unmap_len, DMA_TO_DEVICE); } - /* Free the header DMA mapping, if using option descriptors */ + /* Free the header DMA mapping */ if (state.header_unmap_len) dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr, state.header_unmap_len, DMA_TO_DEVICE); - return -ENOMEM; + return rc; }