mirror of
https://github.com/rd-stuffs/msm-4.14.git
synced 2025-02-20 11:45:48 +08:00
[SQUASH] treewide: Revert backported crypto algos
* breaks vowifi, besides i had little to no improvement in geekbench aex-xts scores with these Revert "ARM64/configs: surya: Enable backported crypto algos" This reverts commit 53cf765af430ea1ce8b99cfdebfaf1fbd05ce9e7. Revert "arm64: crypto: aes-glue: always clear out defines before assuming" This reverts commit 6bdf40bd12304f3c26c7278be8172dae4d355d23. Revert "lib: crypto: move out aes generic library into arm64" This reverts commit 6f53883b2cac7ec3818464e90738182c7da727b4. Revert "crypto: bring back blkcipher" This reverts commit 49ab3ad968fdf0f3b15a7014d6958cc46e50c4a3. Revert "crypto: lib/aes - export sbox and inverse sbox" This reverts commit 6d7509597228bb2824b229b6f92200621af17039. Revert "crypto: lib/aes add aes generic" This reverts commit cd956b7d9c2533f72a41eaa1421d7896c867137c. Revert "HACK: include: crypto: aes.h: redirect aes function definitions to current ones" This reverts commit 90035f7567d034bebd0672724d2ee341d9ef789a. Revert "arm64: assembler: add utility macros to push/pop stack frames" This reverts commit 5899671c86c750a23f68fce359ac4574f289b252. Revert "crypto: skcipher - add the ability to abort a skcipher walk" This reverts commit 6bfa9de12bea39c63115d573b292e6352a85ef69. Revert "crypto: aes/fixed-time - align key schedule with other implementations" This reverts commit 6b930b634c0bab896b127835ffba9a3c2d796760. Revert "crypto: sm4 - export encrypt/decrypt routines to other drivers" This reverts commit 306ce112895e48e8c67f5531dcc7794b8c9799d2. Revert "crypto: sm4 - introduce SM4 symmetric cipher algorithm" This reverts commit 9e677e8a2fe26fd4694dfff61279bd85237aacc8. Revert "crypto: hash - introduce crypto_shash_tfm_digest()" This reverts commit 2e972aa2655e7e50b4362d92e13b78723ab32c01. Revert "crypto: ctr - add helper for performing a CTR encryption walk" This reverts commit 20d0ffb036d9f88eb7b2c056202a9e6f4cece6bf. Revert "crypto: sm3 - export crypto_sm3_final function" This reverts commit d6983c76e3da30db111421795beda5c703955aa9. Revert "crypto: sm3 - add OSCCA SM3 secure hash" This reverts commit d6a29406704f117dfd2cc99b78078831d1fe287e. Revert "crypto: don't optimize keccakf()" This reverts commit 598d21bacff6319be4aa058025c632b107413f32. Revert "crypto: sha3-generic - Use __optimize to support old compilers" This reverts commit 734a8b2ae19a5e6e1ca1ada7ccb333a9655a0673. Revert "crypto: sha3-generic - deal with oversize stack frames" This reverts commit 83f16c21f6e058890a258a2a5b765046f6d1230c. Revert "crypto: sha3-generic - export init/update/final routines" This reverts commit 591f7afb9d0890c2ee9460360112aa4e448a3924. Revert "crypto: sha3-generic - simplify code" This reverts commit 6a9ea8f69fb0baa3d403fbbb82e052d684bc9fec. Revert "crypto: sha3-generic - rewrite KECCAK transform to help the compiler optimize" This reverts commit f7f038880d90b321a48788160beea14fa294f109. Revert "arm64: crypto: fallback to may_use_yield" This reverts commit 5dc3c1306cd51f5ca293e8b76f7f382ba68da2c2. Revert "arm64: assembler: add cond_yield macro" This reverts commit 10ec3894fbd69c75638c44925408b4a618a6afa8. Revert "crypto: arm64/aes-ce - deal with oversight in new CTR carry code" This reverts commit 769190a03e8e1373a4e6ebfffa409bd89a123e85. Revert "crypto: arm64/crc-t10dif - move NEON yield to C code" This reverts commit ae264b6f57eb729aaeb9f8c0cb64d6da7d2a61ae. Revert "crypto: arm64/aes-ce-mac - simplify NEON yield" This reverts commit f497267b2d962c1d1e3227b68ed47450235c0134. Revert "crypto: arm64/aes-neonbs - remove NEON yield calls" This reverts commit e3bf48533edde9d66e0de81df758b75e5daf7649. Revert "crypto: arm64/sha512-ce - simplify NEON yield" This reverts commit 223c7a38e2597442d1bfa9b8cf1d22d4e5bc975b. Revert "crypto: arm64/sha3-ce - simplify NEON yield" This reverts commit c4ab0f1013d7f75e77bbb190d2d0a45931777e06. Revert "crypto: arm64/sha2-ce - simplify NEON yield" This reverts commit c194acd3691aa17bb4bfff16124b2a3f476d0f4f. Revert "crypto: arm64/sha1-ce - simplify NEON yield" This reverts commit 2c13ddfe897ccda89cdbbb77d44522b7a1b52d71. Revert "crypto: arm64/sha - add missing module aliases" This reverts commit 0ccfa37388086d02858f9a829a8424ea6fe91369. Revert "crypto: arm64/aes-ctr - improve tail handling" This reverts commit 76050bf6b606b2a634d5d2615e3e37da30de1ccd. Revert "crypto: arm64/aes-ce - really hide slower algos when faster ones are enabled" This reverts commit 5648ba13a3c67b2f15544c73d75525f4b47af4f6. Revert "crypto: arm64/gcm - move authentication tag check to SIMD domain" This reverts commit 1efe38494e3a102b79f267e85359dad696483790. Revert "crypto: arm64/chacha - simplify tail block handling" This reverts commit c5f4710f95bbc3b530f22d6752cd1ff3d2006f67. Revert "crypto: hash - Use memzero_explicit() for clearing state" This reverts commit 1ec2a7f09f56c06773a754b1037d9463d44cf5b0. Revert "crypto: arm64: Use x16 with indirect branch to bti_c" This reverts commit a8c3cc4987341e820cf33589fb8be1e80d6a1455. Revert "crypto: arm64/gcm - Fix endianness warnings" This reverts commit 02adba8b71b749dbd0d4912e233e18b20f5a8a9d. Revert "crypto: arm64/sha - Add declarations for assembly variables" This reverts commit 435ec992f88cefba133194f1ee249805d482d8b2. Revert "crypto: arm64/gcm - use inline helper to suppress indirect calls" This reverts commit bc92b86fadeb48ae4bfef2b0d3c5b9ba078a4328. Revert "crypto: arm64/gcm - use variably sized key struct" This reverts commit 0aa3bc4ef6a0400c29fa82bf222996ed63a221f9. Revert "crypto: arm64/gcm - disentangle ghash and gcm setkey() routines" This reverts commit 4ff08ef5dd27b020b66fbe45f61282f20942c7cb. Revert "crypto: arm64/ghash - drop PMULL based shash" This reverts commit 313a347394fc9593669d088608fa327c21481d16. Revert "crypto: arm64/aes-glue - use crypto_shash_tfm_digest()" This reverts commit fb5c6d2fd18e01024fc8a509945545ec6925cdd7. Revert "crypto: arm64 - Consistently enable extension" This reverts commit 6eae8549471bc9b81b8d25859e6683e9be4cc67c. Revert "crypto: arm/neon - memzero_explicit aes-cbc key" This reverts commit fda9fcfafd679f7ed2ef75f8859ff91680b9dd6b. Revert "arm64: crypto: Modernize names for AES function macros" This reverts commit 2d88fdbd9de0f95ef5b8e135474a40004b0b63c3. Revert "arm64: crypto: Modernize some extra assembly annotations" This reverts commit d6b0bf996b79a03a37c407bfe93a8c80adbd7821. Revert "crypto: arm64/sha-ce - implement export/import" This reverts commit 62e0842054a83d169791e1fddb0699589b82079d. Revert "crypto: arm64 - Use modern annotations for assembly functions" This reverts commit cecb3c804506cae6693fa140435dc47a5ee9154c. Revert "crypto: arm64/ghash-neon - bump priority to 150" This reverts commit c1c96a11a6618724bd9f2c094d22118a6244439a. Revert "crypto: arm64/sha - fix function types" This reverts commit b2a6b1e16dc2abd35ec6cb0fee1bb5681fc27e95. Revert "crypto: skcipher - rename the crypto_blkcipher module and kconfig option" This reverts commit 515e10b92b2b7bf161703c0ffbdbb7273626de1d. Revert "crypto: arm64/aes-neonbs - add return value of skcipher_walk_done() in __xts_crypt()" This reverts commit 06cd75589703d1b6bce80320a182b595ed42ab8e. Revert "crypto: arm64/gcm-ce - implement 4 way interleave" This reverts commit c14ebb4667dd5e494867ee129a997c6d2198507d. Revert "crypto: arm64/aes-neonbs - implement ciphertext stealing for XTS" This reverts commit d5397cf5dc32382901d611cd724eec6c3817656c. Revert "crypto: arm64/aes - implement support for XTS ciphertext stealing" This reverts commit 192933ba203e238cae5b16ab599e4cfa6b698d51. Revert "crypto: arm64/aes-cts-cbc - move request context data to the stack" This reverts commit f71bf2534f6d610950b010790b8e5675ed5455c7. Revert "crypto: arm64/aes-cts-cbc-ce - performance tweak" This reverts commit 7e8f61db4e78db217a3364ee47cfca764cbc5f0d. Revert "crypto: arm64/aes-neon - limit exposed routines if faster driver is enabled" This reverts commit ecd648682b2dc6bed3214b2e43a0eca35b63094d. Revert "crypto: arm64/aes-neonbs - replace tweak mask literal with composition" This reverts commit 5be595c6ba8edae081cdb63dca0353115d2484d1. Revert "crypto: arm64/aes - Use PTR_ERR_OR_ZERO rather than its implementation." This reverts commit aab710faff1ec9001c805568e6b8831103e3ecb6. Revert "crypto: arm64 - Rename functions to avoid conflict with crypto/sha256.h" This reverts commit caf246ef54b23c9675585fa555d4bb541bd59c92. Revert "crypto: arm64/aes - implement accelerated ESSIV/CBC mode" This reverts commit f8779aee4500c50d6d94bd980a2842308abeb79c. Revert "crypto: arm64/aes-cts-cbc - factor out CBC en/decryption of a walk" This reverts commit e0a1f4f64a8d1f4edf497ff327e016fbffadf66f. Revert "crypto: arm64/aes-cipher - switch to shared AES inverse Sbox" This reverts commit 186ffd910b0d5b0489d89ba6ca191c0cd376f05c. Revert "crypto: arm64/aes-neon - switch to shared AES Sboxes" This reverts commit 59e5b082956da502c60ac5760a915b486d1b8e5a. Revert "crypto: arm64/aes-ce-cipher - use AES library as fallback" This reverts commit a700606c4170512da6f9b4c96aba4f6df8ccf8a7. Revert "crypto: aes - move sync ctr(aes) to AES library and generic helper" This reverts commit 48b821c53fc518c80c410047748577bea1983e71. Revert "crypto: arm64/aes-ce - switch to library version of key expansion routine" This reverts commit cdd168c7cd95008f372dfadc2ba3861fc906ca60. Revert "crypto: arm64/aes-neonbs - switch to library version of key expansion routine" This reverts commit e2eb9cb4f948c5541bf39bbe3c5b4298c8a92909. Revert "crypto: arm64/aes-ccm - switch to AES library" This reverts commit 9fd25afeced4df2f1da1ecdebab4833b2aa98c3a. Revert "crypto: arm64/ghash - switch to AES library" This reverts commit 8790d3e83a3f13e887eadd033719c08e1156ad52. Revert "crypto: aes - rename local routines to prevent future clashes" This reverts commit cef928c589372b9fc280695fcaecfb6c21b7ee83. Revert "crypto: arm64/aes-ce - implement 5 way interleave for ECB, CBC and CTR" This reverts commit fdce296ea17bff501601164bb25b783798b4a402. Revert "crypto: arm64/aes-ce - add 5 way interleave routines" This reverts commit 917c3db52e00f145613a362143cb01fa16e72676. Revert "crypto: chacha - constify ctx and iv arguments" This reverts commit c83b6ad8ff05e72d0142471ed848023d984da356. Revert "arm64: HWCAP: add support for AT_HWCAP2" This reverts commit 9bb745a4cea3de91bf69fe01af7532f2877f6ac3. Revert "crypto: arm64/cbcmac - handle empty messages in same way as template" This reverts commit 4e90675cbb0ff662a9027f73d1f13fd6ec3d694a. Revert "crypto: arm64 - convert to use crypto_simd_usable()" This reverts commit 3b5ca4665ab8281d8ccec34709aafa8e43dc2e6d. Revert "crypto: arm64/gcm-aes-ce - fix no-NEON fallback code" This reverts commit 924a1110fe35f605fb92804200bf21b4594479d3. Revert "crypto: arm64/chacha - fix hchacha_block_neon() for big endian" This reverts commit 6ecb0ec105fdd67f4f60736e166ae33bc96c3821. Revert "crypto: arm64/chacha - fix chacha_4block_xor_neon() for big endian" This reverts commit e0c9ed3235e0d9460acc931eff40d02244553204. Revert "crypto: arm64/aes-blk - update IV after partial final CTR block" This reverts commit 18b10273b19d1e5e9e29d27948f68fb9a9291a28. Revert "crypto: arm64/aes-neonbs - fix returning final keystream block" This reverts commit 9f297390e264ffa3f1a00ff46f21c656dde26df9. Revert "crypto: arm64/crct10dif-ce - cleanup and optimizations" This reverts commit 2aae1e3d62019544b8716d1b69ebf90614e8cdab. Revert "crypto: arm64/crct10dif - register PMULL variants as separate algos" This reverts commit bb84013e29730f4f68d227d2668a67a23eea8187. Revert "crypto: arm64/crct10dif - remove dead code" This reverts commit e9a91b8f781aa9b9ce9b8cba96b84f564aa5c9f4. Revert "crypto: arm64/ghash - register PMULL variants as separate algos" This reverts commit 7d747b051d69639da21c96e934909611c9f81549. Revert "crypto: arm64/aes-ccm - don't use an atomic walk needlessly" This reverts commit 25b54060800d593a4ffdb34579210fc804b5489a. Revert "crypto: arm64/aes-ccm - fix logical bug in AAD MAC handling" This reverts commit 8b995fb58cc5c8c6bf7489dab8fe8142301ac1da. Revert "crypto: arm64/chacha - use combined SIMD/ALU routine for more speed" This reverts commit fefef573014532d1deb1bdc428d3a71306d0e558. Revert "crypto: arm64/chacha - optimize for arbitrary length inputs" This reverts commit d7764657bd5578c796275ba5dbbac2ad713cfdca. Revert "crypto: arm64/chacha - add XChaCha12 support" This reverts commit e6c5e231ae1cd27d51526fafa4a7b081f332b019. Revert "crypto: arm64/chacha20 - refactor to allow varying number of rounds" This reverts commit fe2906d03233b94e1ddc91c505a6d2f385cc6679. Revert "crypto: arm64/chacha20 - add XChaCha20 support" This reverts commit b4e2ca051adf26b8cafad012b16b39d96cb77d3f. Revert "crypto: arm64/nhpoly1305 - add NEON-accelerated NHPoly1305" This reverts commit f5d169f37be30ca6360232e88357d715a578174f. Revert "crypto: arm64/aes-blk - ensure XTS mask is always loaded" This reverts commit a21d4e59e8d54c315ccbc196def1aa15ee6538c9. Revert "crypto: arm64/aes - fix handling sub-block CTS-CBC inputs" This reverts commit 2211e34cc97c80548ab831166373fefc7dc2f29d. Revert "crypto: arm64/aes-blk - improve XTS mask handling" This reverts commit 3fef20ad89773585e81febe97e7337664b488450. Revert "crypto: arm64/aes-blk - add support for CTS-CBC mode" This reverts commit 54f8af3c79f88da71c3173be25cbd790c8bd07ec. Revert "crypto: arm64/aes-blk - revert NEON yield for skciphers" This reverts commit 4007f6d321639b70146b5f3c1e4612c8f8158fee. Revert "crypto: arm64/aes-blk - remove pointless (u8 *) casts" This reverts commit 3e38de9d524ebc2d509f4af77a1ef53d71579f6b. Revert "crypto: arm64/crct10dif - implement non-Crypto Extensions alternative" This reverts commit 7546637992dddb0d5ed8ec161f5aa00b5e13d5e6. Revert "crypto: arm64/crct10dif - preparatory refactor for 8x8 PMULL version" This reverts commit 4a0e214b37f461ea570b3e23982447bb2cad31a5. Revert "crypto: arm64/crc32 - remove PMULL based CRC32 driver" This reverts commit 7362148d02a38e69e11f2c5f9985ff7375d4bd29. Revert "crypto: arm64/aes-modes - get rid of literal load of addend vector" This reverts commit ca48e35e57323d4ce03694240616a9d967c8ebfd. Revert "crypto: arm64/aes-gcm-ce - fix scatterwalk API violation" This reverts commit 842ecc8548a5ba41706ce1943f3caabed69ac06a. Revert "crypto: arm64/sm4-ce - check for the right CPU feature bit" This reverts commit 0d9a3ccbc34f63956c254406faf28ba2ae22fe6f. Revert "crypto: arm64/ghash-ce - implement 4-way aggregation" This reverts commit c2b969ace9aade2625ae01f466289f05762192ff. Revert "crypto: arm64/ghash-ce - replace NEON yield check with block limit" This reverts commit 811b2c5d4d1ade3c71dfc62da2ed082490acae6a. Revert "crypto: arm64/aes-ce-gcm - don't reload key schedule if avoidable" This reverts commit e873ab3d42c7ad1906ec869900a0507fabc0d517. Revert "crypto: arm64/aes-ce-gcm - implement 2-way aggregation" This reverts commit 94ccba0f7c486355122d25da43c39097d81a02c7. Revert "crypto: arm64/aes-ce-gcm - operate on two input blocks at a time" This reverts commit f2105c4e7e500b80b28d8ebc2f17db32c1d8bfd6. Revert "crypto: arm64 - revert NEON yield for fast AEAD implementations" This reverts commit 39e662f0f9d3db97eccef09f36f2fefc9a618bbd. Revert "crypto/arm64: aes-ce-gcm - add missing kernel_neon_begin/end pair" This reverts commit 15d003b13075a16712dcbdb177c41b2bddbcbf0e. Revert "crypto: arm64/sha256 - increase cra_priority of scalar implementations" This reverts commit 2ef73cb76eb12bb45bf0f89a49b7dc7126fadd47. Revert "crypto: shash - remove useless setting of type flags" This reverts commit c1d4b72ca1321bfcbee7b86390b808428c3a27fa. Revert "crypto: arm64/aes-blk - fix and move skcipher_walk_done out of kernel_neon_begin, _end" This reverts commit ac65336b8bd522c96fd50f49ef9c4ceee3603487. Revert "crypto: clarify licensing of OpenSSL asm code" This reverts commit e989520a26841372c045c493e7e2c8f7bbda4f59. Revert "crypto: arm64/sha512-ce - yield NEON after every block of input" This reverts commit 3bdd591e9b4da8c93d9ef7ae6049cf28385098a0. Revert "crypto: arm64/sha3-ce - yield NEON after every block of input" This reverts commit e3d3203e0645587a72da93e482f38695f107c3a4. Revert "crypto: arm64/crct10dif-ce - yield NEON after every block of input" This reverts commit 859ffc1c5d38d109ca9c57f0703e792f3a9582c2. Revert "crypto: arm64/crc32-ce - yield NEON after every block of input" This reverts commit 5f1c71918b39f512e4c1f4daeeb3c9d5706351a8. Revert "crypto: arm64/aes-ghash - yield NEON after every block of input" This reverts commit 3125f780eb7ac53a4cdbeed97ae91e8ec5945ea1. Revert "crypto: arm64/aes-bs - yield NEON after every block of input" This reverts commit ad2d657e731bd4d462cedbcd6ef5662a265d75a0. Revert "Revert "crypto: arm64/aes-neonbs - fix returning final keystream block"" This reverts commit 1525d077c18b78b2953cf0d1868c444c37d3c682. Revert "crypto: arm64/aes-blk - yield NEON after every block of input" This reverts commit 5accaa34eea96c527aa8ec741383d3f38042ff69. Revert "crypto: arm64/aes-ccm - yield NEON after every block of input" This reverts commit c8f4e1b6574b2d4fdc06cd352b6950a8da2bef6c. Revert "Revert "crypto: arm64/aes-ccm - fix logical bug in AAD MAC handling"" This reverts commit 1629c18dda48bbac7154022dccdf9eb952ba8643. Revert "crypto: arm64/sha2-ce - yield NEON after every block of input" This reverts commit 343779e3c70586299ab67f3f9216562ab9bd6001. Revert "crypto: arm64/sha1-ce - yield NEON after every block of input" This reverts commit 68106b7893e6e78da5dc39601cba4d7a60071dea. Revert "crypto: arm64 - add support for SM4 encryption using special instructions" This reverts commit b9547b0195e0c89e59f41a631eadad77220d8993. Revert "crypto: arm64/sha256-neon - play nice with CONFIG_PREEMPT kernels" This reverts commit 33045c52eb89af1dca46aa2f31b9ace6848eb1fd. Revert "crypto: arm64/aes-blk - add 4 way interleave to CBC-MAC encrypt path" This reverts commit 2624ccfec2c746fd5c38e52be423d81d772ab785. Revert "crypto: arm64/aes-blk - add 4 way interleave to CBC encrypt path" This reverts commit 26acced8924afe42757e2f20420d2b412fd5122d. Revert "crypto: arm64/aes-blk - remove configurable interleave" This reverts commit aade51c37fda96baf6708504fae38dd7e8f3b525. Revert "crypto: arm64/chacha20 - move kernel mode neon en/disable into loop" This reverts commit 563fdb9d8718c999feff21a56e855d7d368bfe38. Revert "crypto: arm64/aes-bs - move kernel mode neon en/disable into loop" This reverts commit 7693fb31e5ec7a3a442abc82ce6dc223e0070c63. Revert "crypto: arm64/aes-blk - move kernel mode neon en/disable into loop" This reverts commit f3d47872ae322555f1933b89deaedf15f01a1630. Revert "crypto: arm64/aes-ce-ccm - move kernel mode neon en/disable into loop" This reverts commit 208213ffed71f368561cddafd96b8d726a96d628. Revert "crypto: arm64/speck - add NEON-accelerated implementation of Speck-XTS" This reverts commit a53ebc68fb69e48a0670b74f2107339937af236b. Revert "crypto: arm64/sha512 - fix/improve new v8.2 Crypto Extensions code" This reverts commit 3fe60009d5c1a9619412ff4e8f3a3963b90b03ee. Revert "crypto: arm64/sm3 - new v8.2 Crypto Extensions implementation" This reverts commit f12a399dc824600b5df583418a96759d7b76df3d. Revert "crypto: arm64/sha3 - new v8.2 Crypto Extensions implementation" This reverts commit 0224378dd15ddc1e5bf6999b26f6a722be125604. Revert "crypto: arm64/sha1-ce - get rid of literal pool" This reverts commit 537e00dd0c79e6c31c4b3b2adde9727aa9f2590f. Revert "crypto: arm64/sha2-ce - move the round constant table to .rodata section" This reverts commit 11f38eee4f30ae202563dce4742be2819cf5ffe7. Revert "crypto: arm64/crct10dif - move literal data to .rodata section" This reverts commit 89710355b705b55f2c410c698ee8cd510c769672. Revert "crypto: arm64/crc32 - move literal data to .rodata section" This reverts commit d3fada575171f56e3dbbc402b119db0a7e106b65. Revert "crypto: arm64/aes-neon - move literal data to .rodata section" This reverts commit b73eae3af478173d89be32546d3db209c241a2ac. Revert "crypto: arm64/aes-cipher - move S-box to .rodata section" This reverts commit fc79c79af1578456030b820c8855887f79460eec. Revert "crypto: arm64 - implement SHA-512 using special instructions" This reverts commit f85618f0d28e37abd7fbb83acaa3de5e32849c5e. Revert "crypto: arm64/aes - do not call crypto_unregister_skcipher twice on error" This reverts commit 095b634144846d00932b2d68b212373b203d8265. Revert "[SQUASH] arm64: crypto: Revert old backports" This reverts commit 62e858fc144999fd13942977e9de8f0ab4b17041.
This commit is contained in:
parent
147f696539
commit
c65c2e9bc6
@ -81,7 +81,7 @@ config CRYPTO_AES_ARM
|
||||
config CRYPTO_AES_ARM_BS
|
||||
tristate "Bit sliced AES using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
select CRYPTO_AES
|
||||
help
|
||||
@ -97,7 +97,7 @@ config CRYPTO_AES_ARM_BS
|
||||
config CRYPTO_AES_ARM_CE
|
||||
tristate "Accelerated AES using ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SIMD
|
||||
help
|
||||
Use an implementation of AES in CBC, CTR and XTS modes that uses
|
||||
@ -127,7 +127,7 @@ config CRYPTO_CRC32_ARM_CE
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "NEON accelerated ChaCha stream cipher algorithms"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
|
@ -19,7 +19,7 @@ EXPORT_SYMBOL(__aes_arm_encrypt);
|
||||
asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
|
||||
EXPORT_SYMBOL(__aes_arm_decrypt);
|
||||
|
||||
static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
@ -27,7 +27,7 @@ static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
|
||||
}
|
||||
|
||||
static void aes_arm_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
@ -47,8 +47,8 @@ static struct crypto_alg aes_alg = {
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = crypto_aes_set_key,
|
||||
.cra_cipher.cia_encrypt = aes_arm_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_arm_decrypt,
|
||||
.cra_cipher.cia_encrypt = aes_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_decrypt,
|
||||
|
||||
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
|
||||
.cra_alignmask = 3,
|
||||
|
@ -132,7 +132,6 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
||||
kernel_neon_end();
|
||||
memzero_explicit(&rk, sizeof(rk));
|
||||
|
||||
return crypto_cipher_setkey(ctx->enc_tfm, in_key, key_len);
|
||||
}
|
||||
|
@ -62,7 +62,7 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
}
|
||||
|
||||
static int chacha_neon_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
struct chacha_ctx *ctx, u8 *iv)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
|
@ -152,7 +152,7 @@ static struct shash_alg ghash_alg = {
|
||||
.cra_name = "__ghash",
|
||||
.cra_driver_name = "__driver-ghash-ce",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH | CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct ghash_key),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
@ -1,14 +1,4 @@
|
||||
#define __ARM_ARCH__ __LINUX_ARM_ARCH__
|
||||
@ SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
@ has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
@ you can redistribute it and/or modify it under the terms of the GNU General
|
||||
@ Public License version 2 as published by the Free Software Foundation.
|
||||
@
|
||||
@ The original headers, including the original license headers, are
|
||||
@ included below for completeness.
|
||||
|
||||
@ ====================================================================
|
||||
@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
|
||||
@ project. The module is, however, dual licensed under OpenSSL and
|
||||
|
@ -75,6 +75,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -67,6 +67,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-asm",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -83,6 +83,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -78,6 +78,7 @@ static struct shash_alg algs[] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -92,6 +93,7 @@ static struct shash_alg algs[] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-ce",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -1,19 +1,12 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
# has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
# you can redistribute it and/or modify it under the terms of the GNU General
|
||||
# Public License version 2 as published by the Free Software Foundation.
|
||||
#
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
#
|
||||
# Permission to use under GPL terms is granted.
|
||||
# ====================================================================
|
||||
|
||||
# SHA256 block procedure for ARMv4. May 2007.
|
||||
|
@ -1,18 +1,11 @@
|
||||
@ SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
@ has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
@ you can redistribute it and/or modify it under the terms of the GNU General
|
||||
@ Public License version 2 as published by the Free Software Foundation.
|
||||
@
|
||||
@ The original headers, including the original license headers, are
|
||||
@ included below for completeness.
|
||||
|
||||
@ ====================================================================
|
||||
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@ project. The module is, however, dual licensed under OpenSSL and
|
||||
@ CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
@ details see http://www.openssl.org/~appro/cryptogams/.
|
||||
@
|
||||
@ Permission to use under GPL terms is granted.
|
||||
@ ====================================================================
|
||||
|
||||
@ SHA256 block procedure for ARMv4. May 2007.
|
||||
|
@ -71,6 +71,7 @@ static struct shash_alg algs[] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-asm",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -85,6 +86,7 @@ static struct shash_alg algs[] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-asm",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -79,6 +79,7 @@ struct shash_alg sha256_neon_algs[] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -93,6 +94,7 @@ struct shash_alg sha256_neon_algs[] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-neon",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -1,19 +1,12 @@
|
||||
#!/usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
# has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
# you can redistribute it and/or modify it under the terms of the GNU General
|
||||
# Public License version 2 as published by the Free Software Foundation.
|
||||
#
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
|
||||
# ====================================================================
|
||||
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
#
|
||||
# Permission to use under GPL terms is granted.
|
||||
# ====================================================================
|
||||
|
||||
# SHA512 block procedure for ARMv4. September 2007.
|
||||
|
@ -1,18 +1,11 @@
|
||||
@ SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
@ has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
@ you can redistribute it and/or modify it under the terms of the GNU General
|
||||
@ Public License version 2 as published by the Free Software Foundation.
|
||||
@
|
||||
@ The original headers, including the original license headers, are
|
||||
@ included below for completeness.
|
||||
|
||||
@ ====================================================================
|
||||
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
|
||||
@ project. The module is, however, dual licensed under OpenSSL and
|
||||
@ CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
@ details see http://www.openssl.org/~appro/cryptogams/.
|
||||
@
|
||||
@ Permission to use under GPL terms is granted.
|
||||
@ ====================================================================
|
||||
|
||||
@ SHA512 block procedure for ARMv4. September 2007.
|
||||
|
@ -63,6 +63,7 @@ static struct shash_alg sha512_arm_algs[] = { {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-arm",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -77,6 +78,7 @@ static struct shash_alg sha512_arm_algs[] = { {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name = "sha512-arm",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -75,6 +75,7 @@ struct shash_alg sha512_neon_algs[] = { {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-neon",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
|
||||
@ -90,6 +91,7 @@ struct shash_alg sha512_neon_algs[] = { {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name = "sha512-neon",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -596,6 +596,7 @@ CONFIG_CRYPTO_SHA1_ARM64_CE=y
|
||||
CONFIG_CRYPTO_SHA2_ARM64_CE=y
|
||||
CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
||||
CONFIG_CRYPTO_CRCT10DIF_ARM64_CE=m
|
||||
CONFIG_CRYPTO_CRC32_ARM64_CE=m
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||
CONFIG_CRYPTO_CHACHA20_NEON=m
|
||||
|
@ -727,13 +727,7 @@ CONFIG_CRYPTO_DEV_QCOM_ICE=y
|
||||
CONFIG_ARM64_CRYPTO=y
|
||||
CONFIG_CRYPTO_SHA1_ARM64_CE=y
|
||||
CONFIG_CRYPTO_SHA2_ARM64_CE=y
|
||||
CONFIG_CRYPTO_SHA512_ARM64_CE=y
|
||||
CONFIG_CRYPTO_SHA3_ARM64=y
|
||||
CONFIG_CRYPTO_SM3_ARM64_CE=y
|
||||
CONFIG_CRYPTO_SM4_ARM64_CE=y
|
||||
CONFIG_CRYPTO_GHASH_ARM64_CE=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
|
||||
CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
|
||||
CONFIG_CRYPTO_CHACHA20_NEON=y
|
||||
CONFIG_CRYPTO_NHPOLY1305_NEON=y
|
||||
CONFIG_CRYPTO_AES_ARM64_BS=y
|
||||
CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y
|
||||
|
@ -29,42 +29,24 @@ config CRYPTO_SHA2_ARM64_CE
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA256_ARM64
|
||||
|
||||
config CRYPTO_SHA512_ARM64_CE
|
||||
tristate "SHA-384/SHA-512 digest algorithm (ARMv8 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA512_ARM64
|
||||
|
||||
config CRYPTO_SHA3_ARM64
|
||||
tristate "SHA3 digest algorithm (ARMv8.2 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SHA3
|
||||
|
||||
config CRYPTO_SM3_ARM64_CE
|
||||
tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_SM3
|
||||
|
||||
config CRYPTO_SM4_ARM64_CE
|
||||
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SM4
|
||||
|
||||
config CRYPTO_GHASH_ARM64_CE
|
||||
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_GF128MUL
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_AES_ARM64
|
||||
|
||||
config CRYPTO_CRCT10DIF_ARM64_CE
|
||||
tristate "CRCT10DIF digest algorithm using PMULL instructions"
|
||||
depends on KERNEL_MODE_NEON && CRC_T10DIF
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_CRC32_ARM64_CE
|
||||
tristate "CRC32 and CRC32C digest algorithms using ARMv8 extensions"
|
||||
depends on CRC32
|
||||
select CRYPTO_HASH
|
||||
|
||||
config CRYPTO_AES_ARM64
|
||||
tristate "AES core cipher using scalar instructions"
|
||||
select CRYPTO_AES
|
||||
@ -73,20 +55,20 @@ config CRYPTO_AES_ARM64_CE
|
||||
tristate "AES core cipher using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_AES_ARM64
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_CCM
|
||||
tristate "AES in CCM mode using ARMv8 Crypto Extensions"
|
||||
depends on ARM64 && KERNEL_MODE_NEON
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
select CRYPTO_AES_ARM64
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_LIB_AES
|
||||
|
||||
config CRYPTO_AES_ARM64_CE_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_CE
|
||||
select CRYPTO_AES_ARM64
|
||||
select CRYPTO_SIMD
|
||||
@ -94,35 +76,23 @@ config CRYPTO_AES_ARM64_CE_BLK
|
||||
config CRYPTO_AES_ARM64_NEON_BLK
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_AES
|
||||
select CRYPTO_SIMD
|
||||
|
||||
config CRYPTO_CHACHA20_NEON
|
||||
tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
|
||||
tristate "NEON accelerated ChaCha20 symmetric cipher"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
|
||||
config CRYPTO_NHPOLY1305_NEON
|
||||
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_NHPOLY1305
|
||||
|
||||
config CRYPTO_AES_ARM64_BS
|
||||
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_AES_ARM64_NEON_BLK
|
||||
select CRYPTO_AES_ARM64
|
||||
select CRYPTO_LIB_AES
|
||||
select CRYPTO_SIMD
|
||||
|
||||
config CRYPTO_SPECK_NEON
|
||||
tristate "NEON accelerated Speck cipher algorithms"
|
||||
depends on KERNEL_MODE_NEON
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SPECK
|
||||
|
||||
endif
|
||||
|
@ -8,32 +8,21 @@
|
||||
# published by the Free Software Foundation.
|
||||
#
|
||||
|
||||
obj-y := aes-lib.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA1_ARM64_CE) += sha1-ce.o
|
||||
sha1-ce-y := sha1-ce-glue.o sha1-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA2_ARM64_CE) += sha2-ce.o
|
||||
sha2-ce-y := sha2-ce-glue.o sha2-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM64_CE) += sha512-ce.o
|
||||
sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
|
||||
sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
|
||||
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
|
||||
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
|
||||
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM64_CE) += crct10dif-ce.o
|
||||
crct10dif-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CRC32_ARM64_CE) += crc32-ce.o
|
||||
crc32-ce-y:= crc32-ce-core.o crc32-ce-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64_CE) += aes-ce-cipher.o
|
||||
aes-ce-cipher-y := aes-ce-core.o aes-ce-glue.o
|
||||
|
||||
@ -52,14 +41,8 @@ sha256-arm64-y := sha256-glue.o sha256-core.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512_ARM64) += sha512-arm64.o
|
||||
sha512-arm64-y := sha512-glue.o sha512-core.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
|
||||
chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_SPECK_NEON) += speck-neon.o
|
||||
speck-neon-y := speck-neon-core.o speck-neon-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
|
||||
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
|
||||
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha20-neon.o
|
||||
chacha20-neon-y := chacha20-neon-core.o chacha20-neon-glue.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AES_ARM64) += aes-arm64.o
|
||||
aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o
|
||||
|
@ -18,7 +18,7 @@
|
||||
* void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
|
||||
* u32 *macp, u8 const rk[], u32 rounds);
|
||||
*/
|
||||
SYM_FUNC_START(ce_aes_ccm_auth_data)
|
||||
ENTRY(ce_aes_ccm_auth_data)
|
||||
ldr w8, [x3] /* leftover from prev round? */
|
||||
ld1 {v0.16b}, [x0] /* load mac */
|
||||
cbz w8, 1f
|
||||
@ -84,13 +84,13 @@ SYM_FUNC_START(ce_aes_ccm_auth_data)
|
||||
st1 {v0.16b}, [x0]
|
||||
10: str w8, [x3]
|
||||
ret
|
||||
SYM_FUNC_END(ce_aes_ccm_auth_data)
|
||||
ENDPROC(ce_aes_ccm_auth_data)
|
||||
|
||||
/*
|
||||
* void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
|
||||
* u32 rounds);
|
||||
*/
|
||||
SYM_FUNC_START(ce_aes_ccm_final)
|
||||
ENTRY(ce_aes_ccm_final)
|
||||
ld1 {v3.4s}, [x2], #16 /* load first round key */
|
||||
ld1 {v0.16b}, [x0] /* load mac */
|
||||
cmp w3, #12 /* which key size? */
|
||||
@ -124,7 +124,7 @@ SYM_FUNC_START(ce_aes_ccm_final)
|
||||
eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
|
||||
st1 {v0.16b}, [x0] /* store result */
|
||||
ret
|
||||
SYM_FUNC_END(ce_aes_ccm_final)
|
||||
ENDPROC(ce_aes_ccm_final)
|
||||
|
||||
.macro aes_ccm_do_crypt,enc
|
||||
ldr x8, [x6, #8] /* load lower ctr */
|
||||
@ -215,10 +215,10 @@ CPU_LE( rev x8, x8 )
|
||||
* u8 const rk[], u32 rounds, u8 mac[],
|
||||
* u8 ctr[]);
|
||||
*/
|
||||
SYM_FUNC_START(ce_aes_ccm_encrypt)
|
||||
ENTRY(ce_aes_ccm_encrypt)
|
||||
aes_ccm_do_crypt 1
|
||||
SYM_FUNC_END(ce_aes_ccm_encrypt)
|
||||
ENDPROC(ce_aes_ccm_encrypt)
|
||||
|
||||
SYM_FUNC_START(ce_aes_ccm_decrypt)
|
||||
ENTRY(ce_aes_ccm_decrypt)
|
||||
aes_ccm_do_crypt 0
|
||||
SYM_FUNC_END(ce_aes_ccm_decrypt)
|
||||
ENDPROC(ce_aes_ccm_decrypt)
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
@ -46,6 +45,8 @@ asmlinkage void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
|
||||
asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
|
||||
u32 rounds);
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
|
||||
static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
@ -106,13 +107,11 @@ static int ccm_init_mac(struct aead_request *req, u8 maciv[], u32 msglen)
|
||||
}
|
||||
|
||||
static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
|
||||
u32 abytes, u32 *macp)
|
||||
u32 abytes, u32 *macp, bool use_neon)
|
||||
{
|
||||
if (may_use_simd()) {
|
||||
kernel_neon_begin();
|
||||
if (likely(use_neon)) {
|
||||
ce_aes_ccm_auth_data(mac, in, abytes, macp, key->key_enc,
|
||||
num_rounds(key));
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
if (*macp > 0 && *macp < AES_BLOCK_SIZE) {
|
||||
int added = min(abytes, AES_BLOCK_SIZE - *macp);
|
||||
@ -125,7 +124,8 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
|
||||
}
|
||||
|
||||
while (abytes >= AES_BLOCK_SIZE) {
|
||||
aes_encrypt(key, mac, mac);
|
||||
__aes_arm64_encrypt(key->key_enc, mac, mac,
|
||||
num_rounds(key));
|
||||
crypto_xor(mac, in, AES_BLOCK_SIZE);
|
||||
|
||||
in += AES_BLOCK_SIZE;
|
||||
@ -133,14 +133,16 @@ static void ccm_update_mac(struct crypto_aes_ctx *key, u8 mac[], u8 const in[],
|
||||
}
|
||||
|
||||
if (abytes > 0) {
|
||||
aes_encrypt(key, mac, mac);
|
||||
__aes_arm64_encrypt(key->key_enc, mac, mac,
|
||||
num_rounds(key));
|
||||
crypto_xor(mac, in, abytes);
|
||||
*macp = abytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
|
||||
static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[],
|
||||
bool use_neon)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_aead_ctx(aead);
|
||||
@ -159,7 +161,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
|
||||
ltag.len = 6;
|
||||
}
|
||||
|
||||
ccm_update_mac(ctx, mac, (u8 *)<ag, ltag.len, &macp);
|
||||
ccm_update_mac(ctx, mac, (u8 *)<ag, ltag.len, &macp, use_neon);
|
||||
scatterwalk_start(&walk, req->src);
|
||||
|
||||
do {
|
||||
@ -171,7 +173,7 @@ static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
|
||||
n = scatterwalk_clamp(&walk, len);
|
||||
}
|
||||
p = scatterwalk_map(&walk);
|
||||
ccm_update_mac(ctx, mac, p, n, &macp);
|
||||
ccm_update_mac(ctx, mac, p, n, &macp, use_neon);
|
||||
len -= n;
|
||||
|
||||
scatterwalk_unmap(p);
|
||||
@ -205,8 +207,10 @@ static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
|
||||
bsize = nbytes;
|
||||
|
||||
crypto_inc(walk->iv, AES_BLOCK_SIZE);
|
||||
aes_encrypt(ctx, buf, walk->iv);
|
||||
aes_encrypt(ctx, mac, mac);
|
||||
__aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
|
||||
num_rounds(ctx));
|
||||
__aes_arm64_encrypt(ctx->key_enc, mac, mac,
|
||||
num_rounds(ctx));
|
||||
if (enc)
|
||||
crypto_xor(mac, src, bsize);
|
||||
crypto_xor_cpy(dst, src, buf, bsize);
|
||||
@ -221,8 +225,8 @@ static int ccm_crypt_fallback(struct skcipher_walk *walk, u8 mac[], u8 iv0[],
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
aes_encrypt(ctx, buf, iv0);
|
||||
aes_encrypt(ctx, mac, mac);
|
||||
__aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
|
||||
__aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
|
||||
crypto_xor(mac, buf, AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
@ -236,42 +240,43 @@ static int ccm_encrypt(struct aead_request *req)
|
||||
u8 __aligned(8) mac[AES_BLOCK_SIZE];
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
u32 len = req->cryptlen;
|
||||
bool use_neon = may_use_simd();
|
||||
int err;
|
||||
|
||||
err = ccm_init_mac(req, mac, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (likely(use_neon))
|
||||
kernel_neon_begin();
|
||||
|
||||
if (req->assoclen)
|
||||
ccm_calculate_auth_mac(req, mac);
|
||||
ccm_calculate_auth_mac(req, mac, use_neon);
|
||||
|
||||
/* preserve the original iv for the final round */
|
||||
memcpy(buf, req->iv, AES_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, false);
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, true);
|
||||
|
||||
if (may_use_simd()) {
|
||||
if (likely(use_neon)) {
|
||||
while (walk.nbytes) {
|
||||
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes == walk.total)
|
||||
tail = 0;
|
||||
|
||||
kernel_neon_begin();
|
||||
ce_aes_ccm_encrypt(walk.dst.virt.addr,
|
||||
walk.src.virt.addr,
|
||||
walk.nbytes - tail, ctx->key_enc,
|
||||
num_rounds(ctx), mac, walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
err = skcipher_walk_done(&walk, tail);
|
||||
}
|
||||
if (!err) {
|
||||
kernel_neon_begin();
|
||||
if (!err)
|
||||
ce_aes_ccm_final(mac, buf, ctx->key_enc,
|
||||
num_rounds(ctx));
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
err = ccm_crypt_fallback(&walk, mac, buf, ctx, true);
|
||||
}
|
||||
@ -294,42 +299,43 @@ static int ccm_decrypt(struct aead_request *req)
|
||||
u8 __aligned(8) mac[AES_BLOCK_SIZE];
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
u32 len = req->cryptlen - authsize;
|
||||
bool use_neon = may_use_simd();
|
||||
int err;
|
||||
|
||||
err = ccm_init_mac(req, mac, len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (likely(use_neon))
|
||||
kernel_neon_begin();
|
||||
|
||||
if (req->assoclen)
|
||||
ccm_calculate_auth_mac(req, mac);
|
||||
ccm_calculate_auth_mac(req, mac, use_neon);
|
||||
|
||||
/* preserve the original iv for the final round */
|
||||
memcpy(buf, req->iv, AES_BLOCK_SIZE);
|
||||
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, false);
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, true);
|
||||
|
||||
if (may_use_simd()) {
|
||||
if (likely(use_neon)) {
|
||||
while (walk.nbytes) {
|
||||
u32 tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes == walk.total)
|
||||
tail = 0;
|
||||
|
||||
kernel_neon_begin();
|
||||
ce_aes_ccm_decrypt(walk.dst.virt.addr,
|
||||
walk.src.virt.addr,
|
||||
walk.nbytes - tail, ctx->key_enc,
|
||||
num_rounds(ctx), mac, walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
err = skcipher_walk_done(&walk, tail);
|
||||
}
|
||||
if (!err) {
|
||||
kernel_neon_begin();
|
||||
if (!err)
|
||||
ce_aes_ccm_final(mac, buf, ctx->key_enc,
|
||||
num_rounds(ctx));
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
err = ccm_crypt_fallback(&walk, mac, buf, ctx, false);
|
||||
}
|
||||
@ -367,7 +373,7 @@ static struct aead_alg ccm_aes_alg = {
|
||||
|
||||
static int __init aes_mod_init(void)
|
||||
{
|
||||
if (!cpu_have_named_feature(AES))
|
||||
if (!(elf_hwcap & HWCAP_AES))
|
||||
return -ENODEV;
|
||||
return crypto_register_aead(&ccm_aes_alg);
|
||||
}
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
.arch armv8-a+crypto
|
||||
|
||||
SYM_FUNC_START(__aes_ce_encrypt)
|
||||
ENTRY(__aes_ce_encrypt)
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b}, [x2]
|
||||
ld1 {v1.4s}, [x0], #16
|
||||
@ -37,9 +37,9 @@ SYM_FUNC_START(__aes_ce_encrypt)
|
||||
eor v0.16b, v0.16b, v3.16b
|
||||
st1 {v0.16b}, [x1]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_encrypt)
|
||||
ENDPROC(__aes_ce_encrypt)
|
||||
|
||||
SYM_FUNC_START(__aes_ce_decrypt)
|
||||
ENTRY(__aes_ce_decrypt)
|
||||
sub w3, w3, #2
|
||||
ld1 {v0.16b}, [x2]
|
||||
ld1 {v1.4s}, [x0], #16
|
||||
@ -65,23 +65,23 @@ SYM_FUNC_START(__aes_ce_decrypt)
|
||||
eor v0.16b, v0.16b, v3.16b
|
||||
st1 {v0.16b}, [x1]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_decrypt)
|
||||
ENDPROC(__aes_ce_decrypt)
|
||||
|
||||
/*
|
||||
* __aes_ce_sub() - use the aese instruction to perform the AES sbox
|
||||
* substitution on each byte in 'input'
|
||||
*/
|
||||
SYM_FUNC_START(__aes_ce_sub)
|
||||
ENTRY(__aes_ce_sub)
|
||||
dup v1.4s, w0
|
||||
movi v0.16b, #0
|
||||
aese v0.16b, v1.16b
|
||||
umov w0, v0.s[0]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_sub)
|
||||
ENDPROC(__aes_ce_sub)
|
||||
|
||||
SYM_FUNC_START(__aes_ce_invert)
|
||||
ENTRY(__aes_ce_invert)
|
||||
ld1 {v0.4s}, [x1]
|
||||
aesimc v1.16b, v0.16b
|
||||
st1 {v1.4s}, [x0]
|
||||
ret
|
||||
SYM_FUNC_END(__aes_ce_invert)
|
||||
ENDPROC(__aes_ce_invert)
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
@ -23,6 +22,9 @@ MODULE_DESCRIPTION("Synchronous AES cipher using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
|
||||
struct aes_block {
|
||||
u8 b[AES_BLOCK_SIZE];
|
||||
};
|
||||
@ -51,7 +53,7 @@ static void aes_cipher_encrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
aes_encrypt(ctx, dst, src);
|
||||
__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -65,7 +67,7 @@ static void aes_cipher_decrypt(struct crypto_tfm *tfm, u8 dst[], u8 const src[])
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
aes_decrypt(ctx, dst, src);
|
||||
__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -12,21 +12,11 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#define AES_FUNC_START(func) SYM_FUNC_START(ce_ ## func)
|
||||
#define AES_FUNC_END(func) SYM_FUNC_END(ce_ ## func)
|
||||
#define AES_ENTRY(func) ENTRY(ce_ ## func)
|
||||
#define AES_ENDPROC(func) ENDPROC(ce_ ## func)
|
||||
|
||||
.arch armv8-a+crypto
|
||||
|
||||
xtsmask .req v16
|
||||
cbciv .req v16
|
||||
vctr .req v16
|
||||
|
||||
.macro xts_reload_mask, tmp
|
||||
.endm
|
||||
|
||||
.macro xts_cts_skip_tw, reg, lbl
|
||||
.endm
|
||||
|
||||
/* preload all round keys */
|
||||
.macro load_round_keys, rounds, rk
|
||||
cmp \rounds, #12
|
||||
@ -40,24 +30,21 @@
|
||||
.endm
|
||||
|
||||
/* prepare for encryption with key in rk[] */
|
||||
.macro enc_prepare, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
.macro enc_prepare, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.endm
|
||||
|
||||
/* prepare for encryption (again) but with new key in rk[] */
|
||||
.macro enc_switch_key, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
.macro enc_switch_key, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.endm
|
||||
|
||||
/* prepare for decryption with key in rk[] */
|
||||
.macro dec_prepare, rounds, rk, temp
|
||||
mov \temp, \rk
|
||||
load_round_keys \rounds, \temp
|
||||
.macro dec_prepare, rounds, rk, ignore
|
||||
load_round_keys \rounds, \rk
|
||||
.endm
|
||||
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
|
||||
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
|
||||
aes\de \i0\().16b, \k\().16b
|
||||
aes\mc \i0\().16b, \i0\().16b
|
||||
.ifnb \i1
|
||||
@ -68,34 +55,27 @@
|
||||
aes\mc \i2\().16b, \i2\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
aes\mc \i3\().16b, \i3\().16b
|
||||
.ifnb \i4
|
||||
aes\de \i4\().16b, \k\().16b
|
||||
aes\mc \i4\().16b, \i4\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 5 interleaved encryption rounds with the same round key */
|
||||
.macro round_Nx, enc, k, i0, i1, i2, i3, i4
|
||||
/* up to 4 interleaved encryption rounds with the same round key */
|
||||
.macro round_Nx, enc, k, i0, i1, i2, i3
|
||||
.ifc \enc, e
|
||||
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3, \i4
|
||||
do_enc_Nx e, mc, \k, \i0, \i1, \i2, \i3
|
||||
.else
|
||||
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3, \i4
|
||||
do_enc_Nx d, imc, \k, \i0, \i1, \i2, \i3
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 5 interleaved final rounds */
|
||||
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3, i4
|
||||
/* up to 4 interleaved final rounds */
|
||||
.macro fin_round_Nx, de, k, k2, i0, i1, i2, i3
|
||||
aes\de \i0\().16b, \k\().16b
|
||||
.ifnb \i1
|
||||
aes\de \i1\().16b, \k\().16b
|
||||
.ifnb \i3
|
||||
aes\de \i2\().16b, \k\().16b
|
||||
aes\de \i3\().16b, \k\().16b
|
||||
.ifnb \i4
|
||||
aes\de \i4\().16b, \k\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endif
|
||||
eor \i0\().16b, \i0\().16b, \k2\().16b
|
||||
@ -104,52 +84,47 @@
|
||||
.ifnb \i3
|
||||
eor \i2\().16b, \i2\().16b, \k2\().16b
|
||||
eor \i3\().16b, \i3\().16b, \k2\().16b
|
||||
.ifnb \i4
|
||||
eor \i4\().16b, \i4\().16b, \k2\().16b
|
||||
.endif
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/* up to 5 interleaved blocks */
|
||||
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
|
||||
/* up to 4 interleaved blocks */
|
||||
.macro do_block_Nx, enc, rounds, i0, i1, i2, i3
|
||||
cmp \rounds, #12
|
||||
blo 2222f /* 128 bits */
|
||||
beq 1111f /* 192 bits */
|
||||
round_Nx \enc, v17, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, v18, \i0, \i1, \i2, \i3, \i4
|
||||
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, v20, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, v17, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, v18, \i0, \i1, \i2, \i3
|
||||
1111: round_Nx \enc, v19, \i0, \i1, \i2, \i3
|
||||
round_Nx \enc, v20, \i0, \i1, \i2, \i3
|
||||
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
|
||||
round_Nx \enc, \key, \i0, \i1, \i2, \i3, \i4
|
||||
round_Nx \enc, \key, \i0, \i1, \i2, \i3
|
||||
.endr
|
||||
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
|
||||
fin_round_Nx \enc, v30, v31, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
.macro encrypt_block, in, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \in
|
||||
.endm
|
||||
|
||||
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
|
||||
.macro encrypt_block2x, i0, i1, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1
|
||||
.endm
|
||||
|
||||
.macro encrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3, \i4
|
||||
.macro encrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
||||
do_block_Nx e, \rounds, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
.macro decrypt_block, in, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \in
|
||||
.endm
|
||||
|
||||
.macro decrypt_block2x, i0, i1, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1
|
||||
.endm
|
||||
|
||||
.macro decrypt_block4x, i0, i1, i2, i3, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3
|
||||
.endm
|
||||
|
||||
.macro decrypt_block5x, i0, i1, i2, i3, i4, rounds, t0, t1, t2
|
||||
do_block_Nx d, \rounds, \i0, \i1, \i2, \i3, \i4
|
||||
.endm
|
||||
|
||||
#define MAX_STRIDE 5
|
||||
|
||||
#include "aes-modes.S"
|
||||
|
@ -125,11 +125,48 @@ CPU_BE( rev w7, w7 )
|
||||
ret
|
||||
.endm
|
||||
|
||||
SYM_FUNC_START(__aes_arm64_encrypt)
|
||||
.align L1_CACHE_SHIFT
|
||||
.type __aes_arm64_inverse_sbox, %object
|
||||
__aes_arm64_inverse_sbox:
|
||||
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
|
||||
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
|
||||
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
|
||||
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
|
||||
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
|
||||
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
|
||||
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
|
||||
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
|
||||
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
|
||||
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
|
||||
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
|
||||
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
|
||||
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
|
||||
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
|
||||
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
|
||||
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
|
||||
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
|
||||
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
|
||||
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
|
||||
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
|
||||
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
|
||||
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
|
||||
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
|
||||
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
|
||||
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
|
||||
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
|
||||
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
|
||||
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
|
||||
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
|
||||
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
||||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
.size __aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
|
||||
|
||||
ENTRY(__aes_arm64_encrypt)
|
||||
do_crypt fround, crypto_ft_tab, crypto_ft_tab + 1, 2
|
||||
SYM_FUNC_END(__aes_arm64_encrypt)
|
||||
ENDPROC(__aes_arm64_encrypt)
|
||||
|
||||
.align 5
|
||||
SYM_FUNC_START(__aes_arm64_decrypt)
|
||||
do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0
|
||||
SYM_FUNC_END(__aes_arm64_decrypt)
|
||||
ENTRY(__aes_arm64_decrypt)
|
||||
do_crypt iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
|
||||
ENDPROC(__aes_arm64_decrypt)
|
||||
|
@ -13,9 +13,12 @@
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
EXPORT_SYMBOL(__aes_arm64_encrypt);
|
||||
|
||||
static void aes_arm64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
EXPORT_SYMBOL(__aes_arm64_decrypt);
|
||||
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
@ -23,7 +26,7 @@ static void aes_arm64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
__aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
|
||||
}
|
||||
|
||||
static void aes_arm64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
@ -43,8 +46,8 @@ static struct crypto_alg aes_alg = {
|
||||
.cra_cipher.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cra_cipher.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cra_cipher.cia_setkey = crypto_aes_set_key,
|
||||
.cra_cipher.cia_encrypt = aes_arm64_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_arm64_decrypt
|
||||
.cra_cipher.cia_encrypt = aes_encrypt,
|
||||
.cra_cipher.cia_decrypt = aes_decrypt
|
||||
};
|
||||
|
||||
static int __init aes_init(void)
|
||||
|
53
arch/arm64/crypto/aes-ctr-fallback.h
Normal file
53
arch/arm64/crypto/aes-ctr-fallback.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Fallback for sync aes(ctr) in contexts where kernel mode NEON
|
||||
* is not allowed
|
||||
*
|
||||
* Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
|
||||
static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
|
||||
struct skcipher_request *req)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 *src = walk.src.virt.addr;
|
||||
int nbytes = walk.nbytes;
|
||||
int tail = 0;
|
||||
|
||||
if (nbytes < walk.total) {
|
||||
nbytes = round_down(nbytes, AES_BLOCK_SIZE);
|
||||
tail = walk.nbytes % AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
do {
|
||||
int bsize = min(nbytes, AES_BLOCK_SIZE);
|
||||
|
||||
__aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
|
||||
6 + ctx->key_length / 4);
|
||||
crypto_xor_cpy(dst, src, buf, bsize);
|
||||
crypto_inc(walk.iv, AES_BLOCK_SIZE);
|
||||
|
||||
dst += AES_BLOCK_SIZE;
|
||||
src += AES_BLOCK_SIZE;
|
||||
nbytes -= AES_BLOCK_SIZE;
|
||||
} while (nbytes > 0);
|
||||
|
||||
err = skcipher_walk_done(&walk, tail);
|
||||
}
|
||||
return err;
|
||||
}
|
@ -12,45 +12,25 @@
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <crypto/xts.h>
|
||||
|
||||
#include "aes-ce-setkey.h"
|
||||
|
||||
#undef aes_expandkey
|
||||
#undef aes_ecb_encrypt
|
||||
#undef aes_ecb_decrypt
|
||||
#undef aes_cbc_encrypt
|
||||
#undef aes_cbc_decrypt
|
||||
#undef aes_cbc_cts_encrypt
|
||||
#undef aes_cbc_cts_decrypt
|
||||
#undef aes_essiv_cbc_encrypt
|
||||
#undef aes_essiv_cbc_decrypt
|
||||
#undef aes_ctr_encrypt
|
||||
#undef aes_xts_encrypt
|
||||
#undef aes_xts_decrypt
|
||||
#undef aes_mac_update
|
||||
#include "aes-ctr-fallback.h"
|
||||
|
||||
#ifdef USE_V8_CRYPTO_EXTENSIONS
|
||||
#define MODE "ce"
|
||||
#define PRIO 300
|
||||
#define STRIDE 5
|
||||
#define aes_setkey ce_aes_setkey
|
||||
#define aes_expandkey ce_aes_expandkey
|
||||
#define aes_ecb_encrypt ce_aes_ecb_encrypt
|
||||
#define aes_ecb_decrypt ce_aes_ecb_decrypt
|
||||
#define aes_cbc_encrypt ce_aes_cbc_encrypt
|
||||
#define aes_cbc_decrypt ce_aes_cbc_decrypt
|
||||
#define aes_cbc_cts_encrypt ce_aes_cbc_cts_encrypt
|
||||
#define aes_cbc_cts_decrypt ce_aes_cbc_cts_decrypt
|
||||
#define aes_essiv_cbc_encrypt ce_aes_essiv_cbc_encrypt
|
||||
#define aes_essiv_cbc_decrypt ce_aes_essiv_cbc_decrypt
|
||||
#define aes_ctr_encrypt ce_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt ce_aes_xts_encrypt
|
||||
#define aes_xts_decrypt ce_aes_xts_decrypt
|
||||
@ -59,84 +39,59 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
|
||||
#else
|
||||
#define MODE "neon"
|
||||
#define PRIO 200
|
||||
#define STRIDE 4
|
||||
#define aes_setkey crypto_aes_set_key
|
||||
#define aes_expandkey crypto_aes_expand_key
|
||||
#define aes_ecb_encrypt neon_aes_ecb_encrypt
|
||||
#define aes_ecb_decrypt neon_aes_ecb_decrypt
|
||||
#define aes_cbc_encrypt neon_aes_cbc_encrypt
|
||||
#define aes_cbc_decrypt neon_aes_cbc_decrypt
|
||||
#define aes_cbc_cts_encrypt neon_aes_cbc_cts_encrypt
|
||||
#define aes_cbc_cts_decrypt neon_aes_cbc_cts_decrypt
|
||||
#define aes_essiv_cbc_encrypt neon_aes_essiv_cbc_encrypt
|
||||
#define aes_essiv_cbc_decrypt neon_aes_essiv_cbc_decrypt
|
||||
#define aes_ctr_encrypt neon_aes_ctr_encrypt
|
||||
#define aes_xts_encrypt neon_aes_xts_encrypt
|
||||
#define aes_xts_decrypt neon_aes_xts_decrypt
|
||||
#define aes_mac_update neon_aes_mac_update
|
||||
MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
|
||||
#endif
|
||||
#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
|
||||
MODULE_ALIAS_CRYPTO("ecb(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xts(aes)");
|
||||
#endif
|
||||
MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
|
||||
MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
|
||||
MODULE_ALIAS_CRYPTO("cmac(aes)");
|
||||
MODULE_ALIAS_CRYPTO("xcbc(aes)");
|
||||
MODULE_ALIAS_CRYPTO("cbcmac(aes)");
|
||||
#endif
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
/* defined in aes-modes.S */
|
||||
asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
asmlinkage void aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
asmlinkage void aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks);
|
||||
|
||||
asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
asmlinkage void aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
asmlinkage void aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
|
||||
asmlinkage void aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 const iv[]);
|
||||
asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 const iv[]);
|
||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 ctr[]);
|
||||
|
||||
asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int bytes, u8 ctr[], u8 finalbuf[]);
|
||||
|
||||
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int bytes, u32 const rk2[], u8 iv[],
|
||||
asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
int first);
|
||||
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int bytes, u32 const rk2[], u8 iv[],
|
||||
asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
|
||||
int rounds, int blocks, u8 const rk2[], u8 iv[],
|
||||
int first);
|
||||
|
||||
asmlinkage void aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int blocks, u8 iv[],
|
||||
u32 const rk2[]);
|
||||
asmlinkage void aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
int rounds, int blocks, u8 iv[],
|
||||
u32 const rk2[]);
|
||||
|
||||
asmlinkage int aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
int blocks, u8 dg[], int enc_before,
|
||||
int enc_after);
|
||||
asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
int blocks, u8 dg[], int enc_before,
|
||||
int enc_after);
|
||||
|
||||
struct crypto_aes_xts_ctx {
|
||||
struct crypto_aes_ctx key1;
|
||||
struct crypto_aes_ctx __aligned(8) key2;
|
||||
};
|
||||
|
||||
struct crypto_aes_essiv_cbc_ctx {
|
||||
struct crypto_aes_ctx key1;
|
||||
struct crypto_aes_ctx __aligned(8) key2;
|
||||
struct crypto_shash *hash;
|
||||
};
|
||||
|
||||
struct mac_tfm_ctx {
|
||||
struct crypto_aes_ctx key;
|
||||
u8 __aligned(8) consts[];
|
||||
@ -150,18 +105,11 @@ struct mac_desc_ctx {
|
||||
static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int ret;
|
||||
|
||||
ret = aes_expandkey(ctx, in_key, key_len);
|
||||
if (ret)
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
|
||||
return ret;
|
||||
return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
|
||||
}
|
||||
|
||||
static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm,
|
||||
const u8 *in_key, unsigned int key_len)
|
||||
static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int ret;
|
||||
@ -181,31 +129,7 @@ static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm,
|
||||
const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
u8 digest[SHA256_DIGEST_SIZE];
|
||||
int ret;
|
||||
|
||||
ret = aes_expandkey(&ctx->key1, in_key, key_len);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
crypto_shash_tfm_digest(ctx->hash, in_key, key_len, digest);
|
||||
|
||||
ret = aes_expandkey(&ctx->key2, digest, sizeof(digest));
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
return 0;
|
||||
out:
|
||||
crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
|
||||
static int ecb_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
@ -218,14 +142,14 @@ static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_enc, rounds, blocks);
|
||||
(u8 *)ctx->key_enc, rounds, blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
|
||||
static int ecb_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
@ -238,243 +162,51 @@ static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_dec, rounds, blocks);
|
||||
(u8 *)ctx->key_dec, rounds, blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cbc_encrypt_walk(struct skcipher_request *req,
|
||||
struct skcipher_walk *walk)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err = 0, rounds = 6 + ctx->key_length / 4;
|
||||
unsigned int blocks;
|
||||
|
||||
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
|
||||
ctx->key_enc, rounds, blocks, walk->iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __maybe_unused cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
return cbc_encrypt_walk(req, &walk);
|
||||
}
|
||||
|
||||
static int cbc_decrypt_walk(struct skcipher_request *req,
|
||||
struct skcipher_walk *walk)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err = 0, rounds = 6 + ctx->key_length / 4;
|
||||
unsigned int blocks;
|
||||
|
||||
while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
|
||||
ctx->key_dec, rounds, blocks, walk->iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __maybe_unused cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
return cbc_decrypt_walk(req, &walk);
|
||||
}
|
||||
|
||||
static int cts_cbc_encrypt(struct skcipher_request *req)
|
||||
static int cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, rounds = 6 + ctx->key_length / 4;
|
||||
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
struct scatterlist *src = req->src, *dst = req->dst;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct skcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
if (req->cryptlen <= AES_BLOCK_SIZE) {
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
cbc_blocks = 1;
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (cbc_blocks > 0) {
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false) ?:
|
||||
cbc_encrypt_walk(&subreq, &walk);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||
return 0;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst,
|
||||
subreq.cryptlen);
|
||||
}
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
skcipher_request_set_crypt(&subreq, src, dst,
|
||||
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_enc, rounds, walk.nbytes, walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int cts_cbc_decrypt(struct skcipher_request *req)
|
||||
static int cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, rounds = 6 + ctx->key_length / 4;
|
||||
int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
|
||||
struct scatterlist *src = req->src, *dst = req->dst;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct skcipher_walk walk;
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
|
||||
if (req->cryptlen <= AES_BLOCK_SIZE) {
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
cbc_blocks = 1;
|
||||
}
|
||||
|
||||
if (cbc_blocks > 0) {
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false) ?:
|
||||
cbc_decrypt_walk(&subreq, &walk);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (req->cryptlen == AES_BLOCK_SIZE)
|
||||
return 0;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst,
|
||||
subreq.cryptlen);
|
||||
}
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
skcipher_request_set_crypt(&subreq, src, dst,
|
||||
req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key_dec, rounds, walk.nbytes, walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
static int __maybe_unused essiv_cbc_init_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
ctx->hash = crypto_alloc_shash("sha256", 0, 0);
|
||||
|
||||
return PTR_ERR_OR_ZERO(ctx->hash);
|
||||
}
|
||||
|
||||
static void __maybe_unused essiv_cbc_exit_tfm(struct crypto_skcipher *tfm)
|
||||
{
|
||||
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
crypto_free_shash(ctx->hash);
|
||||
}
|
||||
|
||||
static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, rounds = 6 + ctx->key1.key_length / 4;
|
||||
struct skcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
if (blocks) {
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_enc, rounds, blocks,
|
||||
req->iv, ctx->key2.key_enc);
|
||||
aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_dec, rounds, blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err ?: cbc_encrypt_walk(req, &walk);
|
||||
}
|
||||
|
||||
static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, rounds = 6 + ctx->key1.key_length / 4;
|
||||
struct skcipher_walk walk;
|
||||
unsigned int blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
if (blocks) {
|
||||
kernel_neon_begin();
|
||||
aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_dec, rounds, blocks,
|
||||
req->iv, ctx->key2.key_enc);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
return err ?: cbc_decrypt_walk(req, &walk);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ctr_encrypt(struct skcipher_request *req)
|
||||
@ -483,211 +215,95 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, rounds = 6 + ctx->key_length / 4;
|
||||
struct skcipher_walk walk;
|
||||
int blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
|
||||
kernel_neon_begin();
|
||||
aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
(u8 *)ctx->key_enc, rounds, blocks, walk.iv);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
kernel_neon_end();
|
||||
}
|
||||
if (walk.nbytes) {
|
||||
u8 __aligned(8) tail[AES_BLOCK_SIZE];
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
unsigned int tail;
|
||||
u8 *tdst = walk.dst.virt.addr;
|
||||
u8 *tsrc = walk.src.virt.addr;
|
||||
|
||||
if (unlikely(nbytes < AES_BLOCK_SIZE))
|
||||
src = memcpy(buf, src, nbytes);
|
||||
else if (nbytes < walk.total)
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
/*
|
||||
* Tell aes_ctr_encrypt() to process a tail block.
|
||||
*/
|
||||
blocks = -1;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes,
|
||||
walk.iv, buf);
|
||||
aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc, rounds,
|
||||
blocks, walk.iv);
|
||||
kernel_neon_end();
|
||||
|
||||
tail = nbytes % (STRIDE * AES_BLOCK_SIZE);
|
||||
if (tail > 0 && tail < AES_BLOCK_SIZE)
|
||||
/*
|
||||
* The final partial block could not be returned using
|
||||
* an overlapping store, so it was passed via buf[]
|
||||
* instead.
|
||||
*/
|
||||
memcpy(dst + nbytes - tail, buf, tail);
|
||||
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
crypto_xor_cpy(tdst, tsrc, tail, nbytes);
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
|
||||
static int ctr_encrypt_sync(struct skcipher_request *req)
|
||||
{
|
||||
const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
unsigned long flags;
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
/*
|
||||
* Temporarily disable interrupts to avoid races where
|
||||
* cachelines are evicted when the CPU is interrupted
|
||||
* to do something else.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
aes_encrypt(ctx, dst, src);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static int __maybe_unused ctr_encrypt_sync(struct skcipher_request *req)
|
||||
{
|
||||
if (!may_use_simd())
|
||||
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
|
||||
return aes_ctr_encrypt_fallback(ctx, req);
|
||||
|
||||
return ctr_encrypt(req);
|
||||
}
|
||||
|
||||
static int __maybe_unused xts_encrypt(struct skcipher_request *req)
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, first, rounds = 6 + ctx->key1.key_length / 4;
|
||||
int tail = req->cryptlen % AES_BLOCK_SIZE;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct scatterlist *src, *dst;
|
||||
struct skcipher_walk walk;
|
||||
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
unsigned int blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
|
||||
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
|
||||
AES_BLOCK_SIZE) - 2;
|
||||
|
||||
skcipher_walk_abort(&walk);
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq,
|
||||
skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
xts_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
req = &subreq;
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
} else {
|
||||
tail = 0;
|
||||
}
|
||||
|
||||
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
|
||||
int nbytes = walk.nbytes;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
kernel_neon_begin();
|
||||
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_enc, rounds, nbytes,
|
||||
ctx->key2.key_enc, walk.iv, first);
|
||||
(u8 *)ctx->key1.key_enc, rounds, blocks,
|
||||
(u8 *)ctx->key2.key_enc, walk.iv, first);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (err || likely(!tail))
|
||||
return err;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
|
||||
|
||||
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_enc, rounds, walk.nbytes,
|
||||
ctx->key2.key_enc, walk.iv, first);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int __maybe_unused xts_decrypt(struct skcipher_request *req)
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err, first, rounds = 6 + ctx->key1.key_length / 4;
|
||||
int tail = req->cryptlen % AES_BLOCK_SIZE;
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct scatterlist *src, *dst;
|
||||
struct skcipher_walk walk;
|
||||
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
unsigned int blocks;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
|
||||
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
|
||||
AES_BLOCK_SIZE) - 2;
|
||||
|
||||
skcipher_walk_abort(&walk);
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq,
|
||||
skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
xts_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
req = &subreq;
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
} else {
|
||||
tail = 0;
|
||||
}
|
||||
|
||||
for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
|
||||
int nbytes = walk.nbytes;
|
||||
|
||||
if (walk.nbytes < walk.total)
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
|
||||
for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
|
||||
kernel_neon_begin();
|
||||
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_dec, rounds, nbytes,
|
||||
ctx->key2.key_enc, walk.iv, first);
|
||||
(u8 *)ctx->key1.key_dec, rounds, blocks,
|
||||
(u8 *)ctx->key2.key_enc, walk.iv, first);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (err || likely(!tail))
|
||||
return err;
|
||||
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
|
||||
|
||||
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, &subreq, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key1.key_dec, rounds, walk.nbytes,
|
||||
ctx->key2.key_enc, walk.iv, first);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
#if defined(USE_V8_CRYPTO_EXTENSIONS) || !IS_ENABLED(CONFIG_CRYPTO_AES_ARM64_BS)
|
||||
.base = {
|
||||
.cra_name = "__ecb(aes)",
|
||||
.cra_driver_name = "__ecb-aes-" MODE,
|
||||
@ -764,46 +380,9 @@ static struct skcipher_alg aes_algs[] = { {
|
||||
.min_keysize = 2 * AES_MIN_KEY_SIZE,
|
||||
.max_keysize = 2 * AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.walksize = 2 * AES_BLOCK_SIZE,
|
||||
.setkey = xts_set_key,
|
||||
.encrypt = xts_encrypt,
|
||||
.decrypt = xts_decrypt,
|
||||
}, {
|
||||
#endif
|
||||
.base = {
|
||||
.cra_name = "__cts(cbc(aes))",
|
||||
.cra_driver_name = "__cts-cbc-aes-" MODE,
|
||||
.cra_priority = PRIO,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.walksize = 2 * AES_BLOCK_SIZE,
|
||||
.setkey = skcipher_aes_setkey,
|
||||
.encrypt = cts_cbc_encrypt,
|
||||
.decrypt = cts_cbc_decrypt,
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "__essiv(cbc(aes),sha256)",
|
||||
.cra_driver_name = "__essiv-cbc-aes-sha256-" MODE,
|
||||
.cra_priority = PRIO + 1,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_aes_essiv_cbc_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.min_keysize = AES_MIN_KEY_SIZE,
|
||||
.max_keysize = AES_MAX_KEY_SIZE,
|
||||
.ivsize = AES_BLOCK_SIZE,
|
||||
.setkey = essiv_cbc_set_key,
|
||||
.encrypt = essiv_cbc_encrypt,
|
||||
.decrypt = essiv_cbc_decrypt,
|
||||
.init = essiv_cbc_init_tfm,
|
||||
.exit = essiv_cbc_exit_tfm,
|
||||
} };
|
||||
|
||||
static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
@ -833,6 +412,7 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
{
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 *consts = (be128 *)ctx->consts;
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
int err;
|
||||
|
||||
@ -842,8 +422,7 @@ static int cmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
|
||||
/* encrypt the zero vector */
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, ctx->key.key_enc,
|
||||
rounds, 1);
|
||||
aes_ecb_encrypt(ctx->consts, (u8[AES_BLOCK_SIZE]){}, rk, rounds, 1);
|
||||
kernel_neon_end();
|
||||
|
||||
cmac_gf128_mul_by_x(consts, consts);
|
||||
@ -862,6 +441,7 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
};
|
||||
|
||||
struct mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
u8 *rk = (u8 *)ctx->key.key_enc;
|
||||
int rounds = 6 + key_len / 4;
|
||||
u8 key[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
@ -871,8 +451,8 @@ static int xcbc_setkey(struct crypto_shash *tfm, const u8 *in_key,
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
aes_ecb_encrypt(key, ks[0], ctx->key.key_enc, rounds, 1);
|
||||
aes_ecb_encrypt(ctx->consts, ks[1], ctx->key.key_enc, rounds, 2);
|
||||
aes_ecb_encrypt(key, ks[0], rk, rounds, 1);
|
||||
aes_ecb_encrypt(ctx->consts, ks[1], rk, rounds, 2);
|
||||
kernel_neon_end();
|
||||
|
||||
return cbcmac_setkey(tfm, key, sizeof(key));
|
||||
@ -894,27 +474,21 @@ static void mac_do_update(struct crypto_aes_ctx *ctx, u8 const in[], int blocks,
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
|
||||
if (may_use_simd()) {
|
||||
int rem;
|
||||
|
||||
do {
|
||||
kernel_neon_begin();
|
||||
rem = aes_mac_update(in, ctx->key_enc, rounds, blocks,
|
||||
dg, enc_before, enc_after);
|
||||
kernel_neon_end();
|
||||
in += (blocks - rem) * AES_BLOCK_SIZE;
|
||||
blocks = rem;
|
||||
enc_before = 0;
|
||||
} while (blocks);
|
||||
kernel_neon_begin();
|
||||
aes_mac_update(in, ctx->key_enc, rounds, blocks, dg, enc_before,
|
||||
enc_after);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
if (enc_before)
|
||||
aes_encrypt(ctx, dg, dg);
|
||||
__aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
|
||||
|
||||
while (blocks--) {
|
||||
crypto_xor(dg, in, AES_BLOCK_SIZE);
|
||||
in += AES_BLOCK_SIZE;
|
||||
|
||||
if (blocks || enc_after)
|
||||
aes_encrypt(ctx, dg, dg);
|
||||
__aes_arm64_encrypt(ctx->key_enc, dg, dg,
|
||||
rounds);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -964,7 +538,7 @@ static int cbcmac_final(struct shash_desc *desc, u8 *out)
|
||||
struct mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
mac_do_update(&tctx->key, NULL, 0, ctx->dg, (ctx->len != 0), 0);
|
||||
mac_do_update(&tctx->key, NULL, 0, ctx->dg, 1, 0);
|
||||
|
||||
memcpy(out, ctx->dg, AES_BLOCK_SIZE);
|
||||
|
||||
@ -993,6 +567,7 @@ static struct shash_alg mac_algs[] = { {
|
||||
.base.cra_name = "cmac(aes)",
|
||||
.base.cra_driver_name = "cmac-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
|
||||
2 * AES_BLOCK_SIZE,
|
||||
@ -1008,6 +583,7 @@ static struct shash_alg mac_algs[] = { {
|
||||
.base.cra_name = "xcbc(aes)",
|
||||
.base.cra_driver_name = "xcbc-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = AES_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx) +
|
||||
2 * AES_BLOCK_SIZE,
|
||||
@ -1023,6 +599,7 @@ static struct shash_alg mac_algs[] = { {
|
||||
.base.cra_name = "cbcmac(aes)",
|
||||
.base.cra_driver_name = "cbcmac-aes-" MODE,
|
||||
.base.cra_priority = PRIO,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct mac_tfm_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
@ -1085,7 +662,6 @@ static int __init aes_init(void)
|
||||
|
||||
unregister_simds:
|
||||
aes_exit();
|
||||
return err;
|
||||
unregister_ciphers:
|
||||
crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
return err;
|
||||
@ -1097,7 +673,5 @@ module_cpu_feature_match(AES, aes_init);
|
||||
module_init(aes_init);
|
||||
EXPORT_SYMBOL(neon_aes_ecb_encrypt);
|
||||
EXPORT_SYMBOL(neon_aes_cbc_encrypt);
|
||||
EXPORT_SYMBOL(neon_aes_xts_encrypt);
|
||||
EXPORT_SYMBOL(neon_aes_xts_decrypt);
|
||||
#endif
|
||||
module_exit(aes_exit);
|
||||
|
@ -1,356 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2017-2019 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*/
|
||||
|
||||
#include <crypto/aes.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
/*
|
||||
* Emit the sbox as volatile const to prevent the compiler from doing
|
||||
* constant folding on sbox references involving fixed indexes.
|
||||
*/
|
||||
static volatile const u8 __cacheline_aligned aes_sbox[] = {
|
||||
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
|
||||
0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
|
||||
0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
|
||||
0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
|
||||
0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
|
||||
0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
|
||||
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
|
||||
0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
|
||||
0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
|
||||
0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
|
||||
0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
|
||||
0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
|
||||
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
|
||||
0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
|
||||
0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
|
||||
0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
|
||||
0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
|
||||
0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
|
||||
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
|
||||
0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
|
||||
0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
|
||||
0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
|
||||
0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
|
||||
0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
|
||||
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
|
||||
0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
|
||||
0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
|
||||
0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
|
||||
0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
|
||||
0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
|
||||
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
|
||||
0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
|
||||
};
|
||||
|
||||
static volatile const u8 __cacheline_aligned aes_inv_sbox[] = {
|
||||
0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
|
||||
0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
|
||||
0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
|
||||
0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
|
||||
0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
|
||||
0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
|
||||
0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
|
||||
0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
|
||||
0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
|
||||
0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
|
||||
0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
|
||||
0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
|
||||
0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
|
||||
0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
|
||||
0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
|
||||
0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
|
||||
0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
|
||||
0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
|
||||
0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
|
||||
0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
|
||||
0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
|
||||
0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
|
||||
0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
|
||||
0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
|
||||
0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
|
||||
0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
|
||||
0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
|
||||
0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
|
||||
0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
|
||||
0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
|
||||
0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
|
||||
0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
|
||||
};
|
||||
|
||||
extern const u8 crypto_aes_sbox[256] __alias(aes_sbox);
|
||||
extern const u8 crypto_aes_inv_sbox[256] __alias(aes_inv_sbox);
|
||||
|
||||
EXPORT_SYMBOL(crypto_aes_sbox);
|
||||
EXPORT_SYMBOL(crypto_aes_inv_sbox);
|
||||
|
||||
static u32 mul_by_x(u32 w)
|
||||
{
|
||||
u32 x = w & 0x7f7f7f7f;
|
||||
u32 y = w & 0x80808080;
|
||||
|
||||
/* multiply by polynomial 'x' (0b10) in GF(2^8) */
|
||||
return (x << 1) ^ (y >> 7) * 0x1b;
|
||||
}
|
||||
|
||||
static u32 mul_by_x2(u32 w)
|
||||
{
|
||||
u32 x = w & 0x3f3f3f3f;
|
||||
u32 y = w & 0x80808080;
|
||||
u32 z = w & 0x40404040;
|
||||
|
||||
/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
|
||||
return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
|
||||
}
|
||||
|
||||
static u32 mix_columns(u32 x)
|
||||
{
|
||||
/*
|
||||
* Perform the following matrix multiplication in GF(2^8)
|
||||
*
|
||||
* | 0x2 0x3 0x1 0x1 | | x[0] |
|
||||
* | 0x1 0x2 0x3 0x1 | | x[1] |
|
||||
* | 0x1 0x1 0x2 0x3 | x | x[2] |
|
||||
* | 0x3 0x1 0x1 0x2 | | x[3] |
|
||||
*/
|
||||
u32 y = mul_by_x(x) ^ ror32(x, 16);
|
||||
|
||||
return y ^ ror32(x ^ y, 8);
|
||||
}
|
||||
|
||||
static u32 inv_mix_columns(u32 x)
|
||||
{
|
||||
/*
|
||||
* Perform the following matrix multiplication in GF(2^8)
|
||||
*
|
||||
* | 0xe 0xb 0xd 0x9 | | x[0] |
|
||||
* | 0x9 0xe 0xb 0xd | | x[1] |
|
||||
* | 0xd 0x9 0xe 0xb | x | x[2] |
|
||||
* | 0xb 0xd 0x9 0xe | | x[3] |
|
||||
*
|
||||
* which can conveniently be reduced to
|
||||
*
|
||||
* | 0x2 0x3 0x1 0x1 | | 0x5 0x0 0x4 0x0 | | x[0] |
|
||||
* | 0x1 0x2 0x3 0x1 | | 0x0 0x5 0x0 0x4 | | x[1] |
|
||||
* | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
|
||||
* | 0x3 0x1 0x1 0x2 | | 0x0 0x4 0x0 0x5 | | x[3] |
|
||||
*/
|
||||
u32 y = mul_by_x2(x);
|
||||
|
||||
return mix_columns(x ^ y ^ ror32(y, 16));
|
||||
}
|
||||
|
||||
static __always_inline u32 subshift(u32 in[], int pos)
|
||||
{
|
||||
return (aes_sbox[in[pos] & 0xff]) ^
|
||||
(aes_sbox[(in[(pos + 1) % 4] >> 8) & 0xff] << 8) ^
|
||||
(aes_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
|
||||
(aes_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static __always_inline u32 inv_subshift(u32 in[], int pos)
|
||||
{
|
||||
return (aes_inv_sbox[in[pos] & 0xff]) ^
|
||||
(aes_inv_sbox[(in[(pos + 3) % 4] >> 8) & 0xff] << 8) ^
|
||||
(aes_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
|
||||
(aes_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
static u32 subw(u32 in)
|
||||
{
|
||||
return (aes_sbox[in & 0xff]) ^
|
||||
(aes_sbox[(in >> 8) & 0xff] << 8) ^
|
||||
(aes_sbox[(in >> 16) & 0xff] << 16) ^
|
||||
(aes_sbox[(in >> 24) & 0xff] << 24);
|
||||
}
|
||||
|
||||
/**
|
||||
* aes_expandkey - Expands the AES key as described in FIPS-197
|
||||
* @ctx: The location where the computed key will be stored.
|
||||
* @in_key: The supplied key.
|
||||
* @key_len: The length of the supplied key.
|
||||
*
|
||||
* Returns 0 on success. The function fails only if an invalid key size (or
|
||||
* pointer) is supplied.
|
||||
* The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
|
||||
* key schedule plus a 16 bytes key which is used before the first round).
|
||||
* The decryption key is prepared for the "Equivalent Inverse Cipher" as
|
||||
* described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
|
||||
* for the initial combination, the second slot for the first round and so on.
|
||||
*/
|
||||
int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
u32 kwords = key_len / sizeof(u32);
|
||||
u32 rc, i, j;
|
||||
|
||||
if (key_len != AES_KEYSIZE_128 &&
|
||||
key_len != AES_KEYSIZE_192 &&
|
||||
key_len != AES_KEYSIZE_256)
|
||||
return -EINVAL;
|
||||
|
||||
ctx->key_length = key_len;
|
||||
|
||||
for (i = 0; i < kwords; i++)
|
||||
ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
|
||||
|
||||
for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
|
||||
u32 *rki = ctx->key_enc + (i * kwords);
|
||||
u32 *rko = rki + kwords;
|
||||
|
||||
rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
|
||||
rko[1] = rko[0] ^ rki[1];
|
||||
rko[2] = rko[1] ^ rki[2];
|
||||
rko[3] = rko[2] ^ rki[3];
|
||||
|
||||
if (key_len == AES_KEYSIZE_192) {
|
||||
if (i >= 7)
|
||||
break;
|
||||
rko[4] = rko[3] ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
} else if (key_len == AES_KEYSIZE_256) {
|
||||
if (i >= 6)
|
||||
break;
|
||||
rko[4] = subw(rko[3]) ^ rki[4];
|
||||
rko[5] = rko[4] ^ rki[5];
|
||||
rko[6] = rko[5] ^ rki[6];
|
||||
rko[7] = rko[6] ^ rki[7];
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the decryption keys for the Equivalent Inverse Cipher.
|
||||
* This involves reversing the order of the round keys, and applying
|
||||
* the Inverse Mix Columns transformation to all but the first and
|
||||
* the last one.
|
||||
*/
|
||||
ctx->key_dec[0] = ctx->key_enc[key_len + 24];
|
||||
ctx->key_dec[1] = ctx->key_enc[key_len + 25];
|
||||
ctx->key_dec[2] = ctx->key_enc[key_len + 26];
|
||||
ctx->key_dec[3] = ctx->key_enc[key_len + 27];
|
||||
|
||||
for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
|
||||
ctx->key_dec[i] = inv_mix_columns(ctx->key_enc[j]);
|
||||
ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
|
||||
ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
|
||||
ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
|
||||
}
|
||||
|
||||
ctx->key_dec[i] = ctx->key_enc[0];
|
||||
ctx->key_dec[i + 1] = ctx->key_enc[1];
|
||||
ctx->key_dec[i + 2] = ctx->key_enc[2];
|
||||
ctx->key_dec[i + 3] = ctx->key_enc[3];
|
||||
|
||||
return 0;
|
||||
}
|
||||
EXPORT_SYMBOL(aes_expandkey);
|
||||
|
||||
/**
|
||||
* aes_encrypt - Encrypt a single AES block
|
||||
* @ctx: Context struct containing the key schedule
|
||||
* @out: Buffer to store the ciphertext
|
||||
* @in: Buffer containing the plaintext
|
||||
*/
|
||||
void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
|
||||
{
|
||||
const u32 *rkp = ctx->key_enc + 4;
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
u32 st0[4], st1[4];
|
||||
int round;
|
||||
|
||||
st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
|
||||
st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
|
||||
st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
|
||||
st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
/*
|
||||
* Force the compiler to emit data independent Sbox references,
|
||||
* by xoring the input with Sbox values that are known to add up
|
||||
* to zero. This pulls the entire Sbox into the D-cache before any
|
||||
* data dependent lookups are done.
|
||||
*/
|
||||
st0[0] ^= aes_sbox[ 0] ^ aes_sbox[ 64] ^ aes_sbox[134] ^ aes_sbox[195];
|
||||
st0[1] ^= aes_sbox[16] ^ aes_sbox[ 82] ^ aes_sbox[158] ^ aes_sbox[221];
|
||||
st0[2] ^= aes_sbox[32] ^ aes_sbox[ 96] ^ aes_sbox[160] ^ aes_sbox[234];
|
||||
st0[3] ^= aes_sbox[48] ^ aes_sbox[112] ^ aes_sbox[186] ^ aes_sbox[241];
|
||||
|
||||
for (round = 0;; round += 2, rkp += 8) {
|
||||
st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
|
||||
st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
|
||||
st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
|
||||
st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
|
||||
|
||||
if (round == rounds - 2)
|
||||
break;
|
||||
|
||||
st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
|
||||
st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
|
||||
st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
|
||||
st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
|
||||
}
|
||||
|
||||
put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
|
||||
put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
|
||||
put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
|
||||
put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
|
||||
}
|
||||
EXPORT_SYMBOL(aes_encrypt);
|
||||
|
||||
/**
|
||||
* aes_decrypt - Decrypt a single AES block
|
||||
* @ctx: Context struct containing the key schedule
|
||||
* @out: Buffer to store the plaintext
|
||||
* @in: Buffer containing the ciphertext
|
||||
*/
|
||||
void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
|
||||
{
|
||||
const u32 *rkp = ctx->key_dec + 4;
|
||||
int rounds = 6 + ctx->key_length / 4;
|
||||
u32 st0[4], st1[4];
|
||||
int round;
|
||||
|
||||
st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
|
||||
st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
|
||||
st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
|
||||
st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
|
||||
|
||||
/*
|
||||
* Force the compiler to emit data independent Sbox references,
|
||||
* by xoring the input with Sbox values that are known to add up
|
||||
* to zero. This pulls the entire Sbox into the D-cache before any
|
||||
* data dependent lookups are done.
|
||||
*/
|
||||
st0[0] ^= aes_inv_sbox[ 0] ^ aes_inv_sbox[ 64] ^ aes_inv_sbox[129] ^ aes_inv_sbox[200];
|
||||
st0[1] ^= aes_inv_sbox[16] ^ aes_inv_sbox[ 83] ^ aes_inv_sbox[150] ^ aes_inv_sbox[212];
|
||||
st0[2] ^= aes_inv_sbox[32] ^ aes_inv_sbox[ 96] ^ aes_inv_sbox[160] ^ aes_inv_sbox[236];
|
||||
st0[3] ^= aes_inv_sbox[48] ^ aes_inv_sbox[112] ^ aes_inv_sbox[187] ^ aes_inv_sbox[247];
|
||||
|
||||
for (round = 0;; round += 2, rkp += 8) {
|
||||
st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
|
||||
st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
|
||||
st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
|
||||
st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
|
||||
|
||||
if (round == rounds - 2)
|
||||
break;
|
||||
|
||||
st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
|
||||
st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
|
||||
st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
|
||||
st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
|
||||
}
|
||||
|
||||
put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
|
||||
put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
|
||||
put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
|
||||
put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
|
||||
}
|
||||
EXPORT_SYMBOL(aes_decrypt);
|
||||
|
||||
MODULE_DESCRIPTION("Generic AES library");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -13,39 +13,15 @@
|
||||
.text
|
||||
.align 4
|
||||
|
||||
#ifndef MAX_STRIDE
|
||||
#define MAX_STRIDE 4
|
||||
#endif
|
||||
|
||||
#if MAX_STRIDE == 4
|
||||
#define ST4(x...) x
|
||||
#define ST5(x...)
|
||||
#else
|
||||
#define ST4(x...)
|
||||
#define ST5(x...) x
|
||||
#endif
|
||||
|
||||
SYM_FUNC_START_LOCAL(aes_encrypt_block4x)
|
||||
aes_encrypt_block4x:
|
||||
encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||
ret
|
||||
SYM_FUNC_END(aes_encrypt_block4x)
|
||||
ENDPROC(aes_encrypt_block4x)
|
||||
|
||||
SYM_FUNC_START_LOCAL(aes_decrypt_block4x)
|
||||
aes_decrypt_block4x:
|
||||
decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7
|
||||
ret
|
||||
SYM_FUNC_END(aes_decrypt_block4x)
|
||||
|
||||
#if MAX_STRIDE == 5
|
||||
SYM_FUNC_START_LOCAL(aes_encrypt_block5x)
|
||||
encrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
|
||||
ret
|
||||
SYM_FUNC_END(aes_encrypt_block5x)
|
||||
|
||||
SYM_FUNC_START_LOCAL(aes_decrypt_block5x)
|
||||
decrypt_block5x v0, v1, v2, v3, v4, w3, x2, x8, w7
|
||||
ret
|
||||
SYM_FUNC_END(aes_decrypt_block5x)
|
||||
#endif
|
||||
ENDPROC(aes_decrypt_block4x)
|
||||
|
||||
/*
|
||||
* aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
@ -54,24 +30,21 @@ SYM_FUNC_END(aes_decrypt_block5x)
|
||||
* int blocks)
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_ecb_encrypt)
|
||||
AES_ENTRY(aes_ecb_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
enc_prepare w3, x2, x5
|
||||
|
||||
.LecbencloopNx:
|
||||
subs w4, w4, #MAX_STRIDE
|
||||
subs w4, w4, #4
|
||||
bmi .Lecbenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
ST4( bl aes_encrypt_block4x )
|
||||
ST5( ld1 {v4.16b}, [x1], #16 )
|
||||
ST5( bl aes_encrypt_block5x )
|
||||
bl aes_encrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
ST5( st1 {v4.16b}, [x0], #16 )
|
||||
b .LecbencloopNx
|
||||
.Lecbenc1x:
|
||||
adds w4, w4, #MAX_STRIDE
|
||||
adds w4, w4, #4
|
||||
beq .Lecbencout
|
||||
.Lecbencloop:
|
||||
ld1 {v0.16b}, [x1], #16 /* get next pt block */
|
||||
@ -82,27 +55,24 @@ ST5( st1 {v4.16b}, [x0], #16 )
|
||||
.Lecbencout:
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_FUNC_END(aes_ecb_encrypt)
|
||||
AES_ENDPROC(aes_ecb_encrypt)
|
||||
|
||||
|
||||
AES_FUNC_START(aes_ecb_decrypt)
|
||||
AES_ENTRY(aes_ecb_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
dec_prepare w3, x2, x5
|
||||
|
||||
.LecbdecloopNx:
|
||||
subs w4, w4, #MAX_STRIDE
|
||||
subs w4, w4, #4
|
||||
bmi .Lecbdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
ST4( bl aes_decrypt_block4x )
|
||||
ST5( ld1 {v4.16b}, [x1], #16 )
|
||||
ST5( bl aes_decrypt_block5x )
|
||||
bl aes_decrypt_block4x
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
ST5( st1 {v4.16b}, [x0], #16 )
|
||||
b .LecbdecloopNx
|
||||
.Lecbdec1x:
|
||||
adds w4, w4, #MAX_STRIDE
|
||||
adds w4, w4, #4
|
||||
beq .Lecbdecout
|
||||
.Lecbdecloop:
|
||||
ld1 {v0.16b}, [x1], #16 /* get next ct block */
|
||||
@ -113,7 +83,7 @@ ST5( st1 {v4.16b}, [x0], #16 )
|
||||
.Lecbdecout:
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_FUNC_END(aes_ecb_decrypt)
|
||||
AES_ENDPROC(aes_ecb_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
@ -121,24 +91,9 @@ AES_FUNC_END(aes_ecb_decrypt)
|
||||
* int blocks, u8 iv[])
|
||||
* aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
* aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
* int rounds, int blocks, u8 iv[],
|
||||
* u32 const rk2[]);
|
||||
* aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
|
||||
* int rounds, int blocks, u8 iv[],
|
||||
* u32 const rk2[]);
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_essiv_cbc_encrypt)
|
||||
ld1 {v4.16b}, [x5] /* get iv */
|
||||
|
||||
mov w8, #14 /* AES-256: 14 rounds */
|
||||
enc_prepare w8, x6, x7
|
||||
encrypt_block v4, w8, x6, x7, w9
|
||||
enc_switch_key w3, x2, x6
|
||||
b .Lcbcencloop4x
|
||||
|
||||
AES_FUNC_START(aes_cbc_encrypt)
|
||||
AES_ENTRY(aes_cbc_encrypt)
|
||||
ld1 {v4.16b}, [x5] /* get iv */
|
||||
enc_prepare w3, x2, x6
|
||||
|
||||
@ -170,360 +125,184 @@ AES_FUNC_START(aes_cbc_encrypt)
|
||||
.Lcbcencout:
|
||||
st1 {v4.16b}, [x5] /* return iv */
|
||||
ret
|
||||
AES_FUNC_END(aes_cbc_encrypt)
|
||||
AES_FUNC_END(aes_essiv_cbc_encrypt)
|
||||
AES_ENDPROC(aes_cbc_encrypt)
|
||||
|
||||
AES_FUNC_START(aes_essiv_cbc_decrypt)
|
||||
|
||||
AES_ENTRY(aes_cbc_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
ld1 {cbciv.16b}, [x5] /* get iv */
|
||||
|
||||
mov w8, #14 /* AES-256: 14 rounds */
|
||||
enc_prepare w8, x6, x7
|
||||
encrypt_block cbciv, w8, x6, x7, w9
|
||||
b .Lessivcbcdecstart
|
||||
|
||||
AES_FUNC_START(aes_cbc_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
ld1 {cbciv.16b}, [x5] /* get iv */
|
||||
.Lessivcbcdecstart:
|
||||
ld1 {v7.16b}, [x5] /* get iv */
|
||||
dec_prepare w3, x2, x6
|
||||
|
||||
.LcbcdecloopNx:
|
||||
subs w4, w4, #MAX_STRIDE
|
||||
subs w4, w4, #4
|
||||
bmi .Lcbcdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
#if MAX_STRIDE == 5
|
||||
ld1 {v4.16b}, [x1], #16 /* get 1 ct block */
|
||||
mov v5.16b, v0.16b
|
||||
mov v6.16b, v1.16b
|
||||
mov v7.16b, v2.16b
|
||||
bl aes_decrypt_block5x
|
||||
sub x1, x1, #32
|
||||
eor v0.16b, v0.16b, cbciv.16b
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld1 {v5.16b}, [x1], #16 /* reload 1 ct block */
|
||||
ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
eor v4.16b, v4.16b, v5.16b
|
||||
#else
|
||||
mov v4.16b, v0.16b
|
||||
mov v5.16b, v1.16b
|
||||
mov v6.16b, v2.16b
|
||||
bl aes_decrypt_block4x
|
||||
sub x1, x1, #16
|
||||
eor v0.16b, v0.16b, cbciv.16b
|
||||
eor v0.16b, v0.16b, v7.16b
|
||||
eor v1.16b, v1.16b, v4.16b
|
||||
ld1 {cbciv.16b}, [x1], #16 /* reload 1 ct block */
|
||||
ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */
|
||||
eor v2.16b, v2.16b, v5.16b
|
||||
eor v3.16b, v3.16b, v6.16b
|
||||
#endif
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
ST5( st1 {v4.16b}, [x0], #16 )
|
||||
b .LcbcdecloopNx
|
||||
.Lcbcdec1x:
|
||||
adds w4, w4, #MAX_STRIDE
|
||||
adds w4, w4, #4
|
||||
beq .Lcbcdecout
|
||||
.Lcbcdecloop:
|
||||
ld1 {v1.16b}, [x1], #16 /* get next ct block */
|
||||
mov v0.16b, v1.16b /* ...and copy to v0 */
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
eor v0.16b, v0.16b, cbciv.16b /* xor with iv => pt */
|
||||
mov cbciv.16b, v1.16b /* ct is next iv */
|
||||
eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */
|
||||
mov v7.16b, v1.16b /* ct is next iv */
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
bne .Lcbcdecloop
|
||||
.Lcbcdecout:
|
||||
st1 {cbciv.16b}, [x5] /* return iv */
|
||||
st1 {v7.16b}, [x5] /* return iv */
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_FUNC_END(aes_cbc_decrypt)
|
||||
AES_FUNC_END(aes_essiv_cbc_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
* int rounds, int bytes, u8 const iv[])
|
||||
* aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
* int rounds, int bytes, u8 const iv[])
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_cbc_cts_encrypt)
|
||||
adr_l x8, .Lcts_permute_table
|
||||
sub x4, x4, #16
|
||||
add x9, x8, #32
|
||||
add x8, x8, x4
|
||||
sub x9, x9, x4
|
||||
ld1 {v3.16b}, [x8]
|
||||
ld1 {v4.16b}, [x9]
|
||||
|
||||
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
|
||||
ld1 {v1.16b}, [x1]
|
||||
|
||||
ld1 {v5.16b}, [x5] /* get iv */
|
||||
enc_prepare w3, x2, x6
|
||||
|
||||
eor v0.16b, v0.16b, v5.16b /* xor with iv */
|
||||
tbl v1.16b, {v1.16b}, v4.16b
|
||||
encrypt_block v0, w3, x2, x6, w7
|
||||
|
||||
eor v1.16b, v1.16b, v0.16b
|
||||
tbl v0.16b, {v0.16b}, v3.16b
|
||||
encrypt_block v1, w3, x2, x6, w7
|
||||
|
||||
add x4, x0, x4
|
||||
st1 {v0.16b}, [x4] /* overlapping stores */
|
||||
st1 {v1.16b}, [x0]
|
||||
ret
|
||||
AES_FUNC_END(aes_cbc_cts_encrypt)
|
||||
|
||||
AES_FUNC_START(aes_cbc_cts_decrypt)
|
||||
adr_l x8, .Lcts_permute_table
|
||||
sub x4, x4, #16
|
||||
add x9, x8, #32
|
||||
add x8, x8, x4
|
||||
sub x9, x9, x4
|
||||
ld1 {v3.16b}, [x8]
|
||||
ld1 {v4.16b}, [x9]
|
||||
|
||||
ld1 {v0.16b}, [x1], x4 /* overlapping loads */
|
||||
ld1 {v1.16b}, [x1]
|
||||
|
||||
ld1 {v5.16b}, [x5] /* get iv */
|
||||
dec_prepare w3, x2, x6
|
||||
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
tbl v2.16b, {v0.16b}, v3.16b
|
||||
eor v2.16b, v2.16b, v1.16b
|
||||
|
||||
tbx v0.16b, {v1.16b}, v4.16b
|
||||
decrypt_block v0, w3, x2, x6, w7
|
||||
eor v0.16b, v0.16b, v5.16b /* xor with iv */
|
||||
|
||||
add x4, x0, x4
|
||||
st1 {v2.16b}, [x4] /* overlapping stores */
|
||||
st1 {v0.16b}, [x0]
|
||||
ret
|
||||
AES_FUNC_END(aes_cbc_cts_decrypt)
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 6
|
||||
.Lcts_permute_table:
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.previous
|
||||
AES_ENDPROC(aes_cbc_decrypt)
|
||||
|
||||
|
||||
/*
|
||||
* aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int bytes, u8 ctr[], u8 finalbuf[])
|
||||
* int blocks, u8 ctr[])
|
||||
*/
|
||||
|
||||
AES_FUNC_START(aes_ctr_encrypt)
|
||||
AES_ENTRY(aes_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
enc_prepare w3, x2, x12
|
||||
ld1 {vctr.16b}, [x5]
|
||||
|
||||
umov x12, vctr.d[1] /* keep swabbed ctr in reg */
|
||||
rev x12, x12
|
||||
enc_prepare w3, x2, x6
|
||||
ld1 {v4.16b}, [x5]
|
||||
|
||||
umov x6, v4.d[1] /* keep swabbed ctr in reg */
|
||||
rev x6, x6
|
||||
cmn w6, w4 /* 32 bit overflow? */
|
||||
bcs .Lctrloop
|
||||
.LctrloopNx:
|
||||
add w7, w4, #15
|
||||
sub w4, w4, #MAX_STRIDE << 4
|
||||
lsr w7, w7, #4
|
||||
mov w8, #MAX_STRIDE
|
||||
cmp w7, w8
|
||||
csel w7, w7, w8, lt
|
||||
adds x12, x12, x7
|
||||
|
||||
mov v0.16b, vctr.16b
|
||||
mov v1.16b, vctr.16b
|
||||
mov v2.16b, vctr.16b
|
||||
mov v3.16b, vctr.16b
|
||||
ST5( mov v4.16b, vctr.16b )
|
||||
bcs 0f
|
||||
|
||||
.subsection 1
|
||||
/* apply carry to outgoing counter */
|
||||
0: umov x8, vctr.d[0]
|
||||
rev x8, x8
|
||||
add x8, x8, #1
|
||||
rev x8, x8
|
||||
ins vctr.d[0], x8
|
||||
|
||||
/* apply carry to N counter blocks for N := x12 */
|
||||
cbz x12, 2f
|
||||
adr x16, 1f
|
||||
sub x16, x16, x12, lsl #3
|
||||
br x16
|
||||
hint 34 // bti c
|
||||
mov v0.d[0], vctr.d[0]
|
||||
hint 34 // bti c
|
||||
mov v1.d[0], vctr.d[0]
|
||||
hint 34 // bti c
|
||||
mov v2.d[0], vctr.d[0]
|
||||
hint 34 // bti c
|
||||
mov v3.d[0], vctr.d[0]
|
||||
ST5( hint 34 )
|
||||
ST5( mov v4.d[0], vctr.d[0] )
|
||||
1: b 2f
|
||||
.previous
|
||||
|
||||
2: rev x7, x12
|
||||
ins vctr.d[1], x7
|
||||
sub x7, x12, #MAX_STRIDE - 1
|
||||
sub x8, x12, #MAX_STRIDE - 2
|
||||
sub x9, x12, #MAX_STRIDE - 3
|
||||
rev x7, x7
|
||||
rev x8, x8
|
||||
mov v1.d[1], x7
|
||||
rev x9, x9
|
||||
ST5( sub x10, x12, #MAX_STRIDE - 4 )
|
||||
mov v2.d[1], x8
|
||||
ST5( rev x10, x10 )
|
||||
mov v3.d[1], x9
|
||||
ST5( mov v4.d[1], x10 )
|
||||
tbnz w4, #31, .Lctrtail
|
||||
ld1 {v5.16b-v7.16b}, [x1], #48
|
||||
ST4( bl aes_encrypt_block4x )
|
||||
ST5( bl aes_encrypt_block5x )
|
||||
subs w4, w4, #4
|
||||
bmi .Lctr1x
|
||||
add w7, w6, #1
|
||||
mov v0.16b, v4.16b
|
||||
add w8, w6, #2
|
||||
mov v1.16b, v4.16b
|
||||
add w9, w6, #3
|
||||
mov v2.16b, v4.16b
|
||||
rev w7, w7
|
||||
mov v3.16b, v4.16b
|
||||
rev w8, w8
|
||||
mov v1.s[3], w7
|
||||
rev w9, w9
|
||||
mov v2.s[3], w8
|
||||
mov v3.s[3], w9
|
||||
ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */
|
||||
bl aes_encrypt_block4x
|
||||
eor v0.16b, v5.16b, v0.16b
|
||||
ST4( ld1 {v5.16b}, [x1], #16 )
|
||||
ld1 {v5.16b}, [x1], #16 /* get 1 input block */
|
||||
eor v1.16b, v6.16b, v1.16b
|
||||
ST5( ld1 {v5.16b-v6.16b}, [x1], #32 )
|
||||
eor v2.16b, v7.16b, v2.16b
|
||||
eor v3.16b, v5.16b, v3.16b
|
||||
ST5( eor v4.16b, v6.16b, v4.16b )
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
ST5( st1 {v4.16b}, [x0], #16 )
|
||||
add x6, x6, #4
|
||||
rev x7, x6
|
||||
ins v4.d[1], x7
|
||||
cbz w4, .Lctrout
|
||||
b .LctrloopNx
|
||||
.Lctr1x:
|
||||
adds w4, w4, #4
|
||||
beq .Lctrout
|
||||
.Lctrloop:
|
||||
mov v0.16b, v4.16b
|
||||
encrypt_block v0, w3, x2, x8, w7
|
||||
|
||||
adds x6, x6, #1 /* increment BE ctr */
|
||||
rev x7, x6
|
||||
ins v4.d[1], x7
|
||||
bcs .Lctrcarry /* overflow? */
|
||||
|
||||
.Lctrcarrydone:
|
||||
subs w4, w4, #1
|
||||
bmi .Lctrtailblock /* blocks <0 means tail block */
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
eor v3.16b, v0.16b, v3.16b
|
||||
st1 {v3.16b}, [x0], #16
|
||||
bne .Lctrloop
|
||||
|
||||
.Lctrout:
|
||||
st1 {vctr.16b}, [x5] /* return next CTR value */
|
||||
st1 {v4.16b}, [x5] /* return next CTR value */
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
.Lctrtail:
|
||||
/* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */
|
||||
mov x16, #16
|
||||
ands x13, x4, #0xf
|
||||
csel x13, x13, x16, ne
|
||||
.Lctrtailblock:
|
||||
st1 {v0.16b}, [x0]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
ST5( cmp w4, #64 - (MAX_STRIDE << 4) )
|
||||
ST5( csel x14, x16, xzr, gt )
|
||||
cmp w4, #48 - (MAX_STRIDE << 4)
|
||||
csel x15, x16, xzr, gt
|
||||
cmp w4, #32 - (MAX_STRIDE << 4)
|
||||
csel x16, x16, xzr, gt
|
||||
cmp w4, #16 - (MAX_STRIDE << 4)
|
||||
ble .Lctrtail1x
|
||||
|
||||
adr_l x12, .Lcts_permute_table
|
||||
add x12, x12, x13
|
||||
|
||||
ST5( ld1 {v5.16b}, [x1], x14 )
|
||||
ld1 {v6.16b}, [x1], x15
|
||||
ld1 {v7.16b}, [x1], x16
|
||||
|
||||
ST4( bl aes_encrypt_block4x )
|
||||
ST5( bl aes_encrypt_block5x )
|
||||
|
||||
ld1 {v8.16b}, [x1], x13
|
||||
ld1 {v9.16b}, [x1]
|
||||
ld1 {v10.16b}, [x12]
|
||||
|
||||
ST4( eor v6.16b, v6.16b, v0.16b )
|
||||
ST4( eor v7.16b, v7.16b, v1.16b )
|
||||
ST4( tbl v3.16b, {v3.16b}, v10.16b )
|
||||
ST4( eor v8.16b, v8.16b, v2.16b )
|
||||
ST4( eor v9.16b, v9.16b, v3.16b )
|
||||
|
||||
ST5( eor v5.16b, v5.16b, v0.16b )
|
||||
ST5( eor v6.16b, v6.16b, v1.16b )
|
||||
ST5( tbl v4.16b, {v4.16b}, v10.16b )
|
||||
ST5( eor v7.16b, v7.16b, v2.16b )
|
||||
ST5( eor v8.16b, v8.16b, v3.16b )
|
||||
ST5( eor v9.16b, v9.16b, v4.16b )
|
||||
|
||||
ST5( st1 {v5.16b}, [x0], x14 )
|
||||
st1 {v6.16b}, [x0], x15
|
||||
st1 {v7.16b}, [x0], x16
|
||||
add x13, x13, x0
|
||||
st1 {v9.16b}, [x13] // overlapping stores
|
||||
st1 {v8.16b}, [x0]
|
||||
b .Lctrout
|
||||
|
||||
.Lctrtail1x:
|
||||
csel x0, x0, x6, eq // use finalbuf if less than a full block
|
||||
ld1 {v5.16b}, [x1]
|
||||
ST5( mov v3.16b, v4.16b )
|
||||
encrypt_block v3, w3, x2, x8, w7
|
||||
eor v5.16b, v5.16b, v3.16b
|
||||
st1 {v5.16b}, [x0]
|
||||
b .Lctrout
|
||||
AES_FUNC_END(aes_ctr_encrypt)
|
||||
.Lctrcarry:
|
||||
umov x7, v4.d[0] /* load upper word of ctr */
|
||||
rev x7, x7 /* ... to handle the carry */
|
||||
add x7, x7, #1
|
||||
rev x7, x7
|
||||
ins v4.d[0], x7
|
||||
b .Lctrcarrydone
|
||||
AES_ENDPROC(aes_ctr_encrypt)
|
||||
.ltorg
|
||||
|
||||
|
||||
/*
|
||||
* aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
* int bytes, u8 const rk2[], u8 iv[], int first)
|
||||
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
* int bytes, u8 const rk2[], u8 iv[], int first)
|
||||
* int blocks, u8 const rk2[], u8 iv[], int first)
|
||||
* aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
|
||||
* int blocks, u8 const rk2[], u8 iv[], int first)
|
||||
*/
|
||||
|
||||
.macro next_tweak, out, in, tmp
|
||||
.macro next_tweak, out, in, const, tmp
|
||||
sshr \tmp\().2d, \in\().2d, #63
|
||||
and \tmp\().16b, \tmp\().16b, xtsmask.16b
|
||||
and \tmp\().16b, \tmp\().16b, \const\().16b
|
||||
add \out\().2d, \in\().2d, \in\().2d
|
||||
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
|
||||
eor \out\().16b, \out\().16b, \tmp\().16b
|
||||
.endm
|
||||
|
||||
.macro xts_load_mask, tmp
|
||||
movi xtsmask.2s, #0x1
|
||||
movi \tmp\().2s, #0x87
|
||||
uzp1 xtsmask.4s, xtsmask.4s, \tmp\().4s
|
||||
.endm
|
||||
.Lxts_mul_x:
|
||||
CPU_LE( .quad 1, 0x87 )
|
||||
CPU_BE( .quad 0x87, 1 )
|
||||
|
||||
AES_FUNC_START(aes_xts_encrypt)
|
||||
AES_ENTRY(aes_xts_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
ld1 {v4.16b}, [x6]
|
||||
xts_load_mask v8
|
||||
cbz w7, .Lxtsencnotfirst
|
||||
|
||||
enc_prepare w3, x5, x8
|
||||
xts_cts_skip_tw w7, .LxtsencNx
|
||||
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
|
||||
enc_switch_key w3, x2, x8
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsencNx
|
||||
|
||||
.Lxtsencnotfirst:
|
||||
enc_prepare w3, x2, x8
|
||||
.LxtsencloopNx:
|
||||
next_tweak v4, v4, v8
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
.LxtsencNx:
|
||||
subs w4, w4, #64
|
||||
subs w4, w4, #4
|
||||
bmi .Lxtsenc1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */
|
||||
next_tweak v5, v4, v8
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v8
|
||||
next_tweak v6, v5, v7, v8
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
next_tweak v7, v6, v8
|
||||
next_tweak v7, v6, v7, v8
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
bl aes_encrypt_block4x
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
@ -532,91 +311,56 @@ AES_FUNC_START(aes_xts_encrypt)
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w4, .Lxtsencret
|
||||
xts_reload_mask v8
|
||||
cbz w4, .Lxtsencout
|
||||
b .LxtsencloopNx
|
||||
.Lxtsenc1x:
|
||||
adds w4, w4, #64
|
||||
adds w4, w4, #4
|
||||
beq .Lxtsencout
|
||||
subs w4, w4, #16
|
||||
bmi .LxtsencctsNx
|
||||
.Lxtsencloop:
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
.Lxtsencctsout:
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
encrypt_block v0, w3, x2, x8, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
cbz w4, .Lxtsencout
|
||||
subs w4, w4, #16
|
||||
next_tweak v4, v4, v8
|
||||
bmi .Lxtsenccts
|
||||
st1 {v0.16b}, [x0], #16
|
||||
subs w4, w4, #1
|
||||
beq .Lxtsencout
|
||||
next_tweak v4, v4, v7, v8
|
||||
b .Lxtsencloop
|
||||
.Lxtsencout:
|
||||
st1 {v0.16b}, [x0]
|
||||
.Lxtsencret:
|
||||
st1 {v4.16b}, [x6]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
AES_ENDPROC(aes_xts_encrypt)
|
||||
|
||||
.LxtsencctsNx:
|
||||
mov v0.16b, v3.16b
|
||||
sub x0, x0, #16
|
||||
.Lxtsenccts:
|
||||
adr_l x8, .Lcts_permute_table
|
||||
|
||||
add x1, x1, w4, sxtw /* rewind input pointer */
|
||||
add w4, w4, #16 /* # bytes in final block */
|
||||
add x9, x8, #32
|
||||
add x8, x8, x4
|
||||
sub x9, x9, x4
|
||||
add x4, x0, x4 /* output address of final block */
|
||||
|
||||
ld1 {v1.16b}, [x1] /* load final block */
|
||||
ld1 {v2.16b}, [x8]
|
||||
ld1 {v3.16b}, [x9]
|
||||
|
||||
tbl v2.16b, {v0.16b}, v2.16b
|
||||
tbx v0.16b, {v1.16b}, v3.16b
|
||||
st1 {v2.16b}, [x4] /* overlapping stores */
|
||||
mov w4, wzr
|
||||
b .Lxtsencctsout
|
||||
AES_FUNC_END(aes_xts_encrypt)
|
||||
|
||||
AES_FUNC_START(aes_xts_decrypt)
|
||||
AES_ENTRY(aes_xts_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
/* subtract 16 bytes if we are doing CTS */
|
||||
sub w8, w4, #0x10
|
||||
tst w4, #0xf
|
||||
csel w4, w4, w8, eq
|
||||
|
||||
ld1 {v4.16b}, [x6]
|
||||
xts_load_mask v8
|
||||
xts_cts_skip_tw w7, .Lxtsdecskiptw
|
||||
cbz w7, .Lxtsdecnotfirst
|
||||
|
||||
enc_prepare w3, x5, x8
|
||||
encrypt_block v4, w3, x5, x8, w7 /* first tweak */
|
||||
.Lxtsdecskiptw:
|
||||
dec_prepare w3, x2, x8
|
||||
ldr q7, .Lxts_mul_x
|
||||
b .LxtsdecNx
|
||||
|
||||
.Lxtsdecnotfirst:
|
||||
dec_prepare w3, x2, x8
|
||||
.LxtsdecloopNx:
|
||||
next_tweak v4, v4, v8
|
||||
ldr q7, .Lxts_mul_x
|
||||
next_tweak v4, v4, v7, v8
|
||||
.LxtsdecNx:
|
||||
subs w4, w4, #64
|
||||
subs w4, w4, #4
|
||||
bmi .Lxtsdec1x
|
||||
ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */
|
||||
next_tweak v5, v4, v8
|
||||
next_tweak v5, v4, v7, v8
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
next_tweak v6, v5, v8
|
||||
next_tweak v6, v5, v7, v8
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
next_tweak v7, v6, v8
|
||||
next_tweak v7, v6, v7, v8
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
bl aes_decrypt_block4x
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
@ -626,62 +370,31 @@ AES_FUNC_START(aes_xts_decrypt)
|
||||
st1 {v0.16b-v3.16b}, [x0], #64
|
||||
mov v4.16b, v7.16b
|
||||
cbz w4, .Lxtsdecout
|
||||
xts_reload_mask v8
|
||||
b .LxtsdecloopNx
|
||||
.Lxtsdec1x:
|
||||
adds w4, w4, #64
|
||||
adds w4, w4, #4
|
||||
beq .Lxtsdecout
|
||||
subs w4, w4, #16
|
||||
.Lxtsdecloop:
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
bmi .Lxtsdeccts
|
||||
.Lxtsdecctsout:
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
eor v0.16b, v1.16b, v4.16b
|
||||
decrypt_block v0, w3, x2, x8, w7
|
||||
eor v0.16b, v0.16b, v4.16b
|
||||
st1 {v0.16b}, [x0], #16
|
||||
cbz w4, .Lxtsdecout
|
||||
subs w4, w4, #16
|
||||
next_tweak v4, v4, v8
|
||||
subs w4, w4, #1
|
||||
beq .Lxtsdecout
|
||||
next_tweak v4, v4, v7, v8
|
||||
b .Lxtsdecloop
|
||||
.Lxtsdecout:
|
||||
st1 {v4.16b}, [x6]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
.Lxtsdeccts:
|
||||
adr_l x8, .Lcts_permute_table
|
||||
|
||||
add x1, x1, w4, sxtw /* rewind input pointer */
|
||||
add w4, w4, #16 /* # bytes in final block */
|
||||
add x9, x8, #32
|
||||
add x8, x8, x4
|
||||
sub x9, x9, x4
|
||||
add x4, x0, x4 /* output address of final block */
|
||||
|
||||
next_tweak v5, v4, v8
|
||||
|
||||
ld1 {v1.16b}, [x1] /* load final block */
|
||||
ld1 {v2.16b}, [x8]
|
||||
ld1 {v3.16b}, [x9]
|
||||
|
||||
eor v0.16b, v0.16b, v5.16b
|
||||
decrypt_block v0, w3, x2, x8, w7
|
||||
eor v0.16b, v0.16b, v5.16b
|
||||
|
||||
tbl v2.16b, {v0.16b}, v2.16b
|
||||
tbx v0.16b, {v1.16b}, v3.16b
|
||||
|
||||
st1 {v2.16b}, [x4] /* overlapping stores */
|
||||
mov w4, wzr
|
||||
b .Lxtsdecctsout
|
||||
AES_FUNC_END(aes_xts_decrypt)
|
||||
AES_ENDPROC(aes_xts_decrypt)
|
||||
|
||||
/*
|
||||
* aes_mac_update(u8 const in[], u32 const rk[], int rounds,
|
||||
* int blocks, u8 dg[], int enc_before, int enc_after)
|
||||
*/
|
||||
AES_FUNC_START(aes_mac_update)
|
||||
AES_ENTRY(aes_mac_update)
|
||||
ld1 {v0.16b}, [x4] /* get dg */
|
||||
enc_prepare w2, x1, x7
|
||||
cbz w5, .Lmacloop4x
|
||||
@ -703,8 +416,6 @@ AES_FUNC_START(aes_mac_update)
|
||||
csinv x5, x6, xzr, eq
|
||||
cbz w5, .Lmacout
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
st1 {v0.16b}, [x4] /* return dg */
|
||||
cond_yield .Lmacout, x7
|
||||
b .Lmacloop4x
|
||||
.Lmac1x:
|
||||
add w3, w3, #4
|
||||
@ -717,12 +428,10 @@ AES_FUNC_START(aes_mac_update)
|
||||
csinv x5, x6, xzr, eq
|
||||
cbz w5, .Lmacout
|
||||
|
||||
.Lmacenc:
|
||||
encrypt_block v0, w2, x1, x7, w8
|
||||
b .Lmacloop
|
||||
|
||||
.Lmacout:
|
||||
st1 {v0.16b}, [x4] /* return dg */
|
||||
mov w0, w3
|
||||
ret
|
||||
AES_FUNC_END(aes_mac_update)
|
||||
AES_ENDPROC(aes_mac_update)
|
||||
|
@ -11,21 +11,8 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
#define AES_FUNC_START(func) SYM_FUNC_START(neon_ ## func)
|
||||
#define AES_FUNC_END(func) SYM_FUNC_END(neon_ ## func)
|
||||
|
||||
xtsmask .req v7
|
||||
cbciv .req v7
|
||||
vctr .req v4
|
||||
|
||||
.macro xts_reload_mask, tmp
|
||||
xts_load_mask \tmp
|
||||
.endm
|
||||
|
||||
/* special case for the neon-bs driver calling into this one for CTS */
|
||||
.macro xts_cts_skip_tw, reg, lbl
|
||||
tbnz \reg, #1, \lbl
|
||||
.endm
|
||||
#define AES_ENTRY(func) ENTRY(neon_ ## func)
|
||||
#define AES_ENDPROC(func) ENDPROC(neon_ ## func)
|
||||
|
||||
/* multiply by polynomial 'x' in GF(2^8) */
|
||||
.macro mul_by_x, out, in, temp, const
|
||||
@ -45,10 +32,10 @@
|
||||
|
||||
/* preload the entire Sbox */
|
||||
.macro prepare, sbox, shiftrows, temp
|
||||
adr \temp, \sbox
|
||||
movi v12.16b, #0x1b
|
||||
ldr_l q13, \shiftrows, \temp
|
||||
ldr_l q14, .Lror32by8, \temp
|
||||
adr_l \temp, \sbox
|
||||
ldr q13, \shiftrows
|
||||
ldr q14, .Lror32by8
|
||||
ld1 {v16.16b-v19.16b}, [\temp], #64
|
||||
ld1 {v20.16b-v23.16b}, [\temp], #64
|
||||
ld1 {v24.16b-v27.16b}, [\temp], #64
|
||||
@ -57,7 +44,7 @@
|
||||
|
||||
/* do preload for encryption */
|
||||
.macro enc_prepare, ignore0, ignore1, temp
|
||||
prepare crypto_aes_sbox, .LForward_ShiftRows, \temp
|
||||
prepare .LForward_Sbox, .LForward_ShiftRows, \temp
|
||||
.endm
|
||||
|
||||
.macro enc_switch_key, ignore0, ignore1, temp
|
||||
@ -66,7 +53,7 @@
|
||||
|
||||
/* do preload for decryption */
|
||||
.macro dec_prepare, ignore0, ignore1, temp
|
||||
prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
|
||||
prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
|
||||
.endm
|
||||
|
||||
/* apply SubBytes transformation using the the preloaded Sbox */
|
||||
@ -124,9 +111,26 @@
|
||||
|
||||
/*
|
||||
* Interleaved versions: functionally equivalent to the
|
||||
* ones above, but applied to AES states in parallel.
|
||||
* ones above, but applied to 2 or 4 AES states in parallel.
|
||||
*/
|
||||
|
||||
.macro sub_bytes_2x, in0, in1
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
sub v9.16b, \in1\().16b, v15.16b
|
||||
tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
|
||||
sub v10.16b, v8.16b, v15.16b
|
||||
tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
|
||||
sub v11.16b, v9.16b, v15.16b
|
||||
tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
|
||||
sub v8.16b, v10.16b, v15.16b
|
||||
tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
|
||||
sub v9.16b, v11.16b, v15.16b
|
||||
tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
|
||||
tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
|
||||
tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
|
||||
.endm
|
||||
|
||||
.macro sub_bytes_4x, in0, in1, in2, in3
|
||||
sub v8.16b, \in0\().16b, v15.16b
|
||||
tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
|
||||
@ -205,6 +209,25 @@
|
||||
eor \in1\().16b, \in1\().16b, v11.16b
|
||||
.endm
|
||||
|
||||
.macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
mov \i, \rounds
|
||||
1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
movi v15.16b, #0x40
|
||||
tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
|
||||
tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
|
||||
sub_bytes_2x \in0, \in1
|
||||
subs \i, \i, #1
|
||||
ld1 {v15.4s}, [\rkp], #16
|
||||
beq 2222f
|
||||
mix_columns_2x \in0, \in1, \enc
|
||||
b 1111b
|
||||
2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
|
||||
eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
|
||||
ld1 {v15.4s}, [\rk]
|
||||
add \rkp, \rk, #16
|
||||
@ -231,6 +254,14 @@
|
||||
eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
|
||||
.endm
|
||||
|
||||
.macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
|
||||
do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
|
||||
do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
|
||||
.macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
|
||||
do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
|
||||
.endm
|
||||
@ -241,8 +272,76 @@
|
||||
|
||||
#include "aes-modes.S"
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 4
|
||||
.text
|
||||
.align 6
|
||||
.LForward_Sbox:
|
||||
.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
|
||||
.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
|
||||
.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
|
||||
.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
|
||||
.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
|
||||
.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
|
||||
.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
|
||||
.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
|
||||
.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
|
||||
.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
|
||||
.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
|
||||
.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
|
||||
.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
|
||||
.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
|
||||
.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
|
||||
.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
|
||||
.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
|
||||
.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
|
||||
.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
|
||||
.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
|
||||
.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
|
||||
.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
|
||||
.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
|
||||
.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
|
||||
.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
|
||||
.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
|
||||
.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
|
||||
.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
|
||||
.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
|
||||
.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
|
||||
.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
|
||||
.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
|
||||
|
||||
.LReverse_Sbox:
|
||||
.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
|
||||
.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
|
||||
.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
|
||||
.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
|
||||
.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
|
||||
.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
|
||||
.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
|
||||
.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
|
||||
.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
|
||||
.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
|
||||
.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
|
||||
.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
|
||||
.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
|
||||
.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
|
||||
.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
|
||||
.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
|
||||
.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
|
||||
.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
|
||||
.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
|
||||
.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
|
||||
.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
|
||||
.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
|
||||
.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
|
||||
.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
|
||||
.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
|
||||
.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
|
||||
.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
|
||||
.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
|
||||
.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
|
||||
.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
|
||||
.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
|
||||
.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
|
||||
|
||||
.LForward_ShiftRows:
|
||||
.octa 0x0b06010c07020d08030e09040f0a0500
|
||||
|
||||
|
@ -383,7 +383,7 @@ ISRM0: .octa 0x0306090c00070a0d01040b0e0205080f
|
||||
/*
|
||||
* void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
|
||||
*/
|
||||
SYM_FUNC_START(aesbs_convert_key)
|
||||
ENTRY(aesbs_convert_key)
|
||||
ld1 {v7.4s}, [x1], #16 // load round 0 key
|
||||
ld1 {v17.4s}, [x1], #16 // load round 1 key
|
||||
|
||||
@ -428,10 +428,10 @@ SYM_FUNC_START(aesbs_convert_key)
|
||||
eor v17.16b, v17.16b, v7.16b
|
||||
str q17, [x0]
|
||||
ret
|
||||
SYM_FUNC_END(aesbs_convert_key)
|
||||
ENDPROC(aesbs_convert_key)
|
||||
|
||||
.align 4
|
||||
SYM_FUNC_START_LOCAL(aesbs_encrypt8)
|
||||
aesbs_encrypt8:
|
||||
ldr q9, [bskey], #16 // round 0 key
|
||||
ldr q8, M0SR
|
||||
ldr q24, SR
|
||||
@ -491,10 +491,10 @@ SYM_FUNC_START_LOCAL(aesbs_encrypt8)
|
||||
eor v2.16b, v2.16b, v12.16b
|
||||
eor v5.16b, v5.16b, v12.16b
|
||||
ret
|
||||
SYM_FUNC_END(aesbs_encrypt8)
|
||||
ENDPROC(aesbs_encrypt8)
|
||||
|
||||
.align 4
|
||||
SYM_FUNC_START_LOCAL(aesbs_decrypt8)
|
||||
aesbs_decrypt8:
|
||||
lsl x9, rounds, #7
|
||||
add bskey, bskey, x9
|
||||
|
||||
@ -556,7 +556,7 @@ SYM_FUNC_START_LOCAL(aesbs_decrypt8)
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
eor v5.16b, v5.16b, v12.16b
|
||||
ret
|
||||
SYM_FUNC_END(aesbs_decrypt8)
|
||||
ENDPROC(aesbs_decrypt8)
|
||||
|
||||
/*
|
||||
* aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
@ -565,122 +565,110 @@ SYM_FUNC_END(aesbs_decrypt8)
|
||||
* int blocks)
|
||||
*/
|
||||
.macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
frame_push 5
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
99: mov x5, #1
|
||||
lsl x5, x5, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
lsl x5, x5, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x5, x5, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x20], #16
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
tbnz x5, #1, 0f
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
tbnz x5, #2, 0f
|
||||
ld1 {v2.16b}, [x20], #16
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
tbnz x5, #3, 0f
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
tbnz x5, #4, 0f
|
||||
ld1 {v4.16b}, [x20], #16
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
tbnz x5, #5, 0f
|
||||
ld1 {v5.16b}, [x20], #16
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
tbnz x5, #6, 0f
|
||||
ld1 {v6.16b}, [x20], #16
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
tbnz x5, #7, 0f
|
||||
ld1 {v7.16b}, [x20], #16
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl \do8
|
||||
|
||||
st1 {\o0\().16b}, [x19], #16
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
tbnz x5, #1, 1f
|
||||
st1 {\o1\().16b}, [x19], #16
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
tbnz x5, #2, 1f
|
||||
st1 {\o2\().16b}, [x19], #16
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
tbnz x5, #3, 1f
|
||||
st1 {\o3\().16b}, [x19], #16
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
tbnz x5, #4, 1f
|
||||
st1 {\o4\().16b}, [x19], #16
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
tbnz x5, #5, 1f
|
||||
st1 {\o5\().16b}, [x19], #16
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
tbnz x5, #6, 1f
|
||||
st1 {\o6\().16b}, [x19], #16
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
tbnz x5, #7, 1f
|
||||
st1 {\o7\().16b}, [x19], #16
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
|
||||
cbz x23, 1f
|
||||
b 99b
|
||||
cbnz x4, 99b
|
||||
|
||||
1: frame_pop
|
||||
1: ldp x29, x30, [sp], #16
|
||||
ret
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
SYM_FUNC_START(aesbs_ecb_encrypt)
|
||||
ENTRY(aesbs_ecb_encrypt)
|
||||
__ecb_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
|
||||
SYM_FUNC_END(aesbs_ecb_encrypt)
|
||||
ENDPROC(aesbs_ecb_encrypt)
|
||||
|
||||
.align 4
|
||||
SYM_FUNC_START(aesbs_ecb_decrypt)
|
||||
ENTRY(aesbs_ecb_decrypt)
|
||||
__ecb_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
|
||||
SYM_FUNC_END(aesbs_ecb_decrypt)
|
||||
ENDPROC(aesbs_ecb_decrypt)
|
||||
|
||||
/*
|
||||
* aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
*/
|
||||
.align 4
|
||||
SYM_FUNC_START(aesbs_cbc_decrypt)
|
||||
frame_push 6
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
ENTRY(aesbs_cbc_decrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
99: mov x6, #1
|
||||
lsl x6, x6, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x20], #16
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
mov v25.16b, v0.16b
|
||||
tbnz x6, #1, 0f
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
mov v26.16b, v1.16b
|
||||
tbnz x6, #2, 0f
|
||||
ld1 {v2.16b}, [x20], #16
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
mov v27.16b, v2.16b
|
||||
tbnz x6, #3, 0f
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
mov v28.16b, v3.16b
|
||||
tbnz x6, #4, 0f
|
||||
ld1 {v4.16b}, [x20], #16
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
mov v29.16b, v4.16b
|
||||
tbnz x6, #5, 0f
|
||||
ld1 {v5.16b}, [x20], #16
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
mov v30.16b, v5.16b
|
||||
tbnz x6, #6, 0f
|
||||
ld1 {v6.16b}, [x20], #16
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
mov v31.16b, v6.16b
|
||||
tbnz x6, #7, 0f
|
||||
ld1 {v7.16b}, [x20]
|
||||
ld1 {v7.16b}, [x1]
|
||||
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl aesbs_decrypt8
|
||||
|
||||
ld1 {v24.16b}, [x24] // load IV
|
||||
ld1 {v24.16b}, [x5] // load IV
|
||||
|
||||
eor v1.16b, v1.16b, v25.16b
|
||||
eor v6.16b, v6.16b, v26.16b
|
||||
@ -691,37 +679,36 @@ SYM_FUNC_START(aesbs_cbc_decrypt)
|
||||
eor v3.16b, v3.16b, v30.16b
|
||||
eor v5.16b, v5.16b, v31.16b
|
||||
|
||||
st1 {v0.16b}, [x19], #16
|
||||
st1 {v0.16b}, [x0], #16
|
||||
mov v24.16b, v25.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {v1.16b}, [x19], #16
|
||||
st1 {v1.16b}, [x0], #16
|
||||
mov v24.16b, v26.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {v6.16b}, [x19], #16
|
||||
st1 {v6.16b}, [x0], #16
|
||||
mov v24.16b, v27.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {v4.16b}, [x19], #16
|
||||
st1 {v4.16b}, [x0], #16
|
||||
mov v24.16b, v28.16b
|
||||
tbnz x6, #4, 1f
|
||||
st1 {v2.16b}, [x19], #16
|
||||
st1 {v2.16b}, [x0], #16
|
||||
mov v24.16b, v29.16b
|
||||
tbnz x6, #5, 1f
|
||||
st1 {v7.16b}, [x19], #16
|
||||
st1 {v7.16b}, [x0], #16
|
||||
mov v24.16b, v30.16b
|
||||
tbnz x6, #6, 1f
|
||||
st1 {v3.16b}, [x19], #16
|
||||
st1 {v3.16b}, [x0], #16
|
||||
mov v24.16b, v31.16b
|
||||
tbnz x6, #7, 1f
|
||||
ld1 {v24.16b}, [x20], #16
|
||||
st1 {v5.16b}, [x19], #16
|
||||
1: st1 {v24.16b}, [x24] // store IV
|
||||
ld1 {v24.16b}, [x1], #16
|
||||
st1 {v5.16b}, [x0], #16
|
||||
1: st1 {v24.16b}, [x5] // store IV
|
||||
|
||||
cbz x23, 2f
|
||||
b 99b
|
||||
cbnz x4, 99b
|
||||
|
||||
2: frame_pop
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
SYM_FUNC_END(aesbs_cbc_decrypt)
|
||||
ENDPROC(aesbs_cbc_decrypt)
|
||||
|
||||
.macro next_tweak, out, in, const, tmp
|
||||
sshr \tmp\().2d, \in\().2d, #63
|
||||
@ -731,103 +718,100 @@ SYM_FUNC_END(aesbs_cbc_decrypt)
|
||||
eor \out\().16b, \out\().16b, \tmp\().16b
|
||||
.endm
|
||||
|
||||
.align 4
|
||||
.Lxts_mul_x:
|
||||
CPU_LE( .quad 1, 0x87 )
|
||||
CPU_BE( .quad 0x87, 1 )
|
||||
|
||||
/*
|
||||
* aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
* aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
|
||||
* int blocks, u8 iv[])
|
||||
*/
|
||||
SYM_FUNC_START_LOCAL(__xts_crypt8)
|
||||
__xts_crypt8:
|
||||
mov x6, #1
|
||||
lsl x6, x6, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
lsl x6, x6, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x6, x6, xzr, mi
|
||||
|
||||
ld1 {v0.16b}, [x20], #16
|
||||
ld1 {v0.16b}, [x1], #16
|
||||
next_tweak v26, v25, v30, v31
|
||||
eor v0.16b, v0.16b, v25.16b
|
||||
tbnz x6, #1, 0f
|
||||
|
||||
ld1 {v1.16b}, [x20], #16
|
||||
ld1 {v1.16b}, [x1], #16
|
||||
next_tweak v27, v26, v30, v31
|
||||
eor v1.16b, v1.16b, v26.16b
|
||||
tbnz x6, #2, 0f
|
||||
|
||||
ld1 {v2.16b}, [x20], #16
|
||||
ld1 {v2.16b}, [x1], #16
|
||||
next_tweak v28, v27, v30, v31
|
||||
eor v2.16b, v2.16b, v27.16b
|
||||
tbnz x6, #3, 0f
|
||||
|
||||
ld1 {v3.16b}, [x20], #16
|
||||
ld1 {v3.16b}, [x1], #16
|
||||
next_tweak v29, v28, v30, v31
|
||||
eor v3.16b, v3.16b, v28.16b
|
||||
tbnz x6, #4, 0f
|
||||
|
||||
ld1 {v4.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset]
|
||||
ld1 {v4.16b}, [x1], #16
|
||||
str q29, [sp, #16]
|
||||
eor v4.16b, v4.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #5, 0f
|
||||
|
||||
ld1 {v5.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset + 16]
|
||||
ld1 {v5.16b}, [x1], #16
|
||||
str q29, [sp, #32]
|
||||
eor v5.16b, v5.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #6, 0f
|
||||
|
||||
ld1 {v6.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset + 32]
|
||||
ld1 {v6.16b}, [x1], #16
|
||||
str q29, [sp, #48]
|
||||
eor v6.16b, v6.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
tbnz x6, #7, 0f
|
||||
|
||||
ld1 {v7.16b}, [x20], #16
|
||||
str q29, [sp, #.Lframe_local_offset + 48]
|
||||
ld1 {v7.16b}, [x1], #16
|
||||
str q29, [sp, #64]
|
||||
eor v7.16b, v7.16b, v29.16b
|
||||
next_tweak v29, v29, v30, v31
|
||||
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
br x16
|
||||
SYM_FUNC_END(__xts_crypt8)
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
br x7
|
||||
ENDPROC(__xts_crypt8)
|
||||
|
||||
.macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
|
||||
frame_push 6, 64
|
||||
stp x29, x30, [sp, #-80]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
ldr q30, .Lxts_mul_x
|
||||
ld1 {v25.16b}, [x5]
|
||||
|
||||
movi v30.2s, #0x1
|
||||
movi v25.2s, #0x87
|
||||
uzp1 v30.4s, v30.4s, v25.4s
|
||||
ld1 {v25.16b}, [x24]
|
||||
|
||||
99: adr x16, \do8
|
||||
99: adr x7, \do8
|
||||
bl __xts_crypt8
|
||||
|
||||
ldp q16, q17, [sp, #.Lframe_local_offset]
|
||||
ldp q18, q19, [sp, #.Lframe_local_offset + 32]
|
||||
ldp q16, q17, [sp, #16]
|
||||
ldp q18, q19, [sp, #48]
|
||||
|
||||
eor \o0\().16b, \o0\().16b, v25.16b
|
||||
eor \o1\().16b, \o1\().16b, v26.16b
|
||||
eor \o2\().16b, \o2\().16b, v27.16b
|
||||
eor \o3\().16b, \o3\().16b, v28.16b
|
||||
|
||||
st1 {\o0\().16b}, [x19], #16
|
||||
st1 {\o0\().16b}, [x0], #16
|
||||
mov v25.16b, v26.16b
|
||||
tbnz x6, #1, 1f
|
||||
st1 {\o1\().16b}, [x19], #16
|
||||
st1 {\o1\().16b}, [x0], #16
|
||||
mov v25.16b, v27.16b
|
||||
tbnz x6, #2, 1f
|
||||
st1 {\o2\().16b}, [x19], #16
|
||||
st1 {\o2\().16b}, [x0], #16
|
||||
mov v25.16b, v28.16b
|
||||
tbnz x6, #3, 1f
|
||||
st1 {\o3\().16b}, [x19], #16
|
||||
st1 {\o3\().16b}, [x0], #16
|
||||
mov v25.16b, v29.16b
|
||||
tbnz x6, #4, 1f
|
||||
|
||||
@ -836,31 +820,28 @@ SYM_FUNC_END(__xts_crypt8)
|
||||
eor \o6\().16b, \o6\().16b, v18.16b
|
||||
eor \o7\().16b, \o7\().16b, v19.16b
|
||||
|
||||
st1 {\o4\().16b}, [x19], #16
|
||||
st1 {\o4\().16b}, [x0], #16
|
||||
tbnz x6, #5, 1f
|
||||
st1 {\o5\().16b}, [x19], #16
|
||||
st1 {\o5\().16b}, [x0], #16
|
||||
tbnz x6, #6, 1f
|
||||
st1 {\o6\().16b}, [x19], #16
|
||||
st1 {\o6\().16b}, [x0], #16
|
||||
tbnz x6, #7, 1f
|
||||
st1 {\o7\().16b}, [x19], #16
|
||||
st1 {\o7\().16b}, [x0], #16
|
||||
|
||||
cbz x23, 1f
|
||||
st1 {v25.16b}, [x24]
|
||||
cbnz x4, 99b
|
||||
|
||||
b 99b
|
||||
|
||||
1: st1 {v25.16b}, [x24]
|
||||
frame_pop
|
||||
1: st1 {v25.16b}, [x5]
|
||||
ldp x29, x30, [sp], #80
|
||||
ret
|
||||
.endm
|
||||
|
||||
SYM_FUNC_START(aesbs_xts_encrypt)
|
||||
ENTRY(aesbs_xts_encrypt)
|
||||
__xts_crypt aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
|
||||
SYM_FUNC_END(aesbs_xts_encrypt)
|
||||
ENDPROC(aesbs_xts_encrypt)
|
||||
|
||||
SYM_FUNC_START(aesbs_xts_decrypt)
|
||||
ENTRY(aesbs_xts_decrypt)
|
||||
__xts_crypt aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
|
||||
SYM_FUNC_END(aesbs_xts_decrypt)
|
||||
ENDPROC(aesbs_xts_decrypt)
|
||||
|
||||
.macro next_ctr, v
|
||||
mov \v\().d[1], x8
|
||||
@ -874,32 +855,25 @@ SYM_FUNC_END(aesbs_xts_decrypt)
|
||||
* aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
|
||||
* int rounds, int blocks, u8 iv[], u8 final[])
|
||||
*/
|
||||
SYM_FUNC_START(aesbs_ctr_encrypt)
|
||||
frame_push 8
|
||||
ENTRY(aesbs_ctr_encrypt)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
mov x19, x0
|
||||
mov x20, x1
|
||||
mov x21, x2
|
||||
mov x22, x3
|
||||
mov x23, x4
|
||||
mov x24, x5
|
||||
mov x25, x6
|
||||
cmp x6, #0
|
||||
cset x10, ne
|
||||
add x4, x4, x10 // do one extra block if final
|
||||
|
||||
cmp x25, #0
|
||||
cset x26, ne
|
||||
add x23, x23, x26 // do one extra block if final
|
||||
|
||||
ldp x7, x8, [x24]
|
||||
ld1 {v0.16b}, [x24]
|
||||
ldp x7, x8, [x5]
|
||||
ld1 {v0.16b}, [x5]
|
||||
CPU_LE( rev x7, x7 )
|
||||
CPU_LE( rev x8, x8 )
|
||||
adds x8, x8, #1
|
||||
adc x7, x7, xzr
|
||||
|
||||
99: mov x9, #1
|
||||
lsl x9, x9, x23
|
||||
subs w23, w23, #8
|
||||
csel x23, x23, xzr, pl
|
||||
lsl x9, x9, x4
|
||||
subs w4, w4, #8
|
||||
csel x4, x4, xzr, pl
|
||||
csel x9, x9, xzr, le
|
||||
|
||||
tbnz x9, #1, 0f
|
||||
@ -917,88 +891,85 @@ CPU_LE( rev x8, x8 )
|
||||
tbnz x9, #7, 0f
|
||||
next_ctr v7
|
||||
|
||||
0: mov bskey, x21
|
||||
mov rounds, x22
|
||||
0: mov bskey, x2
|
||||
mov rounds, x3
|
||||
bl aesbs_encrypt8
|
||||
|
||||
lsr x9, x9, x26 // disregard the extra block
|
||||
lsr x9, x9, x10 // disregard the extra block
|
||||
tbnz x9, #0, 0f
|
||||
|
||||
ld1 {v8.16b}, [x20], #16
|
||||
ld1 {v8.16b}, [x1], #16
|
||||
eor v0.16b, v0.16b, v8.16b
|
||||
st1 {v0.16b}, [x19], #16
|
||||
st1 {v0.16b}, [x0], #16
|
||||
tbnz x9, #1, 1f
|
||||
|
||||
ld1 {v9.16b}, [x20], #16
|
||||
ld1 {v9.16b}, [x1], #16
|
||||
eor v1.16b, v1.16b, v9.16b
|
||||
st1 {v1.16b}, [x19], #16
|
||||
st1 {v1.16b}, [x0], #16
|
||||
tbnz x9, #2, 2f
|
||||
|
||||
ld1 {v10.16b}, [x20], #16
|
||||
ld1 {v10.16b}, [x1], #16
|
||||
eor v4.16b, v4.16b, v10.16b
|
||||
st1 {v4.16b}, [x19], #16
|
||||
st1 {v4.16b}, [x0], #16
|
||||
tbnz x9, #3, 3f
|
||||
|
||||
ld1 {v11.16b}, [x20], #16
|
||||
ld1 {v11.16b}, [x1], #16
|
||||
eor v6.16b, v6.16b, v11.16b
|
||||
st1 {v6.16b}, [x19], #16
|
||||
st1 {v6.16b}, [x0], #16
|
||||
tbnz x9, #4, 4f
|
||||
|
||||
ld1 {v12.16b}, [x20], #16
|
||||
ld1 {v12.16b}, [x1], #16
|
||||
eor v3.16b, v3.16b, v12.16b
|
||||
st1 {v3.16b}, [x19], #16
|
||||
st1 {v3.16b}, [x0], #16
|
||||
tbnz x9, #5, 5f
|
||||
|
||||
ld1 {v13.16b}, [x20], #16
|
||||
ld1 {v13.16b}, [x1], #16
|
||||
eor v7.16b, v7.16b, v13.16b
|
||||
st1 {v7.16b}, [x19], #16
|
||||
st1 {v7.16b}, [x0], #16
|
||||
tbnz x9, #6, 6f
|
||||
|
||||
ld1 {v14.16b}, [x20], #16
|
||||
ld1 {v14.16b}, [x1], #16
|
||||
eor v2.16b, v2.16b, v14.16b
|
||||
st1 {v2.16b}, [x19], #16
|
||||
st1 {v2.16b}, [x0], #16
|
||||
tbnz x9, #7, 7f
|
||||
|
||||
ld1 {v15.16b}, [x20], #16
|
||||
ld1 {v15.16b}, [x1], #16
|
||||
eor v5.16b, v5.16b, v15.16b
|
||||
st1 {v5.16b}, [x19], #16
|
||||
st1 {v5.16b}, [x0], #16
|
||||
|
||||
8: next_ctr v0
|
||||
st1 {v0.16b}, [x24]
|
||||
cbz x23, .Lctr_done
|
||||
cbnz x4, 99b
|
||||
|
||||
b 99b
|
||||
|
||||
.Lctr_done:
|
||||
frame_pop
|
||||
st1 {v0.16b}, [x5]
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
|
||||
/*
|
||||
* If we are handling the tail of the input (x6 != NULL), return the
|
||||
* final keystream block back to the caller.
|
||||
*/
|
||||
0: cbz x25, 8b
|
||||
st1 {v0.16b}, [x25]
|
||||
0: cbz x6, 8b
|
||||
st1 {v0.16b}, [x6]
|
||||
b 8b
|
||||
1: cbz x25, 8b
|
||||
st1 {v1.16b}, [x25]
|
||||
1: cbz x6, 8b
|
||||
st1 {v1.16b}, [x6]
|
||||
b 8b
|
||||
2: cbz x25, 8b
|
||||
st1 {v4.16b}, [x25]
|
||||
2: cbz x6, 8b
|
||||
st1 {v4.16b}, [x6]
|
||||
b 8b
|
||||
3: cbz x25, 8b
|
||||
st1 {v6.16b}, [x25]
|
||||
3: cbz x6, 8b
|
||||
st1 {v6.16b}, [x6]
|
||||
b 8b
|
||||
4: cbz x25, 8b
|
||||
st1 {v3.16b}, [x25]
|
||||
4: cbz x6, 8b
|
||||
st1 {v3.16b}, [x6]
|
||||
b 8b
|
||||
5: cbz x25, 8b
|
||||
st1 {v7.16b}, [x25]
|
||||
5: cbz x6, 8b
|
||||
st1 {v7.16b}, [x6]
|
||||
b 8b
|
||||
6: cbz x25, 8b
|
||||
st1 {v2.16b}, [x25]
|
||||
6: cbz x6, 8b
|
||||
st1 {v2.16b}, [x6]
|
||||
b 8b
|
||||
7: cbz x25, 8b
|
||||
st1 {v5.16b}, [x25]
|
||||
7: cbz x6, 8b
|
||||
st1 {v5.16b}, [x6]
|
||||
b 8b
|
||||
SYM_FUNC_END(aesbs_ctr_encrypt)
|
||||
ENDPROC(aesbs_ctr_encrypt)
|
||||
|
@ -11,13 +11,13 @@
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/aes.h>
|
||||
#include <crypto/ctr.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include "aes-ctr-fallback.h"
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
@ -49,12 +49,6 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks);
|
||||
asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
|
||||
int rounds, int blocks, u8 iv[]);
|
||||
asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
|
||||
u32 const rk1[], int rounds, int bytes,
|
||||
u32 const rk2[], u8 iv[], int first);
|
||||
asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[],
|
||||
u32 const rk1[], int rounds, int bytes,
|
||||
u32 const rk2[], u8 iv[], int first);
|
||||
|
||||
struct aesbs_ctx {
|
||||
u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32];
|
||||
@ -74,7 +68,6 @@ struct aesbs_ctr_ctx {
|
||||
struct aesbs_xts_ctx {
|
||||
struct aesbs_ctx key;
|
||||
u32 twkey[AES_MAX_KEYLENGTH_U32];
|
||||
struct crypto_aes_ctx cts;
|
||||
};
|
||||
|
||||
static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
@ -84,7 +77,7 @@ static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = aes_expandkey(&rk, in_key, key_len);
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -106,8 +99,9 @@ static int __ecb_crypt(struct skcipher_request *req,
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
@ -115,13 +109,12 @@ static int __ecb_crypt(struct skcipher_request *req,
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
|
||||
ctx->rounds, blocks);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -143,7 +136,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
struct crypto_aes_ctx rk;
|
||||
int err;
|
||||
|
||||
err = aes_expandkey(&rk, in_key, key_len);
|
||||
err = crypto_aes_expand_key(&rk, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -154,7 +147,6 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
kernel_neon_begin();
|
||||
aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds);
|
||||
kernel_neon_end();
|
||||
memzero_explicit(&rk, sizeof(rk));
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -166,19 +158,19 @@ static int cbc_encrypt(struct skcipher_request *req)
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
/* fall back to the non-bitsliced NEON implementation */
|
||||
kernel_neon_begin();
|
||||
neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->enc, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -189,8 +181,9 @@ static int cbc_decrypt(struct skcipher_request *req)
|
||||
struct skcipher_walk walk;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
@ -198,14 +191,13 @@ static int cbc_decrypt(struct skcipher_request *req)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->key.rk, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -216,7 +208,7 @@ static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = aes_expandkey(&ctx->fallback, in_key, key_len);
|
||||
err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -237,8 +229,9 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
|
||||
@ -249,10 +242,8 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
final = NULL;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
ctx->rk, ctx->rounds, blocks, walk.iv, final);
|
||||
kernel_neon_end();
|
||||
|
||||
if (final) {
|
||||
u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
|
||||
@ -267,6 +258,8 @@ static int ctr_encrypt(struct skcipher_request *req)
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -282,11 +275,7 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
return err;
|
||||
|
||||
key_len /= 2;
|
||||
err = aes_expandkey(&ctx->cts, in_key, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = aes_expandkey(&rk, in_key + key_len, key_len);
|
||||
err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
@ -295,142 +284,60 @@ static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
|
||||
return aesbs_setkey(tfm, in_key, key_len);
|
||||
}
|
||||
|
||||
static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
|
||||
{
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
unsigned long flags;
|
||||
|
||||
/*
|
||||
* Temporarily disable interrupts to avoid races where
|
||||
* cachelines are evicted when the CPU is interrupted
|
||||
* to do something else.
|
||||
*/
|
||||
local_irq_save(flags);
|
||||
aes_encrypt(&ctx->fallback, dst, src);
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
|
||||
static int ctr_encrypt_sync(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
if (!may_use_simd())
|
||||
return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
|
||||
return aes_ctr_encrypt_fallback(&ctx->fallback, req);
|
||||
|
||||
return ctr_encrypt(req);
|
||||
}
|
||||
|
||||
static int __xts_crypt(struct skcipher_request *req, bool encrypt,
|
||||
static int __xts_crypt(struct skcipher_request *req,
|
||||
void (*fn)(u8 out[], u8 const in[], u8 const rk[],
|
||||
int rounds, int blocks, u8 iv[]))
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int tail = req->cryptlen % (8 * AES_BLOCK_SIZE);
|
||||
struct scatterlist sg_src[2], sg_dst[2];
|
||||
struct skcipher_request subreq;
|
||||
struct scatterlist *src, *dst;
|
||||
struct skcipher_walk walk;
|
||||
int nbytes, err;
|
||||
int first = 1;
|
||||
u8 *out, *in;
|
||||
int err;
|
||||
|
||||
if (req->cryptlen < AES_BLOCK_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
/* ensure that the cts tail is covered by a single step */
|
||||
if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) {
|
||||
int xts_blocks = DIV_ROUND_UP(req->cryptlen,
|
||||
AES_BLOCK_SIZE) - 2;
|
||||
|
||||
skcipher_request_set_tfm(&subreq, tfm);
|
||||
skcipher_request_set_callback(&subreq,
|
||||
skcipher_request_flags(req),
|
||||
NULL, NULL);
|
||||
skcipher_request_set_crypt(&subreq, req->src, req->dst,
|
||||
xts_blocks * AES_BLOCK_SIZE,
|
||||
req->iv);
|
||||
req = &subreq;
|
||||
} else {
|
||||
tail = 0;
|
||||
}
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
kernel_neon_begin();
|
||||
|
||||
neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey,
|
||||
ctx->key.rounds, 1);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE)
|
||||
if (walk.nbytes < walk.total)
|
||||
blocks = round_down(blocks,
|
||||
walk.stride / AES_BLOCK_SIZE);
|
||||
|
||||
out = walk.dst.virt.addr;
|
||||
in = walk.src.virt.addr;
|
||||
nbytes = walk.nbytes;
|
||||
|
||||
kernel_neon_begin();
|
||||
if (likely(blocks > 6)) { /* plain NEON is faster otherwise */
|
||||
if (first)
|
||||
neon_aes_ecb_encrypt(walk.iv, walk.iv,
|
||||
ctx->twkey,
|
||||
ctx->key.rounds, 1);
|
||||
first = 0;
|
||||
|
||||
fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
|
||||
walk.iv);
|
||||
|
||||
out += blocks * AES_BLOCK_SIZE;
|
||||
in += blocks * AES_BLOCK_SIZE;
|
||||
nbytes -= blocks * AES_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
if (walk.nbytes == walk.total && nbytes > 0)
|
||||
goto xts_tail;
|
||||
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
|
||||
ctx->key.rounds, blocks, walk.iv);
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes - blocks * AES_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
if (err || likely(!tail))
|
||||
return err;
|
||||
|
||||
/* handle ciphertext stealing */
|
||||
dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
|
||||
if (req->dst != req->src)
|
||||
dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
|
||||
|
||||
skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
|
||||
req->iv);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
out = walk.dst.virt.addr;
|
||||
in = walk.src.virt.addr;
|
||||
nbytes = walk.nbytes;
|
||||
|
||||
kernel_neon_begin();
|
||||
xts_tail:
|
||||
if (encrypt)
|
||||
neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
|
||||
nbytes, ctx->twkey, walk.iv, first ?: 2);
|
||||
else
|
||||
neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
|
||||
nbytes, ctx->twkey, walk.iv, first ?: 2);
|
||||
kernel_neon_end();
|
||||
|
||||
return skcipher_walk_done(&walk, 0);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, true, aesbs_xts_encrypt);
|
||||
return __xts_crypt(req, aesbs_xts_encrypt);
|
||||
}
|
||||
|
||||
static int xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __xts_crypt(req, false, aesbs_xts_decrypt);
|
||||
return __xts_crypt(req, aesbs_xts_decrypt);
|
||||
}
|
||||
|
||||
static struct skcipher_alg aes_algs[] = { {
|
||||
@ -537,7 +444,7 @@ static int __init aes_init(void)
|
||||
int err;
|
||||
int i;
|
||||
|
||||
if (!cpu_have_named_feature(ASIMD))
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs));
|
||||
|
@ -1,199 +0,0 @@
|
||||
/*
|
||||
* ARM NEON accelerated ChaCha and XChaCha stream ciphers,
|
||||
* including ChaCha20 (RFC7539)
|
||||
*
|
||||
* Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha_block_xor_neon(u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds);
|
||||
asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
|
||||
int nrounds, int bytes);
|
||||
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
|
||||
|
||||
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
int bytes, int nrounds)
|
||||
{
|
||||
while (bytes > 0) {
|
||||
int l = min(bytes, CHACHA_BLOCK_SIZE * 5);
|
||||
|
||||
if (l <= CHACHA_BLOCK_SIZE) {
|
||||
u8 buf[CHACHA_BLOCK_SIZE];
|
||||
|
||||
memcpy(buf, src, l);
|
||||
chacha_block_xor_neon(state, buf, buf, nrounds);
|
||||
memcpy(dst, buf, l);
|
||||
state[12] += 1;
|
||||
break;
|
||||
}
|
||||
chacha_4block_xor_neon(state, dst, src, nrounds, l);
|
||||
bytes -= CHACHA_BLOCK_SIZE * 5;
|
||||
src += CHACHA_BLOCK_SIZE * 5;
|
||||
dst += CHACHA_BLOCK_SIZE * 5;
|
||||
state[12] += 5;
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha_neon_stream_xor(struct skcipher_request *req,
|
||||
const struct chacha_ctx *ctx, const u8 *iv)
|
||||
{
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, false);
|
||||
|
||||
crypto_chacha_init(state, ctx, iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = rounddown(nbytes, walk.stride);
|
||||
|
||||
kernel_neon_begin();
|
||||
chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes, ctx->nrounds);
|
||||
kernel_neon_end();
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int chacha_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
|
||||
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_chacha_crypt(req);
|
||||
|
||||
return chacha_neon_stream_xor(req, ctx, req->iv);
|
||||
}
|
||||
|
||||
static int xchacha_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct chacha_ctx subctx;
|
||||
u32 state[16];
|
||||
u8 real_iv[16];
|
||||
|
||||
if (req->cryptlen <= CHACHA_BLOCK_SIZE || !may_use_simd())
|
||||
return crypto_xchacha_crypt(req);
|
||||
|
||||
crypto_chacha_init(state, ctx, req->iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
hchacha_block_neon(state, subctx.key, ctx->nrounds);
|
||||
kernel_neon_end();
|
||||
subctx.nrounds = ctx->nrounds;
|
||||
|
||||
memcpy(&real_iv[0], req->iv + 24, 8);
|
||||
memcpy(&real_iv[8], req->iv + 16, 8);
|
||||
return chacha_neon_stream_xor(req, &subctx, real_iv);
|
||||
}
|
||||
|
||||
static struct skcipher_alg algs[] = {
|
||||
{
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 5 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha_neon,
|
||||
.decrypt = chacha_neon,
|
||||
}, {
|
||||
.base.cra_name = "xchacha20",
|
||||
.base.cra_driver_name = "xchacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 5 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}, {
|
||||
.base.cra_name = "xchacha12",
|
||||
.base.cra_driver_name = "xchacha12-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = XCHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 5 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha12_setkey,
|
||||
.encrypt = xchacha_neon,
|
||||
.decrypt = xchacha_neon,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init chacha_simd_mod_init(void)
|
||||
{
|
||||
if (!cpu_have_named_feature(ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
static void __exit chacha_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_init(chacha_simd_mod_init);
|
||||
module_exit(chacha_simd_mod_fini);
|
||||
|
||||
MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
||||
MODULE_ALIAS_CRYPTO("chacha20-neon");
|
||||
MODULE_ALIAS_CRYPTO("xchacha20");
|
||||
MODULE_ALIAS_CRYPTO("xchacha20-neon");
|
||||
MODULE_ALIAS_CRYPTO("xchacha12");
|
||||
MODULE_ALIAS_CRYPTO("xchacha12-neon");
|
@ -1,13 +1,13 @@
|
||||
/*
|
||||
* ChaCha/XChaCha NEON helper functions
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
|
||||
*
|
||||
* Copyright (C) 2016-2018 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Originally based on:
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, x64 SSSE3 functions
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
@ -19,27 +19,29 @@
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
#include <asm/cache.h>
|
||||
|
||||
.text
|
||||
.align 6
|
||||
|
||||
/*
|
||||
* chacha_permute - permute one block
|
||||
*
|
||||
* Permute one 64-byte block where the state matrix is stored in the four NEON
|
||||
* registers v0-v3. It performs matrix operations on four words in parallel,
|
||||
* but requires shuffling to rearrange the words after each round.
|
||||
*
|
||||
* The round count is given in w3.
|
||||
*
|
||||
* Clobbers: w3, x10, v4, v12
|
||||
*/
|
||||
SYM_FUNC_START_LOCAL(chacha_permute)
|
||||
ENTRY(chacha20_block_xor_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: 1 data block output, o
|
||||
// x2: 1 data block input, i
|
||||
|
||||
adr_l x10, ROT8
|
||||
ld1 {v12.4s}, [x10]
|
||||
//
|
||||
// This function encrypts one ChaCha20 block by loading the state matrix
|
||||
// in four NEON registers. It performs matrix operation on four words in
|
||||
// parallel, but requires shuffling to rearrange the words after each
|
||||
// round.
|
||||
//
|
||||
|
||||
// x0..3 = s0..3
|
||||
adr x3, ROT8
|
||||
ld1 {v0.4s-v3.4s}, [x0]
|
||||
ld1 {v8.4s-v11.4s}, [x0]
|
||||
ld1 {v12.4s}, [x3]
|
||||
|
||||
mov x3, #10
|
||||
|
||||
.Ldoubleround:
|
||||
// x0 += x1, x3 = rotl32(x3 ^ x0, 16)
|
||||
@ -100,27 +102,9 @@ SYM_FUNC_START_LOCAL(chacha_permute)
|
||||
// x3 = shuffle32(x3, MASK(0, 3, 2, 1))
|
||||
ext v3.16b, v3.16b, v3.16b, #4
|
||||
|
||||
subs w3, w3, #2
|
||||
subs x3, x3, #1
|
||||
b.ne .Ldoubleround
|
||||
|
||||
ret
|
||||
SYM_FUNC_END(chacha_permute)
|
||||
|
||||
SYM_FUNC_START(chacha_block_xor_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: 1 data block output, o
|
||||
// x2: 1 data block input, i
|
||||
// w3: nrounds
|
||||
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
// x0..3 = s0..3
|
||||
ld1 {v0.4s-v3.4s}, [x0]
|
||||
ld1 {v8.4s-v11.4s}, [x0]
|
||||
|
||||
bl chacha_permute
|
||||
|
||||
ld1 {v4.16b-v7.16b}, [x2]
|
||||
|
||||
// o0 = i0 ^ (x0 + s0)
|
||||
@ -141,155 +125,71 @@ SYM_FUNC_START(chacha_block_xor_neon)
|
||||
|
||||
st1 {v0.16b-v3.16b}, [x1]
|
||||
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
SYM_FUNC_END(chacha_block_xor_neon)
|
||||
|
||||
SYM_FUNC_START(hchacha_block_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: output (8 32-bit words)
|
||||
// w2: nrounds
|
||||
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
|
||||
ld1 {v0.4s-v3.4s}, [x0]
|
||||
|
||||
mov w3, w2
|
||||
bl chacha_permute
|
||||
|
||||
st1 {v0.4s}, [x1], #16
|
||||
st1 {v3.4s}, [x1]
|
||||
|
||||
ldp x29, x30, [sp], #16
|
||||
ret
|
||||
SYM_FUNC_END(hchacha_block_neon)
|
||||
|
||||
a0 .req w12
|
||||
a1 .req w13
|
||||
a2 .req w14
|
||||
a3 .req w15
|
||||
a4 .req w16
|
||||
a5 .req w17
|
||||
a6 .req w19
|
||||
a7 .req w20
|
||||
a8 .req w21
|
||||
a9 .req w22
|
||||
a10 .req w23
|
||||
a11 .req w24
|
||||
a12 .req w25
|
||||
a13 .req w26
|
||||
a14 .req w27
|
||||
a15 .req w28
|
||||
ENDPROC(chacha20_block_xor_neon)
|
||||
|
||||
.align 6
|
||||
SYM_FUNC_START(chacha_4block_xor_neon)
|
||||
frame_push 10
|
||||
|
||||
ENTRY(chacha20_4block_xor_neon)
|
||||
// x0: Input state matrix, s
|
||||
// x1: 4 data blocks output, o
|
||||
// x2: 4 data blocks input, i
|
||||
// w3: nrounds
|
||||
// x4: byte count
|
||||
|
||||
adr_l x10, .Lpermute
|
||||
and x5, x4, #63
|
||||
add x10, x10, x5
|
||||
|
||||
//
|
||||
// This function encrypts four consecutive ChaCha blocks by loading
|
||||
// This function encrypts four consecutive ChaCha20 blocks by loading
|
||||
// the state matrix in NEON registers four times. The algorithm performs
|
||||
// each operation on the corresponding word of each state matrix, hence
|
||||
// requires no word shuffling. For final XORing step we transpose the
|
||||
// matrix by interleaving 32- and then 64-bit words, which allows us to
|
||||
// do XOR in NEON registers.
|
||||
//
|
||||
// At the same time, a fifth block is encrypted in parallel using
|
||||
// scalar registers
|
||||
//
|
||||
adr_l x9, CTRINC // ... and ROT8
|
||||
ld1 {v30.4s-v31.4s}, [x9]
|
||||
adr x3, CTRINC // ... and ROT8
|
||||
ld1 {v30.4s-v31.4s}, [x3]
|
||||
|
||||
// x0..15[0-3] = s0..3[0..3]
|
||||
add x8, x0, #16
|
||||
ld4r { v0.4s- v3.4s}, [x0]
|
||||
ld4r { v4.4s- v7.4s}, [x8], #16
|
||||
ld4r { v8.4s-v11.4s}, [x8], #16
|
||||
ld4r {v12.4s-v15.4s}, [x8]
|
||||
mov x4, x0
|
||||
ld4r { v0.4s- v3.4s}, [x4], #16
|
||||
ld4r { v4.4s- v7.4s}, [x4], #16
|
||||
ld4r { v8.4s-v11.4s}, [x4], #16
|
||||
ld4r {v12.4s-v15.4s}, [x4]
|
||||
|
||||
mov a0, v0.s[0]
|
||||
mov a1, v1.s[0]
|
||||
mov a2, v2.s[0]
|
||||
mov a3, v3.s[0]
|
||||
mov a4, v4.s[0]
|
||||
mov a5, v5.s[0]
|
||||
mov a6, v6.s[0]
|
||||
mov a7, v7.s[0]
|
||||
mov a8, v8.s[0]
|
||||
mov a9, v9.s[0]
|
||||
mov a10, v10.s[0]
|
||||
mov a11, v11.s[0]
|
||||
mov a12, v12.s[0]
|
||||
mov a13, v13.s[0]
|
||||
mov a14, v14.s[0]
|
||||
mov a15, v15.s[0]
|
||||
|
||||
// x12 += counter values 1-4
|
||||
// x12 += counter values 0-3
|
||||
add v12.4s, v12.4s, v30.4s
|
||||
|
||||
mov x3, #10
|
||||
|
||||
.Ldoubleround4:
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 16)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 16)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 16)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 16)
|
||||
add v0.4s, v0.4s, v4.4s
|
||||
add a0, a0, a4
|
||||
add v1.4s, v1.4s, v5.4s
|
||||
add a1, a1, a5
|
||||
add v2.4s, v2.4s, v6.4s
|
||||
add a2, a2, a6
|
||||
add v3.4s, v3.4s, v7.4s
|
||||
add a3, a3, a7
|
||||
|
||||
eor v12.16b, v12.16b, v0.16b
|
||||
eor a12, a12, a0
|
||||
eor v13.16b, v13.16b, v1.16b
|
||||
eor a13, a13, a1
|
||||
eor v14.16b, v14.16b, v2.16b
|
||||
eor a14, a14, a2
|
||||
eor v15.16b, v15.16b, v3.16b
|
||||
eor a15, a15, a3
|
||||
|
||||
rev32 v12.8h, v12.8h
|
||||
ror a12, a12, #16
|
||||
rev32 v13.8h, v13.8h
|
||||
ror a13, a13, #16
|
||||
rev32 v14.8h, v14.8h
|
||||
ror a14, a14, #16
|
||||
rev32 v15.8h, v15.8h
|
||||
ror a15, a15, #16
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 12)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 12)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 12)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 12)
|
||||
add v8.4s, v8.4s, v12.4s
|
||||
add a8, a8, a12
|
||||
add v9.4s, v9.4s, v13.4s
|
||||
add a9, a9, a13
|
||||
add v10.4s, v10.4s, v14.4s
|
||||
add a10, a10, a14
|
||||
add v11.4s, v11.4s, v15.4s
|
||||
add a11, a11, a15
|
||||
|
||||
eor v16.16b, v4.16b, v8.16b
|
||||
eor a4, a4, a8
|
||||
eor v17.16b, v5.16b, v9.16b
|
||||
eor a5, a5, a9
|
||||
eor v18.16b, v6.16b, v10.16b
|
||||
eor a6, a6, a10
|
||||
eor v19.16b, v7.16b, v11.16b
|
||||
eor a7, a7, a11
|
||||
|
||||
shl v4.4s, v16.4s, #12
|
||||
shl v5.4s, v17.4s, #12
|
||||
@ -297,66 +197,42 @@ SYM_FUNC_START(chacha_4block_xor_neon)
|
||||
shl v7.4s, v19.4s, #12
|
||||
|
||||
sri v4.4s, v16.4s, #20
|
||||
ror a4, a4, #20
|
||||
sri v5.4s, v17.4s, #20
|
||||
ror a5, a5, #20
|
||||
sri v6.4s, v18.4s, #20
|
||||
ror a6, a6, #20
|
||||
sri v7.4s, v19.4s, #20
|
||||
ror a7, a7, #20
|
||||
|
||||
// x0 += x4, x12 = rotl32(x12 ^ x0, 8)
|
||||
// x1 += x5, x13 = rotl32(x13 ^ x1, 8)
|
||||
// x2 += x6, x14 = rotl32(x14 ^ x2, 8)
|
||||
// x3 += x7, x15 = rotl32(x15 ^ x3, 8)
|
||||
add v0.4s, v0.4s, v4.4s
|
||||
add a0, a0, a4
|
||||
add v1.4s, v1.4s, v5.4s
|
||||
add a1, a1, a5
|
||||
add v2.4s, v2.4s, v6.4s
|
||||
add a2, a2, a6
|
||||
add v3.4s, v3.4s, v7.4s
|
||||
add a3, a3, a7
|
||||
|
||||
eor v12.16b, v12.16b, v0.16b
|
||||
eor a12, a12, a0
|
||||
eor v13.16b, v13.16b, v1.16b
|
||||
eor a13, a13, a1
|
||||
eor v14.16b, v14.16b, v2.16b
|
||||
eor a14, a14, a2
|
||||
eor v15.16b, v15.16b, v3.16b
|
||||
eor a15, a15, a3
|
||||
|
||||
tbl v12.16b, {v12.16b}, v31.16b
|
||||
ror a12, a12, #24
|
||||
tbl v13.16b, {v13.16b}, v31.16b
|
||||
ror a13, a13, #24
|
||||
tbl v14.16b, {v14.16b}, v31.16b
|
||||
ror a14, a14, #24
|
||||
tbl v15.16b, {v15.16b}, v31.16b
|
||||
ror a15, a15, #24
|
||||
|
||||
// x8 += x12, x4 = rotl32(x4 ^ x8, 7)
|
||||
// x9 += x13, x5 = rotl32(x5 ^ x9, 7)
|
||||
// x10 += x14, x6 = rotl32(x6 ^ x10, 7)
|
||||
// x11 += x15, x7 = rotl32(x7 ^ x11, 7)
|
||||
add v8.4s, v8.4s, v12.4s
|
||||
add a8, a8, a12
|
||||
add v9.4s, v9.4s, v13.4s
|
||||
add a9, a9, a13
|
||||
add v10.4s, v10.4s, v14.4s
|
||||
add a10, a10, a14
|
||||
add v11.4s, v11.4s, v15.4s
|
||||
add a11, a11, a15
|
||||
|
||||
eor v16.16b, v4.16b, v8.16b
|
||||
eor a4, a4, a8
|
||||
eor v17.16b, v5.16b, v9.16b
|
||||
eor a5, a5, a9
|
||||
eor v18.16b, v6.16b, v10.16b
|
||||
eor a6, a6, a10
|
||||
eor v19.16b, v7.16b, v11.16b
|
||||
eor a7, a7, a11
|
||||
|
||||
shl v4.4s, v16.4s, #7
|
||||
shl v5.4s, v17.4s, #7
|
||||
@ -364,66 +240,42 @@ SYM_FUNC_START(chacha_4block_xor_neon)
|
||||
shl v7.4s, v19.4s, #7
|
||||
|
||||
sri v4.4s, v16.4s, #25
|
||||
ror a4, a4, #25
|
||||
sri v5.4s, v17.4s, #25
|
||||
ror a5, a5, #25
|
||||
sri v6.4s, v18.4s, #25
|
||||
ror a6, a6, #25
|
||||
sri v7.4s, v19.4s, #25
|
||||
ror a7, a7, #25
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 16)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 16)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 16)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 16)
|
||||
add v0.4s, v0.4s, v5.4s
|
||||
add a0, a0, a5
|
||||
add v1.4s, v1.4s, v6.4s
|
||||
add a1, a1, a6
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
add a2, a2, a7
|
||||
add v3.4s, v3.4s, v4.4s
|
||||
add a3, a3, a4
|
||||
|
||||
eor v15.16b, v15.16b, v0.16b
|
||||
eor a15, a15, a0
|
||||
eor v12.16b, v12.16b, v1.16b
|
||||
eor a12, a12, a1
|
||||
eor v13.16b, v13.16b, v2.16b
|
||||
eor a13, a13, a2
|
||||
eor v14.16b, v14.16b, v3.16b
|
||||
eor a14, a14, a3
|
||||
|
||||
rev32 v15.8h, v15.8h
|
||||
ror a15, a15, #16
|
||||
rev32 v12.8h, v12.8h
|
||||
ror a12, a12, #16
|
||||
rev32 v13.8h, v13.8h
|
||||
ror a13, a13, #16
|
||||
rev32 v14.8h, v14.8h
|
||||
ror a14, a14, #16
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 12)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 12)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 12)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 12)
|
||||
add v10.4s, v10.4s, v15.4s
|
||||
add a10, a10, a15
|
||||
add v11.4s, v11.4s, v12.4s
|
||||
add a11, a11, a12
|
||||
add v8.4s, v8.4s, v13.4s
|
||||
add a8, a8, a13
|
||||
add v9.4s, v9.4s, v14.4s
|
||||
add a9, a9, a14
|
||||
|
||||
eor v16.16b, v5.16b, v10.16b
|
||||
eor a5, a5, a10
|
||||
eor v17.16b, v6.16b, v11.16b
|
||||
eor a6, a6, a11
|
||||
eor v18.16b, v7.16b, v8.16b
|
||||
eor a7, a7, a8
|
||||
eor v19.16b, v4.16b, v9.16b
|
||||
eor a4, a4, a9
|
||||
|
||||
shl v5.4s, v16.4s, #12
|
||||
shl v6.4s, v17.4s, #12
|
||||
@ -431,66 +283,42 @@ SYM_FUNC_START(chacha_4block_xor_neon)
|
||||
shl v4.4s, v19.4s, #12
|
||||
|
||||
sri v5.4s, v16.4s, #20
|
||||
ror a5, a5, #20
|
||||
sri v6.4s, v17.4s, #20
|
||||
ror a6, a6, #20
|
||||
sri v7.4s, v18.4s, #20
|
||||
ror a7, a7, #20
|
||||
sri v4.4s, v19.4s, #20
|
||||
ror a4, a4, #20
|
||||
|
||||
// x0 += x5, x15 = rotl32(x15 ^ x0, 8)
|
||||
// x1 += x6, x12 = rotl32(x12 ^ x1, 8)
|
||||
// x2 += x7, x13 = rotl32(x13 ^ x2, 8)
|
||||
// x3 += x4, x14 = rotl32(x14 ^ x3, 8)
|
||||
add v0.4s, v0.4s, v5.4s
|
||||
add a0, a0, a5
|
||||
add v1.4s, v1.4s, v6.4s
|
||||
add a1, a1, a6
|
||||
add v2.4s, v2.4s, v7.4s
|
||||
add a2, a2, a7
|
||||
add v3.4s, v3.4s, v4.4s
|
||||
add a3, a3, a4
|
||||
|
||||
eor v15.16b, v15.16b, v0.16b
|
||||
eor a15, a15, a0
|
||||
eor v12.16b, v12.16b, v1.16b
|
||||
eor a12, a12, a1
|
||||
eor v13.16b, v13.16b, v2.16b
|
||||
eor a13, a13, a2
|
||||
eor v14.16b, v14.16b, v3.16b
|
||||
eor a14, a14, a3
|
||||
|
||||
tbl v15.16b, {v15.16b}, v31.16b
|
||||
ror a15, a15, #24
|
||||
tbl v12.16b, {v12.16b}, v31.16b
|
||||
ror a12, a12, #24
|
||||
tbl v13.16b, {v13.16b}, v31.16b
|
||||
ror a13, a13, #24
|
||||
tbl v14.16b, {v14.16b}, v31.16b
|
||||
ror a14, a14, #24
|
||||
|
||||
// x10 += x15, x5 = rotl32(x5 ^ x10, 7)
|
||||
// x11 += x12, x6 = rotl32(x6 ^ x11, 7)
|
||||
// x8 += x13, x7 = rotl32(x7 ^ x8, 7)
|
||||
// x9 += x14, x4 = rotl32(x4 ^ x9, 7)
|
||||
add v10.4s, v10.4s, v15.4s
|
||||
add a10, a10, a15
|
||||
add v11.4s, v11.4s, v12.4s
|
||||
add a11, a11, a12
|
||||
add v8.4s, v8.4s, v13.4s
|
||||
add a8, a8, a13
|
||||
add v9.4s, v9.4s, v14.4s
|
||||
add a9, a9, a14
|
||||
|
||||
eor v16.16b, v5.16b, v10.16b
|
||||
eor a5, a5, a10
|
||||
eor v17.16b, v6.16b, v11.16b
|
||||
eor a6, a6, a11
|
||||
eor v18.16b, v7.16b, v8.16b
|
||||
eor a7, a7, a8
|
||||
eor v19.16b, v4.16b, v9.16b
|
||||
eor a4, a4, a9
|
||||
|
||||
shl v5.4s, v16.4s, #7
|
||||
shl v6.4s, v17.4s, #7
|
||||
@ -498,15 +326,11 @@ SYM_FUNC_START(chacha_4block_xor_neon)
|
||||
shl v4.4s, v19.4s, #7
|
||||
|
||||
sri v5.4s, v16.4s, #25
|
||||
ror a5, a5, #25
|
||||
sri v6.4s, v17.4s, #25
|
||||
ror a6, a6, #25
|
||||
sri v7.4s, v18.4s, #25
|
||||
ror a7, a7, #25
|
||||
sri v4.4s, v19.4s, #25
|
||||
ror a4, a4, #25
|
||||
|
||||
subs w3, w3, #2
|
||||
subs x3, x3, #1
|
||||
b.ne .Ldoubleround4
|
||||
|
||||
ld4r {v16.4s-v19.4s}, [x0], #16
|
||||
@ -520,21 +344,9 @@ SYM_FUNC_START(chacha_4block_xor_neon)
|
||||
// x2[0-3] += s0[2]
|
||||
// x3[0-3] += s0[3]
|
||||
add v0.4s, v0.4s, v16.4s
|
||||
mov w6, v16.s[0]
|
||||
mov w7, v17.s[0]
|
||||
add v1.4s, v1.4s, v17.4s
|
||||
mov w8, v18.s[0]
|
||||
mov w9, v19.s[0]
|
||||
add v2.4s, v2.4s, v18.4s
|
||||
add a0, a0, w6
|
||||
add a1, a1, w7
|
||||
add v3.4s, v3.4s, v19.4s
|
||||
add a2, a2, w8
|
||||
add a3, a3, w9
|
||||
CPU_BE( rev a0, a0 )
|
||||
CPU_BE( rev a1, a1 )
|
||||
CPU_BE( rev a2, a2 )
|
||||
CPU_BE( rev a3, a3 )
|
||||
|
||||
ld4r {v24.4s-v27.4s}, [x0], #16
|
||||
ld4r {v28.4s-v31.4s}, [x0]
|
||||
@ -544,154 +356,69 @@ CPU_BE( rev a3, a3 )
|
||||
// x6[0-3] += s1[2]
|
||||
// x7[0-3] += s1[3]
|
||||
add v4.4s, v4.4s, v20.4s
|
||||
mov w6, v20.s[0]
|
||||
mov w7, v21.s[0]
|
||||
add v5.4s, v5.4s, v21.4s
|
||||
mov w8, v22.s[0]
|
||||
mov w9, v23.s[0]
|
||||
add v6.4s, v6.4s, v22.4s
|
||||
add a4, a4, w6
|
||||
add a5, a5, w7
|
||||
add v7.4s, v7.4s, v23.4s
|
||||
add a6, a6, w8
|
||||
add a7, a7, w9
|
||||
CPU_BE( rev a4, a4 )
|
||||
CPU_BE( rev a5, a5 )
|
||||
CPU_BE( rev a6, a6 )
|
||||
CPU_BE( rev a7, a7 )
|
||||
|
||||
// x8[0-3] += s2[0]
|
||||
// x9[0-3] += s2[1]
|
||||
// x10[0-3] += s2[2]
|
||||
// x11[0-3] += s2[3]
|
||||
add v8.4s, v8.4s, v24.4s
|
||||
mov w6, v24.s[0]
|
||||
mov w7, v25.s[0]
|
||||
add v9.4s, v9.4s, v25.4s
|
||||
mov w8, v26.s[0]
|
||||
mov w9, v27.s[0]
|
||||
add v10.4s, v10.4s, v26.4s
|
||||
add a8, a8, w6
|
||||
add a9, a9, w7
|
||||
add v11.4s, v11.4s, v27.4s
|
||||
add a10, a10, w8
|
||||
add a11, a11, w9
|
||||
CPU_BE( rev a8, a8 )
|
||||
CPU_BE( rev a9, a9 )
|
||||
CPU_BE( rev a10, a10 )
|
||||
CPU_BE( rev a11, a11 )
|
||||
|
||||
// x12[0-3] += s3[0]
|
||||
// x13[0-3] += s3[1]
|
||||
// x14[0-3] += s3[2]
|
||||
// x15[0-3] += s3[3]
|
||||
add v12.4s, v12.4s, v28.4s
|
||||
mov w6, v28.s[0]
|
||||
mov w7, v29.s[0]
|
||||
add v13.4s, v13.4s, v29.4s
|
||||
mov w8, v30.s[0]
|
||||
mov w9, v31.s[0]
|
||||
add v14.4s, v14.4s, v30.4s
|
||||
add a12, a12, w6
|
||||
add a13, a13, w7
|
||||
add v15.4s, v15.4s, v31.4s
|
||||
add a14, a14, w8
|
||||
add a15, a15, w9
|
||||
CPU_BE( rev a12, a12 )
|
||||
CPU_BE( rev a13, a13 )
|
||||
CPU_BE( rev a14, a14 )
|
||||
CPU_BE( rev a15, a15 )
|
||||
|
||||
// interleave 32-bit words in state n, n+1
|
||||
ldp w6, w7, [x2], #64
|
||||
zip1 v16.4s, v0.4s, v1.4s
|
||||
ldp w8, w9, [x2, #-56]
|
||||
eor a0, a0, w6
|
||||
zip2 v17.4s, v0.4s, v1.4s
|
||||
eor a1, a1, w7
|
||||
zip1 v18.4s, v2.4s, v3.4s
|
||||
eor a2, a2, w8
|
||||
zip2 v19.4s, v2.4s, v3.4s
|
||||
eor a3, a3, w9
|
||||
ldp w6, w7, [x2, #-48]
|
||||
zip1 v20.4s, v4.4s, v5.4s
|
||||
ldp w8, w9, [x2, #-40]
|
||||
eor a4, a4, w6
|
||||
zip2 v21.4s, v4.4s, v5.4s
|
||||
eor a5, a5, w7
|
||||
zip1 v22.4s, v6.4s, v7.4s
|
||||
eor a6, a6, w8
|
||||
zip2 v23.4s, v6.4s, v7.4s
|
||||
eor a7, a7, w9
|
||||
ldp w6, w7, [x2, #-32]
|
||||
zip1 v24.4s, v8.4s, v9.4s
|
||||
ldp w8, w9, [x2, #-24]
|
||||
eor a8, a8, w6
|
||||
zip2 v25.4s, v8.4s, v9.4s
|
||||
eor a9, a9, w7
|
||||
zip1 v26.4s, v10.4s, v11.4s
|
||||
eor a10, a10, w8
|
||||
zip2 v27.4s, v10.4s, v11.4s
|
||||
eor a11, a11, w9
|
||||
ldp w6, w7, [x2, #-16]
|
||||
zip1 v28.4s, v12.4s, v13.4s
|
||||
ldp w8, w9, [x2, #-8]
|
||||
eor a12, a12, w6
|
||||
zip2 v29.4s, v12.4s, v13.4s
|
||||
eor a13, a13, w7
|
||||
zip1 v30.4s, v14.4s, v15.4s
|
||||
eor a14, a14, w8
|
||||
zip2 v31.4s, v14.4s, v15.4s
|
||||
eor a15, a15, w9
|
||||
|
||||
add x3, x2, x4
|
||||
sub x3, x3, #128 // start of last block
|
||||
|
||||
subs x5, x4, #128
|
||||
csel x2, x2, x3, ge
|
||||
|
||||
// interleave 64-bit words in state n, n+2
|
||||
zip1 v0.2d, v16.2d, v18.2d
|
||||
zip2 v4.2d, v16.2d, v18.2d
|
||||
stp a0, a1, [x1], #64
|
||||
zip1 v8.2d, v17.2d, v19.2d
|
||||
zip2 v12.2d, v17.2d, v19.2d
|
||||
stp a2, a3, [x1, #-56]
|
||||
|
||||
subs x6, x4, #192
|
||||
ld1 {v16.16b-v19.16b}, [x2], #64
|
||||
csel x2, x2, x3, ge
|
||||
|
||||
zip1 v1.2d, v20.2d, v22.2d
|
||||
zip2 v5.2d, v20.2d, v22.2d
|
||||
stp a4, a5, [x1, #-48]
|
||||
zip1 v9.2d, v21.2d, v23.2d
|
||||
zip2 v13.2d, v21.2d, v23.2d
|
||||
stp a6, a7, [x1, #-40]
|
||||
|
||||
subs x7, x4, #256
|
||||
ld1 {v20.16b-v23.16b}, [x2], #64
|
||||
csel x2, x2, x3, ge
|
||||
|
||||
zip1 v2.2d, v24.2d, v26.2d
|
||||
zip2 v6.2d, v24.2d, v26.2d
|
||||
stp a8, a9, [x1, #-32]
|
||||
zip1 v10.2d, v25.2d, v27.2d
|
||||
zip2 v14.2d, v25.2d, v27.2d
|
||||
stp a10, a11, [x1, #-24]
|
||||
|
||||
subs x8, x4, #320
|
||||
ld1 {v24.16b-v27.16b}, [x2], #64
|
||||
csel x2, x2, x3, ge
|
||||
|
||||
zip1 v3.2d, v28.2d, v30.2d
|
||||
zip2 v7.2d, v28.2d, v30.2d
|
||||
stp a12, a13, [x1, #-16]
|
||||
zip1 v11.2d, v29.2d, v31.2d
|
||||
zip2 v15.2d, v29.2d, v31.2d
|
||||
stp a14, a15, [x1, #-8]
|
||||
|
||||
tbnz x5, #63, .Lt128
|
||||
ld1 {v28.16b-v31.16b}, [x2]
|
||||
|
||||
// xor with corresponding input, write to output
|
||||
@ -699,107 +426,25 @@ CPU_BE( rev a15, a15 )
|
||||
eor v17.16b, v17.16b, v1.16b
|
||||
eor v18.16b, v18.16b, v2.16b
|
||||
eor v19.16b, v19.16b, v3.16b
|
||||
|
||||
tbnz x6, #63, .Lt192
|
||||
|
||||
eor v20.16b, v20.16b, v4.16b
|
||||
eor v21.16b, v21.16b, v5.16b
|
||||
st1 {v16.16b-v19.16b}, [x1], #64
|
||||
eor v22.16b, v22.16b, v6.16b
|
||||
eor v23.16b, v23.16b, v7.16b
|
||||
|
||||
st1 {v16.16b-v19.16b}, [x1], #64
|
||||
tbnz x7, #63, .Lt256
|
||||
|
||||
eor v24.16b, v24.16b, v8.16b
|
||||
eor v25.16b, v25.16b, v9.16b
|
||||
st1 {v20.16b-v23.16b}, [x1], #64
|
||||
eor v26.16b, v26.16b, v10.16b
|
||||
eor v27.16b, v27.16b, v11.16b
|
||||
|
||||
st1 {v20.16b-v23.16b}, [x1], #64
|
||||
tbnz x8, #63, .Lt320
|
||||
|
||||
eor v28.16b, v28.16b, v12.16b
|
||||
st1 {v24.16b-v27.16b}, [x1], #64
|
||||
eor v29.16b, v29.16b, v13.16b
|
||||
eor v30.16b, v30.16b, v14.16b
|
||||
eor v31.16b, v31.16b, v15.16b
|
||||
|
||||
st1 {v24.16b-v27.16b}, [x1], #64
|
||||
st1 {v28.16b-v31.16b}, [x1]
|
||||
|
||||
.Lout: frame_pop
|
||||
ret
|
||||
ENDPROC(chacha20_4block_xor_neon)
|
||||
|
||||
// fewer than 192 bytes of in/output
|
||||
.Lt192: cbz x5, 1f // exactly 128 bytes?
|
||||
ld1 {v28.16b-v31.16b}, [x10]
|
||||
add x5, x5, x1
|
||||
tbl v28.16b, {v4.16b-v7.16b}, v28.16b
|
||||
tbl v29.16b, {v4.16b-v7.16b}, v29.16b
|
||||
tbl v30.16b, {v4.16b-v7.16b}, v30.16b
|
||||
tbl v31.16b, {v4.16b-v7.16b}, v31.16b
|
||||
|
||||
0: eor v20.16b, v20.16b, v28.16b
|
||||
eor v21.16b, v21.16b, v29.16b
|
||||
eor v22.16b, v22.16b, v30.16b
|
||||
eor v23.16b, v23.16b, v31.16b
|
||||
st1 {v20.16b-v23.16b}, [x5] // overlapping stores
|
||||
1: st1 {v16.16b-v19.16b}, [x1]
|
||||
b .Lout
|
||||
|
||||
// fewer than 128 bytes of in/output
|
||||
.Lt128: ld1 {v28.16b-v31.16b}, [x10]
|
||||
add x5, x5, x1
|
||||
sub x1, x1, #64
|
||||
tbl v28.16b, {v0.16b-v3.16b}, v28.16b
|
||||
tbl v29.16b, {v0.16b-v3.16b}, v29.16b
|
||||
tbl v30.16b, {v0.16b-v3.16b}, v30.16b
|
||||
tbl v31.16b, {v0.16b-v3.16b}, v31.16b
|
||||
ld1 {v16.16b-v19.16b}, [x1] // reload first output block
|
||||
b 0b
|
||||
|
||||
// fewer than 256 bytes of in/output
|
||||
.Lt256: cbz x6, 2f // exactly 192 bytes?
|
||||
ld1 {v4.16b-v7.16b}, [x10]
|
||||
add x6, x6, x1
|
||||
tbl v0.16b, {v8.16b-v11.16b}, v4.16b
|
||||
tbl v1.16b, {v8.16b-v11.16b}, v5.16b
|
||||
tbl v2.16b, {v8.16b-v11.16b}, v6.16b
|
||||
tbl v3.16b, {v8.16b-v11.16b}, v7.16b
|
||||
|
||||
eor v28.16b, v28.16b, v0.16b
|
||||
eor v29.16b, v29.16b, v1.16b
|
||||
eor v30.16b, v30.16b, v2.16b
|
||||
eor v31.16b, v31.16b, v3.16b
|
||||
st1 {v28.16b-v31.16b}, [x6] // overlapping stores
|
||||
2: st1 {v20.16b-v23.16b}, [x1]
|
||||
b .Lout
|
||||
|
||||
// fewer than 320 bytes of in/output
|
||||
.Lt320: cbz x7, 3f // exactly 256 bytes?
|
||||
ld1 {v4.16b-v7.16b}, [x10]
|
||||
add x7, x7, x1
|
||||
tbl v0.16b, {v12.16b-v15.16b}, v4.16b
|
||||
tbl v1.16b, {v12.16b-v15.16b}, v5.16b
|
||||
tbl v2.16b, {v12.16b-v15.16b}, v6.16b
|
||||
tbl v3.16b, {v12.16b-v15.16b}, v7.16b
|
||||
|
||||
eor v28.16b, v28.16b, v0.16b
|
||||
eor v29.16b, v29.16b, v1.16b
|
||||
eor v30.16b, v30.16b, v2.16b
|
||||
eor v31.16b, v31.16b, v3.16b
|
||||
st1 {v28.16b-v31.16b}, [x7] // overlapping stores
|
||||
3: st1 {v24.16b-v27.16b}, [x1]
|
||||
b .Lout
|
||||
SYM_FUNC_END(chacha_4block_xor_neon)
|
||||
|
||||
.section ".rodata", "a", %progbits
|
||||
.align L1_CACHE_SHIFT
|
||||
.Lpermute:
|
||||
.set .Li, 0
|
||||
.rept 128
|
||||
.byte (.Li - 64)
|
||||
.set .Li, .Li + 1
|
||||
.endr
|
||||
|
||||
CTRINC: .word 1, 2, 3, 4
|
||||
CTRINC: .word 0, 1, 2, 3
|
||||
ROT8: .word 0x02010003, 0x06050407, 0x0a09080b, 0x0e0d0c0f
|
127
arch/arm64/crypto/chacha20-neon-glue.c
Normal file
127
arch/arm64/crypto/chacha20-neon-glue.c
Normal file
@ -0,0 +1,127 @@
|
||||
/*
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, arm64 NEON functions
|
||||
*
|
||||
* Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* Based on:
|
||||
* ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
|
||||
*
|
||||
* Copyright (C) 2015 Martin Willi
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*/
|
||||
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/chacha.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
asmlinkage void chacha20_block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
asmlinkage void chacha20_4block_xor_neon(u32 *state, u8 *dst, const u8 *src);
|
||||
|
||||
static void chacha20_doneon(u32 *state, u8 *dst, const u8 *src,
|
||||
unsigned int bytes)
|
||||
{
|
||||
u8 buf[CHACHA_BLOCK_SIZE];
|
||||
|
||||
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
|
||||
chacha20_4block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA_BLOCK_SIZE * 4;
|
||||
src += CHACHA_BLOCK_SIZE * 4;
|
||||
dst += CHACHA_BLOCK_SIZE * 4;
|
||||
state[12] += 4;
|
||||
}
|
||||
while (bytes >= CHACHA_BLOCK_SIZE) {
|
||||
chacha20_block_xor_neon(state, dst, src);
|
||||
bytes -= CHACHA_BLOCK_SIZE;
|
||||
src += CHACHA_BLOCK_SIZE;
|
||||
dst += CHACHA_BLOCK_SIZE;
|
||||
state[12]++;
|
||||
}
|
||||
if (bytes) {
|
||||
memcpy(buf, src, bytes);
|
||||
chacha20_block_xor_neon(state, buf, buf);
|
||||
memcpy(dst, buf, bytes);
|
||||
}
|
||||
}
|
||||
|
||||
static int chacha20_neon(struct skcipher_request *req)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
u32 state[16];
|
||||
int err;
|
||||
|
||||
if (!may_use_simd() || req->cryptlen <= CHACHA_BLOCK_SIZE)
|
||||
return crypto_chacha_crypt(req);
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_chacha_init(state, ctx, walk.iv);
|
||||
|
||||
kernel_neon_begin();
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
|
||||
if (nbytes < walk.total)
|
||||
nbytes = round_down(nbytes, walk.stride);
|
||||
|
||||
chacha20_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
|
||||
nbytes);
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
}
|
||||
kernel_neon_end();
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static struct skcipher_alg alg = {
|
||||
.base.cra_name = "chacha20",
|
||||
.base.cra_driver_name = "chacha20-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct chacha_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.min_keysize = CHACHA_KEY_SIZE,
|
||||
.max_keysize = CHACHA_KEY_SIZE,
|
||||
.ivsize = CHACHA_IV_SIZE,
|
||||
.chunksize = CHACHA_BLOCK_SIZE,
|
||||
.walksize = 4 * CHACHA_BLOCK_SIZE,
|
||||
.setkey = crypto_chacha20_setkey,
|
||||
.encrypt = chacha20_neon,
|
||||
.decrypt = chacha20_neon,
|
||||
};
|
||||
|
||||
static int __init chacha20_simd_mod_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_skcipher(&alg);
|
||||
}
|
||||
|
||||
static void __exit chacha20_simd_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_skcipher(&alg);
|
||||
}
|
||||
|
||||
module_init(chacha20_simd_mod_init);
|
||||
module_exit(chacha20_simd_mod_fini);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("chacha20");
|
266
arch/arm64/crypto/crc32-ce-core.S
Normal file
266
arch/arm64/crypto/crc32-ce-core.S
Normal file
@ -0,0 +1,266 @@
|
||||
/*
|
||||
* Accelerated CRC32(C) using arm64 CRC, NEON and Crypto Extensions instructions
|
||||
*
|
||||
* Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
/* GPL HEADER START
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 only,
|
||||
* as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License version 2 for more details (a copy is included
|
||||
* in the LICENSE file that accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* version 2 along with this program; If not, see http://www.gnu.org/licenses
|
||||
*
|
||||
* Please visit http://www.xyratex.com/contact if you need additional
|
||||
* information or have any questions.
|
||||
*
|
||||
* GPL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2012 Xyratex Technology Limited
|
||||
*
|
||||
* Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
|
||||
* calculation.
|
||||
* CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
|
||||
* PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
|
||||
* at:
|
||||
* http://www.intel.com/products/processor/manuals/
|
||||
* Intel(R) 64 and IA-32 Architectures Software Developer's Manual
|
||||
* Volume 2B: Instruction Set Reference, N-Z
|
||||
*
|
||||
* Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
|
||||
* Alexander Boyko <Alexander_Boyko@xyratex.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
.align 6
|
||||
.cpu generic+crypto+crc
|
||||
|
||||
.Lcrc32_constants:
|
||||
/*
|
||||
* [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
|
||||
* #define CONSTANT_R1 0x154442bd4LL
|
||||
*
|
||||
* [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
|
||||
* #define CONSTANT_R2 0x1c6e41596LL
|
||||
*/
|
||||
.octa 0x00000001c6e415960000000154442bd4
|
||||
|
||||
/*
|
||||
* [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
|
||||
* #define CONSTANT_R3 0x1751997d0LL
|
||||
*
|
||||
* [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
|
||||
* #define CONSTANT_R4 0x0ccaa009eLL
|
||||
*/
|
||||
.octa 0x00000000ccaa009e00000001751997d0
|
||||
|
||||
/*
|
||||
* [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
|
||||
* #define CONSTANT_R5 0x163cd6124LL
|
||||
*/
|
||||
.quad 0x0000000163cd6124
|
||||
.quad 0x00000000FFFFFFFF
|
||||
|
||||
/*
|
||||
* #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
|
||||
*
|
||||
* Barrett Reduction constant (u64`) = u` = (x**64 / P(x))`
|
||||
* = 0x1F7011641LL
|
||||
* #define CONSTANT_RU 0x1F7011641LL
|
||||
*/
|
||||
.octa 0x00000001F701164100000001DB710641
|
||||
|
||||
.Lcrc32c_constants:
|
||||
.octa 0x000000009e4addf800000000740eef02
|
||||
.octa 0x000000014cd00bd600000000f20c0dfe
|
||||
.quad 0x00000000dd45aab8
|
||||
.quad 0x00000000FFFFFFFF
|
||||
.octa 0x00000000dea713f10000000105ec76f0
|
||||
|
||||
vCONSTANT .req v0
|
||||
dCONSTANT .req d0
|
||||
qCONSTANT .req q0
|
||||
|
||||
BUF .req x0
|
||||
LEN .req x1
|
||||
CRC .req x2
|
||||
|
||||
vzr .req v9
|
||||
|
||||
/**
|
||||
* Calculate crc32
|
||||
* BUF - buffer
|
||||
* LEN - sizeof buffer (multiple of 16 bytes), LEN should be > 63
|
||||
* CRC - initial crc32
|
||||
* return %eax crc32
|
||||
* uint crc32_pmull_le(unsigned char const *buffer,
|
||||
* size_t len, uint crc32)
|
||||
*/
|
||||
ENTRY(crc32_pmull_le)
|
||||
adr x3, .Lcrc32_constants
|
||||
b 0f
|
||||
|
||||
ENTRY(crc32c_pmull_le)
|
||||
adr x3, .Lcrc32c_constants
|
||||
|
||||
0: bic LEN, LEN, #15
|
||||
ld1 {v1.16b-v4.16b}, [BUF], #0x40
|
||||
movi vzr.16b, #0
|
||||
fmov dCONSTANT, CRC
|
||||
eor v1.16b, v1.16b, vCONSTANT.16b
|
||||
sub LEN, LEN, #0x40
|
||||
cmp LEN, #0x40
|
||||
b.lt less_64
|
||||
|
||||
ldr qCONSTANT, [x3]
|
||||
|
||||
loop_64: /* 64 bytes Full cache line folding */
|
||||
sub LEN, LEN, #0x40
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull2 v6.1q, v2.2d, vCONSTANT.2d
|
||||
pmull2 v7.1q, v3.2d, vCONSTANT.2d
|
||||
pmull2 v8.1q, v4.2d, vCONSTANT.2d
|
||||
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
pmull v2.1q, v2.1d, vCONSTANT.1d
|
||||
pmull v3.1q, v3.1d, vCONSTANT.1d
|
||||
pmull v4.1q, v4.1d, vCONSTANT.1d
|
||||
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
ld1 {v5.16b}, [BUF], #0x10
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
ld1 {v6.16b}, [BUF], #0x10
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
ld1 {v7.16b}, [BUF], #0x10
|
||||
eor v4.16b, v4.16b, v8.16b
|
||||
ld1 {v8.16b}, [BUF], #0x10
|
||||
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v2.16b, v2.16b, v6.16b
|
||||
eor v3.16b, v3.16b, v7.16b
|
||||
eor v4.16b, v4.16b, v8.16b
|
||||
|
||||
cmp LEN, #0x40
|
||||
b.ge loop_64
|
||||
|
||||
less_64: /* Folding cache line into 128bit */
|
||||
ldr qCONSTANT, [x3, #16]
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v3.16b
|
||||
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v4.16b
|
||||
|
||||
cbz LEN, fold_64
|
||||
|
||||
loop_16: /* Folding rest buffer into 128bit */
|
||||
subs LEN, LEN, #0x10
|
||||
|
||||
ld1 {v2.16b}, [BUF], #0x10
|
||||
pmull2 v5.1q, v1.2d, vCONSTANT.2d
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v5.16b
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
b.ne loop_16
|
||||
|
||||
fold_64:
|
||||
/* perform the last 64 bit fold, also adds 32 zeroes
|
||||
* to the input stream */
|
||||
ext v2.16b, v1.16b, v1.16b, #8
|
||||
pmull2 v2.1q, v2.2d, vCONSTANT.2d
|
||||
ext v1.16b, v1.16b, vzr.16b, #8
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
/* final 32-bit fold */
|
||||
ldr dCONSTANT, [x3, #32]
|
||||
ldr d3, [x3, #40]
|
||||
|
||||
ext v2.16b, v1.16b, vzr.16b, #4
|
||||
and v1.16b, v1.16b, v3.16b
|
||||
pmull v1.1q, v1.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
|
||||
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
|
||||
ldr qCONSTANT, [x3, #48]
|
||||
|
||||
and v2.16b, v1.16b, v3.16b
|
||||
ext v2.16b, vzr.16b, v2.16b, #8
|
||||
pmull2 v2.1q, v2.2d, vCONSTANT.2d
|
||||
and v2.16b, v2.16b, v3.16b
|
||||
pmull v2.1q, v2.1d, vCONSTANT.1d
|
||||
eor v1.16b, v1.16b, v2.16b
|
||||
mov w0, v1.s[1]
|
||||
|
||||
ret
|
||||
ENDPROC(crc32_pmull_le)
|
||||
ENDPROC(crc32c_pmull_le)
|
||||
|
||||
.macro __crc32, c
|
||||
0: subs x2, x2, #16
|
||||
b.mi 8f
|
||||
ldp x3, x4, [x1], #16
|
||||
CPU_BE( rev x3, x3 )
|
||||
CPU_BE( rev x4, x4 )
|
||||
crc32\c\()x w0, w0, x3
|
||||
crc32\c\()x w0, w0, x4
|
||||
b.ne 0b
|
||||
ret
|
||||
|
||||
8: tbz x2, #3, 4f
|
||||
ldr x3, [x1], #8
|
||||
CPU_BE( rev x3, x3 )
|
||||
crc32\c\()x w0, w0, x3
|
||||
4: tbz x2, #2, 2f
|
||||
ldr w3, [x1], #4
|
||||
CPU_BE( rev w3, w3 )
|
||||
crc32\c\()w w0, w0, w3
|
||||
2: tbz x2, #1, 1f
|
||||
ldrh w3, [x1], #2
|
||||
CPU_BE( rev16 w3, w3 )
|
||||
crc32\c\()h w0, w0, w3
|
||||
1: tbz x2, #0, 0f
|
||||
ldrb w3, [x1]
|
||||
crc32\c\()b w0, w0, w3
|
||||
0: ret
|
||||
.endm
|
||||
|
||||
.align 5
|
||||
ENTRY(crc32_armv8_le)
|
||||
__crc32
|
||||
ENDPROC(crc32_armv8_le)
|
||||
|
||||
.align 5
|
||||
ENTRY(crc32c_armv8_le)
|
||||
__crc32 c
|
||||
ENDPROC(crc32c_armv8_le)
|
244
arch/arm64/crypto/crc32-ce-glue.c
Normal file
244
arch/arm64/crypto/crc32-ce-glue.c
Normal file
@ -0,0 +1,244 @@
|
||||
/*
|
||||
* Accelerated CRC32(C) using arm64 NEON and Crypto Extensions instructions
|
||||
*
|
||||
* Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crc32.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#define PMULL_MIN_LEN 64L /* minimum size of buffer
|
||||
* for crc32_pmull_le_16 */
|
||||
#define SCALE_F 16L /* size of NEON register */
|
||||
|
||||
asmlinkage u32 crc32_pmull_le(const u8 buf[], u64 len, u32 init_crc);
|
||||
asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], size_t len);
|
||||
|
||||
asmlinkage u32 crc32c_pmull_le(const u8 buf[], u64 len, u32 init_crc);
|
||||
asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], size_t len);
|
||||
|
||||
static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], size_t len);
|
||||
static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], size_t len);
|
||||
|
||||
static int crc32_pmull_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_pmull_cra_init(struct crypto_tfm *tfm)
|
||||
{
|
||||
u32 *key = crypto_tfm_ctx(tfm);
|
||||
|
||||
*key = ~0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_setkey(struct crypto_shash *hash, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(hash);
|
||||
|
||||
if (keylen != sizeof(u32)) {
|
||||
crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN);
|
||||
return -EINVAL;
|
||||
}
|
||||
*mctx = le32_to_cpup((__le32 *)key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_init(struct shash_desc *desc)
|
||||
{
|
||||
u32 *mctx = crypto_shash_ctx(desc->tfm);
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = *mctx;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
*crc = crc32c_armv8_le(*crc, data, length);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
unsigned int l;
|
||||
|
||||
if ((u64)data % SCALE_F) {
|
||||
l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
|
||||
|
||||
*crc = fallback_crc32(*crc, data, l);
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length >= PMULL_MIN_LEN && may_use_simd()) {
|
||||
l = round_down(length, SCALE_F);
|
||||
|
||||
kernel_neon_begin();
|
||||
*crc = crc32_pmull_le(data, l, *crc);
|
||||
kernel_neon_end();
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length > 0)
|
||||
*crc = fallback_crc32(*crc, data, length);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
unsigned int l;
|
||||
|
||||
if ((u64)data % SCALE_F) {
|
||||
l = min_t(u32, length, SCALE_F - ((u64)data % SCALE_F));
|
||||
|
||||
*crc = fallback_crc32c(*crc, data, l);
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length >= PMULL_MIN_LEN && may_use_simd()) {
|
||||
l = round_down(length, SCALE_F);
|
||||
|
||||
kernel_neon_begin();
|
||||
*crc = crc32c_pmull_le(data, l, *crc);
|
||||
kernel_neon_end();
|
||||
|
||||
data += l;
|
||||
length -= l;
|
||||
}
|
||||
|
||||
if (length > 0) {
|
||||
*crc = fallback_crc32c(*crc, data, length);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32_pmull_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(*crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crc32c_pmull_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
u32 *crc = shash_desc_ctx(desc);
|
||||
|
||||
put_unaligned_le32(~*crc, out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg crc32_pmull_algs[] = { {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32_update,
|
||||
.final = crc32_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
|
||||
.base.cra_ctxsize = sizeof(u32),
|
||||
.base.cra_init = crc32_pmull_cra_init,
|
||||
.base.cra_name = "crc32",
|
||||
.base.cra_driver_name = "crc32-arm64-ce",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
.setkey = crc32_pmull_setkey,
|
||||
.init = crc32_pmull_init,
|
||||
.update = crc32c_update,
|
||||
.final = crc32c_pmull_final,
|
||||
.descsize = sizeof(u32),
|
||||
.digestsize = sizeof(u32),
|
||||
|
||||
.base.cra_ctxsize = sizeof(u32),
|
||||
.base.cra_init = crc32c_pmull_cra_init,
|
||||
.base.cra_name = "crc32c",
|
||||
.base.cra_driver_name = "crc32c-arm64-ce",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
} };
|
||||
|
||||
static int __init crc32_pmull_mod_init(void)
|
||||
{
|
||||
if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_PMULL)) {
|
||||
crc32_pmull_algs[0].update = crc32_pmull_update;
|
||||
crc32_pmull_algs[1].update = crc32c_pmull_update;
|
||||
|
||||
if (elf_hwcap & HWCAP_CRC32) {
|
||||
fallback_crc32 = crc32_armv8_le;
|
||||
fallback_crc32c = crc32c_armv8_le;
|
||||
} else {
|
||||
fallback_crc32 = crc32_le;
|
||||
fallback_crc32c = __crc32c_le;
|
||||
}
|
||||
} else if (!(elf_hwcap & HWCAP_CRC32)) {
|
||||
return -ENODEV;
|
||||
}
|
||||
return crypto_register_shashes(crc32_pmull_algs,
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
|
||||
static void __exit crc32_pmull_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shashes(crc32_pmull_algs,
|
||||
ARRAY_SIZE(crc32_pmull_algs));
|
||||
}
|
||||
|
||||
static const struct cpu_feature crc32_cpu_feature[] = {
|
||||
{ cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { }
|
||||
};
|
||||
MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature);
|
||||
|
||||
module_init(crc32_pmull_mod_init);
|
||||
module_exit(crc32_pmull_mod_exit);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
@ -2,14 +2,12 @@
|
||||
// Accelerated CRC-T10DIF using arm64 NEON and Crypto Extensions instructions
|
||||
//
|
||||
// Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
// Copyright (C) 2019 Google LLC <ebiggers@google.com>
|
||||
//
|
||||
// This program is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License version 2 as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
|
||||
// Derived from the x86 version:
|
||||
//
|
||||
// Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
|
||||
//
|
||||
@ -56,176 +54,109 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Function API:
|
||||
// UINT16 crc_t10dif_pcl(
|
||||
// UINT16 init_crc, //initial CRC value, 16 bits
|
||||
// const unsigned char *buf, //buffer pointer to calculate CRC on
|
||||
// UINT64 len //buffer length in bytes (64-bit data)
|
||||
// );
|
||||
//
|
||||
// Reference paper titled "Fast CRC Computation for Generic
|
||||
// Polynomials Using PCLMULQDQ Instruction"
|
||||
// URL: http://www.intel.com/content/dam/www/public/us/en/documents
|
||||
// /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
|
||||
//
|
||||
//
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
.cpu generic+crypto
|
||||
|
||||
init_crc .req w0
|
||||
buf .req x1
|
||||
len .req x2
|
||||
fold_consts_ptr .req x3
|
||||
arg1_low32 .req w0
|
||||
arg2 .req x1
|
||||
arg3 .req x2
|
||||
|
||||
fold_consts .req v10
|
||||
vzr .req v13
|
||||
|
||||
ad .req v14
|
||||
ENTRY(crc_t10dif_pmull)
|
||||
movi vzr.16b, #0 // init zero register
|
||||
|
||||
k00_16 .req v15
|
||||
k32_48 .req v16
|
||||
// adjust the 16-bit initial_crc value, scale it to 32 bits
|
||||
lsl arg1_low32, arg1_low32, #16
|
||||
|
||||
t3 .req v17
|
||||
t4 .req v18
|
||||
t5 .req v19
|
||||
t6 .req v20
|
||||
t7 .req v21
|
||||
t8 .req v22
|
||||
t9 .req v23
|
||||
// check if smaller than 256
|
||||
cmp arg3, #256
|
||||
|
||||
perm1 .req v24
|
||||
perm2 .req v25
|
||||
perm3 .req v26
|
||||
perm4 .req v27
|
||||
// for sizes less than 128, we can't fold 64B at a time...
|
||||
b.lt _less_than_128
|
||||
|
||||
bd1 .req v28
|
||||
bd2 .req v29
|
||||
bd3 .req v30
|
||||
bd4 .req v31
|
||||
// load the initial crc value
|
||||
// crc value does not need to be byte-reflected, but it needs
|
||||
// to be moved to the high part of the register.
|
||||
// because data will be byte-reflected and will align with
|
||||
// initial crc at correct place.
|
||||
movi v10.16b, #0
|
||||
mov v10.s[3], arg1_low32 // initial crc
|
||||
|
||||
.macro __pmull_init_p64
|
||||
.endm
|
||||
// receive the initial 64B data, xor the initial crc value
|
||||
ldp q0, q1, [arg2]
|
||||
ldp q2, q3, [arg2, #0x20]
|
||||
ldp q4, q5, [arg2, #0x40]
|
||||
ldp q6, q7, [arg2, #0x60]
|
||||
add arg2, arg2, #0x80
|
||||
|
||||
.macro __pmull_pre_p64, bd
|
||||
.endm
|
||||
CPU_LE( rev64 v0.16b, v0.16b )
|
||||
CPU_LE( rev64 v1.16b, v1.16b )
|
||||
CPU_LE( rev64 v2.16b, v2.16b )
|
||||
CPU_LE( rev64 v3.16b, v3.16b )
|
||||
CPU_LE( rev64 v4.16b, v4.16b )
|
||||
CPU_LE( rev64 v5.16b, v5.16b )
|
||||
CPU_LE( rev64 v6.16b, v6.16b )
|
||||
CPU_LE( rev64 v7.16b, v7.16b )
|
||||
|
||||
.macro __pmull_init_p8
|
||||
// k00_16 := 0x0000000000000000_000000000000ffff
|
||||
// k32_48 := 0x00000000ffffffff_0000ffffffffffff
|
||||
movi k32_48.2d, #0xffffffff
|
||||
mov k32_48.h[2], k32_48.h[0]
|
||||
ushr k00_16.2d, k32_48.2d, #32
|
||||
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
|
||||
CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
|
||||
CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
|
||||
CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
|
||||
CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
|
||||
CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
|
||||
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
|
||||
// prepare the permutation vectors
|
||||
mov_q x5, 0x080f0e0d0c0b0a09
|
||||
movi perm4.8b, #8
|
||||
dup perm1.2d, x5
|
||||
eor perm1.16b, perm1.16b, perm4.16b
|
||||
ushr perm2.2d, perm1.2d, #8
|
||||
ushr perm3.2d, perm1.2d, #16
|
||||
ushr perm4.2d, perm1.2d, #24
|
||||
sli perm2.2d, perm1.2d, #56
|
||||
sli perm3.2d, perm1.2d, #48
|
||||
sli perm4.2d, perm1.2d, #40
|
||||
.endm
|
||||
// XOR the initial_crc value
|
||||
eor v0.16b, v0.16b, v10.16b
|
||||
|
||||
.macro __pmull_pre_p8, bd
|
||||
tbl bd1.16b, {\bd\().16b}, perm1.16b
|
||||
tbl bd2.16b, {\bd\().16b}, perm2.16b
|
||||
tbl bd3.16b, {\bd\().16b}, perm3.16b
|
||||
tbl bd4.16b, {\bd\().16b}, perm4.16b
|
||||
.endm
|
||||
ldr q10, rk3 // xmm10 has rk3 and rk4
|
||||
// type of pmull instruction
|
||||
// will determine which constant to use
|
||||
|
||||
SYM_FUNC_START_LOCAL(__pmull_p8_core)
|
||||
.L__pmull_p8_core:
|
||||
ext t4.8b, ad.8b, ad.8b, #1 // A1
|
||||
ext t5.8b, ad.8b, ad.8b, #2 // A2
|
||||
ext t6.8b, ad.8b, ad.8b, #3 // A3
|
||||
//
|
||||
// we subtract 256 instead of 128 to save one instruction from the loop
|
||||
//
|
||||
sub arg3, arg3, #256
|
||||
|
||||
pmull t4.8h, t4.8b, fold_consts.8b // F = A1*B
|
||||
pmull t8.8h, ad.8b, bd1.8b // E = A*B1
|
||||
pmull t5.8h, t5.8b, fold_consts.8b // H = A2*B
|
||||
pmull t7.8h, ad.8b, bd2.8b // G = A*B2
|
||||
pmull t6.8h, t6.8b, fold_consts.8b // J = A3*B
|
||||
pmull t9.8h, ad.8b, bd3.8b // I = A*B3
|
||||
pmull t3.8h, ad.8b, bd4.8b // K = A*B4
|
||||
b 0f
|
||||
// at this section of the code, there is 64*x+y (0<=y<64) bytes of
|
||||
// buffer. The _fold_64_B_loop will fold 64B at a time
|
||||
// until we have 64+y Bytes of buffer
|
||||
|
||||
.L__pmull_p8_core2:
|
||||
tbl t4.16b, {ad.16b}, perm1.16b // A1
|
||||
tbl t5.16b, {ad.16b}, perm2.16b // A2
|
||||
tbl t6.16b, {ad.16b}, perm3.16b // A3
|
||||
|
||||
pmull2 t4.8h, t4.16b, fold_consts.16b // F = A1*B
|
||||
pmull2 t8.8h, ad.16b, bd1.16b // E = A*B1
|
||||
pmull2 t5.8h, t5.16b, fold_consts.16b // H = A2*B
|
||||
pmull2 t7.8h, ad.16b, bd2.16b // G = A*B2
|
||||
pmull2 t6.8h, t6.16b, fold_consts.16b // J = A3*B
|
||||
pmull2 t9.8h, ad.16b, bd3.16b // I = A*B3
|
||||
pmull2 t3.8h, ad.16b, bd4.16b // K = A*B4
|
||||
// fold 64B at a time. This section of the code folds 4 vector
|
||||
// registers in parallel
|
||||
_fold_64_B_loop:
|
||||
|
||||
0: eor t4.16b, t4.16b, t8.16b // L = E + F
|
||||
eor t5.16b, t5.16b, t7.16b // M = G + H
|
||||
eor t6.16b, t6.16b, t9.16b // N = I + J
|
||||
.macro fold64, reg1, reg2
|
||||
ldp q11, q12, [arg2], #0x20
|
||||
|
||||
uzp1 t8.2d, t4.2d, t5.2d
|
||||
uzp2 t4.2d, t4.2d, t5.2d
|
||||
uzp1 t7.2d, t6.2d, t3.2d
|
||||
uzp2 t6.2d, t6.2d, t3.2d
|
||||
|
||||
// t4 = (L) (P0 + P1) << 8
|
||||
// t5 = (M) (P2 + P3) << 16
|
||||
eor t8.16b, t8.16b, t4.16b
|
||||
and t4.16b, t4.16b, k32_48.16b
|
||||
|
||||
// t6 = (N) (P4 + P5) << 24
|
||||
// t7 = (K) (P6 + P7) << 32
|
||||
eor t7.16b, t7.16b, t6.16b
|
||||
and t6.16b, t6.16b, k00_16.16b
|
||||
|
||||
eor t8.16b, t8.16b, t4.16b
|
||||
eor t7.16b, t7.16b, t6.16b
|
||||
|
||||
zip2 t5.2d, t8.2d, t4.2d
|
||||
zip1 t4.2d, t8.2d, t4.2d
|
||||
zip2 t3.2d, t7.2d, t6.2d
|
||||
zip1 t6.2d, t7.2d, t6.2d
|
||||
|
||||
ext t4.16b, t4.16b, t4.16b, #15
|
||||
ext t5.16b, t5.16b, t5.16b, #14
|
||||
ext t6.16b, t6.16b, t6.16b, #13
|
||||
ext t3.16b, t3.16b, t3.16b, #12
|
||||
|
||||
eor t4.16b, t4.16b, t5.16b
|
||||
eor t6.16b, t6.16b, t3.16b
|
||||
ret
|
||||
SYM_FUNC_END(__pmull_p8_core)
|
||||
|
||||
.macro __pmull_p8, rq, ad, bd, i
|
||||
.ifnc \bd, fold_consts
|
||||
.err
|
||||
.endif
|
||||
mov ad.16b, \ad\().16b
|
||||
.ifb \i
|
||||
pmull \rq\().8h, \ad\().8b, \bd\().8b // D = A*B
|
||||
.else
|
||||
pmull2 \rq\().8h, \ad\().16b, \bd\().16b // D = A*B
|
||||
.endif
|
||||
|
||||
bl .L__pmull_p8_core\i
|
||||
|
||||
eor \rq\().16b, \rq\().16b, t4.16b
|
||||
eor \rq\().16b, \rq\().16b, t6.16b
|
||||
.endm
|
||||
|
||||
// Fold reg1, reg2 into the next 32 data bytes, storing the result back
|
||||
// into reg1, reg2.
|
||||
.macro fold_32_bytes, p, reg1, reg2
|
||||
ldp q11, q12, [buf], #0x20
|
||||
|
||||
__pmull_\p v8, \reg1, fold_consts, 2
|
||||
__pmull_\p \reg1, \reg1, fold_consts
|
||||
pmull2 v8.1q, \reg1\().2d, v10.2d
|
||||
pmull \reg1\().1q, \reg1\().1d, v10.1d
|
||||
|
||||
CPU_LE( rev64 v11.16b, v11.16b )
|
||||
CPU_LE( rev64 v12.16b, v12.16b )
|
||||
|
||||
__pmull_\p v9, \reg2, fold_consts, 2
|
||||
__pmull_\p \reg2, \reg2, fold_consts
|
||||
pmull2 v9.1q, \reg2\().2d, v10.2d
|
||||
pmull \reg2\().1q, \reg2\().1d, v10.1d
|
||||
|
||||
CPU_LE( ext v11.16b, v11.16b, v11.16b, #8 )
|
||||
CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
@ -236,279 +167,225 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
|
||||
eor \reg2\().16b, \reg2\().16b, v12.16b
|
||||
.endm
|
||||
|
||||
// Fold src_reg into dst_reg, optionally loading the next fold constants
|
||||
.macro fold_16_bytes, p, src_reg, dst_reg, load_next_consts
|
||||
__pmull_\p v8, \src_reg, fold_consts
|
||||
__pmull_\p \src_reg, \src_reg, fold_consts, 2
|
||||
.ifnb \load_next_consts
|
||||
ld1 {fold_consts.2d}, [fold_consts_ptr], #16
|
||||
__pmull_pre_\p fold_consts
|
||||
fold64 v0, v1
|
||||
fold64 v2, v3
|
||||
fold64 v4, v5
|
||||
fold64 v6, v7
|
||||
|
||||
subs arg3, arg3, #128
|
||||
|
||||
// check if there is another 64B in the buffer to be able to fold
|
||||
b.ge _fold_64_B_loop
|
||||
|
||||
// at this point, the buffer pointer is pointing at the last y Bytes
|
||||
// of the buffer the 64B of folded data is in 4 of the vector
|
||||
// registers: v0, v1, v2, v3
|
||||
|
||||
// fold the 8 vector registers to 1 vector register with different
|
||||
// constants
|
||||
|
||||
ldr q10, rk9
|
||||
|
||||
.macro fold16, reg, rk
|
||||
pmull v8.1q, \reg\().1d, v10.1d
|
||||
pmull2 \reg\().1q, \reg\().2d, v10.2d
|
||||
.ifnb \rk
|
||||
ldr q10, \rk
|
||||
.endif
|
||||
eor \dst_reg\().16b, \dst_reg\().16b, v8.16b
|
||||
eor \dst_reg\().16b, \dst_reg\().16b, \src_reg\().16b
|
||||
.endm
|
||||
|
||||
.macro __pmull_p64, rd, rn, rm, n
|
||||
.ifb \n
|
||||
pmull \rd\().1q, \rn\().1d, \rm\().1d
|
||||
.else
|
||||
pmull2 \rd\().1q, \rn\().2d, \rm\().2d
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro crc_t10dif_pmull, p
|
||||
__pmull_init_\p
|
||||
|
||||
// For sizes less than 256 bytes, we can't fold 128 bytes at a time.
|
||||
cmp len, #256
|
||||
b.lt .Lless_than_256_bytes_\@
|
||||
|
||||
adr_l fold_consts_ptr, .Lfold_across_128_bytes_consts
|
||||
|
||||
// Load the first 128 data bytes. Byte swapping is necessary to make
|
||||
// the bit order match the polynomial coefficient order.
|
||||
ldp q0, q1, [buf]
|
||||
ldp q2, q3, [buf, #0x20]
|
||||
ldp q4, q5, [buf, #0x40]
|
||||
ldp q6, q7, [buf, #0x60]
|
||||
add buf, buf, #0x80
|
||||
CPU_LE( rev64 v0.16b, v0.16b )
|
||||
CPU_LE( rev64 v1.16b, v1.16b )
|
||||
CPU_LE( rev64 v2.16b, v2.16b )
|
||||
CPU_LE( rev64 v3.16b, v3.16b )
|
||||
CPU_LE( rev64 v4.16b, v4.16b )
|
||||
CPU_LE( rev64 v5.16b, v5.16b )
|
||||
CPU_LE( rev64 v6.16b, v6.16b )
|
||||
CPU_LE( rev64 v7.16b, v7.16b )
|
||||
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
|
||||
CPU_LE( ext v2.16b, v2.16b, v2.16b, #8 )
|
||||
CPU_LE( ext v3.16b, v3.16b, v3.16b, #8 )
|
||||
CPU_LE( ext v4.16b, v4.16b, v4.16b, #8 )
|
||||
CPU_LE( ext v5.16b, v5.16b, v5.16b, #8 )
|
||||
CPU_LE( ext v6.16b, v6.16b, v6.16b, #8 )
|
||||
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
|
||||
// XOR the first 16 data *bits* with the initial CRC value.
|
||||
movi v8.16b, #0
|
||||
mov v8.h[7], init_crc
|
||||
eor v0.16b, v0.16b, v8.16b
|
||||
|
||||
// Load the constants for folding across 128 bytes.
|
||||
ld1 {fold_consts.2d}, [fold_consts_ptr]
|
||||
__pmull_pre_\p fold_consts
|
||||
|
||||
// Subtract 128 for the 128 data bytes just consumed. Subtract another
|
||||
// 128 to simplify the termination condition of the following loop.
|
||||
sub len, len, #256
|
||||
|
||||
// While >= 128 data bytes remain (not counting v0-v7), fold the 128
|
||||
// bytes v0-v7 into them, storing the result back into v0-v7.
|
||||
.Lfold_128_bytes_loop_\@:
|
||||
fold_32_bytes \p, v0, v1
|
||||
fold_32_bytes \p, v2, v3
|
||||
fold_32_bytes \p, v4, v5
|
||||
fold_32_bytes \p, v6, v7
|
||||
|
||||
subs len, len, #128
|
||||
b.ge .Lfold_128_bytes_loop_\@
|
||||
|
||||
// Now fold the 112 bytes in v0-v6 into the 16 bytes in v7.
|
||||
|
||||
// Fold across 64 bytes.
|
||||
add fold_consts_ptr, fold_consts_ptr, #16
|
||||
ld1 {fold_consts.2d}, [fold_consts_ptr], #16
|
||||
__pmull_pre_\p fold_consts
|
||||
fold_16_bytes \p, v0, v4
|
||||
fold_16_bytes \p, v1, v5
|
||||
fold_16_bytes \p, v2, v6
|
||||
fold_16_bytes \p, v3, v7, 1
|
||||
// Fold across 32 bytes.
|
||||
fold_16_bytes \p, v4, v6
|
||||
fold_16_bytes \p, v5, v7, 1
|
||||
// Fold across 16 bytes.
|
||||
fold_16_bytes \p, v6, v7
|
||||
|
||||
// Add 128 to get the correct number of data bytes remaining in 0...127
|
||||
// (not counting v7), following the previous extra subtraction by 128.
|
||||
// Then subtract 16 to simplify the termination condition of the
|
||||
// following loop.
|
||||
adds len, len, #(128-16)
|
||||
|
||||
// While >= 16 data bytes remain (not counting v7), fold the 16 bytes v7
|
||||
// into them, storing the result back into v7.
|
||||
b.lt .Lfold_16_bytes_loop_done_\@
|
||||
.Lfold_16_bytes_loop_\@:
|
||||
__pmull_\p v8, v7, fold_consts
|
||||
__pmull_\p v7, v7, fold_consts, 2
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
ldr q0, [buf], #16
|
||||
eor v7.16b, v7.16b, \reg\().16b
|
||||
.endm
|
||||
|
||||
fold16 v0, rk11
|
||||
fold16 v1, rk13
|
||||
fold16 v2, rk15
|
||||
fold16 v3, rk17
|
||||
fold16 v4, rk19
|
||||
fold16 v5, rk1
|
||||
fold16 v6
|
||||
|
||||
// instead of 64, we add 48 to the loop counter to save 1 instruction
|
||||
// from the loop instead of a cmp instruction, we use the negative
|
||||
// flag with the jl instruction
|
||||
adds arg3, arg3, #(128-16)
|
||||
b.lt _final_reduction_for_128
|
||||
|
||||
// now we have 16+y bytes left to reduce. 16 Bytes is in register v7
|
||||
// and the rest is in memory. We can fold 16 bytes at a time if y>=16
|
||||
// continue folding 16B at a time
|
||||
|
||||
_16B_reduction_loop:
|
||||
pmull v8.1q, v7.1d, v10.1d
|
||||
pmull2 v7.1q, v7.2d, v10.2d
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
|
||||
ldr q0, [arg2], #16
|
||||
CPU_LE( rev64 v0.16b, v0.16b )
|
||||
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
subs len, len, #16
|
||||
b.ge .Lfold_16_bytes_loop_\@
|
||||
subs arg3, arg3, #16
|
||||
|
||||
.Lfold_16_bytes_loop_done_\@:
|
||||
// Add 16 to get the correct number of data bytes remaining in 0...15
|
||||
// (not counting v7), following the previous extra subtraction by 16.
|
||||
adds len, len, #16
|
||||
b.eq .Lreduce_final_16_bytes_\@
|
||||
// instead of a cmp instruction, we utilize the flags with the
|
||||
// jge instruction equivalent of: cmp arg3, 16-16
|
||||
// check if there is any more 16B in the buffer to be able to fold
|
||||
b.ge _16B_reduction_loop
|
||||
|
||||
.Lhandle_partial_segment_\@:
|
||||
// Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first
|
||||
// 16 bytes are in v7 and the rest are the remaining data in 'buf'. To
|
||||
// do this without needing a fold constant for each possible 'len',
|
||||
// redivide the bytes into a first chunk of 'len' bytes and a second
|
||||
// chunk of 16 bytes, then fold the first chunk into the second.
|
||||
// now we have 16+z bytes left to reduce, where 0<= z < 16.
|
||||
// first, we reduce the data in the xmm7 register
|
||||
|
||||
// v0 = last 16 original data bytes
|
||||
add buf, buf, len
|
||||
ldr q0, [buf, #-16]
|
||||
CPU_LE( rev64 v0.16b, v0.16b )
|
||||
CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 )
|
||||
_final_reduction_for_128:
|
||||
// check if any more data to fold. If not, compute the CRC of
|
||||
// the final 128 bits
|
||||
adds arg3, arg3, #16
|
||||
b.eq _128_done
|
||||
|
||||
// v1 = high order part of second chunk: v7 left-shifted by 'len' bytes.
|
||||
adr_l x4, .Lbyteshift_table + 16
|
||||
sub x4, x4, len
|
||||
ld1 {v2.16b}, [x4]
|
||||
tbl v1.16b, {v7.16b}, v2.16b
|
||||
// here we are getting data that is less than 16 bytes.
|
||||
// since we know that there was data before the pointer, we can
|
||||
// offset the input pointer before the actual point, to receive
|
||||
// exactly 16 bytes. after that the registers need to be adjusted.
|
||||
_get_last_two_regs:
|
||||
add arg2, arg2, arg3
|
||||
ldr q1, [arg2, #-16]
|
||||
CPU_LE( rev64 v1.16b, v1.16b )
|
||||
CPU_LE( ext v1.16b, v1.16b, v1.16b, #8 )
|
||||
|
||||
// v3 = first chunk: v7 right-shifted by '16-len' bytes.
|
||||
movi v3.16b, #0x80
|
||||
eor v2.16b, v2.16b, v3.16b
|
||||
tbl v3.16b, {v7.16b}, v2.16b
|
||||
// get rid of the extra data that was loaded before
|
||||
// load the shift constant
|
||||
adr x4, tbl_shf_table + 16
|
||||
sub x4, x4, arg3
|
||||
ld1 {v0.16b}, [x4]
|
||||
|
||||
// Convert to 8-bit masks: 'len' 0x00 bytes, then '16-len' 0xff bytes.
|
||||
sshr v2.16b, v2.16b, #7
|
||||
// shift v2 to the left by arg3 bytes
|
||||
tbl v2.16b, {v7.16b}, v0.16b
|
||||
|
||||
// v2 = second chunk: 'len' bytes from v0 (low-order bytes),
|
||||
// then '16-len' bytes from v1 (high-order bytes).
|
||||
bsl v2.16b, v1.16b, v0.16b
|
||||
// shift v7 to the right by 16-arg3 bytes
|
||||
movi v9.16b, #0x80
|
||||
eor v0.16b, v0.16b, v9.16b
|
||||
tbl v7.16b, {v7.16b}, v0.16b
|
||||
|
||||
// Fold the first chunk into the second chunk, storing the result in v7.
|
||||
__pmull_\p v0, v3, fold_consts
|
||||
__pmull_\p v7, v3, fold_consts, 2
|
||||
// blend
|
||||
sshr v0.16b, v0.16b, #7 // convert to 8-bit mask
|
||||
bsl v0.16b, v2.16b, v1.16b
|
||||
|
||||
// fold 16 Bytes
|
||||
pmull v8.1q, v7.1d, v10.1d
|
||||
pmull2 v7.1q, v7.2d, v10.2d
|
||||
eor v7.16b, v7.16b, v8.16b
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
eor v7.16b, v7.16b, v2.16b
|
||||
|
||||
.Lreduce_final_16_bytes_\@:
|
||||
// Reduce the 128-bit value M(x), stored in v7, to the final 16-bit CRC.
|
||||
_128_done:
|
||||
// compute crc of a 128-bit value
|
||||
ldr q10, rk5 // rk5 and rk6 in xmm10
|
||||
|
||||
movi v2.16b, #0 // init zero register
|
||||
// 64b fold
|
||||
ext v0.16b, vzr.16b, v7.16b, #8
|
||||
mov v7.d[0], v7.d[1]
|
||||
pmull v7.1q, v7.1d, v10.1d
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
|
||||
// Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
|
||||
ld1 {fold_consts.2d}, [fold_consts_ptr], #16
|
||||
__pmull_pre_\p fold_consts
|
||||
// 32b fold
|
||||
ext v0.16b, v7.16b, vzr.16b, #4
|
||||
mov v7.s[3], vzr.s[0]
|
||||
pmull2 v0.1q, v0.2d, v10.2d
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
|
||||
// Fold the high 64 bits into the low 64 bits, while also multiplying by
|
||||
// x^64. This produces a 128-bit value congruent to x^64 * M(x) and
|
||||
// whose low 48 bits are 0.
|
||||
ext v0.16b, v2.16b, v7.16b, #8
|
||||
__pmull_\p v7, v7, fold_consts, 2 // high bits * x^48 * (x^80 mod G(x))
|
||||
eor v0.16b, v0.16b, v7.16b // + low bits * x^64
|
||||
// barrett reduction
|
||||
_barrett:
|
||||
ldr q10, rk7
|
||||
mov v0.d[0], v7.d[1]
|
||||
|
||||
// Fold the high 32 bits into the low 96 bits. This produces a 96-bit
|
||||
// value congruent to x^64 * M(x) and whose low 48 bits are 0.
|
||||
ext v1.16b, v0.16b, v2.16b, #12 // extract high 32 bits
|
||||
mov v0.s[3], v2.s[0] // zero high 32 bits
|
||||
__pmull_\p v1, v1, fold_consts // high 32 bits * x^48 * (x^48 mod G(x))
|
||||
eor v0.16b, v0.16b, v1.16b // + low bits
|
||||
pmull v0.1q, v0.1d, v10.1d
|
||||
ext v0.16b, vzr.16b, v0.16b, #12
|
||||
pmull2 v0.1q, v0.2d, v10.2d
|
||||
ext v0.16b, vzr.16b, v0.16b, #12
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
mov w0, v7.s[1]
|
||||
|
||||
// Load G(x) and floor(x^48 / G(x)).
|
||||
ld1 {fold_consts.2d}, [fold_consts_ptr]
|
||||
__pmull_pre_\p fold_consts
|
||||
|
||||
// Use Barrett reduction to compute the final CRC value.
|
||||
__pmull_\p v1, v0, fold_consts, 2 // high 32 bits * floor(x^48 / G(x))
|
||||
ushr v1.2d, v1.2d, #32 // /= x^32
|
||||
__pmull_\p v1, v1, fold_consts // *= G(x)
|
||||
ushr v0.2d, v0.2d, #48
|
||||
eor v0.16b, v0.16b, v1.16b // + low 16 nonzero bits
|
||||
// Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of v0.
|
||||
|
||||
umov w0, v0.h[0]
|
||||
.ifc \p, p8
|
||||
ldp x29, x30, [sp], #16
|
||||
.endif
|
||||
_cleanup:
|
||||
// scale the result back to 16 bits
|
||||
lsr x0, x0, #16
|
||||
ret
|
||||
|
||||
.Lless_than_256_bytes_\@:
|
||||
// Checksumming a buffer of length 16...255 bytes
|
||||
_less_than_128:
|
||||
cbz arg3, _cleanup
|
||||
|
||||
adr_l fold_consts_ptr, .Lfold_across_16_bytes_consts
|
||||
movi v0.16b, #0
|
||||
mov v0.s[3], arg1_low32 // get the initial crc value
|
||||
|
||||
// Load the first 16 data bytes.
|
||||
ldr q7, [buf], #0x10
|
||||
ldr q7, [arg2], #0x10
|
||||
CPU_LE( rev64 v7.16b, v7.16b )
|
||||
CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 )
|
||||
eor v7.16b, v7.16b, v0.16b // xor the initial crc value
|
||||
|
||||
// XOR the first 16 data *bits* with the initial CRC value.
|
||||
movi v0.16b, #0
|
||||
mov v0.h[7], init_crc
|
||||
eor v7.16b, v7.16b, v0.16b
|
||||
cmp arg3, #16
|
||||
b.eq _128_done // exactly 16 left
|
||||
b.lt _less_than_16_left
|
||||
|
||||
// Load the fold-across-16-bytes constants.
|
||||
ld1 {fold_consts.2d}, [fold_consts_ptr], #16
|
||||
__pmull_pre_\p fold_consts
|
||||
ldr q10, rk1 // rk1 and rk2 in xmm10
|
||||
|
||||
cmp len, #16
|
||||
b.eq .Lreduce_final_16_bytes_\@ // len == 16
|
||||
subs len, len, #32
|
||||
b.ge .Lfold_16_bytes_loop_\@ // 32 <= len <= 255
|
||||
add len, len, #16
|
||||
b .Lhandle_partial_segment_\@ // 17 <= len <= 31
|
||||
.endm
|
||||
// update the counter. subtract 32 instead of 16 to save one
|
||||
// instruction from the loop
|
||||
subs arg3, arg3, #32
|
||||
b.ge _16B_reduction_loop
|
||||
|
||||
//
|
||||
// u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len);
|
||||
//
|
||||
// Assumes len >= 16.
|
||||
//
|
||||
SYM_FUNC_START(crc_t10dif_pmull_p8)
|
||||
stp x29, x30, [sp, #-16]!
|
||||
mov x29, sp
|
||||
crc_t10dif_pmull p8
|
||||
SYM_FUNC_END(crc_t10dif_pmull_p8)
|
||||
add arg3, arg3, #16
|
||||
b _get_last_two_regs
|
||||
|
||||
.align 5
|
||||
//
|
||||
// u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
|
||||
//
|
||||
// Assumes len >= 16.
|
||||
//
|
||||
SYM_FUNC_START(crc_t10dif_pmull_p64)
|
||||
crc_t10dif_pmull p64
|
||||
SYM_FUNC_END(crc_t10dif_pmull_p64)
|
||||
_less_than_16_left:
|
||||
// shl r9, 4
|
||||
adr x0, tbl_shf_table + 16
|
||||
sub x0, x0, arg3
|
||||
ld1 {v0.16b}, [x0]
|
||||
movi v9.16b, #0x80
|
||||
eor v0.16b, v0.16b, v9.16b
|
||||
tbl v7.16b, {v7.16b}, v0.16b
|
||||
b _128_done
|
||||
ENDPROC(crc_t10dif_pmull)
|
||||
|
||||
.section ".rodata", "a"
|
||||
// precomputed constants
|
||||
// these constants are precomputed from the poly:
|
||||
// 0x8bb70000 (0x8bb7 scaled to 32 bits)
|
||||
.align 4
|
||||
// Q = 0x18BB70000
|
||||
// rk1 = 2^(32*3) mod Q << 32
|
||||
// rk2 = 2^(32*5) mod Q << 32
|
||||
// rk3 = 2^(32*15) mod Q << 32
|
||||
// rk4 = 2^(32*17) mod Q << 32
|
||||
// rk5 = 2^(32*3) mod Q << 32
|
||||
// rk6 = 2^(32*2) mod Q << 32
|
||||
// rk7 = floor(2^64/Q)
|
||||
// rk8 = Q
|
||||
|
||||
// Fold constants precomputed from the polynomial 0x18bb7
|
||||
// G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
|
||||
.Lfold_across_128_bytes_consts:
|
||||
.quad 0x0000000000006123 // x^(8*128) mod G(x)
|
||||
.quad 0x0000000000002295 // x^(8*128+64) mod G(x)
|
||||
// .Lfold_across_64_bytes_consts:
|
||||
.quad 0x0000000000001069 // x^(4*128) mod G(x)
|
||||
.quad 0x000000000000dd31 // x^(4*128+64) mod G(x)
|
||||
// .Lfold_across_32_bytes_consts:
|
||||
.quad 0x000000000000857d // x^(2*128) mod G(x)
|
||||
.quad 0x0000000000007acc // x^(2*128+64) mod G(x)
|
||||
.Lfold_across_16_bytes_consts:
|
||||
.quad 0x000000000000a010 // x^(1*128) mod G(x)
|
||||
.quad 0x0000000000001faa // x^(1*128+64) mod G(x)
|
||||
// .Lfinal_fold_consts:
|
||||
.quad 0x1368000000000000 // x^48 * (x^48 mod G(x))
|
||||
.quad 0x2d56000000000000 // x^48 * (x^80 mod G(x))
|
||||
// .Lbarrett_reduction_consts:
|
||||
.quad 0x0000000000018bb7 // G(x)
|
||||
.quad 0x00000001f65a57f8 // floor(x^48 / G(x))
|
||||
rk1: .octa 0x06df0000000000002d56000000000000
|
||||
rk3: .octa 0x7cf50000000000009d9d000000000000
|
||||
rk5: .octa 0x13680000000000002d56000000000000
|
||||
rk7: .octa 0x000000018bb7000000000001f65a57f8
|
||||
rk9: .octa 0xbfd6000000000000ceae000000000000
|
||||
rk11: .octa 0x713c0000000000001e16000000000000
|
||||
rk13: .octa 0x80a6000000000000f7f9000000000000
|
||||
rk15: .octa 0xe658000000000000044c000000000000
|
||||
rk17: .octa 0xa497000000000000ad18000000000000
|
||||
rk19: .octa 0xe7b50000000000006ee3000000000000
|
||||
|
||||
tbl_shf_table:
|
||||
// use these values for shift constants for the tbl/tbx instruction
|
||||
// different alignments result in values as shown:
|
||||
// DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
|
||||
// DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
|
||||
// DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
|
||||
// DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
|
||||
// DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
|
||||
// DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
|
||||
// DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
|
||||
// DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
|
||||
// DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
|
||||
// DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
|
||||
// DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
|
||||
// DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
|
||||
// DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
|
||||
// DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
|
||||
// DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
|
||||
|
||||
// For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 -
|
||||
// len] is the index vector to shift left by 'len' bytes, and is also {0x80,
|
||||
// ..., 0x80} XOR the index vector to shift right by '16 - len' bytes.
|
||||
.Lbyteshift_table:
|
||||
.byte 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
|
||||
.byte 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
|
||||
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
|
@ -16,15 +16,13 @@
|
||||
#include <linux/string.h>
|
||||
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
|
||||
#define CRC_T10DIF_PMULL_CHUNK_SIZE 16U
|
||||
|
||||
asmlinkage u16 crc_t10dif_pmull_p8(u16 init_crc, const u8 *buf, size_t len);
|
||||
asmlinkage u16 crc_t10dif_pmull_p64(u16 init_crc, const u8 *buf, size_t len);
|
||||
asmlinkage u16 crc_t10dif_pmull(u16 init_crc, const u8 buf[], u64 len);
|
||||
|
||||
static int crct10dif_init(struct shash_desc *desc)
|
||||
{
|
||||
@ -34,49 +32,15 @@ static int crct10dif_init(struct shash_desc *desc)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crct10dif_update_pmull_p8(struct shash_desc *desc, const u8 *data,
|
||||
static int crct10dif_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u16 *crc = shash_desc_ctx(desc);
|
||||
|
||||
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
|
||||
do {
|
||||
unsigned int chunk = length;
|
||||
|
||||
if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
|
||||
chunk = SZ_4K;
|
||||
|
||||
kernel_neon_begin();
|
||||
*crc = crc_t10dif_pmull_p8(*crc, data, chunk);
|
||||
kernel_neon_end();
|
||||
data += chunk;
|
||||
length -= chunk;
|
||||
} while (length);
|
||||
} else {
|
||||
*crc = crc_t10dif_generic(*crc, data, length);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int crct10dif_update_pmull_p64(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int length)
|
||||
{
|
||||
u16 *crc = shash_desc_ctx(desc);
|
||||
|
||||
if (length >= CRC_T10DIF_PMULL_CHUNK_SIZE && may_use_simd()) {
|
||||
do {
|
||||
unsigned int chunk = length;
|
||||
|
||||
if (chunk > SZ_4K + CRC_T10DIF_PMULL_CHUNK_SIZE)
|
||||
chunk = SZ_4K;
|
||||
|
||||
kernel_neon_begin();
|
||||
*crc = crc_t10dif_pmull_p64(*crc, data, chunk);
|
||||
kernel_neon_end();
|
||||
data += chunk;
|
||||
length -= chunk;
|
||||
} while (length);
|
||||
kernel_neon_begin();
|
||||
*crc = crc_t10dif_pmull(*crc, data, length);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
*crc = crc_t10dif_generic(*crc, data, length);
|
||||
}
|
||||
@ -92,22 +56,10 @@ static int crct10dif_final(struct shash_desc *desc, u8 *out)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg crc_t10dif_alg[] = {{
|
||||
static struct shash_alg crc_t10dif_alg = {
|
||||
.digestsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
.init = crct10dif_init,
|
||||
.update = crct10dif_update_pmull_p8,
|
||||
.final = crct10dif_final,
|
||||
.descsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
|
||||
.base.cra_name = "crct10dif",
|
||||
.base.cra_driver_name = "crct10dif-arm64-neon",
|
||||
.base.cra_priority = 100,
|
||||
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
.digestsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
.init = crct10dif_init,
|
||||
.update = crct10dif_update_pmull_p64,
|
||||
.update = crct10dif_update,
|
||||
.final = crct10dif_final,
|
||||
.descsize = CRC_T10DIF_DIGEST_SIZE,
|
||||
|
||||
@ -116,31 +68,20 @@ static struct shash_alg crc_t10dif_alg[] = {{
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}};
|
||||
};
|
||||
|
||||
static int __init crc_t10dif_mod_init(void)
|
||||
{
|
||||
if (cpu_have_named_feature(PMULL))
|
||||
return crypto_register_shashes(crc_t10dif_alg,
|
||||
ARRAY_SIZE(crc_t10dif_alg));
|
||||
else
|
||||
/* only register the first array element */
|
||||
return crypto_register_shash(crc_t10dif_alg);
|
||||
return crypto_register_shash(&crc_t10dif_alg);
|
||||
}
|
||||
|
||||
static void __exit crc_t10dif_mod_exit(void)
|
||||
{
|
||||
if (cpu_have_named_feature(PMULL))
|
||||
crypto_unregister_shashes(crc_t10dif_alg,
|
||||
ARRAY_SIZE(crc_t10dif_alg));
|
||||
else
|
||||
crypto_unregister_shash(crc_t10dif_alg);
|
||||
crypto_unregister_shash(&crc_t10dif_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(ASIMD, crc_t10dif_mod_init);
|
||||
module_cpu_feature_match(PMULL, crc_t10dif_mod_init);
|
||||
module_exit(crc_t10dif_mod_exit);
|
||||
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("crct10dif");
|
||||
MODULE_ALIAS_CRYPTO("crct10dif-arm64-ce");
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
|
||||
*
|
||||
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
@ -16,8 +16,8 @@
|
||||
T1 .req v2
|
||||
T2 .req v3
|
||||
MASK .req v4
|
||||
XM .req v5
|
||||
XL .req v6
|
||||
XL .req v5
|
||||
XM .req v6
|
||||
XH .req v7
|
||||
IN1 .req v7
|
||||
|
||||
@ -46,19 +46,6 @@
|
||||
ss3 .req v26
|
||||
ss4 .req v27
|
||||
|
||||
XL2 .req v8
|
||||
XM2 .req v9
|
||||
XH2 .req v10
|
||||
XL3 .req v11
|
||||
XM3 .req v12
|
||||
XH3 .req v13
|
||||
TT3 .req v14
|
||||
TT4 .req v15
|
||||
HH .req v16
|
||||
HH3 .req v17
|
||||
HH4 .req v18
|
||||
HH34 .req v19
|
||||
|
||||
.text
|
||||
.arch armv8-a+crypto
|
||||
|
||||
@ -147,25 +134,11 @@
|
||||
.endm
|
||||
|
||||
.macro __pmull_pre_p64
|
||||
add x8, x3, #16
|
||||
ld1 {HH.2d-HH4.2d}, [x8]
|
||||
|
||||
trn1 SHASH2.2d, SHASH.2d, HH.2d
|
||||
trn2 T1.2d, SHASH.2d, HH.2d
|
||||
eor SHASH2.16b, SHASH2.16b, T1.16b
|
||||
|
||||
trn1 HH34.2d, HH3.2d, HH4.2d
|
||||
trn2 T1.2d, HH3.2d, HH4.2d
|
||||
eor HH34.16b, HH34.16b, T1.16b
|
||||
|
||||
movi MASK.16b, #0xe1
|
||||
shl MASK.2d, MASK.2d, #57
|
||||
.endm
|
||||
|
||||
.macro __pmull_pre_p8
|
||||
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
|
||||
eor SHASH2.16b, SHASH2.16b, SHASH.16b
|
||||
|
||||
// k00_16 := 0x0000000000000000_000000000000ffff
|
||||
// k32_48 := 0x00000000ffffffff_0000ffffffffffff
|
||||
movi k32_48.2d, #0xffffffff
|
||||
@ -242,86 +215,20 @@
|
||||
.macro __pmull_ghash, pn
|
||||
ld1 {SHASH.2d}, [x3]
|
||||
ld1 {XL.2d}, [x1]
|
||||
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
|
||||
eor SHASH2.16b, SHASH2.16b, SHASH.16b
|
||||
|
||||
__pmull_pre_\pn
|
||||
|
||||
/* do the head block first, if supplied */
|
||||
cbz x4, 0f
|
||||
ld1 {T1.2d}, [x4]
|
||||
mov x4, xzr
|
||||
b 3f
|
||||
b 1f
|
||||
|
||||
0: .ifc \pn, p64
|
||||
tbnz w0, #0, 2f // skip until #blocks is a
|
||||
tbnz w0, #1, 2f // round multiple of 4
|
||||
|
||||
1: ld1 {XM3.16b-TT4.16b}, [x2], #64
|
||||
|
||||
sub w0, w0, #4
|
||||
|
||||
rev64 T1.16b, XM3.16b
|
||||
rev64 T2.16b, XH3.16b
|
||||
rev64 TT4.16b, TT4.16b
|
||||
rev64 TT3.16b, TT3.16b
|
||||
|
||||
ext IN1.16b, TT4.16b, TT4.16b, #8
|
||||
ext XL3.16b, TT3.16b, TT3.16b, #8
|
||||
|
||||
eor TT4.16b, TT4.16b, IN1.16b
|
||||
pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
|
||||
pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
|
||||
pmull XM2.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
|
||||
|
||||
eor TT3.16b, TT3.16b, XL3.16b
|
||||
pmull2 XH3.1q, HH.2d, XL3.2d // a1 * b1
|
||||
pmull XL3.1q, HH.1d, XL3.1d // a0 * b0
|
||||
pmull2 XM3.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
|
||||
|
||||
ext IN1.16b, T2.16b, T2.16b, #8
|
||||
eor XL2.16b, XL2.16b, XL3.16b
|
||||
eor XH2.16b, XH2.16b, XH3.16b
|
||||
eor XM2.16b, XM2.16b, XM3.16b
|
||||
|
||||
eor T2.16b, T2.16b, IN1.16b
|
||||
pmull2 XH3.1q, HH3.2d, IN1.2d // a1 * b1
|
||||
pmull XL3.1q, HH3.1d, IN1.1d // a0 * b0
|
||||
pmull XM3.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
|
||||
|
||||
eor XL2.16b, XL2.16b, XL3.16b
|
||||
eor XH2.16b, XH2.16b, XH3.16b
|
||||
eor XM2.16b, XM2.16b, XM3.16b
|
||||
|
||||
ext IN1.16b, T1.16b, T1.16b, #8
|
||||
ext TT3.16b, XL.16b, XL.16b, #8
|
||||
eor XL.16b, XL.16b, IN1.16b
|
||||
eor T1.16b, T1.16b, TT3.16b
|
||||
|
||||
pmull2 XH.1q, HH4.2d, XL.2d // a1 * b1
|
||||
eor T1.16b, T1.16b, XL.16b
|
||||
pmull XL.1q, HH4.1d, XL.1d // a0 * b0
|
||||
pmull2 XM.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
|
||||
|
||||
eor XL.16b, XL.16b, XL2.16b
|
||||
eor XH.16b, XH.16b, XH2.16b
|
||||
eor XM.16b, XM.16b, XM2.16b
|
||||
|
||||
eor T2.16b, XL.16b, XH.16b
|
||||
ext T1.16b, XL.16b, XH.16b, #8
|
||||
eor XM.16b, XM.16b, T2.16b
|
||||
|
||||
__pmull_reduce_p64
|
||||
|
||||
eor T2.16b, T2.16b, XH.16b
|
||||
eor XL.16b, XL.16b, T2.16b
|
||||
|
||||
cbz w0, 5f
|
||||
b 1b
|
||||
.endif
|
||||
|
||||
2: ld1 {T1.2d}, [x2], #16
|
||||
0: ld1 {T1.2d}, [x2], #16
|
||||
sub w0, w0, #1
|
||||
|
||||
3: /* multiply XL by SHASH in GF(2^128) */
|
||||
1: /* multiply XL by SHASH in GF(2^128) */
|
||||
CPU_LE( rev64 T1.16b, T1.16b )
|
||||
|
||||
ext T2.16b, XL.16b, XL.16b, #8
|
||||
@ -334,7 +241,7 @@ CPU_LE( rev64 T1.16b, T1.16b )
|
||||
__pmull_\pn XL, XL, SHASH // a0 * b0
|
||||
__pmull_\pn XM, T1, SHASH2 // (a1 + a0)(b1 + b0)
|
||||
|
||||
4: eor T2.16b, XL.16b, XH.16b
|
||||
eor T2.16b, XL.16b, XH.16b
|
||||
ext T1.16b, XL.16b, XH.16b, #8
|
||||
eor XM.16b, XM.16b, T2.16b
|
||||
|
||||
@ -345,7 +252,7 @@ CPU_LE( rev64 T1.16b, T1.16b )
|
||||
|
||||
cbnz w0, 0b
|
||||
|
||||
5: st1 {XL.2d}, [x1]
|
||||
st1 {XL.2d}, [x1]
|
||||
ret
|
||||
.endm
|
||||
|
||||
@ -353,45 +260,27 @@ CPU_LE( rev64 T1.16b, T1.16b )
|
||||
* void pmull_ghash_update(int blocks, u64 dg[], const char *src,
|
||||
* struct ghash_key const *k, const char *head)
|
||||
*/
|
||||
SYM_FUNC_START(pmull_ghash_update_p64)
|
||||
ENTRY(pmull_ghash_update_p64)
|
||||
__pmull_ghash p64
|
||||
SYM_FUNC_END(pmull_ghash_update_p64)
|
||||
ENDPROC(pmull_ghash_update_p64)
|
||||
|
||||
SYM_FUNC_START(pmull_ghash_update_p8)
|
||||
ENTRY(pmull_ghash_update_p8)
|
||||
__pmull_ghash p8
|
||||
SYM_FUNC_END(pmull_ghash_update_p8)
|
||||
ENDPROC(pmull_ghash_update_p8)
|
||||
|
||||
KS0 .req v8
|
||||
KS1 .req v9
|
||||
KS2 .req v10
|
||||
KS3 .req v11
|
||||
KS .req v8
|
||||
CTR .req v9
|
||||
INP .req v10
|
||||
|
||||
INP0 .req v21
|
||||
INP1 .req v22
|
||||
INP2 .req v23
|
||||
INP3 .req v24
|
||||
|
||||
K0 .req v25
|
||||
K1 .req v26
|
||||
K2 .req v27
|
||||
K3 .req v28
|
||||
K4 .req v12
|
||||
K5 .req v13
|
||||
K6 .req v4
|
||||
K7 .req v5
|
||||
K8 .req v14
|
||||
K9 .req v15
|
||||
KK .req v29
|
||||
KL .req v30
|
||||
KM .req v31
|
||||
|
||||
.macro load_round_keys, rounds, rk, tmp
|
||||
add \tmp, \rk, #64
|
||||
ld1 {K0.4s-K3.4s}, [\rk]
|
||||
ld1 {K4.4s-K5.4s}, [\tmp]
|
||||
add \tmp, \rk, \rounds, lsl #4
|
||||
sub \tmp, \tmp, #32
|
||||
ld1 {KK.4s-KM.4s}, [\tmp]
|
||||
.macro load_round_keys, rounds, rk
|
||||
cmp \rounds, #12
|
||||
blo 2222f /* 128 bits */
|
||||
beq 1111f /* 192 bits */
|
||||
ld1 {v17.4s-v18.4s}, [\rk], #32
|
||||
1111: ld1 {v19.4s-v20.4s}, [\rk], #32
|
||||
2222: ld1 {v21.4s-v24.4s}, [\rk], #64
|
||||
ld1 {v25.4s-v28.4s}, [\rk], #64
|
||||
ld1 {v29.4s-v31.4s}, [\rk]
|
||||
.endm
|
||||
|
||||
.macro enc_round, state, key
|
||||
@ -399,382 +288,157 @@ SYM_FUNC_END(pmull_ghash_update_p8)
|
||||
aesmc \state\().16b, \state\().16b
|
||||
.endm
|
||||
|
||||
.macro enc_qround, s0, s1, s2, s3, key
|
||||
enc_round \s0, \key
|
||||
enc_round \s1, \key
|
||||
enc_round \s2, \key
|
||||
enc_round \s3, \key
|
||||
.endm
|
||||
|
||||
.macro enc_block, state, rounds, rk, tmp
|
||||
add \tmp, \rk, #96
|
||||
ld1 {K6.4s-K7.4s}, [\tmp], #32
|
||||
.irp key, K0, K1, K2, K3, K4 K5
|
||||
.macro enc_block, state, rounds
|
||||
cmp \rounds, #12
|
||||
b.lo 2222f /* 128 bits */
|
||||
b.eq 1111f /* 192 bits */
|
||||
enc_round \state, v17
|
||||
enc_round \state, v18
|
||||
1111: enc_round \state, v19
|
||||
enc_round \state, v20
|
||||
2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
|
||||
enc_round \state, \key
|
||||
.endr
|
||||
|
||||
tbnz \rounds, #2, .Lnot128_\@
|
||||
.Lout256_\@:
|
||||
enc_round \state, K6
|
||||
enc_round \state, K7
|
||||
|
||||
.Lout192_\@:
|
||||
enc_round \state, KK
|
||||
aese \state\().16b, KL.16b
|
||||
eor \state\().16b, \state\().16b, KM.16b
|
||||
|
||||
.subsection 1
|
||||
.Lnot128_\@:
|
||||
ld1 {K8.4s-K9.4s}, [\tmp], #32
|
||||
enc_round \state, K6
|
||||
enc_round \state, K7
|
||||
ld1 {K6.4s-K7.4s}, [\tmp]
|
||||
enc_round \state, K8
|
||||
enc_round \state, K9
|
||||
tbz \rounds, #1, .Lout192_\@
|
||||
b .Lout256_\@
|
||||
.previous
|
||||
aese \state\().16b, v30.16b
|
||||
eor \state\().16b, \state\().16b, v31.16b
|
||||
.endm
|
||||
|
||||
.align 6
|
||||
.macro pmull_gcm_do_crypt, enc
|
||||
stp x29, x30, [sp, #-32]!
|
||||
mov x29, sp
|
||||
str x19, [sp, #24]
|
||||
ld1 {SHASH.2d}, [x4]
|
||||
ld1 {XL.2d}, [x1]
|
||||
ldr x8, [x5, #8] // load lower counter
|
||||
|
||||
load_round_keys x7, x6, x8
|
||||
|
||||
ld1 {SHASH.2d}, [x3], #16
|
||||
ld1 {HH.2d-HH4.2d}, [x3]
|
||||
|
||||
trn1 SHASH2.2d, SHASH.2d, HH.2d
|
||||
trn2 T1.2d, SHASH.2d, HH.2d
|
||||
eor SHASH2.16b, SHASH2.16b, T1.16b
|
||||
|
||||
trn1 HH34.2d, HH3.2d, HH4.2d
|
||||
trn2 T1.2d, HH3.2d, HH4.2d
|
||||
eor HH34.16b, HH34.16b, T1.16b
|
||||
|
||||
ld1 {XL.2d}, [x4]
|
||||
|
||||
cbz x0, 3f // tag only?
|
||||
|
||||
ldr w8, [x5, #12] // load lower counter
|
||||
CPU_LE( rev w8, w8 )
|
||||
|
||||
0: mov w9, #4 // max blocks per round
|
||||
add x10, x0, #0xf
|
||||
lsr x10, x10, #4 // remaining blocks
|
||||
|
||||
subs x0, x0, #64
|
||||
csel w9, w10, w9, mi
|
||||
add w8, w8, w9
|
||||
|
||||
bmi 1f
|
||||
ld1 {INP0.16b-INP3.16b}, [x2], #64
|
||||
.subsection 1
|
||||
/*
|
||||
* Populate the four input registers right to left with up to 63 bytes
|
||||
* of data, using overlapping loads to avoid branches.
|
||||
*
|
||||
* INP0 INP1 INP2 INP3
|
||||
* 1 byte | | | |x |
|
||||
* 16 bytes | | | |xxxxxxxx|
|
||||
* 17 bytes | | |xxxxxxxx|x |
|
||||
* 47 bytes | |xxxxxxxx|xxxxxxxx|xxxxxxx |
|
||||
* etc etc
|
||||
*
|
||||
* Note that this code may read up to 15 bytes before the start of
|
||||
* the input. It is up to the calling code to ensure this is safe if
|
||||
* this happens in the first iteration of the loop (i.e., when the
|
||||
* input size is < 16 bytes)
|
||||
*/
|
||||
1: mov x15, #16
|
||||
ands x19, x0, #0xf
|
||||
csel x19, x19, x15, ne
|
||||
adr_l x17, .Lpermute_table + 16
|
||||
|
||||
sub x11, x15, x19
|
||||
add x12, x17, x11
|
||||
sub x17, x17, x11
|
||||
ld1 {T1.16b}, [x12]
|
||||
sub x10, x1, x11
|
||||
sub x11, x2, x11
|
||||
|
||||
cmp x0, #-16
|
||||
csel x14, x15, xzr, gt
|
||||
cmp x0, #-32
|
||||
csel x15, x15, xzr, gt
|
||||
cmp x0, #-48
|
||||
csel x16, x19, xzr, gt
|
||||
csel x1, x1, x10, gt
|
||||
csel x2, x2, x11, gt
|
||||
|
||||
ld1 {INP0.16b}, [x2], x14
|
||||
ld1 {INP1.16b}, [x2], x15
|
||||
ld1 {INP2.16b}, [x2], x16
|
||||
ld1 {INP3.16b}, [x2]
|
||||
tbl INP3.16b, {INP3.16b}, T1.16b
|
||||
b 2f
|
||||
.previous
|
||||
|
||||
2: .if \enc == 0
|
||||
bl pmull_gcm_ghash_4x
|
||||
.endif
|
||||
|
||||
bl pmull_gcm_enc_4x
|
||||
|
||||
tbnz x0, #63, 6f
|
||||
st1 {INP0.16b-INP3.16b}, [x1], #64
|
||||
.if \enc == 1
|
||||
bl pmull_gcm_ghash_4x
|
||||
.endif
|
||||
bne 0b
|
||||
|
||||
3: ldp x19, x10, [sp, #24]
|
||||
cbz x10, 5f // output tag?
|
||||
|
||||
ld1 {INP3.16b}, [x10] // load lengths[]
|
||||
mov w9, #1
|
||||
bl pmull_gcm_ghash_4x
|
||||
|
||||
mov w11, #(0x1 << 24) // BE '1U'
|
||||
ld1 {KS0.16b}, [x5]
|
||||
mov KS0.s[3], w11
|
||||
|
||||
enc_block KS0, x7, x6, x12
|
||||
|
||||
ext XL.16b, XL.16b, XL.16b, #8
|
||||
rev64 XL.16b, XL.16b
|
||||
eor XL.16b, XL.16b, KS0.16b
|
||||
|
||||
.if \enc == 1
|
||||
st1 {XL.16b}, [x10] // store tag
|
||||
.else
|
||||
ldp x11, x12, [sp, #40] // load tag pointer and authsize
|
||||
adr_l x17, .Lpermute_table
|
||||
ld1 {KS0.16b}, [x11] // load supplied tag
|
||||
add x17, x17, x12
|
||||
ld1 {KS1.16b}, [x17] // load permute vector
|
||||
|
||||
cmeq XL.16b, XL.16b, KS0.16b // compare tags
|
||||
mvn XL.16b, XL.16b // -1 for fail, 0 for pass
|
||||
tbl XL.16b, {XL.16b}, KS1.16b // keep authsize bytes only
|
||||
sminv b0, XL.16b // signed minimum across XL
|
||||
smov w0, v0.b[0] // return b0
|
||||
.endif
|
||||
|
||||
4: ldp x29, x30, [sp], #32
|
||||
ret
|
||||
|
||||
5:
|
||||
CPU_LE( rev w8, w8 )
|
||||
str w8, [x5, #12] // store lower counter
|
||||
st1 {XL.2d}, [x4]
|
||||
b 4b
|
||||
|
||||
6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors
|
||||
sub x17, x17, x19, lsl #1
|
||||
|
||||
cmp w9, #1
|
||||
beq 7f
|
||||
.subsection 1
|
||||
7: ld1 {INP2.16b}, [x1]
|
||||
tbx INP2.16b, {INP3.16b}, T1.16b
|
||||
mov INP3.16b, INP2.16b
|
||||
b 8f
|
||||
.previous
|
||||
|
||||
st1 {INP0.16b}, [x1], x14
|
||||
st1 {INP1.16b}, [x1], x15
|
||||
st1 {INP2.16b}, [x1], x16
|
||||
tbl INP3.16b, {INP3.16b}, T1.16b
|
||||
tbx INP3.16b, {INP2.16b}, T2.16b
|
||||
8: st1 {INP3.16b}, [x1]
|
||||
|
||||
.if \enc == 1
|
||||
ld1 {T1.16b}, [x17]
|
||||
tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits
|
||||
bl pmull_gcm_ghash_4x
|
||||
.endif
|
||||
b 3b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[],
|
||||
* struct ghash_key const *k, u64 dg[], u8 ctr[],
|
||||
* int rounds, u8 tag)
|
||||
*/
|
||||
SYM_FUNC_START(pmull_gcm_encrypt)
|
||||
pmull_gcm_do_crypt 1
|
||||
SYM_FUNC_END(pmull_gcm_encrypt)
|
||||
|
||||
/*
|
||||
* void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[],
|
||||
* struct ghash_key const *k, u64 dg[], u8 ctr[],
|
||||
* int rounds, u8 tag)
|
||||
*/
|
||||
SYM_FUNC_START(pmull_gcm_decrypt)
|
||||
pmull_gcm_do_crypt 0
|
||||
SYM_FUNC_END(pmull_gcm_decrypt)
|
||||
|
||||
SYM_FUNC_START_LOCAL(pmull_gcm_ghash_4x)
|
||||
movi MASK.16b, #0xe1
|
||||
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
|
||||
CPU_LE( rev x8, x8 )
|
||||
shl MASK.2d, MASK.2d, #57
|
||||
eor SHASH2.16b, SHASH2.16b, SHASH.16b
|
||||
|
||||
rev64 T1.16b, INP0.16b
|
||||
rev64 T2.16b, INP1.16b
|
||||
rev64 TT3.16b, INP2.16b
|
||||
rev64 TT4.16b, INP3.16b
|
||||
.if \enc == 1
|
||||
ld1 {KS.16b}, [x7]
|
||||
.endif
|
||||
|
||||
ext XL.16b, XL.16b, XL.16b, #8
|
||||
0: ld1 {CTR.8b}, [x5] // load upper counter
|
||||
ld1 {INP.16b}, [x3], #16
|
||||
rev x9, x8
|
||||
add x8, x8, #1
|
||||
sub w0, w0, #1
|
||||
ins CTR.d[1], x9 // set lower counter
|
||||
|
||||
tbz w9, #2, 0f // <4 blocks?
|
||||
.subsection 1
|
||||
0: movi XH2.16b, #0
|
||||
movi XM2.16b, #0
|
||||
movi XL2.16b, #0
|
||||
.if \enc == 1
|
||||
eor INP.16b, INP.16b, KS.16b // encrypt input
|
||||
st1 {INP.16b}, [x2], #16
|
||||
.endif
|
||||
|
||||
tbz w9, #0, 1f // 2 blocks?
|
||||
tbz w9, #1, 2f // 1 block?
|
||||
rev64 T1.16b, INP.16b
|
||||
|
||||
eor T2.16b, T2.16b, XL.16b
|
||||
ext T1.16b, T2.16b, T2.16b, #8
|
||||
b .Lgh3
|
||||
cmp w6, #12
|
||||
b.ge 2f // AES-192/256?
|
||||
|
||||
1: eor TT3.16b, TT3.16b, XL.16b
|
||||
ext T2.16b, TT3.16b, TT3.16b, #8
|
||||
b .Lgh2
|
||||
1: enc_round CTR, v21
|
||||
|
||||
2: eor TT4.16b, TT4.16b, XL.16b
|
||||
ext IN1.16b, TT4.16b, TT4.16b, #8
|
||||
b .Lgh1
|
||||
.previous
|
||||
|
||||
eor T1.16b, T1.16b, XL.16b
|
||||
ext T2.16b, XL.16b, XL.16b, #8
|
||||
ext IN1.16b, T1.16b, T1.16b, #8
|
||||
|
||||
pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1
|
||||
eor T1.16b, T1.16b, IN1.16b
|
||||
pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0
|
||||
pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
|
||||
enc_round CTR, v22
|
||||
|
||||
ext T1.16b, T2.16b, T2.16b, #8
|
||||
.Lgh3: eor T2.16b, T2.16b, T1.16b
|
||||
pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1
|
||||
pmull XL.1q, HH3.1d, T1.1d // a0 * b0
|
||||
pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
|
||||
eor T1.16b, T1.16b, T2.16b
|
||||
eor XL.16b, XL.16b, IN1.16b
|
||||
|
||||
eor XH2.16b, XH2.16b, XH.16b
|
||||
eor XL2.16b, XL2.16b, XL.16b
|
||||
eor XM2.16b, XM2.16b, XM.16b
|
||||
enc_round CTR, v23
|
||||
|
||||
ext T2.16b, TT3.16b, TT3.16b, #8
|
||||
.Lgh2: eor TT3.16b, TT3.16b, T2.16b
|
||||
pmull2 XH.1q, HH.2d, T2.2d // a1 * b1
|
||||
pmull XL.1q, HH.1d, T2.1d // a0 * b0
|
||||
pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
|
||||
pmull2 XH.1q, SHASH.2d, XL.2d // a1 * b1
|
||||
eor T1.16b, T1.16b, XL.16b
|
||||
|
||||
eor XH2.16b, XH2.16b, XH.16b
|
||||
eor XL2.16b, XL2.16b, XL.16b
|
||||
eor XM2.16b, XM2.16b, XM.16b
|
||||
enc_round CTR, v24
|
||||
|
||||
ext IN1.16b, TT4.16b, TT4.16b, #8
|
||||
.Lgh1: eor TT4.16b, TT4.16b, IN1.16b
|
||||
pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0
|
||||
pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1
|
||||
pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
|
||||
pmull XL.1q, SHASH.1d, XL.1d // a0 * b0
|
||||
pmull XM.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
|
||||
|
||||
eor XH.16b, XH.16b, XH2.16b
|
||||
eor XL.16b, XL.16b, XL2.16b
|
||||
eor XM.16b, XM.16b, XM2.16b
|
||||
enc_round CTR, v25
|
||||
|
||||
eor T2.16b, XL.16b, XH.16b
|
||||
ext T1.16b, XL.16b, XH.16b, #8
|
||||
eor T2.16b, XL.16b, XH.16b
|
||||
eor XM.16b, XM.16b, T1.16b
|
||||
|
||||
enc_round CTR, v26
|
||||
|
||||
eor XM.16b, XM.16b, T2.16b
|
||||
pmull T2.1q, XL.1d, MASK.1d
|
||||
|
||||
__pmull_reduce_p64
|
||||
enc_round CTR, v27
|
||||
|
||||
mov XH.d[0], XM.d[1]
|
||||
mov XM.d[1], XL.d[0]
|
||||
|
||||
enc_round CTR, v28
|
||||
|
||||
eor XL.16b, XM.16b, T2.16b
|
||||
|
||||
enc_round CTR, v29
|
||||
|
||||
ext T2.16b, XL.16b, XL.16b, #8
|
||||
|
||||
aese CTR.16b, v30.16b
|
||||
|
||||
pmull XL.1q, XL.1d, MASK.1d
|
||||
eor T2.16b, T2.16b, XH.16b
|
||||
|
||||
eor KS.16b, CTR.16b, v31.16b
|
||||
|
||||
eor XL.16b, XL.16b, T2.16b
|
||||
|
||||
ret
|
||||
SYM_FUNC_END(pmull_gcm_ghash_4x)
|
||||
.if \enc == 0
|
||||
eor INP.16b, INP.16b, KS.16b
|
||||
st1 {INP.16b}, [x2], #16
|
||||
.endif
|
||||
|
||||
SYM_FUNC_START_LOCAL(pmull_gcm_enc_4x)
|
||||
ld1 {KS0.16b}, [x5] // load upper counter
|
||||
sub w10, w8, #4
|
||||
sub w11, w8, #3
|
||||
sub w12, w8, #2
|
||||
sub w13, w8, #1
|
||||
rev w10, w10
|
||||
rev w11, w11
|
||||
rev w12, w12
|
||||
rev w13, w13
|
||||
mov KS1.16b, KS0.16b
|
||||
mov KS2.16b, KS0.16b
|
||||
mov KS3.16b, KS0.16b
|
||||
ins KS0.s[3], w10 // set lower counter
|
||||
ins KS1.s[3], w11
|
||||
ins KS2.s[3], w12
|
||||
ins KS3.s[3], w13
|
||||
cbnz w0, 0b
|
||||
|
||||
add x10, x6, #96 // round key pointer
|
||||
ld1 {K6.4s-K7.4s}, [x10], #32
|
||||
.irp key, K0, K1, K2, K3, K4, K5
|
||||
enc_qround KS0, KS1, KS2, KS3, \key
|
||||
.endr
|
||||
CPU_LE( rev x8, x8 )
|
||||
st1 {XL.2d}, [x1]
|
||||
str x8, [x5, #8] // store lower counter
|
||||
|
||||
tbnz x7, #2, .Lnot128
|
||||
.subsection 1
|
||||
.Lnot128:
|
||||
ld1 {K8.4s-K9.4s}, [x10], #32
|
||||
.irp key, K6, K7
|
||||
enc_qround KS0, KS1, KS2, KS3, \key
|
||||
.endr
|
||||
ld1 {K6.4s-K7.4s}, [x10]
|
||||
.irp key, K8, K9
|
||||
enc_qround KS0, KS1, KS2, KS3, \key
|
||||
.endr
|
||||
tbz x7, #1, .Lout192
|
||||
b .Lout256
|
||||
.previous
|
||||
|
||||
.Lout256:
|
||||
.irp key, K6, K7
|
||||
enc_qround KS0, KS1, KS2, KS3, \key
|
||||
.endr
|
||||
|
||||
.Lout192:
|
||||
enc_qround KS0, KS1, KS2, KS3, KK
|
||||
|
||||
aese KS0.16b, KL.16b
|
||||
aese KS1.16b, KL.16b
|
||||
aese KS2.16b, KL.16b
|
||||
aese KS3.16b, KL.16b
|
||||
|
||||
eor KS0.16b, KS0.16b, KM.16b
|
||||
eor KS1.16b, KS1.16b, KM.16b
|
||||
eor KS2.16b, KS2.16b, KM.16b
|
||||
eor KS3.16b, KS3.16b, KM.16b
|
||||
|
||||
eor INP0.16b, INP0.16b, KS0.16b
|
||||
eor INP1.16b, INP1.16b, KS1.16b
|
||||
eor INP2.16b, INP2.16b, KS2.16b
|
||||
eor INP3.16b, INP3.16b, KS3.16b
|
||||
.if \enc == 1
|
||||
st1 {KS.16b}, [x7]
|
||||
.endif
|
||||
|
||||
ret
|
||||
SYM_FUNC_END(pmull_gcm_enc_4x)
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 6
|
||||
.Lpermute_table:
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
|
||||
.byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
|
||||
.byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
|
||||
.previous
|
||||
2: b.eq 3f // AES-192?
|
||||
enc_round CTR, v17
|
||||
enc_round CTR, v18
|
||||
3: enc_round CTR, v19
|
||||
enc_round CTR, v20
|
||||
b 1b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
|
||||
* struct ghash_key const *k, u8 ctr[],
|
||||
* int rounds, u8 ks[])
|
||||
*/
|
||||
ENTRY(pmull_gcm_encrypt)
|
||||
pmull_gcm_do_crypt 1
|
||||
ENDPROC(pmull_gcm_encrypt)
|
||||
|
||||
/*
|
||||
* void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
|
||||
* struct ghash_key const *k, u8 ctr[],
|
||||
* int rounds)
|
||||
*/
|
||||
ENTRY(pmull_gcm_decrypt)
|
||||
pmull_gcm_do_crypt 0
|
||||
ENDPROC(pmull_gcm_decrypt)
|
||||
|
||||
/*
|
||||
* void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
|
||||
*/
|
||||
ENTRY(pmull_gcm_encrypt_block)
|
||||
cbz x2, 0f
|
||||
load_round_keys w3, x2
|
||||
0: ld1 {v0.16b}, [x1]
|
||||
enc_block v0, w3
|
||||
st1 {v0.16b}, [x0]
|
||||
ret
|
||||
ENDPROC(pmull_gcm_encrypt_block)
|
||||
|
@ -1,7 +1,7 @@
|
||||
/*
|
||||
* Accelerated GHASH implementation with ARMv8 PMULL instructions.
|
||||
*
|
||||
* Copyright (C) 2014 - 2018 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
* Copyright (C) 2014 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published
|
||||
@ -17,7 +17,6 @@
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/internal/aead.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <linux/cpufeature.h>
|
||||
@ -34,8 +33,9 @@ MODULE_ALIAS_CRYPTO("ghash");
|
||||
#define GCM_IV_SIZE 12
|
||||
|
||||
struct ghash_key {
|
||||
be128 k;
|
||||
u64 h[][2];
|
||||
u64 a;
|
||||
u64 b;
|
||||
be128 k;
|
||||
};
|
||||
|
||||
struct ghash_desc_ctx {
|
||||
@ -50,18 +50,29 @@ struct gcm_aes_ctx {
|
||||
};
|
||||
|
||||
asmlinkage void pmull_ghash_update_p64(int blocks, u64 dg[], const char *src,
|
||||
u64 const h[][2], const char *head);
|
||||
struct ghash_key const *k,
|
||||
const char *head);
|
||||
|
||||
asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
|
||||
u64 const h[][2], const char *head);
|
||||
struct ghash_key const *k,
|
||||
const char *head);
|
||||
|
||||
asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
|
||||
u64 const h[][2], u64 dg[], u8 ctr[],
|
||||
u32 const rk[], int rounds, u8 tag[]);
|
||||
asmlinkage int pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
|
||||
u64 const h[][2], u64 dg[], u8 ctr[],
|
||||
u32 const rk[], int rounds, const u8 l[],
|
||||
const u8 tag[], u64 authsize);
|
||||
static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src,
|
||||
struct ghash_key const *k,
|
||||
const char *head);
|
||||
|
||||
asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
|
||||
const u8 src[], struct ghash_key const *k,
|
||||
u8 ctr[], int rounds, u8 ks[]);
|
||||
|
||||
asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
|
||||
const u8 src[], struct ghash_key const *k,
|
||||
u8 ctr[], int rounds);
|
||||
|
||||
asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
|
||||
u32 const rk[], int rounds);
|
||||
|
||||
asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
|
||||
|
||||
static int ghash_init(struct shash_desc *desc)
|
||||
{
|
||||
@ -73,48 +84,34 @@ static int ghash_init(struct shash_desc *desc)
|
||||
|
||||
static void ghash_do_update(int blocks, u64 dg[], const char *src,
|
||||
struct ghash_key *key, const char *head)
|
||||
{
|
||||
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
|
||||
|
||||
do {
|
||||
const u8 *in = src;
|
||||
|
||||
if (head) {
|
||||
in = head;
|
||||
blocks++;
|
||||
head = NULL;
|
||||
} else {
|
||||
src += GHASH_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
|
||||
gf128mul_lle(&dst, &key->k);
|
||||
} while (--blocks);
|
||||
|
||||
dg[0] = be64_to_cpu(dst.b);
|
||||
dg[1] = be64_to_cpu(dst.a);
|
||||
}
|
||||
|
||||
static __always_inline
|
||||
void ghash_do_simd_update(int blocks, u64 dg[], const char *src,
|
||||
struct ghash_key *key, const char *head,
|
||||
void (*simd_update)(int blocks, u64 dg[],
|
||||
const char *src,
|
||||
u64 const h[][2],
|
||||
const char *head))
|
||||
{
|
||||
if (likely(may_use_simd())) {
|
||||
kernel_neon_begin();
|
||||
simd_update(blocks, dg, src, key->h, head);
|
||||
pmull_ghash_update(blocks, dg, src, key, head);
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
ghash_do_update(blocks, dg, src, key, head);
|
||||
be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
|
||||
|
||||
do {
|
||||
const u8 *in = src;
|
||||
|
||||
if (head) {
|
||||
in = head;
|
||||
blocks++;
|
||||
head = NULL;
|
||||
} else {
|
||||
src += GHASH_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
|
||||
gf128mul_lle(&dst, &key->k);
|
||||
} while (--blocks);
|
||||
|
||||
dg[0] = be64_to_cpu(dst.b);
|
||||
dg[1] = be64_to_cpu(dst.a);
|
||||
}
|
||||
}
|
||||
|
||||
/* avoid hogging the CPU for too long */
|
||||
#define MAX_BLOCKS (SZ_64K / GHASH_BLOCK_SIZE)
|
||||
|
||||
static int ghash_update(struct shash_desc *desc, const u8 *src,
|
||||
unsigned int len)
|
||||
{
|
||||
@ -138,17 +135,11 @@ static int ghash_update(struct shash_desc *desc, const u8 *src,
|
||||
blocks = len / GHASH_BLOCK_SIZE;
|
||||
len %= GHASH_BLOCK_SIZE;
|
||||
|
||||
do {
|
||||
int chunk = min(blocks, MAX_BLOCKS);
|
||||
ghash_do_update(blocks, ctx->digest, src, key,
|
||||
partial ? ctx->buf : NULL);
|
||||
|
||||
ghash_do_simd_update(chunk, ctx->digest, src, key,
|
||||
partial ? ctx->buf : NULL,
|
||||
pmull_ghash_update_p8);
|
||||
|
||||
blocks -= chunk;
|
||||
src += chunk * GHASH_BLOCK_SIZE;
|
||||
partial = 0;
|
||||
} while (unlikely(blocks > 0));
|
||||
src += blocks * GHASH_BLOCK_SIZE;
|
||||
partial = 0;
|
||||
}
|
||||
if (len)
|
||||
memcpy(ctx->buf + partial, src, len);
|
||||
@ -165,25 +156,34 @@ static int ghash_final(struct shash_desc *desc, u8 *dst)
|
||||
|
||||
memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
|
||||
|
||||
ghash_do_simd_update(1, ctx->digest, ctx->buf, key, NULL,
|
||||
pmull_ghash_update_p8);
|
||||
ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
|
||||
}
|
||||
put_unaligned_be64(ctx->digest[1], dst);
|
||||
put_unaligned_be64(ctx->digest[0], dst + 8);
|
||||
|
||||
memzero_explicit(ctx, sizeof(*ctx));
|
||||
*ctx = (struct ghash_desc_ctx){};
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ghash_reflect(u64 h[], const be128 *k)
|
||||
static int __ghash_setkey(struct ghash_key *key,
|
||||
const u8 *inkey, unsigned int keylen)
|
||||
{
|
||||
u64 carry = be64_to_cpu(k->a) & BIT(63) ? 1 : 0;
|
||||
u64 a, b;
|
||||
|
||||
h[0] = (be64_to_cpu(k->b) << 1) | carry;
|
||||
h[1] = (be64_to_cpu(k->a) << 1) | (be64_to_cpu(k->b) >> 63);
|
||||
/* needed for the fallback */
|
||||
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
|
||||
|
||||
if (carry)
|
||||
h[1] ^= 0xc200000000000000UL;
|
||||
/* perform multiplication by 'x' in GF(2^128) */
|
||||
b = get_unaligned_be64(inkey);
|
||||
a = get_unaligned_be64(inkey + 8);
|
||||
|
||||
key->a = (a << 1) | (b >> 63);
|
||||
key->b = (b << 1) | (a >> 63);
|
||||
|
||||
if (b >> 63)
|
||||
key->b ^= 0xc200000000000000UL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ghash_setkey(struct crypto_shash *tfm,
|
||||
@ -196,19 +196,16 @@ static int ghash_setkey(struct crypto_shash *tfm,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* needed for the fallback */
|
||||
memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
|
||||
|
||||
ghash_reflect(key->h[0], &key->k);
|
||||
return 0;
|
||||
return __ghash_setkey(key, inkey, keylen);
|
||||
}
|
||||
|
||||
static struct shash_alg ghash_alg = {
|
||||
.base.cra_name = "ghash",
|
||||
.base.cra_driver_name = "ghash-neon",
|
||||
.base.cra_priority = 150,
|
||||
.base.cra_driver_name = "ghash-ce",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct ghash_key) + sizeof(u64[2]),
|
||||
.base.cra_ctxsize = sizeof(struct ghash_key),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
|
||||
.digestsize = GHASH_DIGEST_SIZE,
|
||||
@ -236,33 +233,18 @@ static int gcm_setkey(struct crypto_aead *tfm, const u8 *inkey,
|
||||
{
|
||||
struct gcm_aes_ctx *ctx = crypto_aead_ctx(tfm);
|
||||
u8 key[GHASH_BLOCK_SIZE];
|
||||
be128 h;
|
||||
int ret;
|
||||
|
||||
ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
|
||||
ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
|
||||
if (ret) {
|
||||
tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
|
||||
num_rounds(&ctx->aes_key));
|
||||
|
||||
/* needed for the fallback */
|
||||
memcpy(&ctx->ghash_key.k, key, GHASH_BLOCK_SIZE);
|
||||
|
||||
ghash_reflect(ctx->ghash_key.h[0], &ctx->ghash_key.k);
|
||||
|
||||
h = ctx->ghash_key.k;
|
||||
gf128mul_lle(&h, &ctx->ghash_key.k);
|
||||
ghash_reflect(ctx->ghash_key.h[1], &h);
|
||||
|
||||
gf128mul_lle(&h, &ctx->ghash_key.k);
|
||||
ghash_reflect(ctx->ghash_key.h[2], &h);
|
||||
|
||||
gf128mul_lle(&h, &ctx->ghash_key.k);
|
||||
ghash_reflect(ctx->ghash_key.h[3], &h);
|
||||
|
||||
return 0;
|
||||
return __ghash_setkey(&ctx->ghash_key, key, sizeof(key));
|
||||
}
|
||||
|
||||
static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
|
||||
@ -294,9 +276,8 @@ static void gcm_update_mac(u64 dg[], const u8 *src, int count, u8 buf[],
|
||||
if (count >= GHASH_BLOCK_SIZE || *buf_count == GHASH_BLOCK_SIZE) {
|
||||
int blocks = count / GHASH_BLOCK_SIZE;
|
||||
|
||||
ghash_do_simd_update(blocks, dg, src, &ctx->ghash_key,
|
||||
*buf_count ? buf : NULL,
|
||||
pmull_ghash_update_p64);
|
||||
ghash_do_update(blocks, dg, src, &ctx->ghash_key,
|
||||
*buf_count ? buf : NULL);
|
||||
|
||||
src += blocks * GHASH_BLOCK_SIZE;
|
||||
count %= GHASH_BLOCK_SIZE;
|
||||
@ -340,117 +321,121 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
|
||||
|
||||
if (buf_count) {
|
||||
memset(&buf[buf_count], 0, GHASH_BLOCK_SIZE - buf_count);
|
||||
ghash_do_simd_update(1, dg, buf, &ctx->ghash_key, NULL,
|
||||
pmull_ghash_update_p64);
|
||||
ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
|
||||
u64 dg[], u8 tag[], int cryptlen)
|
||||
{
|
||||
u8 mac[AES_BLOCK_SIZE];
|
||||
u128 lengths;
|
||||
|
||||
lengths.a = cpu_to_be64(req->assoclen * 8);
|
||||
lengths.b = cpu_to_be64(cryptlen * 8);
|
||||
|
||||
ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL);
|
||||
|
||||
put_unaligned_be64(dg[1], mac);
|
||||
put_unaligned_be64(dg[0], mac + 8);
|
||||
|
||||
crypto_xor(tag, mac, AES_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
static int gcm_encrypt(struct aead_request *req)
|
||||
{
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
|
||||
int nrounds = num_rounds(&ctx->aes_key);
|
||||
struct skcipher_walk walk;
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
u8 iv[AES_BLOCK_SIZE];
|
||||
u8 ks[AES_BLOCK_SIZE];
|
||||
u8 tag[AES_BLOCK_SIZE];
|
||||
u64 dg[2] = {};
|
||||
be128 lengths;
|
||||
u8 *tag;
|
||||
int err;
|
||||
|
||||
lengths.a = cpu_to_be64(req->assoclen * 8);
|
||||
lengths.b = cpu_to_be64(req->cryptlen * 8);
|
||||
|
||||
if (req->assoclen)
|
||||
gcm_calculate_auth_mac(req, dg);
|
||||
|
||||
memcpy(iv, req->iv, GCM_IV_SIZE);
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, false);
|
||||
put_unaligned_be32(1, iv + GCM_IV_SIZE);
|
||||
|
||||
if (likely(may_use_simd())) {
|
||||
do {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
int nbytes = walk.nbytes;
|
||||
kernel_neon_begin();
|
||||
|
||||
tag = (u8 *)&lengths;
|
||||
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
pmull_gcm_encrypt_block(ks, iv, NULL,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(3, iv + GCM_IV_SIZE);
|
||||
|
||||
if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
|
||||
src = dst = memcpy(buf + sizeof(buf) - nbytes,
|
||||
src, nbytes);
|
||||
} else if (nbytes < walk.total) {
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
tag = NULL;
|
||||
}
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, true);
|
||||
|
||||
kernel_neon_begin();
|
||||
pmull_gcm_encrypt(nbytes, dst, src, ctx->ghash_key.h,
|
||||
dg, iv, ctx->aes_key.key_enc, nrounds,
|
||||
tag);
|
||||
kernel_neon_end();
|
||||
|
||||
if (unlikely(!nbytes))
|
||||
break;
|
||||
|
||||
if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
|
||||
memcpy(walk.dst.virt.addr,
|
||||
buf + sizeof(buf) - nbytes, nbytes);
|
||||
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
} while (walk.nbytes);
|
||||
} else {
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
int remaining = blocks;
|
||||
|
||||
do {
|
||||
aes_encrypt(&ctx->aes_key, buf, iv);
|
||||
crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
|
||||
crypto_inc(iv, AES_BLOCK_SIZE);
|
||||
|
||||
dst += AES_BLOCK_SIZE;
|
||||
src += AES_BLOCK_SIZE;
|
||||
} while (--remaining > 0);
|
||||
|
||||
ghash_do_update(blocks, dg, walk.dst.virt.addr,
|
||||
&ctx->ghash_key, NULL);
|
||||
pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, &ctx->ghash_key,
|
||||
iv, num_rounds(&ctx->aes_key), ks);
|
||||
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
/* handle the tail */
|
||||
if (walk.nbytes) {
|
||||
aes_encrypt(&ctx->aes_key, buf, iv);
|
||||
err = skcipher_walk_aead_encrypt(&walk, req, true);
|
||||
|
||||
crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
buf, walk.nbytes);
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 *src = walk.src.virt.addr;
|
||||
|
||||
memcpy(buf, walk.dst.virt.addr, walk.nbytes);
|
||||
memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
|
||||
do {
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc,
|
||||
ks, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
|
||||
crypto_inc(iv, AES_BLOCK_SIZE);
|
||||
|
||||
dst += AES_BLOCK_SIZE;
|
||||
src += AES_BLOCK_SIZE;
|
||||
} while (--blocks > 0);
|
||||
|
||||
ghash_do_update(walk.nbytes / AES_BLOCK_SIZE, dg,
|
||||
walk.dst.virt.addr, &ctx->ghash_key,
|
||||
NULL);
|
||||
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
tag = (u8 *)&lengths;
|
||||
ghash_do_update(1, dg, tag, &ctx->ghash_key,
|
||||
walk.nbytes ? buf : NULL);
|
||||
|
||||
if (walk.nbytes)
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
}
|
||||
|
||||
put_unaligned_be64(dg[1], tag);
|
||||
put_unaligned_be64(dg[0], tag + 8);
|
||||
put_unaligned_be32(1, iv + GCM_IV_SIZE);
|
||||
aes_encrypt(&ctx->aes_key, iv, iv);
|
||||
crypto_xor(tag, iv, AES_BLOCK_SIZE);
|
||||
/* handle the tail */
|
||||
if (walk.nbytes) {
|
||||
u8 buf[GHASH_BLOCK_SIZE];
|
||||
|
||||
crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
|
||||
walk.nbytes);
|
||||
|
||||
memcpy(buf, walk.dst.virt.addr, walk.nbytes);
|
||||
memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
|
||||
ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
|
||||
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
gcm_final(req, ctx, dg, tag, req->cryptlen);
|
||||
|
||||
/* copy authtag to end of dst */
|
||||
scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
|
||||
crypto_aead_authsize(aead), 1);
|
||||
@ -463,81 +448,62 @@ static int gcm_decrypt(struct aead_request *req)
|
||||
struct crypto_aead *aead = crypto_aead_reqtfm(req);
|
||||
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
|
||||
unsigned int authsize = crypto_aead_authsize(aead);
|
||||
int nrounds = num_rounds(&ctx->aes_key);
|
||||
struct skcipher_walk walk;
|
||||
u8 otag[AES_BLOCK_SIZE];
|
||||
u8 buf[AES_BLOCK_SIZE];
|
||||
u8 iv[AES_BLOCK_SIZE];
|
||||
u8 tag[AES_BLOCK_SIZE];
|
||||
u8 buf[GHASH_BLOCK_SIZE];
|
||||
u64 dg[2] = {};
|
||||
be128 lengths;
|
||||
u8 *tag;
|
||||
int err;
|
||||
|
||||
lengths.a = cpu_to_be64(req->assoclen * 8);
|
||||
lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
|
||||
|
||||
if (req->assoclen)
|
||||
gcm_calculate_auth_mac(req, dg);
|
||||
|
||||
memcpy(iv, req->iv, GCM_IV_SIZE);
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
scatterwalk_map_and_copy(otag, req->src,
|
||||
req->assoclen + req->cryptlen - authsize,
|
||||
authsize, 0);
|
||||
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, false);
|
||||
put_unaligned_be32(1, iv + GCM_IV_SIZE);
|
||||
|
||||
if (likely(may_use_simd())) {
|
||||
int ret;
|
||||
kernel_neon_begin();
|
||||
|
||||
do {
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
int nbytes = walk.nbytes;
|
||||
pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
tag = (u8 *)&lengths;
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, true);
|
||||
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
|
||||
pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
|
||||
walk.src.virt.addr, &ctx->ghash_key,
|
||||
iv, num_rounds(&ctx->aes_key));
|
||||
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
if (walk.nbytes)
|
||||
pmull_gcm_encrypt_block(iv, iv, NULL,
|
||||
num_rounds(&ctx->aes_key));
|
||||
|
||||
kernel_neon_end();
|
||||
} else {
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
put_unaligned_be32(2, iv + GCM_IV_SIZE);
|
||||
|
||||
err = skcipher_walk_aead_decrypt(&walk, req, true);
|
||||
|
||||
if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
|
||||
src = dst = memcpy(buf + sizeof(buf) - nbytes,
|
||||
src, nbytes);
|
||||
} else if (nbytes < walk.total) {
|
||||
nbytes &= ~(AES_BLOCK_SIZE - 1);
|
||||
tag = NULL;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
ret = pmull_gcm_decrypt(nbytes, dst, src,
|
||||
ctx->ghash_key.h,
|
||||
dg, iv, ctx->aes_key.key_enc,
|
||||
nrounds, tag, otag, authsize);
|
||||
kernel_neon_end();
|
||||
|
||||
if (unlikely(!nbytes))
|
||||
break;
|
||||
|
||||
if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
|
||||
memcpy(walk.dst.virt.addr,
|
||||
buf + sizeof(buf) - nbytes, nbytes);
|
||||
|
||||
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
|
||||
} while (walk.nbytes);
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
if (ret)
|
||||
return -EBADMSG;
|
||||
} else {
|
||||
while (walk.nbytes >= AES_BLOCK_SIZE) {
|
||||
int blocks = walk.nbytes / AES_BLOCK_SIZE;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
u8 *src = walk.src.virt.addr;
|
||||
|
||||
ghash_do_update(blocks, dg, walk.src.virt.addr,
|
||||
&ctx->ghash_key, NULL);
|
||||
|
||||
do {
|
||||
aes_encrypt(&ctx->aes_key, buf, iv);
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc,
|
||||
buf, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
|
||||
crypto_inc(iv, AES_BLOCK_SIZE);
|
||||
|
||||
@ -548,40 +514,35 @@ static int gcm_decrypt(struct aead_request *req)
|
||||
err = skcipher_walk_done(&walk,
|
||||
walk.nbytes % AES_BLOCK_SIZE);
|
||||
}
|
||||
|
||||
/* handle the tail */
|
||||
if (walk.nbytes) {
|
||||
memcpy(buf, walk.src.virt.addr, walk.nbytes);
|
||||
memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
|
||||
}
|
||||
|
||||
tag = (u8 *)&lengths;
|
||||
ghash_do_update(1, dg, tag, &ctx->ghash_key,
|
||||
walk.nbytes ? buf : NULL);
|
||||
|
||||
if (walk.nbytes) {
|
||||
aes_encrypt(&ctx->aes_key, buf, iv);
|
||||
|
||||
crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
|
||||
buf, walk.nbytes);
|
||||
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
put_unaligned_be64(dg[1], tag);
|
||||
put_unaligned_be64(dg[0], tag + 8);
|
||||
put_unaligned_be32(1, iv + GCM_IV_SIZE);
|
||||
aes_encrypt(&ctx->aes_key, iv, iv);
|
||||
crypto_xor(tag, iv, AES_BLOCK_SIZE);
|
||||
|
||||
if (crypto_memneq(tag, otag, authsize)) {
|
||||
memzero_explicit(tag, AES_BLOCK_SIZE);
|
||||
return -EBADMSG;
|
||||
}
|
||||
if (walk.nbytes)
|
||||
__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
|
||||
num_rounds(&ctx->aes_key));
|
||||
}
|
||||
|
||||
/* handle the tail */
|
||||
if (walk.nbytes) {
|
||||
memcpy(buf, walk.src.virt.addr, walk.nbytes);
|
||||
memset(buf + walk.nbytes, 0, GHASH_BLOCK_SIZE - walk.nbytes);
|
||||
ghash_do_update(1, dg, buf, &ctx->ghash_key, NULL);
|
||||
|
||||
crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
|
||||
walk.nbytes);
|
||||
|
||||
err = skcipher_walk_done(&walk, 0);
|
||||
}
|
||||
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
|
||||
|
||||
/* compare calculated auth tag with the stored one */
|
||||
scatterwalk_map_and_copy(buf, req->src,
|
||||
req->assoclen + req->cryptlen - authsize,
|
||||
authsize, 0);
|
||||
|
||||
if (crypto_memneq(tag, buf, authsize))
|
||||
return -EBADMSG;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -598,28 +559,39 @@ static struct aead_alg gcm_aes_alg = {
|
||||
.base.cra_driver_name = "gcm-aes-ce",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = 1,
|
||||
.base.cra_ctxsize = sizeof(struct gcm_aes_ctx) +
|
||||
4 * sizeof(u64[2]),
|
||||
.base.cra_ctxsize = sizeof(struct gcm_aes_ctx),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
};
|
||||
|
||||
static int __init ghash_ce_mod_init(void)
|
||||
{
|
||||
if (!cpu_have_named_feature(ASIMD))
|
||||
int ret;
|
||||
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
if (cpu_have_named_feature(PMULL))
|
||||
return crypto_register_aead(&gcm_aes_alg);
|
||||
if (elf_hwcap & HWCAP_PMULL)
|
||||
pmull_ghash_update = pmull_ghash_update_p64;
|
||||
|
||||
return crypto_register_shash(&ghash_alg);
|
||||
else
|
||||
pmull_ghash_update = pmull_ghash_update_p8;
|
||||
|
||||
ret = crypto_register_shash(&ghash_alg);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (elf_hwcap & HWCAP_PMULL) {
|
||||
ret = crypto_register_aead(&gcm_aes_alg);
|
||||
if (ret)
|
||||
crypto_unregister_shash(&ghash_alg);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit ghash_ce_mod_exit(void)
|
||||
{
|
||||
if (cpu_have_named_feature(PMULL))
|
||||
crypto_unregister_aead(&gcm_aes_alg);
|
||||
else
|
||||
crypto_unregister_shash(&ghash_alg);
|
||||
crypto_unregister_shash(&ghash_alg);
|
||||
crypto_unregister_aead(&gcm_aes_alg);
|
||||
}
|
||||
|
||||
static const struct cpu_feature ghash_cpu_feature[] = {
|
||||
|
@ -1,103 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* NH - ε-almost-universal hash function, ARM64 NEON accelerated version
|
||||
*
|
||||
* Copyright 2018 Google LLC
|
||||
*
|
||||
* Author: Eric Biggers <ebiggers@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
KEY .req x0
|
||||
MESSAGE .req x1
|
||||
MESSAGE_LEN .req x2
|
||||
HASH .req x3
|
||||
|
||||
PASS0_SUMS .req v0
|
||||
PASS1_SUMS .req v1
|
||||
PASS2_SUMS .req v2
|
||||
PASS3_SUMS .req v3
|
||||
K0 .req v4
|
||||
K1 .req v5
|
||||
K2 .req v6
|
||||
K3 .req v7
|
||||
T0 .req v8
|
||||
T1 .req v9
|
||||
T2 .req v10
|
||||
T3 .req v11
|
||||
T4 .req v12
|
||||
T5 .req v13
|
||||
T6 .req v14
|
||||
T7 .req v15
|
||||
|
||||
.macro _nh_stride k0, k1, k2, k3
|
||||
|
||||
// Load next message stride
|
||||
ld1 {T3.16b}, [MESSAGE], #16
|
||||
|
||||
// Load next key stride
|
||||
ld1 {\k3\().4s}, [KEY], #16
|
||||
|
||||
// Add message words to key words
|
||||
add T0.4s, T3.4s, \k0\().4s
|
||||
add T1.4s, T3.4s, \k1\().4s
|
||||
add T2.4s, T3.4s, \k2\().4s
|
||||
add T3.4s, T3.4s, \k3\().4s
|
||||
|
||||
// Multiply 32x32 => 64 and accumulate
|
||||
mov T4.d[0], T0.d[1]
|
||||
mov T5.d[0], T1.d[1]
|
||||
mov T6.d[0], T2.d[1]
|
||||
mov T7.d[0], T3.d[1]
|
||||
umlal PASS0_SUMS.2d, T0.2s, T4.2s
|
||||
umlal PASS1_SUMS.2d, T1.2s, T5.2s
|
||||
umlal PASS2_SUMS.2d, T2.2s, T6.2s
|
||||
umlal PASS3_SUMS.2d, T3.2s, T7.2s
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void nh_neon(const u32 *key, const u8 *message, size_t message_len,
|
||||
* u8 hash[NH_HASH_BYTES])
|
||||
*
|
||||
* It's guaranteed that message_len % 16 == 0.
|
||||
*/
|
||||
SYM_FUNC_START(nh_neon)
|
||||
|
||||
ld1 {K0.4s,K1.4s}, [KEY], #32
|
||||
movi PASS0_SUMS.2d, #0
|
||||
movi PASS1_SUMS.2d, #0
|
||||
ld1 {K2.4s}, [KEY], #16
|
||||
movi PASS2_SUMS.2d, #0
|
||||
movi PASS3_SUMS.2d, #0
|
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #64
|
||||
blt .Lloop4_done
|
||||
.Lloop4:
|
||||
_nh_stride K0, K1, K2, K3
|
||||
_nh_stride K1, K2, K3, K0
|
||||
_nh_stride K2, K3, K0, K1
|
||||
_nh_stride K3, K0, K1, K2
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #64
|
||||
bge .Lloop4
|
||||
|
||||
.Lloop4_done:
|
||||
ands MESSAGE_LEN, MESSAGE_LEN, #63
|
||||
beq .Ldone
|
||||
_nh_stride K0, K1, K2, K3
|
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #16
|
||||
beq .Ldone
|
||||
_nh_stride K1, K2, K3, K0
|
||||
|
||||
subs MESSAGE_LEN, MESSAGE_LEN, #16
|
||||
beq .Ldone
|
||||
_nh_stride K2, K3, K0, K1
|
||||
|
||||
.Ldone:
|
||||
// Sum the accumulators for each pass, then store the sums to 'hash'
|
||||
addp T0.2d, PASS0_SUMS.2d, PASS1_SUMS.2d
|
||||
addp T1.2d, PASS2_SUMS.2d, PASS3_SUMS.2d
|
||||
st1 {T0.16b,T1.16b}, [HASH]
|
||||
ret
|
||||
SYM_FUNC_END(nh_neon)
|
@ -1,78 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NHPoly1305 - ε-almost-∆-universal hash function for Adiantum
|
||||
* (ARM64 NEON accelerated version)
|
||||
*
|
||||
* Copyright 2018 Google LLC
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/nhpoly1305.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
|
||||
u8 hash[NH_HASH_BYTES]);
|
||||
|
||||
/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
|
||||
static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
|
||||
__le64 hash[NH_NUM_PASSES])
|
||||
{
|
||||
nh_neon(key, message, message_len, (u8 *)hash);
|
||||
}
|
||||
|
||||
static int nhpoly1305_neon_update(struct shash_desc *desc,
|
||||
const u8 *src, unsigned int srclen)
|
||||
{
|
||||
if (srclen < 64 || !may_use_simd())
|
||||
return crypto_nhpoly1305_update(desc, src, srclen);
|
||||
|
||||
do {
|
||||
unsigned int n = min_t(unsigned int, srclen, PAGE_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
|
||||
kernel_neon_end();
|
||||
src += n;
|
||||
srclen -= n;
|
||||
} while (srclen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg nhpoly1305_alg = {
|
||||
.base.cra_name = "nhpoly1305",
|
||||
.base.cra_driver_name = "nhpoly1305-neon",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_ctxsize = sizeof(struct nhpoly1305_key),
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.digestsize = POLY1305_DIGEST_SIZE,
|
||||
.init = crypto_nhpoly1305_init,
|
||||
.update = nhpoly1305_neon_update,
|
||||
.final = crypto_nhpoly1305_final,
|
||||
.setkey = crypto_nhpoly1305_setkey,
|
||||
.descsize = sizeof(struct nhpoly1305_state),
|
||||
};
|
||||
|
||||
static int __init nhpoly1305_mod_init(void)
|
||||
{
|
||||
if (!cpu_have_named_feature(ASIMD))
|
||||
return -ENODEV;
|
||||
|
||||
return crypto_register_shash(&nhpoly1305_alg);
|
||||
}
|
||||
|
||||
static void __exit nhpoly1305_mod_exit(void)
|
||||
{
|
||||
crypto_unregister_shash(&nhpoly1305_alg);
|
||||
}
|
||||
|
||||
module_init(nhpoly1305_mod_init);
|
||||
module_exit(nhpoly1305_mod_exit);
|
||||
|
||||
MODULE_DESCRIPTION("NHPoly1305 ε-almost-∆-universal hash function (NEON-accelerated)");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
|
||||
MODULE_ALIAS_CRYPTO("nhpoly1305");
|
||||
MODULE_ALIAS_CRYPTO("nhpoly1305-neon");
|
@ -58,22 +58,24 @@
|
||||
sha1su1 v\s0\().4s, v\s3\().4s
|
||||
.endm
|
||||
|
||||
.macro loadrc, k, val, tmp
|
||||
movz \tmp, :abs_g0_nc:\val
|
||||
movk \tmp, :abs_g1:\val
|
||||
dup \k, \tmp
|
||||
.endm
|
||||
/*
|
||||
* The SHA1 round constants
|
||||
*/
|
||||
.align 4
|
||||
.Lsha1_rcon:
|
||||
.word 0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
|
||||
|
||||
/*
|
||||
* int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
|
||||
* int blocks)
|
||||
* void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
|
||||
* int blocks)
|
||||
*/
|
||||
SYM_FUNC_START(sha1_ce_transform)
|
||||
ENTRY(sha1_ce_transform)
|
||||
/* load round constants */
|
||||
loadrc k0.4s, 0x5a827999, w6
|
||||
loadrc k1.4s, 0x6ed9eba1, w6
|
||||
loadrc k2.4s, 0x8f1bbcdc, w6
|
||||
loadrc k3.4s, 0xca62c1d6, w6
|
||||
adr x6, .Lsha1_rcon
|
||||
ld1r {k0.4s}, [x6], #4
|
||||
ld1r {k1.4s}, [x6], #4
|
||||
ld1r {k2.4s}, [x6], #4
|
||||
ld1r {k3.4s}, [x6]
|
||||
|
||||
/* load state */
|
||||
ld1 {dgav.4s}, [x0]
|
||||
@ -123,16 +125,14 @@ CPU_LE( rev32 v11.16b, v11.16b )
|
||||
add dgbv.2s, dgbv.2s, dg1v.2s
|
||||
add dgav.4s, dgav.4s, dg0v.4s
|
||||
|
||||
cbz w2, 2f
|
||||
cond_yield 3f, x5
|
||||
b 0b
|
||||
cbnz w2, 0b
|
||||
|
||||
/*
|
||||
* Final block: add padding and total bit count.
|
||||
* Skip if the input size was not a round multiple of the block size,
|
||||
* the padding is handled by the C code in that case.
|
||||
*/
|
||||
2: cbz x4, 3f
|
||||
cbz x4, 3f
|
||||
ldr_l w4, sha1_ce_offsetof_count, x4
|
||||
ldr x4, [x0, x4]
|
||||
movi v9.2d, #0
|
||||
@ -148,6 +148,5 @@ CPU_LE( rev32 v11.16b, v11.16b )
|
||||
/* store new state */
|
||||
3: st1 {dgav.4s}, [x0]
|
||||
str dgb, [x0, #16]
|
||||
mov w0, w2
|
||||
ret
|
||||
SYM_FUNC_END(sha1_ce_transform)
|
||||
ENDPROC(sha1_ce_transform)
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <crypto/sha1_base.h>
|
||||
#include <linux/cpufeature.h>
|
||||
@ -22,18 +21,14 @@
|
||||
MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("sha1");
|
||||
|
||||
struct sha1_ce_state {
|
||||
struct sha1_state sst;
|
||||
u32 finalize;
|
||||
};
|
||||
|
||||
extern const u32 sha1_ce_offsetof_count;
|
||||
extern const u32 sha1_ce_offsetof_finalize;
|
||||
|
||||
asmlinkage int sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
|
||||
int blocks);
|
||||
asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
|
||||
int blocks);
|
||||
#ifdef CONFIG_CFI_CLANG
|
||||
static inline void __cfi_sha1_ce_transform(struct sha1_state *sst,
|
||||
u8 const *src, int blocks)
|
||||
@ -43,23 +38,6 @@ static inline void __cfi_sha1_ce_transform(struct sha1_state *sst,
|
||||
#define sha1_ce_transform __cfi_sha1_ce_transform
|
||||
#endif
|
||||
|
||||
static void __sha1_ce_transform(struct sha1_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
sha1_ce_transform(container_of(sst, struct sha1_ce_state, sst), src,
|
||||
blocks);
|
||||
while (blocks) {
|
||||
int rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = sha1_ce_transform(container_of(sst, struct sha1_ce_state,
|
||||
sst), src, blocks);
|
||||
kernel_neon_end();
|
||||
src += (blocks - rem) * SHA1_BLOCK_SIZE;
|
||||
blocks = rem;
|
||||
}
|
||||
}
|
||||
|
||||
const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count);
|
||||
const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize);
|
||||
|
||||
@ -72,7 +50,10 @@ static int sha1_ce_update(struct shash_desc *desc, const u8 *data,
|
||||
return crypto_sha1_update(desc, data, len);
|
||||
|
||||
sctx->finalize = 0;
|
||||
sha1_base_do_update(desc, data, len, __sha1_ce_transform);
|
||||
kernel_neon_begin();
|
||||
sha1_base_do_update(desc, data, len,
|
||||
(sha1_block_fn *)sha1_ce_transform);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -92,9 +73,12 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data,
|
||||
*/
|
||||
sctx->finalize = finalize;
|
||||
|
||||
sha1_base_do_update(desc, data, len, __sha1_ce_transform);
|
||||
kernel_neon_begin();
|
||||
sha1_base_do_update(desc, data, len,
|
||||
(sha1_block_fn *)sha1_ce_transform);
|
||||
if (!finalize)
|
||||
sha1_base_do_finalize(desc, __sha1_ce_transform);
|
||||
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
|
||||
kernel_neon_end();
|
||||
return sha1_base_finish(desc, out);
|
||||
}
|
||||
|
||||
@ -106,41 +90,24 @@ static int sha1_ce_final(struct shash_desc *desc, u8 *out)
|
||||
return crypto_sha1_finup(desc, NULL, 0, out);
|
||||
|
||||
sctx->finalize = 0;
|
||||
sha1_base_do_finalize(desc, __sha1_ce_transform);
|
||||
kernel_neon_begin();
|
||||
sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform);
|
||||
kernel_neon_end();
|
||||
return sha1_base_finish(desc, out);
|
||||
}
|
||||
|
||||
static int sha1_ce_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, &sctx->sst, sizeof(struct sha1_state));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha1_ce_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha1_ce_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(&sctx->sst, in, sizeof(struct sha1_state));
|
||||
sctx->finalize = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg alg = {
|
||||
.init = sha1_base_init,
|
||||
.update = sha1_ce_update,
|
||||
.final = sha1_ce_final,
|
||||
.finup = sha1_ce_finup,
|
||||
.import = sha1_ce_import,
|
||||
.export = sha1_ce_export,
|
||||
.descsize = sizeof(struct sha1_ce_state),
|
||||
.statesize = sizeof(struct sha1_state),
|
||||
.digestsize = SHA1_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -53,7 +53,6 @@
|
||||
/*
|
||||
* The SHA-256 round constants
|
||||
*/
|
||||
.section ".rodata", "a"
|
||||
.align 4
|
||||
.Lsha2_rcon:
|
||||
.word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
|
||||
@ -77,10 +76,9 @@
|
||||
* void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
|
||||
* int blocks)
|
||||
*/
|
||||
.text
|
||||
SYM_FUNC_START(sha2_ce_transform)
|
||||
ENTRY(sha2_ce_transform)
|
||||
/* load round constants */
|
||||
adr_l x8, .Lsha2_rcon
|
||||
adr x8, .Lsha2_rcon
|
||||
ld1 { v0.4s- v3.4s}, [x8], #64
|
||||
ld1 { v4.4s- v7.4s}, [x8], #64
|
||||
ld1 { v8.4s-v11.4s}, [x8], #64
|
||||
@ -131,16 +129,14 @@ CPU_LE( rev32 v19.16b, v19.16b )
|
||||
add dgbv.4s, dgbv.4s, dg1v.4s
|
||||
|
||||
/* handled all input blocks? */
|
||||
cbz w2, 2f
|
||||
cond_yield 3f, x5
|
||||
b 0b
|
||||
cbnz w2, 0b
|
||||
|
||||
/*
|
||||
* Final block: add padding and total bit count.
|
||||
* Skip if the input size was not a round multiple of the block size,
|
||||
* the padding is handled by the C code in that case.
|
||||
*/
|
||||
2: cbz x4, 3f
|
||||
cbz x4, 3f
|
||||
ldr_l w4, sha256_ce_offsetof_count, x4
|
||||
ldr x4, [x0, x4]
|
||||
movi v17.2d, #0
|
||||
@ -155,6 +151,5 @@ CPU_LE( rev32 v19.16b, v19.16b )
|
||||
|
||||
/* store new state */
|
||||
3: st1 {dgav.4s, dgbv.4s}, [x0]
|
||||
mov w0, w2
|
||||
ret
|
||||
SYM_FUNC_END(sha2_ce_transform)
|
||||
ENDPROC(sha2_ce_transform)
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <crypto/sha256_base.h>
|
||||
#include <linux/cpufeature.h>
|
||||
@ -22,19 +21,14 @@
|
||||
MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("sha224");
|
||||
MODULE_ALIAS_CRYPTO("sha256");
|
||||
|
||||
struct sha256_ce_state {
|
||||
struct sha256_state sst;
|
||||
u32 finalize;
|
||||
};
|
||||
|
||||
extern const u32 sha256_ce_offsetof_count;
|
||||
extern const u32 sha256_ce_offsetof_finalize;
|
||||
|
||||
asmlinkage int sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
|
||||
int blocks);
|
||||
asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src,
|
||||
int blocks);
|
||||
#ifdef CONFIG_CFI_CLANG
|
||||
static inline void __cfi_sha2_ce_transform(struct sha256_state *sst,
|
||||
u8 const *src, int blocks)
|
||||
@ -44,23 +38,6 @@ static inline void __cfi_sha2_ce_transform(struct sha256_state *sst,
|
||||
#define sha2_ce_transform __cfi_sha2_ce_transform
|
||||
#endif
|
||||
|
||||
static void __sha2_ce_transform(struct sha256_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
sha2_ce_transform(container_of(sst, struct sha256_ce_state, sst), src,
|
||||
blocks);
|
||||
while (blocks) {
|
||||
int rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = sha2_ce_transform(container_of(sst, struct sha256_ce_state,
|
||||
sst), src, blocks);
|
||||
kernel_neon_end();
|
||||
src += (blocks - rem) * SHA256_BLOCK_SIZE;
|
||||
blocks = rem;
|
||||
}
|
||||
}
|
||||
|
||||
const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state,
|
||||
sst.count);
|
||||
const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
|
||||
@ -68,12 +45,6 @@ const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state,
|
||||
|
||||
asmlinkage void sha256_block_data_order(u32 *digest, u8 const *src, int blocks);
|
||||
|
||||
static void __sha256_block_data_order(struct sha256_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
sha256_block_data_order(sst->state, src, blocks);
|
||||
}
|
||||
|
||||
static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
@ -81,10 +52,13 @@ static int sha256_ce_update(struct shash_desc *desc, const u8 *data,
|
||||
|
||||
if (!may_use_simd())
|
||||
return sha256_base_do_update(desc, data, len,
|
||||
__sha256_block_data_order);
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
|
||||
sctx->finalize = 0;
|
||||
sha256_base_do_update(desc, data, len, __sha2_ce_transform);
|
||||
kernel_neon_begin();
|
||||
sha256_base_do_update(desc, data, len,
|
||||
(sha256_block_fn *)sha2_ce_transform);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -98,8 +72,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
|
||||
if (!may_use_simd()) {
|
||||
if (len)
|
||||
sha256_base_do_update(desc, data, len,
|
||||
__sha256_block_data_order);
|
||||
sha256_base_do_finalize(desc, __sha256_block_data_order);
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
sha256_base_do_finalize(desc,
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
return sha256_base_finish(desc, out);
|
||||
}
|
||||
|
||||
@ -109,9 +84,13 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data,
|
||||
*/
|
||||
sctx->finalize = finalize;
|
||||
|
||||
sha256_base_do_update(desc, data, len, __sha2_ce_transform);
|
||||
kernel_neon_begin();
|
||||
sha256_base_do_update(desc, data, len,
|
||||
(sha256_block_fn *)sha2_ce_transform);
|
||||
if (!finalize)
|
||||
sha256_base_do_finalize(desc, __sha2_ce_transform);
|
||||
sha256_base_do_finalize(desc,
|
||||
(sha256_block_fn *)sha2_ce_transform);
|
||||
kernel_neon_end();
|
||||
return sha256_base_finish(desc, out);
|
||||
}
|
||||
|
||||
@ -120,46 +99,30 @@ static int sha256_ce_final(struct shash_desc *desc, u8 *out)
|
||||
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
sha256_base_do_finalize(desc, __sha256_block_data_order);
|
||||
sha256_base_do_finalize(desc,
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
return sha256_base_finish(desc, out);
|
||||
}
|
||||
|
||||
sctx->finalize = 0;
|
||||
sha256_base_do_finalize(desc, __sha2_ce_transform);
|
||||
kernel_neon_begin();
|
||||
sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform);
|
||||
kernel_neon_end();
|
||||
return sha256_base_finish(desc, out);
|
||||
}
|
||||
|
||||
static int sha256_ce_export(struct shash_desc *desc, void *out)
|
||||
{
|
||||
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(out, &sctx->sst, sizeof(struct sha256_state));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha256_ce_import(struct shash_desc *desc, const void *in)
|
||||
{
|
||||
struct sha256_ce_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
memcpy(&sctx->sst, in, sizeof(struct sha256_state));
|
||||
sctx->finalize = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.init = sha224_base_init,
|
||||
.update = sha256_ce_update,
|
||||
.final = sha256_ce_final,
|
||||
.finup = sha256_ce_finup,
|
||||
.export = sha256_ce_export,
|
||||
.import = sha256_ce_import,
|
||||
.descsize = sizeof(struct sha256_ce_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -168,15 +131,13 @@ static struct shash_alg algs[] = { {
|
||||
.update = sha256_ce_update,
|
||||
.final = sha256_ce_final,
|
||||
.finup = sha256_ce_finup,
|
||||
.export = sha256_ce_export,
|
||||
.import = sha256_ce_import,
|
||||
.descsize = sizeof(struct sha256_ce_state),
|
||||
.statesize = sizeof(struct sha256_state),
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.base = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -1,13 +1,3 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
// has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
// you can redistribute it and/or modify it under the terms of the GNU General
|
||||
// Public License version 2 as published by the Free Software Foundation.
|
||||
//
|
||||
// The original headers, including the original license headers, are
|
||||
// included below for completeness.
|
||||
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -20,6 +10,8 @@
|
||||
// project. The module is, however, dual licensed under OpenSSL and
|
||||
// CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
// details see http://www.openssl.org/~appro/cryptogams/.
|
||||
//
|
||||
// Permission to use under GPLv2 terms is granted.
|
||||
// ====================================================================
|
||||
//
|
||||
// SHA256/512 for ARMv8.
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <crypto/sha256_base.h>
|
||||
#include <linux/cryptohash.h>
|
||||
@ -32,66 +31,57 @@ asmlinkage void sha256_block_data_order(u32 *digest, const void *data,
|
||||
unsigned int num_blks);
|
||||
EXPORT_SYMBOL(sha256_block_data_order);
|
||||
|
||||
static void __sha256_block_data_order(struct sha256_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
sha256_block_data_order(sst->state, src, blocks);
|
||||
}
|
||||
|
||||
asmlinkage void sha256_block_neon(u32 *digest, const void *data,
|
||||
unsigned int num_blks);
|
||||
|
||||
static void __sha256_block_neon(struct sha256_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
sha256_block_neon(sst->state, src, blocks);
|
||||
}
|
||||
|
||||
static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
static int sha256_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
return sha256_base_do_update(desc, data, len,
|
||||
__sha256_block_data_order);
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
}
|
||||
|
||||
static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
static int sha256_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
if (len)
|
||||
sha256_base_do_update(desc, data, len,
|
||||
__sha256_block_data_order);
|
||||
sha256_base_do_finalize(desc, __sha256_block_data_order);
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
sha256_base_do_finalize(desc,
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
|
||||
return sha256_base_finish(desc, out);
|
||||
}
|
||||
|
||||
static int crypto_sha256_arm64_final(struct shash_desc *desc, u8 *out)
|
||||
static int sha256_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
return crypto_sha256_arm64_finup(desc, NULL, 0, out);
|
||||
return sha256_finup(desc, NULL, 0, out);
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA256_DIGEST_SIZE,
|
||||
.init = sha256_base_init,
|
||||
.update = crypto_sha256_arm64_update,
|
||||
.final = crypto_sha256_arm64_final,
|
||||
.finup = crypto_sha256_arm64_finup,
|
||||
.update = sha256_update,
|
||||
.final = sha256_final,
|
||||
.finup = sha256_finup,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.base.cra_name = "sha256",
|
||||
.base.cra_driver_name = "sha256-arm64",
|
||||
.base.cra_priority = 125,
|
||||
.base.cra_priority = 100,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
.digestsize = SHA224_DIGEST_SIZE,
|
||||
.init = sha224_base_init,
|
||||
.update = crypto_sha256_arm64_update,
|
||||
.final = crypto_sha256_arm64_final,
|
||||
.finup = crypto_sha256_arm64_finup,
|
||||
.update = sha256_update,
|
||||
.final = sha256_final,
|
||||
.finup = sha256_finup,
|
||||
.descsize = sizeof(struct sha256_state),
|
||||
.base.cra_name = "sha224",
|
||||
.base.cra_driver_name = "sha224-arm64",
|
||||
.base.cra_priority = 125,
|
||||
.base.cra_priority = 100,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
} };
|
||||
@ -99,31 +89,21 @@ static struct shash_alg algs[] = { {
|
||||
static int sha256_update_neon(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha256_state *sctx = shash_desc_ctx(desc);
|
||||
|
||||
/*
|
||||
* Stacking and unstacking a substantial slice of the NEON register
|
||||
* file may significantly affect performance for small updates when
|
||||
* executing in interrupt context, so fall back to the scalar code
|
||||
* in that case.
|
||||
*/
|
||||
if (!may_use_simd())
|
||||
return sha256_base_do_update(desc, data, len,
|
||||
__sha256_block_data_order);
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
|
||||
while (len > 0) {
|
||||
unsigned int chunk = len;
|
||||
kernel_neon_begin();
|
||||
sha256_base_do_update(desc, data, len,
|
||||
(sha256_block_fn *)sha256_block_neon);
|
||||
kernel_neon_end();
|
||||
|
||||
/*
|
||||
* Don't hog the CPU for the entire time it takes to process all
|
||||
* input when running on a preemptible kernel, but process the
|
||||
* data block by block instead.
|
||||
*/
|
||||
if (IS_ENABLED(CONFIG_PREEMPT) &&
|
||||
chunk + sctx->count % SHA256_BLOCK_SIZE > SHA256_BLOCK_SIZE)
|
||||
chunk = SHA256_BLOCK_SIZE -
|
||||
sctx->count % SHA256_BLOCK_SIZE;
|
||||
|
||||
kernel_neon_begin();
|
||||
sha256_base_do_update(desc, data, chunk, __sha256_block_neon);
|
||||
kernel_neon_end();
|
||||
data += chunk;
|
||||
len -= chunk;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -133,13 +113,16 @@ static int sha256_finup_neon(struct shash_desc *desc, const u8 *data,
|
||||
if (!may_use_simd()) {
|
||||
if (len)
|
||||
sha256_base_do_update(desc, data, len,
|
||||
__sha256_block_data_order);
|
||||
sha256_base_do_finalize(desc, __sha256_block_data_order);
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
sha256_base_do_finalize(desc,
|
||||
(sha256_block_fn *)sha256_block_data_order);
|
||||
} else {
|
||||
if (len)
|
||||
sha256_update_neon(desc, data, len);
|
||||
kernel_neon_begin();
|
||||
sha256_base_do_finalize(desc, __sha256_block_neon);
|
||||
if (len)
|
||||
sha256_base_do_update(desc, data, len,
|
||||
(sha256_block_fn *)sha256_block_neon);
|
||||
sha256_base_do_finalize(desc,
|
||||
(sha256_block_fn *)sha256_block_neon);
|
||||
kernel_neon_end();
|
||||
}
|
||||
return sha256_base_finish(desc, out);
|
||||
@ -160,6 +143,7 @@ static struct shash_alg neon_algs[] = { {
|
||||
.base.cra_name = "sha256",
|
||||
.base.cra_driver_name = "sha256-arm64-neon",
|
||||
.base.cra_priority = 150,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
@ -172,6 +156,7 @@ static struct shash_alg neon_algs[] = { {
|
||||
.base.cra_name = "sha224",
|
||||
.base.cra_driver_name = "sha224-arm64-neon",
|
||||
.base.cra_priority = 150,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
} };
|
||||
@ -182,7 +167,7 @@ static int __init sha256_mod_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (cpu_have_named_feature(ASIMD)) {
|
||||
if (elf_hwcap & HWCAP_ASIMD) {
|
||||
ret = crypto_register_shashes(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
if (ret)
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
@ -192,7 +177,7 @@ static int __init sha256_mod_init(void)
|
||||
|
||||
static void __exit sha256_mod_fini(void)
|
||||
{
|
||||
if (cpu_have_named_feature(ASIMD))
|
||||
if (elf_hwcap & HWCAP_ASIMD)
|
||||
crypto_unregister_shashes(neon_algs, ARRAY_SIZE(neon_algs));
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
@ -1,212 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* sha3-ce-core.S - core SHA-3 transform using v8.2 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
|
||||
.set .Lv\b\().2d, \b
|
||||
.set .Lv\b\().16b, \b
|
||||
.endr
|
||||
|
||||
/*
|
||||
* ARMv8.2 Crypto Extensions instructions
|
||||
*/
|
||||
.macro eor3, rd, rn, rm, ra
|
||||
.inst 0xce000000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro rax1, rd, rn, rm
|
||||
.inst 0xce608c00 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro bcax, rd, rn, rm, ra
|
||||
.inst 0xce200000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro xar, rd, rn, rm, imm6
|
||||
.inst 0xce800000 | .L\rd | (.L\rn << 5) | ((\imm6) << 10) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* int sha3_ce_transform(u64 *st, const u8 *data, int blocks, int dg_size)
|
||||
*/
|
||||
.text
|
||||
SYM_FUNC_START(sha3_ce_transform)
|
||||
/* load state */
|
||||
add x8, x0, #32
|
||||
ld1 { v0.1d- v3.1d}, [x0]
|
||||
ld1 { v4.1d- v7.1d}, [x8], #32
|
||||
ld1 { v8.1d-v11.1d}, [x8], #32
|
||||
ld1 {v12.1d-v15.1d}, [x8], #32
|
||||
ld1 {v16.1d-v19.1d}, [x8], #32
|
||||
ld1 {v20.1d-v23.1d}, [x8], #32
|
||||
ld1 {v24.1d}, [x8]
|
||||
|
||||
0: sub w2, w2, #1
|
||||
mov w8, #24
|
||||
adr_l x9, .Lsha3_rcon
|
||||
|
||||
/* load input */
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
ld1 {v29.8b-v31.8b}, [x1], #24
|
||||
eor v0.8b, v0.8b, v25.8b
|
||||
eor v1.8b, v1.8b, v26.8b
|
||||
eor v2.8b, v2.8b, v27.8b
|
||||
eor v3.8b, v3.8b, v28.8b
|
||||
eor v4.8b, v4.8b, v29.8b
|
||||
eor v5.8b, v5.8b, v30.8b
|
||||
eor v6.8b, v6.8b, v31.8b
|
||||
|
||||
tbnz x3, #6, 2f // SHA3-512
|
||||
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
ld1 {v29.8b-v30.8b}, [x1], #16
|
||||
eor v7.8b, v7.8b, v25.8b
|
||||
eor v8.8b, v8.8b, v26.8b
|
||||
eor v9.8b, v9.8b, v27.8b
|
||||
eor v10.8b, v10.8b, v28.8b
|
||||
eor v11.8b, v11.8b, v29.8b
|
||||
eor v12.8b, v12.8b, v30.8b
|
||||
|
||||
tbnz x3, #4, 1f // SHA3-384 or SHA3-224
|
||||
|
||||
// SHA3-256
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
eor v13.8b, v13.8b, v25.8b
|
||||
eor v14.8b, v14.8b, v26.8b
|
||||
eor v15.8b, v15.8b, v27.8b
|
||||
eor v16.8b, v16.8b, v28.8b
|
||||
b 3f
|
||||
|
||||
1: tbz x3, #2, 3f // bit 2 cleared? SHA-384
|
||||
|
||||
// SHA3-224
|
||||
ld1 {v25.8b-v28.8b}, [x1], #32
|
||||
ld1 {v29.8b}, [x1], #8
|
||||
eor v13.8b, v13.8b, v25.8b
|
||||
eor v14.8b, v14.8b, v26.8b
|
||||
eor v15.8b, v15.8b, v27.8b
|
||||
eor v16.8b, v16.8b, v28.8b
|
||||
eor v17.8b, v17.8b, v29.8b
|
||||
b 3f
|
||||
|
||||
// SHA3-512
|
||||
2: ld1 {v25.8b-v26.8b}, [x1], #16
|
||||
eor v7.8b, v7.8b, v25.8b
|
||||
eor v8.8b, v8.8b, v26.8b
|
||||
|
||||
3: sub w8, w8, #1
|
||||
|
||||
eor3 v29.16b, v4.16b, v9.16b, v14.16b
|
||||
eor3 v26.16b, v1.16b, v6.16b, v11.16b
|
||||
eor3 v28.16b, v3.16b, v8.16b, v13.16b
|
||||
eor3 v25.16b, v0.16b, v5.16b, v10.16b
|
||||
eor3 v27.16b, v2.16b, v7.16b, v12.16b
|
||||
eor3 v29.16b, v29.16b, v19.16b, v24.16b
|
||||
eor3 v26.16b, v26.16b, v16.16b, v21.16b
|
||||
eor3 v28.16b, v28.16b, v18.16b, v23.16b
|
||||
eor3 v25.16b, v25.16b, v15.16b, v20.16b
|
||||
eor3 v27.16b, v27.16b, v17.16b, v22.16b
|
||||
|
||||
rax1 v30.2d, v29.2d, v26.2d // bc[0]
|
||||
rax1 v26.2d, v26.2d, v28.2d // bc[2]
|
||||
rax1 v28.2d, v28.2d, v25.2d // bc[4]
|
||||
rax1 v25.2d, v25.2d, v27.2d // bc[1]
|
||||
rax1 v27.2d, v27.2d, v29.2d // bc[3]
|
||||
|
||||
eor v0.16b, v0.16b, v30.16b
|
||||
xar v29.2d, v1.2d, v25.2d, (64 - 1)
|
||||
xar v1.2d, v6.2d, v25.2d, (64 - 44)
|
||||
xar v6.2d, v9.2d, v28.2d, (64 - 20)
|
||||
xar v9.2d, v22.2d, v26.2d, (64 - 61)
|
||||
xar v22.2d, v14.2d, v28.2d, (64 - 39)
|
||||
xar v14.2d, v20.2d, v30.2d, (64 - 18)
|
||||
xar v31.2d, v2.2d, v26.2d, (64 - 62)
|
||||
xar v2.2d, v12.2d, v26.2d, (64 - 43)
|
||||
xar v12.2d, v13.2d, v27.2d, (64 - 25)
|
||||
xar v13.2d, v19.2d, v28.2d, (64 - 8)
|
||||
xar v19.2d, v23.2d, v27.2d, (64 - 56)
|
||||
xar v23.2d, v15.2d, v30.2d, (64 - 41)
|
||||
xar v15.2d, v4.2d, v28.2d, (64 - 27)
|
||||
xar v28.2d, v24.2d, v28.2d, (64 - 14)
|
||||
xar v24.2d, v21.2d, v25.2d, (64 - 2)
|
||||
xar v8.2d, v8.2d, v27.2d, (64 - 55)
|
||||
xar v4.2d, v16.2d, v25.2d, (64 - 45)
|
||||
xar v16.2d, v5.2d, v30.2d, (64 - 36)
|
||||
xar v5.2d, v3.2d, v27.2d, (64 - 28)
|
||||
xar v27.2d, v18.2d, v27.2d, (64 - 21)
|
||||
xar v3.2d, v17.2d, v26.2d, (64 - 15)
|
||||
xar v25.2d, v11.2d, v25.2d, (64 - 10)
|
||||
xar v26.2d, v7.2d, v26.2d, (64 - 6)
|
||||
xar v30.2d, v10.2d, v30.2d, (64 - 3)
|
||||
|
||||
bcax v20.16b, v31.16b, v22.16b, v8.16b
|
||||
bcax v21.16b, v8.16b, v23.16b, v22.16b
|
||||
bcax v22.16b, v22.16b, v24.16b, v23.16b
|
||||
bcax v23.16b, v23.16b, v31.16b, v24.16b
|
||||
bcax v24.16b, v24.16b, v8.16b, v31.16b
|
||||
|
||||
ld1r {v31.2d}, [x9], #8
|
||||
|
||||
bcax v17.16b, v25.16b, v19.16b, v3.16b
|
||||
bcax v18.16b, v3.16b, v15.16b, v19.16b
|
||||
bcax v19.16b, v19.16b, v16.16b, v15.16b
|
||||
bcax v15.16b, v15.16b, v25.16b, v16.16b
|
||||
bcax v16.16b, v16.16b, v3.16b, v25.16b
|
||||
|
||||
bcax v10.16b, v29.16b, v12.16b, v26.16b
|
||||
bcax v11.16b, v26.16b, v13.16b, v12.16b
|
||||
bcax v12.16b, v12.16b, v14.16b, v13.16b
|
||||
bcax v13.16b, v13.16b, v29.16b, v14.16b
|
||||
bcax v14.16b, v14.16b, v26.16b, v29.16b
|
||||
|
||||
bcax v7.16b, v30.16b, v9.16b, v4.16b
|
||||
bcax v8.16b, v4.16b, v5.16b, v9.16b
|
||||
bcax v9.16b, v9.16b, v6.16b, v5.16b
|
||||
bcax v5.16b, v5.16b, v30.16b, v6.16b
|
||||
bcax v6.16b, v6.16b, v4.16b, v30.16b
|
||||
|
||||
bcax v3.16b, v27.16b, v0.16b, v28.16b
|
||||
bcax v4.16b, v28.16b, v1.16b, v0.16b
|
||||
bcax v0.16b, v0.16b, v2.16b, v1.16b
|
||||
bcax v1.16b, v1.16b, v27.16b, v2.16b
|
||||
bcax v2.16b, v2.16b, v28.16b, v27.16b
|
||||
|
||||
eor v0.16b, v0.16b, v31.16b
|
||||
|
||||
cbnz w8, 3b
|
||||
cond_yield 3f, x8
|
||||
cbnz w2, 0b
|
||||
|
||||
/* save state */
|
||||
3: st1 { v0.1d- v3.1d}, [x0], #32
|
||||
st1 { v4.1d- v7.1d}, [x0], #32
|
||||
st1 { v8.1d-v11.1d}, [x0], #32
|
||||
st1 {v12.1d-v15.1d}, [x0], #32
|
||||
st1 {v16.1d-v19.1d}, [x0], #32
|
||||
st1 {v20.1d-v23.1d}, [x0], #32
|
||||
st1 {v24.1d}, [x0]
|
||||
mov w0, w2
|
||||
ret
|
||||
SYM_FUNC_END(sha3_ce_transform)
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 8
|
||||
.Lsha3_rcon:
|
||||
.quad 0x0000000000000001, 0x0000000000008082, 0x800000000000808a
|
||||
.quad 0x8000000080008000, 0x000000000000808b, 0x0000000080000001
|
||||
.quad 0x8000000080008081, 0x8000000000008009, 0x000000000000008a
|
||||
.quad 0x0000000000000088, 0x0000000080008009, 0x000000008000000a
|
||||
.quad 0x000000008000808b, 0x800000000000008b, 0x8000000000008089
|
||||
.quad 0x8000000000008003, 0x8000000000008002, 0x8000000000000080
|
||||
.quad 0x000000000000800a, 0x800000008000000a, 0x8000000080008081
|
||||
.quad 0x8000000000008080, 0x0000000080000001, 0x8000000080008008
|
@ -1,166 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/sha3.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("SHA3 secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("sha3-224");
|
||||
MODULE_ALIAS_CRYPTO("sha3-256");
|
||||
MODULE_ALIAS_CRYPTO("sha3-384");
|
||||
MODULE_ALIAS_CRYPTO("sha3-512");
|
||||
|
||||
asmlinkage int sha3_ce_transform(u64 *st, const u8 *data, int blocks,
|
||||
int md_len);
|
||||
|
||||
static int sha3_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sha3_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
|
||||
|
||||
if (!may_use_simd())
|
||||
return crypto_sha3_update(desc, data, len);
|
||||
|
||||
if ((sctx->partial + len) >= sctx->rsiz) {
|
||||
int blocks;
|
||||
|
||||
if (sctx->partial) {
|
||||
int p = sctx->rsiz - sctx->partial;
|
||||
|
||||
memcpy(sctx->buf + sctx->partial, data, p);
|
||||
kernel_neon_begin();
|
||||
sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
|
||||
kernel_neon_end();
|
||||
|
||||
data += p;
|
||||
len -= p;
|
||||
sctx->partial = 0;
|
||||
}
|
||||
|
||||
blocks = len / sctx->rsiz;
|
||||
len %= sctx->rsiz;
|
||||
|
||||
while (blocks) {
|
||||
int rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = sha3_ce_transform(sctx->st, data, blocks,
|
||||
digest_size);
|
||||
kernel_neon_end();
|
||||
data += (blocks - rem) * sctx->rsiz;
|
||||
blocks = rem;
|
||||
}
|
||||
}
|
||||
|
||||
if (len) {
|
||||
memcpy(sctx->buf + sctx->partial, data, len);
|
||||
sctx->partial += len;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha3_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sha3_state *sctx = shash_desc_ctx(desc);
|
||||
unsigned int digest_size = crypto_shash_digestsize(desc->tfm);
|
||||
__le64 *digest = (__le64 *)out;
|
||||
int i;
|
||||
|
||||
if (!may_use_simd())
|
||||
return crypto_sha3_final(desc, out);
|
||||
|
||||
sctx->buf[sctx->partial++] = 0x06;
|
||||
memset(sctx->buf + sctx->partial, 0, sctx->rsiz - sctx->partial);
|
||||
sctx->buf[sctx->rsiz - 1] |= 0x80;
|
||||
|
||||
kernel_neon_begin();
|
||||
sha3_ce_transform(sctx->st, sctx->buf, 1, digest_size);
|
||||
kernel_neon_end();
|
||||
|
||||
for (i = 0; i < digest_size / 8; i++)
|
||||
put_unaligned_le64(sctx->st[i], digest++);
|
||||
|
||||
if (digest_size & 4)
|
||||
put_unaligned_le32(sctx->st[i], (__le32 *)digest);
|
||||
|
||||
memzero_explicit(sctx, sizeof(*sctx));
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.digestsize = SHA3_224_DIGEST_SIZE,
|
||||
.init = crypto_sha3_init,
|
||||
.update = sha3_update,
|
||||
.final = sha3_final,
|
||||
.descsize = sizeof(struct sha3_state),
|
||||
.base.cra_name = "sha3-224",
|
||||
.base.cra_driver_name = "sha3-224-ce",
|
||||
.base.cra_blocksize = SHA3_224_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_priority = 200,
|
||||
}, {
|
||||
.digestsize = SHA3_256_DIGEST_SIZE,
|
||||
.init = crypto_sha3_init,
|
||||
.update = sha3_update,
|
||||
.final = sha3_final,
|
||||
.descsize = sizeof(struct sha3_state),
|
||||
.base.cra_name = "sha3-256",
|
||||
.base.cra_driver_name = "sha3-256-ce",
|
||||
.base.cra_blocksize = SHA3_256_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_priority = 200,
|
||||
}, {
|
||||
.digestsize = SHA3_384_DIGEST_SIZE,
|
||||
.init = crypto_sha3_init,
|
||||
.update = sha3_update,
|
||||
.final = sha3_final,
|
||||
.descsize = sizeof(struct sha3_state),
|
||||
.base.cra_name = "sha3-384",
|
||||
.base.cra_driver_name = "sha3-384-ce",
|
||||
.base.cra_blocksize = SHA3_384_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_priority = 200,
|
||||
}, {
|
||||
.digestsize = SHA3_512_DIGEST_SIZE,
|
||||
.init = crypto_sha3_init,
|
||||
.update = sha3_update,
|
||||
.final = sha3_final,
|
||||
.descsize = sizeof(struct sha3_state),
|
||||
.base.cra_name = "sha3-512",
|
||||
.base.cra_driver_name = "sha3-512-ce",
|
||||
.base.cra_blocksize = SHA3_512_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_priority = 200,
|
||||
} };
|
||||
|
||||
static int __init sha3_neon_mod_init(void)
|
||||
{
|
||||
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
static void __exit sha3_neon_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_cpu_feature_match(SHA3, sha3_neon_mod_init);
|
||||
module_exit(sha3_neon_mod_fini);
|
@ -1,14 +1,4 @@
|
||||
#! /usr/bin/env perl
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
# has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
# you can redistribute it and/or modify it under the terms of the GNU General
|
||||
# Public License version 2 as published by the Free Software Foundation.
|
||||
#
|
||||
# The original headers, including the original license headers, are
|
||||
# included below for completeness.
|
||||
|
||||
# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -21,6 +11,8 @@
|
||||
# project. The module is, however, dual licensed under OpenSSL and
|
||||
# CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
# details see http://www.openssl.org/~appro/cryptogams/.
|
||||
#
|
||||
# Permission to use under GPLv2 terms is granted.
|
||||
# ====================================================================
|
||||
#
|
||||
# SHA256/512 for ARMv8.
|
||||
|
@ -1,206 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* sha512-ce-core.S - core SHA-384/SHA-512 transform using v8 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.irp b,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
|
||||
.set .Lq\b, \b
|
||||
.set .Lv\b\().2d, \b
|
||||
.endr
|
||||
|
||||
.macro sha512h, rd, rn, rm
|
||||
.inst 0xce608000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sha512h2, rd, rn, rm
|
||||
.inst 0xce608400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sha512su0, rd, rn
|
||||
.inst 0xcec08000 | .L\rd | (.L\rn << 5)
|
||||
.endm
|
||||
|
||||
.macro sha512su1, rd, rn, rm
|
||||
.inst 0xce608800 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* The SHA-512 round constants
|
||||
*/
|
||||
.section ".rodata", "a"
|
||||
.align 4
|
||||
.Lsha512_rcon:
|
||||
.quad 0x428a2f98d728ae22, 0x7137449123ef65cd
|
||||
.quad 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc
|
||||
.quad 0x3956c25bf348b538, 0x59f111f1b605d019
|
||||
.quad 0x923f82a4af194f9b, 0xab1c5ed5da6d8118
|
||||
.quad 0xd807aa98a3030242, 0x12835b0145706fbe
|
||||
.quad 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2
|
||||
.quad 0x72be5d74f27b896f, 0x80deb1fe3b1696b1
|
||||
.quad 0x9bdc06a725c71235, 0xc19bf174cf692694
|
||||
.quad 0xe49b69c19ef14ad2, 0xefbe4786384f25e3
|
||||
.quad 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65
|
||||
.quad 0x2de92c6f592b0275, 0x4a7484aa6ea6e483
|
||||
.quad 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5
|
||||
.quad 0x983e5152ee66dfab, 0xa831c66d2db43210
|
||||
.quad 0xb00327c898fb213f, 0xbf597fc7beef0ee4
|
||||
.quad 0xc6e00bf33da88fc2, 0xd5a79147930aa725
|
||||
.quad 0x06ca6351e003826f, 0x142929670a0e6e70
|
||||
.quad 0x27b70a8546d22ffc, 0x2e1b21385c26c926
|
||||
.quad 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df
|
||||
.quad 0x650a73548baf63de, 0x766a0abb3c77b2a8
|
||||
.quad 0x81c2c92e47edaee6, 0x92722c851482353b
|
||||
.quad 0xa2bfe8a14cf10364, 0xa81a664bbc423001
|
||||
.quad 0xc24b8b70d0f89791, 0xc76c51a30654be30
|
||||
.quad 0xd192e819d6ef5218, 0xd69906245565a910
|
||||
.quad 0xf40e35855771202a, 0x106aa07032bbd1b8
|
||||
.quad 0x19a4c116b8d2d0c8, 0x1e376c085141ab53
|
||||
.quad 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8
|
||||
.quad 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb
|
||||
.quad 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3
|
||||
.quad 0x748f82ee5defb2fc, 0x78a5636f43172f60
|
||||
.quad 0x84c87814a1f0ab72, 0x8cc702081a6439ec
|
||||
.quad 0x90befffa23631e28, 0xa4506cebde82bde9
|
||||
.quad 0xbef9a3f7b2c67915, 0xc67178f2e372532b
|
||||
.quad 0xca273eceea26619c, 0xd186b8c721c0c207
|
||||
.quad 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178
|
||||
.quad 0x06f067aa72176fba, 0x0a637dc5a2c898a6
|
||||
.quad 0x113f9804bef90dae, 0x1b710b35131c471b
|
||||
.quad 0x28db77f523047d84, 0x32caab7b40c72493
|
||||
.quad 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c
|
||||
.quad 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a
|
||||
.quad 0x5fcb6fab3ad6faec, 0x6c44198c4a475817
|
||||
|
||||
.macro dround, i0, i1, i2, i3, i4, rc0, rc1, in0, in1, in2, in3, in4
|
||||
.ifnb \rc1
|
||||
ld1 {v\rc1\().2d}, [x4], #16
|
||||
.endif
|
||||
add v5.2d, v\rc0\().2d, v\in0\().2d
|
||||
ext v6.16b, v\i2\().16b, v\i3\().16b, #8
|
||||
ext v5.16b, v5.16b, v5.16b, #8
|
||||
ext v7.16b, v\i1\().16b, v\i2\().16b, #8
|
||||
add v\i3\().2d, v\i3\().2d, v5.2d
|
||||
.ifnb \in1
|
||||
ext v5.16b, v\in3\().16b, v\in4\().16b, #8
|
||||
sha512su0 v\in0\().2d, v\in1\().2d
|
||||
.endif
|
||||
sha512h q\i3, q6, v7.2d
|
||||
.ifnb \in1
|
||||
sha512su1 v\in0\().2d, v\in2\().2d, v5.2d
|
||||
.endif
|
||||
add v\i4\().2d, v\i1\().2d, v\i3\().2d
|
||||
sha512h2 q\i3, q\i1, v\i0\().2d
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void sha512_ce_transform(struct sha512_state *sst, u8 const *src,
|
||||
* int blocks)
|
||||
*/
|
||||
.text
|
||||
SYM_FUNC_START(sha512_ce_transform)
|
||||
/* load state */
|
||||
ld1 {v8.2d-v11.2d}, [x0]
|
||||
|
||||
/* load first 4 round constants */
|
||||
adr_l x3, .Lsha512_rcon
|
||||
ld1 {v20.2d-v23.2d}, [x3], #64
|
||||
|
||||
/* load input */
|
||||
0: ld1 {v12.2d-v15.2d}, [x1], #64
|
||||
ld1 {v16.2d-v19.2d}, [x1], #64
|
||||
sub w2, w2, #1
|
||||
|
||||
CPU_LE( rev64 v12.16b, v12.16b )
|
||||
CPU_LE( rev64 v13.16b, v13.16b )
|
||||
CPU_LE( rev64 v14.16b, v14.16b )
|
||||
CPU_LE( rev64 v15.16b, v15.16b )
|
||||
CPU_LE( rev64 v16.16b, v16.16b )
|
||||
CPU_LE( rev64 v17.16b, v17.16b )
|
||||
CPU_LE( rev64 v18.16b, v18.16b )
|
||||
CPU_LE( rev64 v19.16b, v19.16b )
|
||||
|
||||
mov x4, x3 // rc pointer
|
||||
|
||||
mov v0.16b, v8.16b
|
||||
mov v1.16b, v9.16b
|
||||
mov v2.16b, v10.16b
|
||||
mov v3.16b, v11.16b
|
||||
|
||||
// v0 ab cd -- ef gh ab
|
||||
// v1 cd -- ef gh ab cd
|
||||
// v2 ef gh ab cd -- ef
|
||||
// v3 gh ab cd -- ef gh
|
||||
// v4 -- ef gh ab cd --
|
||||
|
||||
dround 0, 1, 2, 3, 4, 20, 24, 12, 13, 19, 16, 17
|
||||
dround 3, 0, 4, 2, 1, 21, 25, 13, 14, 12, 17, 18
|
||||
dround 2, 3, 1, 4, 0, 22, 26, 14, 15, 13, 18, 19
|
||||
dround 4, 2, 0, 1, 3, 23, 27, 15, 16, 14, 19, 12
|
||||
dround 1, 4, 3, 0, 2, 24, 28, 16, 17, 15, 12, 13
|
||||
|
||||
dround 0, 1, 2, 3, 4, 25, 29, 17, 18, 16, 13, 14
|
||||
dround 3, 0, 4, 2, 1, 26, 30, 18, 19, 17, 14, 15
|
||||
dround 2, 3, 1, 4, 0, 27, 31, 19, 12, 18, 15, 16
|
||||
dround 4, 2, 0, 1, 3, 28, 24, 12, 13, 19, 16, 17
|
||||
dround 1, 4, 3, 0, 2, 29, 25, 13, 14, 12, 17, 18
|
||||
|
||||
dround 0, 1, 2, 3, 4, 30, 26, 14, 15, 13, 18, 19
|
||||
dround 3, 0, 4, 2, 1, 31, 27, 15, 16, 14, 19, 12
|
||||
dround 2, 3, 1, 4, 0, 24, 28, 16, 17, 15, 12, 13
|
||||
dround 4, 2, 0, 1, 3, 25, 29, 17, 18, 16, 13, 14
|
||||
dround 1, 4, 3, 0, 2, 26, 30, 18, 19, 17, 14, 15
|
||||
|
||||
dround 0, 1, 2, 3, 4, 27, 31, 19, 12, 18, 15, 16
|
||||
dround 3, 0, 4, 2, 1, 28, 24, 12, 13, 19, 16, 17
|
||||
dround 2, 3, 1, 4, 0, 29, 25, 13, 14, 12, 17, 18
|
||||
dround 4, 2, 0, 1, 3, 30, 26, 14, 15, 13, 18, 19
|
||||
dround 1, 4, 3, 0, 2, 31, 27, 15, 16, 14, 19, 12
|
||||
|
||||
dround 0, 1, 2, 3, 4, 24, 28, 16, 17, 15, 12, 13
|
||||
dround 3, 0, 4, 2, 1, 25, 29, 17, 18, 16, 13, 14
|
||||
dround 2, 3, 1, 4, 0, 26, 30, 18, 19, 17, 14, 15
|
||||
dround 4, 2, 0, 1, 3, 27, 31, 19, 12, 18, 15, 16
|
||||
dround 1, 4, 3, 0, 2, 28, 24, 12, 13, 19, 16, 17
|
||||
|
||||
dround 0, 1, 2, 3, 4, 29, 25, 13, 14, 12, 17, 18
|
||||
dround 3, 0, 4, 2, 1, 30, 26, 14, 15, 13, 18, 19
|
||||
dround 2, 3, 1, 4, 0, 31, 27, 15, 16, 14, 19, 12
|
||||
dround 4, 2, 0, 1, 3, 24, 28, 16, 17, 15, 12, 13
|
||||
dround 1, 4, 3, 0, 2, 25, 29, 17, 18, 16, 13, 14
|
||||
|
||||
dround 0, 1, 2, 3, 4, 26, 30, 18, 19, 17, 14, 15
|
||||
dround 3, 0, 4, 2, 1, 27, 31, 19, 12, 18, 15, 16
|
||||
dround 2, 3, 1, 4, 0, 28, 24, 12
|
||||
dround 4, 2, 0, 1, 3, 29, 25, 13
|
||||
dround 1, 4, 3, 0, 2, 30, 26, 14
|
||||
|
||||
dround 0, 1, 2, 3, 4, 31, 27, 15
|
||||
dround 3, 0, 4, 2, 1, 24, , 16
|
||||
dround 2, 3, 1, 4, 0, 25, , 17
|
||||
dround 4, 2, 0, 1, 3, 26, , 18
|
||||
dround 1, 4, 3, 0, 2, 27, , 19
|
||||
|
||||
/* update state */
|
||||
add v8.2d, v8.2d, v0.2d
|
||||
add v9.2d, v9.2d, v1.2d
|
||||
add v10.2d, v10.2d, v2.2d
|
||||
add v11.2d, v11.2d, v3.2d
|
||||
|
||||
cond_yield 3f, x4
|
||||
/* handled all input blocks? */
|
||||
cbnz w2, 0b
|
||||
|
||||
/* store new state */
|
||||
3: st1 {v8.2d-v11.2d}, [x0]
|
||||
mov w0, w2
|
||||
ret
|
||||
SYM_FUNC_END(sha512_ce_transform)
|
@ -1,121 +0,0 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* sha512-ce-glue.c - SHA-384/SHA-512 using ARMv8 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/sha.h>
|
||||
#include <crypto/sha512_base.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("SHA-384/SHA-512 secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("sha384");
|
||||
MODULE_ALIAS_CRYPTO("sha512");
|
||||
|
||||
asmlinkage int sha512_ce_transform(struct sha512_state *sst, u8 const *src,
|
||||
int blocks);
|
||||
|
||||
asmlinkage void sha512_block_data_order(u64 *digest, u8 const *src, int blocks);
|
||||
|
||||
static void __sha512_ce_transform(struct sha512_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
while (blocks) {
|
||||
int rem;
|
||||
|
||||
kernel_neon_begin();
|
||||
rem = sha512_ce_transform(sst, src, blocks);
|
||||
kernel_neon_end();
|
||||
src += (blocks - rem) * SHA512_BLOCK_SIZE;
|
||||
blocks = rem;
|
||||
}
|
||||
}
|
||||
|
||||
static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
sha512_block_data_order(sst->state, src, blocks);
|
||||
}
|
||||
|
||||
static int sha512_ce_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
sha512_block_fn *fn = may_use_simd() ? __sha512_ce_transform
|
||||
: __sha512_block_data_order;
|
||||
|
||||
sha512_base_do_update(desc, data, len, fn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sha512_ce_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
sha512_block_fn *fn = may_use_simd() ? __sha512_ce_transform
|
||||
: __sha512_block_data_order;
|
||||
|
||||
sha512_base_do_update(desc, data, len, fn);
|
||||
sha512_base_do_finalize(desc, fn);
|
||||
return sha512_base_finish(desc, out);
|
||||
}
|
||||
|
||||
static int sha512_ce_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
sha512_block_fn *fn = may_use_simd() ? __sha512_ce_transform
|
||||
: __sha512_block_data_order;
|
||||
|
||||
sha512_base_do_finalize(desc, fn);
|
||||
return sha512_base_finish(desc, out);
|
||||
}
|
||||
|
||||
static struct shash_alg algs[] = { {
|
||||
.init = sha384_base_init,
|
||||
.update = sha512_ce_update,
|
||||
.final = sha512_ce_final,
|
||||
.finup = sha512_ce_finup,
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.digestsize = SHA384_DIGEST_SIZE,
|
||||
.base.cra_name = "sha384",
|
||||
.base.cra_driver_name = "sha384-ce",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
.init = sha512_base_init,
|
||||
.update = sha512_ce_update,
|
||||
.final = sha512_ce_final,
|
||||
.finup = sha512_ce_finup,
|
||||
.descsize = sizeof(struct sha512_state),
|
||||
.digestsize = SHA512_DIGEST_SIZE,
|
||||
.base.cra_name = "sha512",
|
||||
.base.cra_driver_name = "sha512-ce",
|
||||
.base.cra_priority = 200,
|
||||
.base.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
} };
|
||||
|
||||
static int __init sha512_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
static void __exit sha512_ce_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
|
||||
}
|
||||
|
||||
module_cpu_feature_match(SHA512, sha512_ce_mod_init);
|
||||
module_exit(sha512_ce_mod_fini);
|
@ -1,13 +1,3 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
// This code is taken from the OpenSSL project but the author (Andy Polyakov)
|
||||
// has relicensed it under the GPLv2. Therefore this program is free software;
|
||||
// you can redistribute it and/or modify it under the terms of the GNU General
|
||||
// Public License version 2 as published by the Free Software Foundation.
|
||||
//
|
||||
// The original headers, including the original license headers, are
|
||||
// included below for completeness.
|
||||
|
||||
// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the OpenSSL license (the "License"). You may not use
|
||||
@ -20,6 +10,8 @@
|
||||
// project. The module is, however, dual licensed under OpenSSL and
|
||||
// CRYPTOGAMS licenses depending on where you obtain it. For further
|
||||
// details see http://www.openssl.org/~appro/cryptogams/.
|
||||
//
|
||||
// Permission to use under GPLv2 terms is granted.
|
||||
// ====================================================================
|
||||
//
|
||||
// SHA256/512 for ARMv8.
|
||||
|
@ -25,21 +25,14 @@ MODULE_LICENSE("GPL v2");
|
||||
MODULE_ALIAS_CRYPTO("sha384");
|
||||
MODULE_ALIAS_CRYPTO("sha512");
|
||||
|
||||
asmlinkage void sha512_block_data_order(u64 *digest, const void *data,
|
||||
asmlinkage void sha512_block_data_order(u32 *digest, const void *data,
|
||||
unsigned int num_blks);
|
||||
EXPORT_SYMBOL(sha512_block_data_order);
|
||||
|
||||
static void __sha512_block_data_order(struct sha512_state *sst, u8 const *src,
|
||||
int blocks)
|
||||
{
|
||||
sha512_block_data_order(sst->state, src, blocks);
|
||||
}
|
||||
|
||||
static int sha512_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
return sha512_base_do_update(desc, data, len,
|
||||
__sha512_block_data_order);
|
||||
(sha512_block_fn *)sha512_block_data_order);
|
||||
}
|
||||
|
||||
static int sha512_finup(struct shash_desc *desc, const u8 *data,
|
||||
@ -47,8 +40,9 @@ static int sha512_finup(struct shash_desc *desc, const u8 *data,
|
||||
{
|
||||
if (len)
|
||||
sha512_base_do_update(desc, data, len,
|
||||
__sha512_block_data_order);
|
||||
sha512_base_do_finalize(desc, __sha512_block_data_order);
|
||||
(sha512_block_fn *)sha512_block_data_order);
|
||||
sha512_base_do_finalize(desc,
|
||||
(sha512_block_fn *)sha512_block_data_order);
|
||||
|
||||
return sha512_base_finish(desc, out);
|
||||
}
|
||||
@ -68,6 +62,7 @@ static struct shash_alg algs[] = { {
|
||||
.base.cra_name = "sha512",
|
||||
.base.cra_driver_name = "sha512-arm64",
|
||||
.base.cra_priority = 150,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
}, {
|
||||
@ -80,6 +75,7 @@ static struct shash_alg algs[] = { {
|
||||
.base.cra_name = "sha384",
|
||||
.base.cra_driver_name = "sha384-arm64",
|
||||
.base.cra_priority = 150,
|
||||
.base.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.base.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
} };
|
||||
|
@ -1,141 +0,0 @@
|
||||
/*
|
||||
* sm3-ce-core.S - SM3 secure hash using ARMv8.2 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
|
||||
.set .Lv\b\().4s, \b
|
||||
.endr
|
||||
|
||||
.macro sm3partw1, rd, rn, rm
|
||||
.inst 0xce60c000 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sm3partw2, rd, rn, rm
|
||||
.inst 0xce60c400 | .L\rd | (.L\rn << 5) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sm3ss1, rd, rn, rm, ra
|
||||
.inst 0xce400000 | .L\rd | (.L\rn << 5) | (.L\ra << 10) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sm3tt1a, rd, rn, rm, imm2
|
||||
.inst 0xce408000 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sm3tt1b, rd, rn, rm, imm2
|
||||
.inst 0xce408400 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sm3tt2a, rd, rn, rm, imm2
|
||||
.inst 0xce408800 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro sm3tt2b, rd, rn, rm, imm2
|
||||
.inst 0xce408c00 | .L\rd | (.L\rn << 5) | ((\imm2) << 12) | (.L\rm << 16)
|
||||
.endm
|
||||
|
||||
.macro round, ab, s0, t0, t1, i
|
||||
sm3ss1 v5.4s, v8.4s, \t0\().4s, v9.4s
|
||||
shl \t1\().4s, \t0\().4s, #1
|
||||
sri \t1\().4s, \t0\().4s, #31
|
||||
sm3tt1\ab v8.4s, v5.4s, v10.4s, \i
|
||||
sm3tt2\ab v9.4s, v5.4s, \s0\().4s, \i
|
||||
.endm
|
||||
|
||||
.macro qround, ab, s0, s1, s2, s3, s4
|
||||
.ifnb \s4
|
||||
ext \s4\().16b, \s1\().16b, \s2\().16b, #12
|
||||
ext v6.16b, \s0\().16b, \s1\().16b, #12
|
||||
ext v7.16b, \s2\().16b, \s3\().16b, #8
|
||||
sm3partw1 \s4\().4s, \s0\().4s, \s3\().4s
|
||||
.endif
|
||||
|
||||
eor v10.16b, \s0\().16b, \s1\().16b
|
||||
|
||||
round \ab, \s0, v11, v12, 0
|
||||
round \ab, \s0, v12, v11, 1
|
||||
round \ab, \s0, v11, v12, 2
|
||||
round \ab, \s0, v12, v11, 3
|
||||
|
||||
.ifnb \s4
|
||||
sm3partw2 \s4\().4s, v7.4s, v6.4s
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
|
||||
* int blocks)
|
||||
*/
|
||||
.text
|
||||
SYM_FUNC_START(sm3_ce_transform)
|
||||
/* load state */
|
||||
ld1 {v8.4s-v9.4s}, [x0]
|
||||
rev64 v8.4s, v8.4s
|
||||
rev64 v9.4s, v9.4s
|
||||
ext v8.16b, v8.16b, v8.16b, #8
|
||||
ext v9.16b, v9.16b, v9.16b, #8
|
||||
|
||||
adr_l x8, .Lt
|
||||
ldp s13, s14, [x8]
|
||||
|
||||
/* load input */
|
||||
0: ld1 {v0.16b-v3.16b}, [x1], #64
|
||||
sub w2, w2, #1
|
||||
|
||||
mov v15.16b, v8.16b
|
||||
mov v16.16b, v9.16b
|
||||
|
||||
CPU_LE( rev32 v0.16b, v0.16b )
|
||||
CPU_LE( rev32 v1.16b, v1.16b )
|
||||
CPU_LE( rev32 v2.16b, v2.16b )
|
||||
CPU_LE( rev32 v3.16b, v3.16b )
|
||||
|
||||
ext v11.16b, v13.16b, v13.16b, #4
|
||||
|
||||
qround a, v0, v1, v2, v3, v4
|
||||
qround a, v1, v2, v3, v4, v0
|
||||
qround a, v2, v3, v4, v0, v1
|
||||
qround a, v3, v4, v0, v1, v2
|
||||
|
||||
ext v11.16b, v14.16b, v14.16b, #4
|
||||
|
||||
qround b, v4, v0, v1, v2, v3
|
||||
qround b, v0, v1, v2, v3, v4
|
||||
qround b, v1, v2, v3, v4, v0
|
||||
qround b, v2, v3, v4, v0, v1
|
||||
qround b, v3, v4, v0, v1, v2
|
||||
qround b, v4, v0, v1, v2, v3
|
||||
qround b, v0, v1, v2, v3, v4
|
||||
qround b, v1, v2, v3, v4, v0
|
||||
qround b, v2, v3, v4, v0, v1
|
||||
qround b, v3, v4
|
||||
qround b, v4, v0
|
||||
qround b, v0, v1
|
||||
|
||||
eor v8.16b, v8.16b, v15.16b
|
||||
eor v9.16b, v9.16b, v16.16b
|
||||
|
||||
/* handled all input blocks? */
|
||||
cbnz w2, 0b
|
||||
|
||||
/* save state */
|
||||
rev64 v8.4s, v8.4s
|
||||
rev64 v9.4s, v9.4s
|
||||
ext v8.16b, v8.16b, v8.16b, #8
|
||||
ext v9.16b, v9.16b, v9.16b, #8
|
||||
st1 {v8.4s-v9.4s}, [x0]
|
||||
ret
|
||||
SYM_FUNC_END(sm3_ce_transform)
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 3
|
||||
.Lt: .word 0x79cc4519, 0x9d8a7a87
|
@ -1,92 +0,0 @@
|
||||
/*
|
||||
* sm3-ce-glue.c - SM3 secure hash using ARMv8.2 Crypto Extensions
|
||||
*
|
||||
* Copyright (C) 2018 Linaro Ltd <ard.biesheuvel@linaro.org>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License version 2 as
|
||||
* published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <asm/unaligned.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/sm3.h>
|
||||
#include <crypto/sm3_base.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
MODULE_DESCRIPTION("SM3 secure hash using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src,
|
||||
int blocks);
|
||||
|
||||
static int sm3_ce_update(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len)
|
||||
{
|
||||
if (!may_use_simd())
|
||||
return crypto_sm3_update(desc, data, len);
|
||||
|
||||
kernel_neon_begin();
|
||||
sm3_base_do_update(desc, data, len, sm3_ce_transform);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sm3_ce_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
if (!may_use_simd())
|
||||
return crypto_sm3_finup(desc, NULL, 0, out);
|
||||
|
||||
kernel_neon_begin();
|
||||
sm3_base_do_finalize(desc, sm3_ce_transform);
|
||||
kernel_neon_end();
|
||||
|
||||
return sm3_base_finish(desc, out);
|
||||
}
|
||||
|
||||
static int sm3_ce_finup(struct shash_desc *desc, const u8 *data,
|
||||
unsigned int len, u8 *out)
|
||||
{
|
||||
if (!may_use_simd())
|
||||
return crypto_sm3_finup(desc, data, len, out);
|
||||
|
||||
kernel_neon_begin();
|
||||
sm3_base_do_update(desc, data, len, sm3_ce_transform);
|
||||
kernel_neon_end();
|
||||
|
||||
return sm3_ce_final(desc, out);
|
||||
}
|
||||
|
||||
static struct shash_alg sm3_alg = {
|
||||
.digestsize = SM3_DIGEST_SIZE,
|
||||
.init = sm3_base_init,
|
||||
.update = sm3_ce_update,
|
||||
.final = sm3_ce_final,
|
||||
.finup = sm3_ce_finup,
|
||||
.descsize = sizeof(struct sm3_state),
|
||||
.base.cra_name = "sm3",
|
||||
.base.cra_driver_name = "sm3-ce",
|
||||
.base.cra_blocksize = SM3_BLOCK_SIZE,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.base.cra_priority = 200,
|
||||
};
|
||||
|
||||
static int __init sm3_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_shash(&sm3_alg);
|
||||
}
|
||||
|
||||
static void __exit sm3_ce_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_shash(&sm3_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(SM3, sm3_ce_mod_init);
|
||||
module_exit(sm3_ce_mod_fini);
|
@ -1,36 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8
|
||||
.set .Lv\b\().4s, \b
|
||||
.endr
|
||||
|
||||
.macro sm4e, rd, rn
|
||||
.inst 0xcec08400 | .L\rd | (.L\rn << 5)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in);
|
||||
*/
|
||||
.text
|
||||
SYM_FUNC_START(sm4_ce_do_crypt)
|
||||
ld1 {v8.4s}, [x2]
|
||||
ld1 {v0.4s-v3.4s}, [x0], #64
|
||||
CPU_LE( rev32 v8.16b, v8.16b )
|
||||
ld1 {v4.4s-v7.4s}, [x0]
|
||||
sm4e v8.4s, v0.4s
|
||||
sm4e v8.4s, v1.4s
|
||||
sm4e v8.4s, v2.4s
|
||||
sm4e v8.4s, v3.4s
|
||||
sm4e v8.4s, v4.4s
|
||||
sm4e v8.4s, v5.4s
|
||||
sm4e v8.4s, v6.4s
|
||||
sm4e v8.4s, v7.4s
|
||||
rev64 v8.4s, v8.4s
|
||||
ext v8.16b, v8.16b, v8.16b, #8
|
||||
CPU_LE( rev32 v8.16b, v8.16b )
|
||||
st1 {v8.4s}, [x1]
|
||||
ret
|
||||
SYM_FUNC_END(sm4_ce_do_crypt)
|
@ -1,74 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/sm4.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/cpufeature.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
MODULE_ALIAS_CRYPTO("sm4");
|
||||
MODULE_ALIAS_CRYPTO("sm4-ce");
|
||||
MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions");
|
||||
MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
||||
asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in);
|
||||
|
||||
static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
crypto_sm4_encrypt(tfm, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_enc, out, in);
|
||||
kernel_neon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
|
||||
{
|
||||
const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
if (!may_use_simd()) {
|
||||
crypto_sm4_decrypt(tfm, out, in);
|
||||
} else {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_do_crypt(ctx->rkey_dec, out, in);
|
||||
kernel_neon_end();
|
||||
}
|
||||
}
|
||||
|
||||
static struct crypto_alg sm4_ce_alg = {
|
||||
.cra_name = "sm4",
|
||||
.cra_driver_name = "sm4-ce",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_CIPHER,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct crypto_sm4_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
.cra_u.cipher = {
|
||||
.cia_min_keysize = SM4_KEY_SIZE,
|
||||
.cia_max_keysize = SM4_KEY_SIZE,
|
||||
.cia_setkey = crypto_sm4_set_key,
|
||||
.cia_encrypt = sm4_ce_encrypt,
|
||||
.cia_decrypt = sm4_ce_decrypt
|
||||
}
|
||||
};
|
||||
|
||||
static int __init sm4_ce_mod_init(void)
|
||||
{
|
||||
return crypto_register_alg(&sm4_ce_alg);
|
||||
}
|
||||
|
||||
static void __exit sm4_ce_mod_fini(void)
|
||||
{
|
||||
crypto_unregister_alg(&sm4_ce_alg);
|
||||
}
|
||||
|
||||
module_cpu_feature_match(SM4, sm4_ce_mod_init);
|
||||
module_exit(sm4_ce_mod_fini);
|
@ -1,352 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* ARM64 NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
|
||||
*
|
||||
* Copyright (c) 2018 Google, Inc
|
||||
*
|
||||
* Author: Eric Biggers <ebiggers@google.com>
|
||||
*/
|
||||
|
||||
#include <linux/linkage.h>
|
||||
|
||||
.text
|
||||
|
||||
// arguments
|
||||
ROUND_KEYS .req x0 // const {u64,u32} *round_keys
|
||||
NROUNDS .req w1 // int nrounds
|
||||
NROUNDS_X .req x1
|
||||
DST .req x2 // void *dst
|
||||
SRC .req x3 // const void *src
|
||||
NBYTES .req w4 // unsigned int nbytes
|
||||
TWEAK .req x5 // void *tweak
|
||||
|
||||
// registers which hold the data being encrypted/decrypted
|
||||
// (underscores avoid a naming collision with ARM64 registers x0-x3)
|
||||
X_0 .req v0
|
||||
Y_0 .req v1
|
||||
X_1 .req v2
|
||||
Y_1 .req v3
|
||||
X_2 .req v4
|
||||
Y_2 .req v5
|
||||
X_3 .req v6
|
||||
Y_3 .req v7
|
||||
|
||||
// the round key, duplicated in all lanes
|
||||
ROUND_KEY .req v8
|
||||
|
||||
// index vector for tbl-based 8-bit rotates
|
||||
ROTATE_TABLE .req v9
|
||||
ROTATE_TABLE_Q .req q9
|
||||
|
||||
// temporary registers
|
||||
TMP0 .req v10
|
||||
TMP1 .req v11
|
||||
TMP2 .req v12
|
||||
TMP3 .req v13
|
||||
|
||||
// multiplication table for updating XTS tweaks
|
||||
GFMUL_TABLE .req v14
|
||||
GFMUL_TABLE_Q .req q14
|
||||
|
||||
// next XTS tweak value(s)
|
||||
TWEAKV_NEXT .req v15
|
||||
|
||||
// XTS tweaks for the blocks currently being encrypted/decrypted
|
||||
TWEAKV0 .req v16
|
||||
TWEAKV1 .req v17
|
||||
TWEAKV2 .req v18
|
||||
TWEAKV3 .req v19
|
||||
TWEAKV4 .req v20
|
||||
TWEAKV5 .req v21
|
||||
TWEAKV6 .req v22
|
||||
TWEAKV7 .req v23
|
||||
|
||||
.align 4
|
||||
.Lror64_8_table:
|
||||
.octa 0x080f0e0d0c0b0a090007060504030201
|
||||
.Lror32_8_table:
|
||||
.octa 0x0c0f0e0d080b0a090407060500030201
|
||||
.Lrol64_8_table:
|
||||
.octa 0x0e0d0c0b0a09080f0605040302010007
|
||||
.Lrol32_8_table:
|
||||
.octa 0x0e0d0c0f0a09080b0605040702010003
|
||||
.Lgf128mul_table:
|
||||
.octa 0x00000000000000870000000000000001
|
||||
.Lgf64mul_table:
|
||||
.octa 0x0000000000000000000000002d361b00
|
||||
|
||||
/*
|
||||
* _speck_round_128bytes() - Speck encryption round on 128 bytes at a time
|
||||
*
|
||||
* Do one Speck encryption round on the 128 bytes (8 blocks for Speck128, 16 for
|
||||
* Speck64) stored in X0-X3 and Y0-Y3, using the round key stored in all lanes
|
||||
* of ROUND_KEY. 'n' is the lane size: 64 for Speck128, or 32 for Speck64.
|
||||
* 'lanes' is the lane specifier: "2d" for Speck128 or "4s" for Speck64.
|
||||
*/
|
||||
.macro _speck_round_128bytes n, lanes
|
||||
|
||||
// x = ror(x, 8)
|
||||
tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
|
||||
tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
|
||||
tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
|
||||
tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
|
||||
|
||||
// x += y
|
||||
add X_0.\lanes, X_0.\lanes, Y_0.\lanes
|
||||
add X_1.\lanes, X_1.\lanes, Y_1.\lanes
|
||||
add X_2.\lanes, X_2.\lanes, Y_2.\lanes
|
||||
add X_3.\lanes, X_3.\lanes, Y_3.\lanes
|
||||
|
||||
// x ^= k
|
||||
eor X_0.16b, X_0.16b, ROUND_KEY.16b
|
||||
eor X_1.16b, X_1.16b, ROUND_KEY.16b
|
||||
eor X_2.16b, X_2.16b, ROUND_KEY.16b
|
||||
eor X_3.16b, X_3.16b, ROUND_KEY.16b
|
||||
|
||||
// y = rol(y, 3)
|
||||
shl TMP0.\lanes, Y_0.\lanes, #3
|
||||
shl TMP1.\lanes, Y_1.\lanes, #3
|
||||
shl TMP2.\lanes, Y_2.\lanes, #3
|
||||
shl TMP3.\lanes, Y_3.\lanes, #3
|
||||
sri TMP0.\lanes, Y_0.\lanes, #(\n - 3)
|
||||
sri TMP1.\lanes, Y_1.\lanes, #(\n - 3)
|
||||
sri TMP2.\lanes, Y_2.\lanes, #(\n - 3)
|
||||
sri TMP3.\lanes, Y_3.\lanes, #(\n - 3)
|
||||
|
||||
// y ^= x
|
||||
eor Y_0.16b, TMP0.16b, X_0.16b
|
||||
eor Y_1.16b, TMP1.16b, X_1.16b
|
||||
eor Y_2.16b, TMP2.16b, X_2.16b
|
||||
eor Y_3.16b, TMP3.16b, X_3.16b
|
||||
.endm
|
||||
|
||||
/*
|
||||
* _speck_unround_128bytes() - Speck decryption round on 128 bytes at a time
|
||||
*
|
||||
* This is the inverse of _speck_round_128bytes().
|
||||
*/
|
||||
.macro _speck_unround_128bytes n, lanes
|
||||
|
||||
// y ^= x
|
||||
eor TMP0.16b, Y_0.16b, X_0.16b
|
||||
eor TMP1.16b, Y_1.16b, X_1.16b
|
||||
eor TMP2.16b, Y_2.16b, X_2.16b
|
||||
eor TMP3.16b, Y_3.16b, X_3.16b
|
||||
|
||||
// y = ror(y, 3)
|
||||
ushr Y_0.\lanes, TMP0.\lanes, #3
|
||||
ushr Y_1.\lanes, TMP1.\lanes, #3
|
||||
ushr Y_2.\lanes, TMP2.\lanes, #3
|
||||
ushr Y_3.\lanes, TMP3.\lanes, #3
|
||||
sli Y_0.\lanes, TMP0.\lanes, #(\n - 3)
|
||||
sli Y_1.\lanes, TMP1.\lanes, #(\n - 3)
|
||||
sli Y_2.\lanes, TMP2.\lanes, #(\n - 3)
|
||||
sli Y_3.\lanes, TMP3.\lanes, #(\n - 3)
|
||||
|
||||
// x ^= k
|
||||
eor X_0.16b, X_0.16b, ROUND_KEY.16b
|
||||
eor X_1.16b, X_1.16b, ROUND_KEY.16b
|
||||
eor X_2.16b, X_2.16b, ROUND_KEY.16b
|
||||
eor X_3.16b, X_3.16b, ROUND_KEY.16b
|
||||
|
||||
// x -= y
|
||||
sub X_0.\lanes, X_0.\lanes, Y_0.\lanes
|
||||
sub X_1.\lanes, X_1.\lanes, Y_1.\lanes
|
||||
sub X_2.\lanes, X_2.\lanes, Y_2.\lanes
|
||||
sub X_3.\lanes, X_3.\lanes, Y_3.\lanes
|
||||
|
||||
// x = rol(x, 8)
|
||||
tbl X_0.16b, {X_0.16b}, ROTATE_TABLE.16b
|
||||
tbl X_1.16b, {X_1.16b}, ROTATE_TABLE.16b
|
||||
tbl X_2.16b, {X_2.16b}, ROTATE_TABLE.16b
|
||||
tbl X_3.16b, {X_3.16b}, ROTATE_TABLE.16b
|
||||
.endm
|
||||
|
||||
.macro _next_xts_tweak next, cur, tmp, n
|
||||
.if \n == 64
|
||||
/*
|
||||
* Calculate the next tweak by multiplying the current one by x,
|
||||
* modulo p(x) = x^128 + x^7 + x^2 + x + 1.
|
||||
*/
|
||||
sshr \tmp\().2d, \cur\().2d, #63
|
||||
and \tmp\().16b, \tmp\().16b, GFMUL_TABLE.16b
|
||||
shl \next\().2d, \cur\().2d, #1
|
||||
ext \tmp\().16b, \tmp\().16b, \tmp\().16b, #8
|
||||
eor \next\().16b, \next\().16b, \tmp\().16b
|
||||
.else
|
||||
/*
|
||||
* Calculate the next two tweaks by multiplying the current ones by x^2,
|
||||
* modulo p(x) = x^64 + x^4 + x^3 + x + 1.
|
||||
*/
|
||||
ushr \tmp\().2d, \cur\().2d, #62
|
||||
shl \next\().2d, \cur\().2d, #2
|
||||
tbl \tmp\().16b, {GFMUL_TABLE.16b}, \tmp\().16b
|
||||
eor \next\().16b, \next\().16b, \tmp\().16b
|
||||
.endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* _speck_xts_crypt() - Speck-XTS encryption/decryption
|
||||
*
|
||||
* Encrypt or decrypt NBYTES bytes of data from the SRC buffer to the DST buffer
|
||||
* using Speck-XTS, specifically the variant with a block size of '2n' and round
|
||||
* count given by NROUNDS. The expanded round keys are given in ROUND_KEYS, and
|
||||
* the current XTS tweak value is given in TWEAK. It's assumed that NBYTES is a
|
||||
* nonzero multiple of 128.
|
||||
*/
|
||||
.macro _speck_xts_crypt n, lanes, decrypting
|
||||
|
||||
/*
|
||||
* If decrypting, modify the ROUND_KEYS parameter to point to the last
|
||||
* round key rather than the first, since for decryption the round keys
|
||||
* are used in reverse order.
|
||||
*/
|
||||
.if \decrypting
|
||||
mov NROUNDS, NROUNDS /* zero the high 32 bits */
|
||||
.if \n == 64
|
||||
add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #3
|
||||
sub ROUND_KEYS, ROUND_KEYS, #8
|
||||
.else
|
||||
add ROUND_KEYS, ROUND_KEYS, NROUNDS_X, lsl #2
|
||||
sub ROUND_KEYS, ROUND_KEYS, #4
|
||||
.endif
|
||||
.endif
|
||||
|
||||
// Load the index vector for tbl-based 8-bit rotates
|
||||
.if \decrypting
|
||||
ldr ROTATE_TABLE_Q, .Lrol\n\()_8_table
|
||||
.else
|
||||
ldr ROTATE_TABLE_Q, .Lror\n\()_8_table
|
||||
.endif
|
||||
|
||||
// One-time XTS preparation
|
||||
.if \n == 64
|
||||
// Load first tweak
|
||||
ld1 {TWEAKV0.16b}, [TWEAK]
|
||||
|
||||
// Load GF(2^128) multiplication table
|
||||
ldr GFMUL_TABLE_Q, .Lgf128mul_table
|
||||
.else
|
||||
// Load first tweak
|
||||
ld1 {TWEAKV0.8b}, [TWEAK]
|
||||
|
||||
// Load GF(2^64) multiplication table
|
||||
ldr GFMUL_TABLE_Q, .Lgf64mul_table
|
||||
|
||||
// Calculate second tweak, packing it together with the first
|
||||
ushr TMP0.2d, TWEAKV0.2d, #63
|
||||
shl TMP1.2d, TWEAKV0.2d, #1
|
||||
tbl TMP0.8b, {GFMUL_TABLE.16b}, TMP0.8b
|
||||
eor TMP0.8b, TMP0.8b, TMP1.8b
|
||||
mov TWEAKV0.d[1], TMP0.d[0]
|
||||
.endif
|
||||
|
||||
.Lnext_128bytes_\@:
|
||||
|
||||
// Calculate XTS tweaks for next 128 bytes
|
||||
_next_xts_tweak TWEAKV1, TWEAKV0, TMP0, \n
|
||||
_next_xts_tweak TWEAKV2, TWEAKV1, TMP0, \n
|
||||
_next_xts_tweak TWEAKV3, TWEAKV2, TMP0, \n
|
||||
_next_xts_tweak TWEAKV4, TWEAKV3, TMP0, \n
|
||||
_next_xts_tweak TWEAKV5, TWEAKV4, TMP0, \n
|
||||
_next_xts_tweak TWEAKV6, TWEAKV5, TMP0, \n
|
||||
_next_xts_tweak TWEAKV7, TWEAKV6, TMP0, \n
|
||||
_next_xts_tweak TWEAKV_NEXT, TWEAKV7, TMP0, \n
|
||||
|
||||
// Load the next source blocks into {X,Y}[0-3]
|
||||
ld1 {X_0.16b-Y_1.16b}, [SRC], #64
|
||||
ld1 {X_2.16b-Y_3.16b}, [SRC], #64
|
||||
|
||||
// XOR the source blocks with their XTS tweaks
|
||||
eor TMP0.16b, X_0.16b, TWEAKV0.16b
|
||||
eor Y_0.16b, Y_0.16b, TWEAKV1.16b
|
||||
eor TMP1.16b, X_1.16b, TWEAKV2.16b
|
||||
eor Y_1.16b, Y_1.16b, TWEAKV3.16b
|
||||
eor TMP2.16b, X_2.16b, TWEAKV4.16b
|
||||
eor Y_2.16b, Y_2.16b, TWEAKV5.16b
|
||||
eor TMP3.16b, X_3.16b, TWEAKV6.16b
|
||||
eor Y_3.16b, Y_3.16b, TWEAKV7.16b
|
||||
|
||||
/*
|
||||
* De-interleave the 'x' and 'y' elements of each block, i.e. make it so
|
||||
* that the X[0-3] registers contain only the second halves of blocks,
|
||||
* and the Y[0-3] registers contain only the first halves of blocks.
|
||||
* (Speck uses the order (y, x) rather than the more intuitive (x, y).)
|
||||
*/
|
||||
uzp2 X_0.\lanes, TMP0.\lanes, Y_0.\lanes
|
||||
uzp1 Y_0.\lanes, TMP0.\lanes, Y_0.\lanes
|
||||
uzp2 X_1.\lanes, TMP1.\lanes, Y_1.\lanes
|
||||
uzp1 Y_1.\lanes, TMP1.\lanes, Y_1.\lanes
|
||||
uzp2 X_2.\lanes, TMP2.\lanes, Y_2.\lanes
|
||||
uzp1 Y_2.\lanes, TMP2.\lanes, Y_2.\lanes
|
||||
uzp2 X_3.\lanes, TMP3.\lanes, Y_3.\lanes
|
||||
uzp1 Y_3.\lanes, TMP3.\lanes, Y_3.\lanes
|
||||
|
||||
// Do the cipher rounds
|
||||
mov x6, ROUND_KEYS
|
||||
mov w7, NROUNDS
|
||||
.Lnext_round_\@:
|
||||
.if \decrypting
|
||||
ld1r {ROUND_KEY.\lanes}, [x6]
|
||||
sub x6, x6, #( \n / 8 )
|
||||
_speck_unround_128bytes \n, \lanes
|
||||
.else
|
||||
ld1r {ROUND_KEY.\lanes}, [x6], #( \n / 8 )
|
||||
_speck_round_128bytes \n, \lanes
|
||||
.endif
|
||||
subs w7, w7, #1
|
||||
bne .Lnext_round_\@
|
||||
|
||||
// Re-interleave the 'x' and 'y' elements of each block
|
||||
zip1 TMP0.\lanes, Y_0.\lanes, X_0.\lanes
|
||||
zip2 Y_0.\lanes, Y_0.\lanes, X_0.\lanes
|
||||
zip1 TMP1.\lanes, Y_1.\lanes, X_1.\lanes
|
||||
zip2 Y_1.\lanes, Y_1.\lanes, X_1.\lanes
|
||||
zip1 TMP2.\lanes, Y_2.\lanes, X_2.\lanes
|
||||
zip2 Y_2.\lanes, Y_2.\lanes, X_2.\lanes
|
||||
zip1 TMP3.\lanes, Y_3.\lanes, X_3.\lanes
|
||||
zip2 Y_3.\lanes, Y_3.\lanes, X_3.\lanes
|
||||
|
||||
// XOR the encrypted/decrypted blocks with the tweaks calculated earlier
|
||||
eor X_0.16b, TMP0.16b, TWEAKV0.16b
|
||||
eor Y_0.16b, Y_0.16b, TWEAKV1.16b
|
||||
eor X_1.16b, TMP1.16b, TWEAKV2.16b
|
||||
eor Y_1.16b, Y_1.16b, TWEAKV3.16b
|
||||
eor X_2.16b, TMP2.16b, TWEAKV4.16b
|
||||
eor Y_2.16b, Y_2.16b, TWEAKV5.16b
|
||||
eor X_3.16b, TMP3.16b, TWEAKV6.16b
|
||||
eor Y_3.16b, Y_3.16b, TWEAKV7.16b
|
||||
mov TWEAKV0.16b, TWEAKV_NEXT.16b
|
||||
|
||||
// Store the ciphertext in the destination buffer
|
||||
st1 {X_0.16b-Y_1.16b}, [DST], #64
|
||||
st1 {X_2.16b-Y_3.16b}, [DST], #64
|
||||
|
||||
// Continue if there are more 128-byte chunks remaining
|
||||
subs NBYTES, NBYTES, #128
|
||||
bne .Lnext_128bytes_\@
|
||||
|
||||
// Store the next tweak and return
|
||||
.if \n == 64
|
||||
st1 {TWEAKV_NEXT.16b}, [TWEAK]
|
||||
.else
|
||||
st1 {TWEAKV_NEXT.8b}, [TWEAK]
|
||||
.endif
|
||||
ret
|
||||
.endm
|
||||
|
||||
ENTRY(speck128_xts_encrypt_neon)
|
||||
_speck_xts_crypt n=64, lanes=2d, decrypting=0
|
||||
ENDPROC(speck128_xts_encrypt_neon)
|
||||
|
||||
ENTRY(speck128_xts_decrypt_neon)
|
||||
_speck_xts_crypt n=64, lanes=2d, decrypting=1
|
||||
ENDPROC(speck128_xts_decrypt_neon)
|
||||
|
||||
ENTRY(speck64_xts_encrypt_neon)
|
||||
_speck_xts_crypt n=32, lanes=4s, decrypting=0
|
||||
ENDPROC(speck64_xts_encrypt_neon)
|
||||
|
||||
ENTRY(speck64_xts_decrypt_neon)
|
||||
_speck_xts_crypt n=32, lanes=4s, decrypting=1
|
||||
ENDPROC(speck64_xts_decrypt_neon)
|
@ -1,282 +0,0 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* NEON-accelerated implementation of Speck128-XTS and Speck64-XTS
|
||||
* (64-bit version; based on the 32-bit version)
|
||||
*
|
||||
* Copyright (c) 2018 Google, Inc
|
||||
*/
|
||||
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/algapi.h>
|
||||
#include <crypto/gf128mul.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/speck.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
|
||||
/* The assembly functions only handle multiples of 128 bytes */
|
||||
#define SPECK_NEON_CHUNK_SIZE 128
|
||||
|
||||
/* Speck128 */
|
||||
|
||||
struct speck128_xts_tfm_ctx {
|
||||
struct speck128_tfm_ctx main_key;
|
||||
struct speck128_tfm_ctx tweak_key;
|
||||
};
|
||||
|
||||
asmlinkage void speck128_xts_encrypt_neon(const u64 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
asmlinkage void speck128_xts_decrypt_neon(const u64 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
typedef void (*speck128_crypt_one_t)(const struct speck128_tfm_ctx *,
|
||||
u8 *, const u8 *);
|
||||
typedef void (*speck128_xts_crypt_many_t)(const u64 *, int, void *,
|
||||
const void *, unsigned int, void *);
|
||||
|
||||
static __always_inline int
|
||||
__speck128_xts_crypt(struct skcipher_request *req,
|
||||
speck128_crypt_one_t crypt_one,
|
||||
speck128_xts_crypt_many_t crypt_many)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
le128 tweak;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_speck128_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
|
||||
if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
|
||||
unsigned int count;
|
||||
|
||||
count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
|
||||
kernel_neon_begin();
|
||||
(*crypt_many)(ctx->main_key.round_keys,
|
||||
ctx->main_key.nrounds,
|
||||
dst, src, count, &tweak);
|
||||
kernel_neon_end();
|
||||
dst += count;
|
||||
src += count;
|
||||
nbytes -= count;
|
||||
}
|
||||
|
||||
/* Handle any remainder with generic code */
|
||||
while (nbytes >= sizeof(tweak)) {
|
||||
le128_xor((le128 *)dst, (const le128 *)src, &tweak);
|
||||
(*crypt_one)(&ctx->main_key, dst, dst);
|
||||
le128_xor((le128 *)dst, (const le128 *)dst, &tweak);
|
||||
gf128mul_x_ble(&tweak, &tweak);
|
||||
|
||||
dst += sizeof(tweak);
|
||||
src += sizeof(tweak);
|
||||
nbytes -= sizeof(tweak);
|
||||
}
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int speck128_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck128_xts_crypt(req, crypto_speck128_encrypt,
|
||||
speck128_xts_encrypt_neon);
|
||||
}
|
||||
|
||||
static int speck128_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck128_xts_crypt(req, crypto_speck128_decrypt,
|
||||
speck128_xts_decrypt_neon);
|
||||
}
|
||||
|
||||
static int speck128_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct speck128_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
keylen /= 2;
|
||||
|
||||
err = crypto_speck128_setkey(&ctx->main_key, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return crypto_speck128_setkey(&ctx->tweak_key, key + keylen, keylen);
|
||||
}
|
||||
|
||||
/* Speck64 */
|
||||
|
||||
struct speck64_xts_tfm_ctx {
|
||||
struct speck64_tfm_ctx main_key;
|
||||
struct speck64_tfm_ctx tweak_key;
|
||||
};
|
||||
|
||||
asmlinkage void speck64_xts_encrypt_neon(const u32 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
asmlinkage void speck64_xts_decrypt_neon(const u32 *round_keys, int nrounds,
|
||||
void *dst, const void *src,
|
||||
unsigned int nbytes, void *tweak);
|
||||
|
||||
typedef void (*speck64_crypt_one_t)(const struct speck64_tfm_ctx *,
|
||||
u8 *, const u8 *);
|
||||
typedef void (*speck64_xts_crypt_many_t)(const u32 *, int, void *,
|
||||
const void *, unsigned int, void *);
|
||||
|
||||
static __always_inline int
|
||||
__speck64_xts_crypt(struct skcipher_request *req, speck64_crypt_one_t crypt_one,
|
||||
speck64_xts_crypt_many_t crypt_many)
|
||||
{
|
||||
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
|
||||
const struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
struct skcipher_walk walk;
|
||||
__le64 tweak;
|
||||
int err;
|
||||
|
||||
err = skcipher_walk_virt(&walk, req, true);
|
||||
|
||||
crypto_speck64_encrypt(&ctx->tweak_key, (u8 *)&tweak, walk.iv);
|
||||
|
||||
while (walk.nbytes > 0) {
|
||||
unsigned int nbytes = walk.nbytes;
|
||||
u8 *dst = walk.dst.virt.addr;
|
||||
const u8 *src = walk.src.virt.addr;
|
||||
|
||||
if (nbytes >= SPECK_NEON_CHUNK_SIZE && may_use_simd()) {
|
||||
unsigned int count;
|
||||
|
||||
count = round_down(nbytes, SPECK_NEON_CHUNK_SIZE);
|
||||
kernel_neon_begin();
|
||||
(*crypt_many)(ctx->main_key.round_keys,
|
||||
ctx->main_key.nrounds,
|
||||
dst, src, count, &tweak);
|
||||
kernel_neon_end();
|
||||
dst += count;
|
||||
src += count;
|
||||
nbytes -= count;
|
||||
}
|
||||
|
||||
/* Handle any remainder with generic code */
|
||||
while (nbytes >= sizeof(tweak)) {
|
||||
*(__le64 *)dst = *(__le64 *)src ^ tweak;
|
||||
(*crypt_one)(&ctx->main_key, dst, dst);
|
||||
*(__le64 *)dst ^= tweak;
|
||||
tweak = cpu_to_le64((le64_to_cpu(tweak) << 1) ^
|
||||
((tweak & cpu_to_le64(1ULL << 63)) ?
|
||||
0x1B : 0));
|
||||
dst += sizeof(tweak);
|
||||
src += sizeof(tweak);
|
||||
nbytes -= sizeof(tweak);
|
||||
}
|
||||
err = skcipher_walk_done(&walk, nbytes);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int speck64_xts_encrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck64_xts_crypt(req, crypto_speck64_encrypt,
|
||||
speck64_xts_encrypt_neon);
|
||||
}
|
||||
|
||||
static int speck64_xts_decrypt(struct skcipher_request *req)
|
||||
{
|
||||
return __speck64_xts_crypt(req, crypto_speck64_decrypt,
|
||||
speck64_xts_decrypt_neon);
|
||||
}
|
||||
|
||||
static int speck64_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int keylen)
|
||||
{
|
||||
struct speck64_xts_tfm_ctx *ctx = crypto_skcipher_ctx(tfm);
|
||||
int err;
|
||||
|
||||
err = xts_verify_key(tfm, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
keylen /= 2;
|
||||
|
||||
err = crypto_speck64_setkey(&ctx->main_key, key, keylen);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
return crypto_speck64_setkey(&ctx->tweak_key, key + keylen, keylen);
|
||||
}
|
||||
|
||||
static struct skcipher_alg speck_algs[] = {
|
||||
{
|
||||
.base.cra_name = "xts(speck128)",
|
||||
.base.cra_driver_name = "xts-speck128-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = SPECK128_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct speck128_xts_tfm_ctx),
|
||||
.base.cra_alignmask = 7,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.min_keysize = 2 * SPECK128_128_KEY_SIZE,
|
||||
.max_keysize = 2 * SPECK128_256_KEY_SIZE,
|
||||
.ivsize = SPECK128_BLOCK_SIZE,
|
||||
.walksize = SPECK_NEON_CHUNK_SIZE,
|
||||
.setkey = speck128_xts_setkey,
|
||||
.encrypt = speck128_xts_encrypt,
|
||||
.decrypt = speck128_xts_decrypt,
|
||||
}, {
|
||||
.base.cra_name = "xts(speck64)",
|
||||
.base.cra_driver_name = "xts-speck64-neon",
|
||||
.base.cra_priority = 300,
|
||||
.base.cra_blocksize = SPECK64_BLOCK_SIZE,
|
||||
.base.cra_ctxsize = sizeof(struct speck64_xts_tfm_ctx),
|
||||
.base.cra_alignmask = 7,
|
||||
.base.cra_module = THIS_MODULE,
|
||||
.min_keysize = 2 * SPECK64_96_KEY_SIZE,
|
||||
.max_keysize = 2 * SPECK64_128_KEY_SIZE,
|
||||
.ivsize = SPECK64_BLOCK_SIZE,
|
||||
.walksize = SPECK_NEON_CHUNK_SIZE,
|
||||
.setkey = speck64_xts_setkey,
|
||||
.encrypt = speck64_xts_encrypt,
|
||||
.decrypt = speck64_xts_decrypt,
|
||||
}
|
||||
};
|
||||
|
||||
static int __init speck_neon_module_init(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
return -ENODEV;
|
||||
return crypto_register_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
static void __exit speck_neon_module_exit(void)
|
||||
{
|
||||
crypto_unregister_skciphers(speck_algs, ARRAY_SIZE(speck_algs));
|
||||
}
|
||||
|
||||
module_init(speck_neon_module_init);
|
||||
module_exit(speck_neon_module_exit);
|
||||
|
||||
MODULE_DESCRIPTION("Speck block cipher (NEON-accelerated)");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
|
||||
MODULE_ALIAS_CRYPTO("xts(speck128)");
|
||||
MODULE_ALIAS_CRYPTO("xts-speck128-neon");
|
||||
MODULE_ALIAS_CRYPTO("xts(speck64)");
|
||||
MODULE_ALIAS_CRYPTO("xts-speck64-neon");
|
@ -532,22 +532,6 @@ alternative_endif
|
||||
and \phys, \pte, #(((1 << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT)
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Check whether preempt-disabled code should yield as soon as it
|
||||
* is able. This is the case if re-enabling preemption a single
|
||||
* time results in a preempt count of zero, and the TIF_NEED_RESCHED
|
||||
* flag is set. (Note that the latter is stored negated in the
|
||||
* top word of the thread_info::preempt_count field)
|
||||
*/
|
||||
.macro cond_yield, lbl:req, tmp:req
|
||||
#ifdef CONFIG_PREEMPTION
|
||||
get_current_task \tmp
|
||||
ldr \tmp, [\tmp, #TSK_TI_PREEMPT]
|
||||
sub \tmp, \tmp, #PREEMPT_DISABLE_OFFSET
|
||||
cbz \tmp, \lbl
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/*
|
||||
* Check the MIDR_EL1 of the current CPU for a given model and a range of
|
||||
* variant/revision. See asm/cputype.h for the macros used below.
|
||||
@ -587,67 +571,4 @@ alternative_endif
|
||||
.Ldone\@:
|
||||
.endm
|
||||
|
||||
/*
|
||||
* frame_push - Push @regcount callee saved registers to the stack,
|
||||
* starting at x19, as well as x29/x30, and set x29 to
|
||||
* the new value of sp. Add @extra bytes of stack space
|
||||
* for locals.
|
||||
*/
|
||||
.macro frame_push, regcount:req, extra
|
||||
__frame st, \regcount, \extra
|
||||
.endm
|
||||
|
||||
/*
|
||||
* frame_pop - Pop the callee saved registers from the stack that were
|
||||
* pushed in the most recent call to frame_push, as well
|
||||
* as x29/x30 and any extra stack space that may have been
|
||||
* allocated.
|
||||
*/
|
||||
.macro frame_pop
|
||||
__frame ld
|
||||
.endm
|
||||
|
||||
.macro __frame_regs, reg1, reg2, op, num
|
||||
.if .Lframe_regcount == \num
|
||||
\op\()r \reg1, [sp, #(\num + 1) * 8]
|
||||
.elseif .Lframe_regcount > \num
|
||||
\op\()p \reg1, \reg2, [sp, #(\num + 1) * 8]
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro __frame, op, regcount, extra=0
|
||||
.ifc \op, st
|
||||
.if (\regcount) < 0 || (\regcount) > 10
|
||||
.error "regcount should be in the range [0 ... 10]"
|
||||
.endif
|
||||
.if ((\extra) % 16) != 0
|
||||
.error "extra should be a multiple of 16 bytes"
|
||||
.endif
|
||||
.ifdef .Lframe_regcount
|
||||
.if .Lframe_regcount != -1
|
||||
.error "frame_push/frame_pop may not be nested"
|
||||
.endif
|
||||
.endif
|
||||
.set .Lframe_regcount, \regcount
|
||||
.set .Lframe_extra, \extra
|
||||
.set .Lframe_local_offset, ((\regcount + 3) / 2) * 16
|
||||
stp x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]!
|
||||
mov x29, sp
|
||||
.endif
|
||||
|
||||
__frame_regs x19, x20, \op, 1
|
||||
__frame_regs x21, x22, \op, 3
|
||||
__frame_regs x23, x24, \op, 5
|
||||
__frame_regs x25, x26, \op, 7
|
||||
__frame_regs x27, x28, \op, 9
|
||||
|
||||
.ifc \op, ld
|
||||
.if .Lframe_regcount == -1
|
||||
.error "frame_push/frame_pop may not be nested"
|
||||
.endif
|
||||
ldp x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra
|
||||
.set .Lframe_regcount, -1
|
||||
.endif
|
||||
.endm
|
||||
|
||||
#endif /* __ASM_ASSEMBLER_H */
|
||||
|
@ -14,8 +14,15 @@
|
||||
#include <asm/hwcap.h>
|
||||
#include <asm/sysreg.h>
|
||||
|
||||
#define MAX_CPU_FEATURES 64
|
||||
#define cpu_feature(x) KERNEL_HWCAP_ ## x
|
||||
/*
|
||||
* In the arm64 world (as in the ARM world), elf_hwcap is used both internally
|
||||
* in the kernel and for user space to keep track of which optional features
|
||||
* are supported by the current system. So let's map feature 'x' to HWCAP_x.
|
||||
* Note that HWCAP_x constants are bit fields so we need to take the log.
|
||||
*/
|
||||
|
||||
#define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap))
|
||||
#define cpu_feature(x) ilog2(HWCAP_ ## x)
|
||||
|
||||
#define ARM64_SSBD_UNKNOWN -1
|
||||
#define ARM64_SSBD_FORCE_DISABLE 0
|
||||
@ -340,19 +347,10 @@ extern struct static_key_false arm64_const_caps_ready;
|
||||
|
||||
bool this_cpu_has_cap(unsigned int cap);
|
||||
|
||||
static inline void cpu_set_feature(unsigned int num)
|
||||
{
|
||||
WARN_ON(num >= MAX_CPU_FEATURES);
|
||||
elf_hwcap |= BIT(num);
|
||||
}
|
||||
#define cpu_set_named_feature(name) cpu_set_feature(cpu_feature(name))
|
||||
|
||||
static inline bool cpu_have_feature(unsigned int num)
|
||||
{
|
||||
WARN_ON(num >= MAX_CPU_FEATURES);
|
||||
return elf_hwcap & BIT(num);
|
||||
return elf_hwcap & (1UL << num);
|
||||
}
|
||||
#define cpu_have_named_feature(name) cpu_have_feature(cpu_feature(name))
|
||||
|
||||
/* System capability check for constant caps */
|
||||
static inline bool __cpus_have_const_cap(int num)
|
||||
|
@ -40,61 +40,11 @@
|
||||
#define COMPAT_HWCAP2_CRC32 (1 << 4)
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/log2.h>
|
||||
|
||||
/*
|
||||
* For userspace we represent hwcaps as a collection of HWCAP{,2}_x bitfields
|
||||
* as described in uapi/asm/hwcap.h. For the kernel we represent hwcaps as
|
||||
* natural numbers (in a single range of size MAX_CPU_FEATURES) defined here
|
||||
* with prefix KERNEL_HWCAP_ mapped to their HWCAP{,2}_x counterpart.
|
||||
*
|
||||
* Hwcaps should be set and tested within the kernel via the
|
||||
* cpu_{set,have}_named_feature(feature) where feature is the unique suffix
|
||||
* of KERNEL_HWCAP_{feature}.
|
||||
*/
|
||||
#define __khwcap_feature(x) const_ilog2(HWCAP_ ## x)
|
||||
#define KERNEL_HWCAP_FP __khwcap_feature(FP)
|
||||
#define KERNEL_HWCAP_ASIMD __khwcap_feature(ASIMD)
|
||||
#define KERNEL_HWCAP_EVTSTRM __khwcap_feature(EVTSTRM)
|
||||
#define KERNEL_HWCAP_AES __khwcap_feature(AES)
|
||||
#define KERNEL_HWCAP_PMULL __khwcap_feature(PMULL)
|
||||
#define KERNEL_HWCAP_SHA1 __khwcap_feature(SHA1)
|
||||
#define KERNEL_HWCAP_SHA2 __khwcap_feature(SHA2)
|
||||
#define KERNEL_HWCAP_CRC32 __khwcap_feature(CRC32)
|
||||
#define KERNEL_HWCAP_ATOMICS __khwcap_feature(ATOMICS)
|
||||
#define KERNEL_HWCAP_FPHP __khwcap_feature(FPHP)
|
||||
#define KERNEL_HWCAP_ASIMDHP __khwcap_feature(ASIMDHP)
|
||||
#define KERNEL_HWCAP_CPUID __khwcap_feature(CPUID)
|
||||
#define KERNEL_HWCAP_ASIMDRDM __khwcap_feature(ASIMDRDM)
|
||||
#define KERNEL_HWCAP_JSCVT __khwcap_feature(JSCVT)
|
||||
#define KERNEL_HWCAP_FCMA __khwcap_feature(FCMA)
|
||||
#define KERNEL_HWCAP_LRCPC __khwcap_feature(LRCPC)
|
||||
#define KERNEL_HWCAP_DCPOP __khwcap_feature(DCPOP)
|
||||
#define KERNEL_HWCAP_SHA3 __khwcap_feature(SHA3)
|
||||
#define KERNEL_HWCAP_SM3 __khwcap_feature(SM3)
|
||||
#define KERNEL_HWCAP_SM4 __khwcap_feature(SM4)
|
||||
#define KERNEL_HWCAP_ASIMDDP __khwcap_feature(ASIMDDP)
|
||||
#define KERNEL_HWCAP_SHA512 __khwcap_feature(SHA512)
|
||||
#define KERNEL_HWCAP_SVE __khwcap_feature(SVE)
|
||||
#define KERNEL_HWCAP_ASIMDFHM __khwcap_feature(ASIMDFHM)
|
||||
#define KERNEL_HWCAP_DIT __khwcap_feature(DIT)
|
||||
#define KERNEL_HWCAP_USCAT __khwcap_feature(USCAT)
|
||||
#define KERNEL_HWCAP_ILRCPC __khwcap_feature(ILRCPC)
|
||||
#define KERNEL_HWCAP_FLAGM __khwcap_feature(FLAGM)
|
||||
#define KERNEL_HWCAP_SSBS __khwcap_feature(SSBS)
|
||||
#define KERNEL_HWCAP_SB __khwcap_feature(SB)
|
||||
#define KERNEL_HWCAP_PACA __khwcap_feature(PACA)
|
||||
#define KERNEL_HWCAP_PACG __khwcap_feature(PACG)
|
||||
|
||||
#define __khwcap2_feature(x) (const_ilog2(HWCAP2_ ## x) + 32)
|
||||
|
||||
/*
|
||||
* This yields a mask that user programs can use to figure out what
|
||||
* instruction set this cpu supports.
|
||||
*/
|
||||
#define ELF_HWCAP lower_32_bits(elf_hwcap)
|
||||
#define ELF_HWCAP2 upper_32_bits(elf_hwcap)
|
||||
#define ELF_HWCAP (elf_hwcap)
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
#define COMPAT_ELF_HWCAP (compat_elf_hwcap)
|
||||
|
@ -18,7 +18,7 @@
|
||||
#define _UAPI__ASM_HWCAP_H
|
||||
|
||||
/*
|
||||
* HWCAP flags - for AT_HWCAP
|
||||
* HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP
|
||||
*/
|
||||
#define HWCAP_FP (1 << 0)
|
||||
#define HWCAP_ASIMD (1 << 1)
|
||||
|
@ -1258,32 +1258,32 @@ static const struct arm64_cpu_capabilities arm64_features[] = {
|
||||
}
|
||||
|
||||
static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = {
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC),
|
||||
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_PMULL),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_AES),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA1),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA2),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_SHA512),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_CRC32),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ATOMICS),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDRDM),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SHA3),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM3),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_SM4),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDDP),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_ASIMDFHM),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FLAGM),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_FP),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_FPHP),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, HWCAP_ASIMD),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_ASIMDHP),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, HWCAP_DIT),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_DCPOP),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_JSCVT),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_FCMA),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_LRCPC),
|
||||
HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, HWCAP_ILRCPC),
|
||||
HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, HWCAP_USCAT),
|
||||
HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, HWCAP_SSBS),
|
||||
{},
|
||||
};
|
||||
|
||||
@ -1329,7 +1329,7 @@ static void __init cap_set_elf_hwcap(const struct arm64_cpu_capabilities *cap)
|
||||
{
|
||||
switch (cap->hwcap_type) {
|
||||
case CAP_HWCAP:
|
||||
cpu_set_feature(cap->hwcap);
|
||||
elf_hwcap |= cap->hwcap;
|
||||
break;
|
||||
#ifdef CONFIG_COMPAT
|
||||
case CAP_COMPAT_HWCAP:
|
||||
@ -1352,7 +1352,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
|
||||
|
||||
switch (cap->hwcap_type) {
|
||||
case CAP_HWCAP:
|
||||
rc = cpu_have_feature(cap->hwcap);
|
||||
rc = (elf_hwcap & cap->hwcap) != 0;
|
||||
break;
|
||||
#ifdef CONFIG_COMPAT
|
||||
case CAP_COMPAT_HWCAP:
|
||||
@ -1373,7 +1373,7 @@ static bool cpus_have_elf_hwcap(const struct arm64_cpu_capabilities *cap)
|
||||
static void __init setup_elf_hwcaps(const struct arm64_cpu_capabilities *hwcaps)
|
||||
{
|
||||
/* We support emulation of accesses to CPU ID feature registers */
|
||||
cpu_set_named_feature(CPUID);
|
||||
elf_hwcap |= HWCAP_CPUID;
|
||||
for (; hwcaps->matches; hwcaps++)
|
||||
if (hwcaps->matches(hwcaps, cpucap_default_scope(hwcaps)))
|
||||
cap_set_elf_hwcap(hwcaps);
|
||||
|
@ -172,7 +172,7 @@ static int c_show(struct seq_file *m, void *v)
|
||||
#endif /* CONFIG_COMPAT */
|
||||
} else {
|
||||
for (j = 0; hwcap_str[j]; j++)
|
||||
if (cpu_have_feature(j))
|
||||
if (elf_hwcap & (1 << j))
|
||||
seq_printf(m, " %s", hwcap_str[j]);
|
||||
}
|
||||
seq_puts(m, "\n");
|
||||
|
@ -444,14 +444,14 @@ static inline void fpsimd_hotplug_init(void) { }
|
||||
*/
|
||||
static int __init fpsimd_init(void)
|
||||
{
|
||||
if (cpu_have_named_feature(FP)) {
|
||||
if (elf_hwcap & HWCAP_FP) {
|
||||
fpsimd_pm_init();
|
||||
fpsimd_hotplug_init();
|
||||
} else {
|
||||
pr_notice("Floating-point is not implemented\n");
|
||||
}
|
||||
|
||||
if (!cpu_have_named_feature(ASIMD))
|
||||
if (!(elf_hwcap & HWCAP_ASIMD))
|
||||
pr_notice("Advanced SIMD is not implemented\n");
|
||||
|
||||
return 0;
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <asm/alternative.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
.cpu generic+crc
|
||||
.arch armv8-a+crc
|
||||
|
||||
.macro __crc32, c
|
||||
cmp x2, #16
|
||||
|
@ -182,6 +182,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "md5",
|
||||
.cra_driver_name= "octeon-md5",
|
||||
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -215,6 +215,7 @@ static struct shash_alg octeon_sha1_alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "octeon-sha1",
|
||||
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -239,6 +239,7 @@ static struct shash_alg octeon_sha256_algs[2] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name= "octeon-sha256",
|
||||
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -251,6 +252,7 @@ static struct shash_alg octeon_sha256_algs[2] = { {
|
||||
.base = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name= "octeon-sha224",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -235,6 +235,7 @@ static struct shash_alg octeon_sha512_algs[2] = { {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name= "octeon-sha512",
|
||||
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -248,6 +249,7 @@ static struct shash_alg octeon_sha512_algs[2] = { {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name= "octeon-sha384",
|
||||
.cra_priority = OCTEON_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -139,6 +139,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "md5",
|
||||
.cra_driver_name= "md5-ppc",
|
||||
.cra_priority = 200,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -185,6 +185,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-ppc-spe",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -132,6 +132,7 @@ static struct shash_alg alg = {
|
||||
.base = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-powerpc",
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -231,6 +231,7 @@ static struct shash_alg algs[2] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name= "sha256-ppc-spe",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -247,6 +248,7 @@ static struct shash_alg algs[2] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name= "sha224-ppc-spe",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -127,6 +127,7 @@ static struct shash_alg ghash_alg = {
|
||||
.cra_name = "ghash",
|
||||
.cra_driver_name = "ghash-s390",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct ghash_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
@ -83,6 +83,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-s390",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -76,6 +76,7 @@ static struct shash_alg sha256_alg = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name= "sha256-s390",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -112,6 +113,7 @@ static struct shash_alg sha224_alg = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name= "sha224-s390",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -81,6 +81,7 @@ static struct shash_alg sha512_alg = {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name= "sha512-s390",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -119,6 +120,7 @@ static struct shash_alg sha384_alg = {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name= "sha384-s390",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct s390_sha_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
@ -196,14 +196,14 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
|
||||
}
|
||||
|
||||
static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
|
||||
|
||||
@ -395,8 +395,8 @@ static struct crypto_alg algs[] = { {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = crypto_aes_encrypt,
|
||||
.cia_decrypt = crypto_aes_decrypt
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
}, {
|
||||
|
@ -144,6 +144,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "md5",
|
||||
.cra_driver_name= "md5-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -139,6 +139,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name= "sha1-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -169,6 +169,7 @@ static struct shash_alg sha256 = {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name= "sha256-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -184,6 +185,7 @@ static struct shash_alg sha224 = {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name= "sha224-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -154,6 +154,7 @@ static struct shash_alg sha512 = {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name= "sha512-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -169,6 +170,7 @@ static struct shash_alg sha384 = {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name= "sha384-sparc64",
|
||||
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -328,7 +328,7 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
|
||||
return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
|
||||
}
|
||||
|
||||
static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
|
||||
|
||||
@ -341,7 +341,7 @@ static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
}
|
||||
}
|
||||
|
||||
static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
|
||||
{
|
||||
struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
|
||||
|
||||
@ -973,8 +973,8 @@ static struct crypto_alg aesni_algs[] = { {
|
||||
.cia_min_keysize = AES_MIN_KEY_SIZE,
|
||||
.cia_max_keysize = AES_MAX_KEY_SIZE,
|
||||
.cia_setkey = aes_set_key,
|
||||
.cia_encrypt = aesni_encrypt,
|
||||
.cia_decrypt = aesni_decrypt
|
||||
.cia_encrypt = aes_encrypt,
|
||||
.cia_decrypt = aes_decrypt
|
||||
}
|
||||
}
|
||||
}, {
|
||||
|
@ -154,7 +154,8 @@ static struct shash_alg ghash_alg = {
|
||||
.cra_name = "__ghash",
|
||||
.cra_driver_name = "__ghash-pclmulqdqni",
|
||||
.cra_priority = 0,
|
||||
.cra_flags = CRYPTO_ALG_INTERNAL,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH |
|
||||
CRYPTO_ALG_INTERNAL,
|
||||
.cra_blocksize = GHASH_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct ghash_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
|
@ -171,6 +171,7 @@ static struct shash_alg alg = {
|
||||
.cra_name = "poly1305",
|
||||
.cra_driver_name = "poly1305-simd",
|
||||
.cra_priority = 300,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_alignmask = sizeof(u32) - 1,
|
||||
.cra_blocksize = POLY1305_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
|
@ -100,6 +100,7 @@ static struct shash_alg sha1_ssse3_alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-ssse3",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -150,6 +151,7 @@ static struct shash_alg sha1_avx_alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-avx",
|
||||
.cra_priority = 160,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -239,6 +241,7 @@ static struct shash_alg sha1_avx2_alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-avx2",
|
||||
.cra_priority = 170,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -294,6 +297,7 @@ static struct shash_alg sha1_ni_alg = {
|
||||
.cra_name = "sha1",
|
||||
.cra_driver_name = "sha1-ni",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA1_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -109,6 +109,7 @@ static struct shash_alg sha256_ssse3_algs[] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-ssse3",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -123,6 +124,7 @@ static struct shash_alg sha256_ssse3_algs[] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-ssse3",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -175,6 +177,7 @@ static struct shash_alg sha256_avx_algs[] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-avx",
|
||||
.cra_priority = 160,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -189,6 +192,7 @@ static struct shash_alg sha256_avx_algs[] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-avx",
|
||||
.cra_priority = 160,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -257,6 +261,7 @@ static struct shash_alg sha256_avx2_algs[] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-avx2",
|
||||
.cra_priority = 170,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -271,6 +276,7 @@ static struct shash_alg sha256_avx2_algs[] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-avx2",
|
||||
.cra_priority = 170,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -337,6 +343,7 @@ static struct shash_alg sha256_ni_algs[] = { {
|
||||
.cra_name = "sha256",
|
||||
.cra_driver_name = "sha256-ni",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA256_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -351,6 +358,7 @@ static struct shash_alg sha256_ni_algs[] = { {
|
||||
.cra_name = "sha224",
|
||||
.cra_driver_name = "sha224-ni",
|
||||
.cra_priority = 250,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA224_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -108,6 +108,7 @@ static struct shash_alg sha512_ssse3_algs[] = { {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name = "sha512-ssse3",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -122,6 +123,7 @@ static struct shash_alg sha512_ssse3_algs[] = { {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-ssse3",
|
||||
.cra_priority = 150,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -185,6 +187,7 @@ static struct shash_alg sha512_avx_algs[] = { {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name = "sha512-avx",
|
||||
.cra_priority = 160,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -199,6 +202,7 @@ static struct shash_alg sha512_avx_algs[] = { {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-avx",
|
||||
.cra_priority = 160,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -256,6 +260,7 @@ static struct shash_alg sha512_avx2_algs[] = { {
|
||||
.cra_name = "sha512",
|
||||
.cra_driver_name = "sha512-avx2",
|
||||
.cra_priority = 170,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA512_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
@ -270,6 +275,7 @@ static struct shash_alg sha512_avx2_algs[] = { {
|
||||
.cra_name = "sha384",
|
||||
.cra_driver_name = "sha384-avx2",
|
||||
.cra_priority = 170,
|
||||
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
|
||||
.cra_blocksize = SHA384_BLOCK_SIZE,
|
||||
.cra_module = THIS_MODULE,
|
||||
}
|
||||
|
@ -195,7 +195,7 @@ config BLK_INLINE_ENCRYPTION_FALLBACK
|
||||
bool "Enable crypto API fallback for blk-crypto"
|
||||
depends on BLK_INLINE_ENCRYPTION
|
||||
select CRYPTO
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
Enabling this lets the block layer handle inline encryption
|
||||
by falling back to the kernel crypto API when inline
|
||||
|
@ -52,12 +52,12 @@ config CRYPTO_AEAD2
|
||||
select CRYPTO_NULL2
|
||||
select CRYPTO_RNG2
|
||||
|
||||
config CRYPTO_SKCIPHER
|
||||
config CRYPTO_BLKCIPHER
|
||||
tristate
|
||||
select CRYPTO_SKCIPHER2
|
||||
select CRYPTO_BLKCIPHER2
|
||||
select CRYPTO_ALGAPI
|
||||
|
||||
config CRYPTO_SKCIPHER2
|
||||
config CRYPTO_BLKCIPHER2
|
||||
tristate
|
||||
select CRYPTO_ALGAPI2
|
||||
select CRYPTO_RNG2
|
||||
@ -146,7 +146,7 @@ config CRYPTO_MANAGER2
|
||||
def_tristate CRYPTO_MANAGER || (CRYPTO_MANAGER!=n && CRYPTO_ALGAPI=y)
|
||||
select CRYPTO_AEAD2
|
||||
select CRYPTO_HASH2
|
||||
select CRYPTO_SKCIPHER2
|
||||
select CRYPTO_BLKCIPHER2
|
||||
select CRYPTO_AKCIPHER2
|
||||
select CRYPTO_KPP2
|
||||
select CRYPTO_ACOMP2
|
||||
@ -185,7 +185,7 @@ config CRYPTO_NULL
|
||||
config CRYPTO_NULL2
|
||||
tristate
|
||||
select CRYPTO_ALGAPI2
|
||||
select CRYPTO_SKCIPHER2
|
||||
select CRYPTO_BLKCIPHER2
|
||||
select CRYPTO_HASH2
|
||||
|
||||
config CRYPTO_PCRYPT
|
||||
@ -203,7 +203,7 @@ config CRYPTO_WORKQUEUE
|
||||
|
||||
config CRYPTO_CRYPTD
|
||||
tristate "Software async crypto daemon"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_WORKQUEUE
|
||||
@ -214,7 +214,7 @@ config CRYPTO_CRYPTD
|
||||
|
||||
config CRYPTO_MCRYPTD
|
||||
tristate "Software async multi-buffer crypto daemon"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_WORKQUEUE
|
||||
@ -229,7 +229,7 @@ config CRYPTO_MCRYPTD
|
||||
config CRYPTO_AUTHENC
|
||||
tristate "Authenc support"
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_HASH
|
||||
select CRYPTO_NULL
|
||||
@ -255,7 +255,7 @@ config CRYPTO_SIMD
|
||||
config CRYPTO_GLUE_HELPER_X86
|
||||
tristate
|
||||
depends on X86
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
|
||||
config CRYPTO_ENGINE
|
||||
tristate
|
||||
@ -295,7 +295,7 @@ config CRYPTO_CHACHA20POLY1305
|
||||
config CRYPTO_SEQIV
|
||||
tristate "Sequence Number IV Generator"
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_NULL
|
||||
select CRYPTO_RNG_DEFAULT
|
||||
help
|
||||
@ -317,7 +317,7 @@ comment "Block modes"
|
||||
|
||||
config CRYPTO_CBC
|
||||
tristate "CBC support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
CBC: Cipher Block Chaining mode
|
||||
@ -325,7 +325,7 @@ config CRYPTO_CBC
|
||||
|
||||
config CRYPTO_CTR
|
||||
tristate "CTR support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_SEQIV
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
@ -334,7 +334,7 @@ config CRYPTO_CTR
|
||||
|
||||
config CRYPTO_CTS
|
||||
tristate "CTS support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
CTS: Cipher Text Stealing
|
||||
This is the Cipher Text Stealing mode as described by
|
||||
@ -345,7 +345,7 @@ config CRYPTO_CTS
|
||||
|
||||
config CRYPTO_ECB
|
||||
tristate "ECB support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
ECB: Electronic CodeBook mode
|
||||
@ -354,7 +354,7 @@ config CRYPTO_ECB
|
||||
|
||||
config CRYPTO_LRW
|
||||
tristate "LRW support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_GF128MUL
|
||||
help
|
||||
@ -366,7 +366,7 @@ config CRYPTO_LRW
|
||||
|
||||
config CRYPTO_PCBC
|
||||
tristate "PCBC support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
help
|
||||
PCBC: Propagating Cipher Block Chaining mode
|
||||
@ -374,7 +374,7 @@ config CRYPTO_PCBC
|
||||
|
||||
config CRYPTO_XTS
|
||||
tristate "XTS support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_MANAGER
|
||||
select CRYPTO_ECB
|
||||
help
|
||||
@ -384,7 +384,7 @@ config CRYPTO_XTS
|
||||
|
||||
config CRYPTO_KEYWRAP
|
||||
tristate "Key wrapping support"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
Support for key wrapping (NIST SP800-38F / RFC3394) without
|
||||
padding.
|
||||
@ -888,17 +888,6 @@ config CRYPTO_SHA3
|
||||
References:
|
||||
http://keccak.noekeon.org/
|
||||
|
||||
config CRYPTO_SM3
|
||||
tristate "SM3 digest algorithm"
|
||||
select CRYPTO_HASH
|
||||
help
|
||||
SM3 secure hash function as defined by OSCCA GM/T 0004-2012 SM3).
|
||||
It is part of the Chinese Commercial Cryptography suite.
|
||||
|
||||
References:
|
||||
http://www.oscca.gov.cn/UpFile/20101222141857786.pdf
|
||||
https://datatracker.ietf.org/doc/html/draft-shen-sm3-hash
|
||||
|
||||
config CRYPTO_TGR192
|
||||
tristate "Tiger digest algorithms"
|
||||
select CRYPTO_HASH
|
||||
@ -934,9 +923,6 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL
|
||||
|
||||
comment "Ciphers"
|
||||
|
||||
config CRYPTO_LIB_AES
|
||||
tristate
|
||||
|
||||
config CRYPTO_AES
|
||||
tristate "AES cipher algorithms"
|
||||
select CRYPTO_ALGAPI
|
||||
@ -960,7 +946,6 @@ config CRYPTO_AES
|
||||
config CRYPTO_AES_TI
|
||||
tristate "Fixed time AES cipher"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_LIB_AES
|
||||
help
|
||||
This is a generic implementation of AES that attempts to eliminate
|
||||
data dependent latencies as much as possible without affecting
|
||||
@ -1027,7 +1012,7 @@ config CRYPTO_AES_NI_INTEL
|
||||
select CRYPTO_AES_X86_64 if 64BIT
|
||||
select CRYPTO_AES_586 if !64BIT
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_GLUE_HELPER_X86 if 64BIT
|
||||
select CRYPTO_SIMD
|
||||
help
|
||||
@ -1111,7 +1096,7 @@ config CRYPTO_ANUBIS
|
||||
|
||||
config CRYPTO_ARC4
|
||||
tristate "ARC4 cipher algorithm"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
ARC4 cipher algorithm.
|
||||
|
||||
@ -1339,7 +1324,7 @@ config CRYPTO_DES3_EDE_X86_64
|
||||
config CRYPTO_FCRYPT
|
||||
tristate "FCrypt cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
FCrypt algorithm used by RxRPC.
|
||||
|
||||
@ -1358,7 +1343,7 @@ config CRYPTO_KHAZAD
|
||||
|
||||
config CRYPTO_SALSA20
|
||||
tristate "Salsa20 stream cipher algorithm"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
Salsa20 stream cipher algorithm.
|
||||
|
||||
@ -1370,7 +1355,7 @@ config CRYPTO_SALSA20
|
||||
|
||||
config CRYPTO_CHACHA20
|
||||
tristate "ChaCha stream cipher algorithms"
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
help
|
||||
The ChaCha20, XChaCha20, and XChaCha12 stream cipher algorithms.
|
||||
|
||||
@ -1392,7 +1377,7 @@ config CRYPTO_CHACHA20
|
||||
config CRYPTO_CHACHA20_X86_64
|
||||
tristate "ChaCha20 cipher algorithm (x86_64/SSSE3/AVX2)"
|
||||
depends on X86 && 64BIT
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_CHACHA20
|
||||
help
|
||||
ChaCha20 cipher algorithm, RFC7539.
|
||||
@ -1431,31 +1416,6 @@ config CRYPTO_SERPENT
|
||||
See also:
|
||||
<http://www.cl.cam.ac.uk/~rja14/serpent.html>
|
||||
|
||||
config CRYPTO_SM4
|
||||
tristate "SM4 cipher algorithm"
|
||||
select CRYPTO_ALGAPI
|
||||
help
|
||||
SM4 cipher algorithms (OSCCA GB/T 32907-2016).
|
||||
|
||||
SM4 (GBT.32907-2016) is a cryptographic standard issued by the
|
||||
Organization of State Commercial Administration of China (OSCCA)
|
||||
as an authorized cryptographic algorithms for the use within China.
|
||||
|
||||
SMS4 was originally created for use in protecting wireless
|
||||
networks, and is mandated in the Chinese National Standard for
|
||||
Wireless LAN WAPI (Wired Authentication and Privacy Infrastructure)
|
||||
(GB.15629.11-2003).
|
||||
|
||||
The latest SM4 standard (GBT.32907-2016) was proposed by OSCCA and
|
||||
standardized through TC 260 of the Standardization Administration
|
||||
of the People's Republic of China (SAC).
|
||||
|
||||
The input, output, and key of SMS4 are each 128 bits.
|
||||
|
||||
See also: <https://eprint.iacr.org/2008/329.pdf>
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config CRYPTO_SERPENT_SSE2_X86_64
|
||||
tristate "Serpent cipher algorithm (x86_64/SSE2)"
|
||||
depends on X86 && 64BIT
|
||||
@ -1795,7 +1755,7 @@ config CRYPTO_USER_API_HASH
|
||||
config CRYPTO_USER_API_SKCIPHER
|
||||
tristate "User-space interface for symmetric key cipher algorithms"
|
||||
depends on NET
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_USER_API
|
||||
help
|
||||
This option enables the user-spaces interface for symmetric
|
||||
@ -1814,7 +1774,7 @@ config CRYPTO_USER_API_AEAD
|
||||
tristate "User-space interface for AEAD cipher algorithms"
|
||||
depends on NET
|
||||
select CRYPTO_AEAD
|
||||
select CRYPTO_SKCIPHER
|
||||
select CRYPTO_BLKCIPHER
|
||||
select CRYPTO_NULL
|
||||
select CRYPTO_USER_API
|
||||
help
|
||||
|
@ -17,9 +17,10 @@ obj-$(CONFIG_CRYPTO_ALGAPI2) += crypto_algapi.o
|
||||
|
||||
obj-$(CONFIG_CRYPTO_AEAD2) += aead.o
|
||||
|
||||
crypto_skcipher-y := ablkcipher.o blkcipher.o
|
||||
crypto_skcipher-y += skcipher.o
|
||||
obj-$(CONFIG_CRYPTO_SKCIPHER2) += crypto_skcipher.o
|
||||
crypto_blkcipher-y := ablkcipher.o
|
||||
crypto_blkcipher-y += blkcipher.o
|
||||
crypto_blkcipher-y += skcipher.o
|
||||
obj-$(CONFIG_CRYPTO_BLKCIPHER2) += crypto_blkcipher.o
|
||||
obj-$(CONFIG_CRYPTO_SEQIV) += seqiv.o
|
||||
obj-$(CONFIG_CRYPTO_ECHAINIV) += echainiv.o
|
||||
|
||||
@ -70,7 +71,6 @@ obj-$(CONFIG_CRYPTO_SHA1) += sha1_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA256) += sha256_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA512) += sha512_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SHA3) += sha3_generic.o
|
||||
obj-$(CONFIG_CRYPTO_SM3) += sm3_generic.o
|
||||
obj-$(CONFIG_CRYPTO_WP512) += wp512.o
|
||||
CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_TGR192) += tgr192.o
|
||||
@ -101,7 +101,6 @@ obj-$(CONFIG_CRYPTO_SERPENT) += serpent_generic.o
|
||||
CFLAGS_serpent_generic.o := $(call cc-option,-fsched-pressure) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
|
||||
obj-$(CONFIG_CRYPTO_AES) += aes_generic.o
|
||||
CFLAGS_aes_generic.o := $(call cc-option,-fno-code-hoisting) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83356
|
||||
obj-$(CONFIG_CRYPTO_SM4) += sm4_generic.o
|
||||
obj-$(CONFIG_CRYPTO_AES_TI) += aes_ti.o
|
||||
obj-$(CONFIG_CRYPTO_CAMELLIA) += camellia_generic.o
|
||||
obj-$(CONFIG_CRYPTO_CAST_COMMON) += cast_common.o
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user