gnutls: amd64 and aarch64 assembly fixes. This is basically perlasm

generated code with a few sprinkles of gnu. Passes regress except
for three tests that are unrelated to these changes.
This commit is contained in:
tb
2023-01-19 12:41:35 +00:00
parent cd7beb5c69
commit cb2bf76c52
16 changed files with 336 additions and 0 deletions
+1
View File
@@ -3,6 +3,7 @@ COMMENT= GNU Transport Layer Security library
V= 3.7.8
DISTNAME= gnutls-${V}
EXTRACT_SUFX= .tar.xz
REVISION= 0
CATEGORIES= security
@@ -0,0 +1,30 @@
Index: lib/accelerated/aarch64/elf/aes-aarch64.s
--- lib/accelerated/aarch64/elf/aes-aarch64.s.orig
+++ lib/accelerated/aarch64/elf/aes-aarch64.s
@@ -45,7 +45,7 @@
# 2 "lib/accelerated/aarch64/elf/aes-aarch64.s.tmp.S" 2
-.text
+.rodata
.arch armv8-a+crypto
.align 5
.Lrcon:
@@ -53,6 +53,7 @@
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
.long 0x1b,0x1b,0x1b,0x1b
+.text
.globl aes_v8_set_encrypt_key
.type aes_v8_set_encrypt_key,%function
.align 5
@@ -73,7 +74,8 @@ aes_v8_set_encrypt_key:
tst w1,#0x3f
b.ne .Lenc_key_abort
- adr x3,.Lrcon
+ adrp x3,.Lrcon
+ add x3,x3,:lo12:.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
@@ -0,0 +1,35 @@
Index: lib/accelerated/aarch64/elf/sha1-armv8.s
--- lib/accelerated/aarch64/elf/sha1-armv8.s.orig
+++ lib/accelerated/aarch64/elf/sha1-armv8.s
@@ -55,11 +55,8 @@ sha1_block_data_order:
- ldr x16,.L_gnutls_arm_cpuid_s
-
- adr x17,.L_gnutls_arm_cpuid_s
- add x16,x16,x17
- ldr w16,[x16]
+ adrp x16,_gnutls_arm_cpuid_s
+ ldr w16,[x16,#:lo12:_gnutls_arm_cpuid_s]
tst w16,#(1<<3)
b.ne .Lv8_entry
@@ -1125,7 +1122,8 @@ sha1_block_armv8:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
- adr x4,.Lconst
+ adrp x4,.Lconst
+ add x4,x4,:lo12:.Lconst
eor v1.16b,v1.16b,v1.16b
ld1 {v0.4s},[x0],#16
ld1 {v1.s}[0],[x0]
@@ -1248,6 +1246,7 @@ sha1_block_armv8:
ldr x29,[sp],#16
ret
.size sha1_block_armv8,.-sha1_block_armv8
+.rodata
.align 6
.Lconst:
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
@@ -0,0 +1,63 @@
Index: lib/accelerated/aarch64/elf/sha256-armv8.s
--- lib/accelerated/aarch64/elf/sha256-armv8.s.orig
+++ lib/accelerated/aarch64/elf/sha256-armv8.s
@@ -58,11 +58,8 @@ sha256_block_data_order:
- ldr x16,.L_gnutls_arm_cpuid_s
-
- adr x17,.L_gnutls_arm_cpuid_s
- add x16,x16,x17
- ldr w16,[x16]
+ adrp x16,_gnutls_arm_cpuid_s
+ ldr w16,[x16,#:lo12:_gnutls_arm_cpuid_s]
tst w16,#(1<<4)
b.ne .Lv8_entry
tst w16,#(1<<0)
@@ -84,7 +81,8 @@ sha256_block_data_order:
ldp w24,w25,[x0,#4*4]
add x2,x1,x2,lsl#6
ldp w26,w27,[x0,#6*4]
- adr x30,.LK256
+ adrp x30,.LK256
+ add x30,x30,:lo12:.LK256
stp x0,x2,[x29,#96]
.Loop:
@@ -1032,6 +1030,7 @@ sha256_block_data_order:
ret
.size sha256_block_data_order,.-sha256_block_data_order
+.rodata
.align 6
.type .LK256,%object
.LK256:
@@ -1065,6 +1064,7 @@ sha256_block_data_order:
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
+.previous
.type sha256_block_armv8,%function
.align 6
@@ -1074,7 +1074,8 @@ sha256_block_armv8:
add x29,sp,#0
ld1 {v0.4s,v1.4s},[x0]
- adr x3,.LK256
+ adrp x3,.LK256
+ add x3,x3,:lo12:.LK256
.Loop_hw:
ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
@@ -1216,7 +1217,8 @@ sha256_block_neon:
mov x29, sp
sub sp,sp,#16*4
- adr x16,.LK256
+ adrp x16,.LK256
+ add x16,x16,:lo12:.LK256
add x2,x1,x2,lsl#6
ld1 {v0.16b},[x1], #16
@@ -0,0 +1,53 @@
Index: lib/accelerated/aarch64/elf/sha512-armv8.s
--- lib/accelerated/aarch64/elf/sha512-armv8.s.orig
+++ lib/accelerated/aarch64/elf/sha512-armv8.s
@@ -58,11 +58,8 @@ sha512_block_data_order:
- ldr x16,.L_gnutls_arm_cpuid_s
-
- adr x17,.L_gnutls_arm_cpuid_s
- add x16,x16,x17
- ldr w16,[x16]
+ adrp x16,_gnutls_arm_cpuid_s
+ ldr w16,[x16,#:lo12:_gnutls_arm_cpuid_s]
tst w16,#(1<<6)
b.ne .Lv8_entry
@@ -82,7 +79,8 @@ sha512_block_data_order:
ldp x24,x25,[x0,#4*8]
add x2,x1,x2,lsl#7
ldp x26,x27,[x0,#6*8]
- adr x30,.LK512
+ adrp x30,.LK512
+ add x30,x30,:lo12:.LK512
stp x0,x2,[x29,#96]
.Loop:
@@ -1030,6 +1028,7 @@ sha512_block_data_order:
ret
.size sha512_block_data_order,.-sha512_block_data_order
+.rodata
.align 6
.type .LK512,%object
.LK512:
@@ -1088,6 +1087,7 @@ sha512_block_data_order:
.align 2
.align 2
+.previous
.type sha512_block_armv8,%function
.align 6
sha512_block_armv8:
@@ -1099,7 +1099,8 @@ sha512_block_armv8:
ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0]
- adr x3,.LK512
+ adrp x3,.LK512
+ add x3,x3,:lo12:.LK512
rev64 v16.16b,v16.16b
rev64 v17.16b,v17.16b
@@ -0,0 +1,11 @@
Index: lib/accelerated/x86/elf/aes-ssse3-x86_64.s
--- lib/accelerated/x86/elf/aes-ssse3-x86_64.s.orig
+++ lib/accelerated/x86/elf/aes-ssse3-x86_64.s
@@ -763,6 +763,7 @@ _vpaes_preheat:
+.rodata
.type _vpaes_consts,@object
.align 64
_vpaes_consts:
@@ -0,0 +1,20 @@
Index: lib/accelerated/x86/elf/aes-ssse3-x86.s
--- lib/accelerated/x86/elf/aes-ssse3-x86.s.orig
+++ lib/accelerated/x86/elf/aes-ssse3-x86.s
@@ -10,7 +10,7 @@
#
# *** This file is auto-generated ***
#
-.text
+.rodata
.align 64
.L_vpaes_consts:
.long 218628480,235210255,168496130,67568393
@@ -67,6 +67,7 @@
.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
.byte 118,101,114,115,105,116,121,41,0
+.previous
.align 64
.type _vpaes_preheat,@function
.align 16
@@ -0,0 +1,11 @@
Index: lib/accelerated/x86/elf/aesni-gcm-x86_64.s
--- lib/accelerated/x86/elf/aesni-gcm-x86_64.s.orig
+++ lib/accelerated/x86/elf/aesni-gcm-x86_64.s
@@ -813,6 +813,7 @@ aesni_gcm_encrypt:
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
+.rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@@ -0,0 +1,11 @@
Index: lib/accelerated/x86/elf/aesni-x86_64.s
--- lib/accelerated/x86/elf/aesni-x86_64.s.orig
+++ lib/accelerated/x86/elf/aesni-x86_64.s
@@ -4489,6 +4489,7 @@ __aesni_set_encrypt_key:
.cfi_endproc
.size aesni_set_encrypt_key,.-aesni_set_encrypt_key
.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
+.rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@@ -0,0 +1,11 @@
Index: lib/accelerated/x86/elf/aesni-x86.s
--- lib/accelerated/x86/elf/aesni-x86.s.orig
+++ lib/accelerated/x86/elf/aesni-x86.s
@@ -3263,6 +3263,7 @@ aesni_set_decrypt_key:
.L134dec_key_ret:
ret
.size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
+.rodata
.align 64
.Lkey_const:
.long 202313229,202313229,202313229,202313229
@@ -0,0 +1,11 @@
Index: lib/accelerated/x86/elf/ghash-x86_64.s
--- lib/accelerated/x86/elf/ghash-x86_64.s.orig
+++ lib/accelerated/x86/elf/ghash-x86_64.s
@@ -1831,6 +1831,7 @@ gcm_ghash_avx:
.byte 0xf3,0xc3
.cfi_endproc
.size gcm_ghash_avx,.-gcm_ghash_avx
+.rodata
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
@@ -0,0 +1,11 @@
Index: lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
--- lib/accelerated/x86/elf/sha1-ssse3-x86_64.s.orig
+++ lib/accelerated/x86/elf/sha1-ssse3-x86_64.s
@@ -5472,6 +5472,7 @@ _avx2_shortcut:
.byte 0xf3,0xc3
.cfi_endproc
.size sha1_block_data_order_avx2,.-sha1_block_data_order_avx2
+.rodata
.align 64
K_XX_XX:
.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999
@@ -0,0 +1,19 @@
Index: lib/accelerated/x86/elf/sha256-ssse3-x86_64.s
--- lib/accelerated/x86/elf/sha256-ssse3-x86_64.s.orig
+++ lib/accelerated/x86/elf/sha256-ssse3-x86_64.s
@@ -1767,6 +1767,7 @@ sha256_block_data_order:
.byte 0xf3,0xc3
.cfi_endproc
.size sha256_block_data_order,.-sha256_block_data_order
+.rodata
.align 64
.type K256,@object
K256:
@@ -1810,6 +1811,7 @@ K256:
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908
.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.previous
.type sha256_block_data_order_shaext,@function
.align 64
sha256_block_data_order_shaext:
@@ -0,0 +1,19 @@
Index: lib/accelerated/x86/elf/sha256-ssse3-x86.s
--- lib/accelerated/x86/elf/sha256-ssse3-x86.s.orig
+++ lib/accelerated/x86/elf/sha256-ssse3-x86.s
@@ -466,6 +466,7 @@ sha256_block_data_order:
popl %ebx
popl %ebp
ret
+.rodata
.align 64
.L001K256:
.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
@@ -475,6 +476,7 @@ sha256_block_data_order:
.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
.byte 62,0
+.previous
.align 16
.L008unrolled:
leal -96(%esp),%esp
@@ -0,0 +1,19 @@
Index: lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
--- lib/accelerated/x86/elf/sha512-ssse3-x86_64.s.orig
+++ lib/accelerated/x86/elf/sha512-ssse3-x86_64.s
@@ -1765,6 +1765,7 @@ sha512_block_data_order:
.byte 0xf3,0xc3
.cfi_endproc
.size sha512_block_data_order,.-sha512_block_data_order
+.rodata
.align 64
.type K512,@object
K512:
@@ -1852,6 +1853,7 @@ K512:
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
.quad 0x0001020304050607,0x08090a0b0c0d0e0f
.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.previous
.type sha512_block_data_order_xop,@function
.align 64
sha512_block_data_order_xop:
@@ -0,0 +1,11 @@
Index: lib/accelerated/x86/elf/sha512-ssse3-x86.s
--- lib/accelerated/x86/elf/sha512-ssse3-x86.s.orig
+++ lib/accelerated/x86/elf/sha512-ssse3-x86.s
@@ -511,6 +511,7 @@ sha512_block_data_order:
popl %ebx
popl %ebp
ret
+.rodata
.align 64
.L001K512:
.long 3609767458,1116352408