graphics/ffmpeg: add missed IBT/BTI patches for 8.1.1 at amd64

Reported, tested and OK: kn@
This commit is contained in:
kirill
2026-06-13 11:17:07 +00:00
parent 537a539e13
commit db89873d81
6 changed files with 170 additions and 0 deletions
+1
View File
@@ -2,6 +2,7 @@ COMMENT= audio/video converter and streamer
# keep it synced with x11/mplayer
V= 8.1.1
REVISION= 0
DISTNAME= ffmpeg-${V}
CATEGORIES= graphics multimedia
SITES= https://ffmpeg.org/releases/
@@ -0,0 +1,19 @@
Index: libavcodec/x86/vp9itxfm_16bpp_avx512.asm
--- libavcodec/x86/vp9itxfm_16bpp_avx512.asm.orig
+++ libavcodec/x86/vp9itxfm_16bpp_avx512.asm
@@ -375,6 +375,7 @@ cglobal vp9_idct_16x16_internal_10, 0, 7, 22, dst, str
TRANSPOSE_4D 4, 5, 6, 7, 16
jmp tx2q
.pass2:
+ _CET_ENDBR
test eobd, eobd
jl .pass2_fast
call .main_part1
@@ -603,6 +604,7 @@ cglobal vp9_iadst_16x16_internal_10, 0, 7, 22, dst, st
WRAP_YMM IADST16_PASS1_END
jmp m(vp9_idct_16x16_internal_10).pass1_fast_end
.pass2:
+ _CET_ENDBR
test eobd, eobd
jl .pass2_fast
call .main_part1
@@ -0,0 +1,51 @@
Index: libavcodec/x86/vp9itxfm_avx2.asm
--- libavcodec/x86/vp9itxfm_avx2.asm.orig
+++ libavcodec/x86/vp9itxfm_avx2.asm
@@ -336,6 +336,7 @@ cglobal vp9_idct_4x4_internal, 0, 5, 6, dst, stride, c
pshufb m1, m3, m2
jmp tx2q
.pass2:
+ _CET_ENDBR
call .main
.pass2_end:
vpbroadcastd m2, [o(pw_2048)]
@@ -382,6 +383,7 @@ cglobal vp9_iadst_4x4_internal, 0, 5, 6, dst, stride,
call .main
jmp m(vp9_idct_4x4_internal).pass1_end
.pass2:
+ _CET_ENDBR
call .main
jmp m(vp9_idct_4x4_internal).pass2_end
ALIGN function_align
@@ -481,6 +483,7 @@ cglobal vp9_idct_8x8_internal, 0, 5, 8, dst, stride, c
vperm2i128 m3, m5, m3, 0x31
jmp tx2q
.pass2:
+ _CET_ENDBR
call .main
vpbroadcastd m4, [o(pw_1024)]
vpermq m1, m1, q2031
@@ -553,6 +556,7 @@ cglobal vp9_iadst_8x8_internal, 0, 5, 8, dst, stride,
vinserti128 m1, m4, xm1, 1
jmp tx2q
.pass2:
+ _CET_ENDBR
pshufd m4, m0, q1032
pshufd m5, m1, q1032
call .main
@@ -923,6 +927,7 @@ cglobal vp9_idct_16x16_internal, 0, 5, 16, 32*6, dst,
call .transpose_8x8
jmp tx2q
.pass2:
+ _CET_ENDBR
test eobd, eobd
jl .pass2_fast
call .main
@@ -1039,6 +1044,7 @@ cglobal vp9_iadst_16x16_internal, 0, 5, 16, 32*6, dst,
mova xm0, [rsp+32*0]
jmp m(vp9_idct_16x16_internal).pass1_fast_end
.pass2:
+ _CET_ENDBR
test eobd, eobd
jl .pass2_fast
call .main
@@ -0,0 +1,19 @@
Index: libavcodec/x86/vp9itxfm_avx512.asm
--- libavcodec/x86/vp9itxfm_avx512.asm.orig
+++ libavcodec/x86/vp9itxfm_avx512.asm
@@ -524,6 +524,7 @@ cglobal vp9_idct_16x16_internal, 0, 5, 16, dst, stride
punpckldq m0, m4 ; 0-1
jmp tx2q
.pass2:
+ _CET_ENDBR
test eobd, eobd
jl .pass2_fast
call .main
@@ -771,6 +772,7 @@ cglobal vp9_iadst_16x16_internal, 0, 5, 16, dst, strid
vpermt2q m3, m5, m4
jmp tx2q
.pass2:
+ _CET_ENDBR
pshufd m1, m1, q1032
pshufd m3, m3, q1032
test eobd, eobd
@@ -0,0 +1,69 @@
Index: libavcodec/x86/vvc/mc.asm
--- libavcodec/x86/vvc/mc.asm.orig
+++ libavcodec/x86/vvc/mc.asm
@@ -41,7 +41,7 @@ SECTION_RODATA
%xdefine %%prefix mangle(private_prefix %+ _vvc_%1_%3_%4)
%%table:
%rep %0 - 4
- dd %%prefix %+ .w%5 - %%base
+ dd %%prefix %+ .w%5_ibt - %%base
%rotate 1
%endrep
%endmacro
@@ -75,6 +75,8 @@ SECTION .text
%if %3
INIT_XMM cpuname
+.w2_ibt:
+ _CET_ENDBR
.w2:
movd xm0, [src0q]
pinsrd xm0, [src0q + AVG_SRC_STRIDE], 1
@@ -84,6 +86,8 @@ INIT_XMM cpuname
AVG_SAVE_W2 %1
AVG_LOOP_END .w2
+.w4_ibt:
+ _CET_ENDBR
.w4:
movq xm0, [src0q]
pinsrq xm0, [src0q + AVG_SRC_STRIDE], 1
@@ -95,6 +99,8 @@ INIT_XMM cpuname
AVG_LOOP_END .w4
INIT_YMM cpuname
+.w8_ibt:
+ _CET_ENDBR
.w8:
movu xm0, [src0q]
movu xm1, [src1q]
@@ -105,21 +111,29 @@ INIT_YMM cpuname
AVG_LOOP_END .w8
+.w16_ibt:
+ _CET_ENDBR
.w16:
AVG_W16_FN %1, %2, 1
AVG_LOOP_END .w16
+.w32_ibt:
+ _CET_ENDBR
.w32:
AVG_W16_FN %1, %2, 2
AVG_LOOP_END .w32
+.w64_ibt:
+ _CET_ENDBR
.w64:
AVG_W16_FN %1, %2, 4
AVG_LOOP_END .w64
+.w128_ibt:
+ _CET_ENDBR
.w128:
AVG_W16_FN %1, %2, 8
@@ -0,0 +1,11 @@
Index: libswscale/x86/ops_int.asm
--- libswscale/x86/ops_int.asm.orig
+++ libswscale/x86/ops_int.asm
@@ -106,6 +106,7 @@ IF %1 > 3, mov out3q, [execq + SwsOpExec.out3]
%endif
align function_align
current_function %+ _return:
+ _CET_ENDBR
; op chain always returns back here
mov implq, [rsp + 8]