From 3b6309b4763c3adb4dffccd1d022d0c7377bed3d Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 27 Oct 2025 21:14:01 +0000 Subject: [PATCH] remove sse2neon Signed-off-by: lizzie --- .ci/license-header.sh | 2 +- ...-support-for-clang-cl-on-Windows-633.patch | 129 ------------------ CMakeLists.txt | 3 - externals/CMakeLists.txt | 7 - externals/cpmfile.json | 9 -- src/video_core/CMakeLists.txt | 4 - 6 files changed, 1 insertion(+), 153 deletions(-) delete mode 100644 .patch/sse2neon/0001-Add-support-for-clang-cl-on-Windows-633.patch diff --git a/.ci/license-header.sh b/.ci/license-header.sh index 874f29aa45..f438d59dac 100755 --- a/.ci/license-header.sh +++ b/.ci/license-header.sh @@ -4,7 +4,7 @@ # SPDX-License-Identifier: GPL-3.0-or-later # specify full path if dupes may exist -EXCLUDE_FILES="CPM.cmake CPMUtil.cmake GetSCMRev.cmake sse2neon.h renderdoc_app.h tools/cpm tools/shellcheck.sh tools/update-cpm.sh externals/stb externals/glad externals/getopt externals/gamemode externals/FidelityFX-FSR externals/demangle externals/bc_decoder" +EXCLUDE_FILES="CPM.cmake CPMUtil.cmake GetSCMRev.cmake renderdoc_app.h tools/cpm tools/shellcheck.sh tools/update-cpm.sh externals/stb externals/glad externals/getopt externals/gamemode externals/FidelityFX-FSR externals/demangle externals/bc_decoder" # license header constants, please change when needed :)))) YEAR=2025 diff --git a/.patch/sse2neon/0001-Add-support-for-clang-cl-on-Windows-633.patch b/.patch/sse2neon/0001-Add-support-for-clang-cl-on-Windows-633.patch deleted file mode 100644 index cf86707355..0000000000 --- a/.patch/sse2neon/0001-Add-support-for-clang-cl-on-Windows-633.patch +++ /dev/null @@ -1,129 +0,0 @@ -From d765ebed3598ddfd7167fc546474626ac5ef9498 Mon Sep 17 00:00:00 2001 -From: Anthony Roberts -Date: Fri, 2 Aug 2024 16:55:57 +0100 -Subject: [PATCH] Add support for clang-cl on Windows (#633) - -This commit adds support for clang-cl (clang, pretending to be MSVC) to -SSE2NEON on Windows ARM64 platforms. This change is part of some Blender -work, as using clang-cl provides a ~20-40% speedup compared to MSVC. - -Compiled with the following command line (via a VS2022 Native ARM64 Tools -CMD window): - msbuild sse2neon.vcxproj /p:Configuration=Release /p:CLToolExe=clang-cl.exe - /p:CLToolPath="C:\Program Files\LLVM\bin\" - -Known failures in test suite: - Test mm_cvttpd_epi32 - Test rdtsc - -Co-authored-by: Anthony Roberts ---- - sse2neon.h | 22 +++++++++++----------- - 1 file changed, 11 insertions(+), 11 deletions(-) - -diff --git a/sse2neon.h b/sse2neon.h -index 56254b5..76cf8e3 100644 ---- a/sse2neon.h -+++ b/sse2neon.h -@@ -180,7 +180,7 @@ - } - - /* Compiler barrier */ --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - #define SSE2NEON_BARRIER() _ReadWriteBarrier() - #else - #define SSE2NEON_BARRIER() \ -@@ -856,7 +856,7 @@ FORCE_INLINE uint64x2_t _sse2neon_vmull_p64(uint64x1_t _a, uint64x1_t _b) - { - poly64_t a = vget_lane_p64(vreinterpret_p64_u64(_a), 0); - poly64_t b = vget_lane_p64(vreinterpret_p64_u64(_b), 0); --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - __n64 a1 = {a}, b1 = {b}; - return vreinterpretq_u64_p128(vmull_p64(a1, b1)); - #else -@@ -1767,7 +1767,7 @@ FORCE_INLINE void _mm_free(void *addr) - FORCE_INLINE uint64_t _sse2neon_get_fpcr(void) - { - uint64_t value; --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - value = _ReadStatusReg(ARM64_FPCR); - #else - __asm__ __volatile__("mrs %0, FPCR" : "=r"(value)); /* read */ -@@ -1777,7 +1777,7 @@ FORCE_INLINE uint64_t _sse2neon_get_fpcr(void) - - FORCE_INLINE void _sse2neon_set_fpcr(uint64_t value) - { --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - _WriteStatusReg(ARM64_FPCR, value); - #else - __asm__ __volatile__("msr FPCR, %0" ::"r"(value)); /* write */ -@@ -2246,7 +2246,7 @@ FORCE_INLINE __m128 _mm_or_ps(__m128 a, __m128 b) - FORCE_INLINE void _mm_prefetch(char const *p, int i) - { - (void) i; --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - switch (i) { - case _MM_HINT_NTA: - __prefetch2(p, 1); -@@ -4817,7 +4817,7 @@ FORCE_INLINE __m128i _mm_packus_epi16(const __m128i a, const __m128i b) - // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_pause - FORCE_INLINE void _mm_pause(void) - { --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - __isb(_ARM64_BARRIER_SY); - #else - __asm__ __volatile__("isb\n"); -@@ -5713,7 +5713,7 @@ FORCE_INLINE __m128d _mm_undefined_pd(void) - #pragma GCC diagnostic ignored "-Wuninitialized" - #endif - __m128d a; --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - a = _mm_setzero_pd(); - #endif - return a; -@@ -8127,7 +8127,7 @@ FORCE_INLINE int _sse2neon_sido_negative(int res, int lb, int imm8, int bound) - - FORCE_INLINE int _sse2neon_clz(unsigned int x) - { --#ifdef _MSC_VER -+#if defined(_MSC_VER) && !defined(__clang__) - unsigned long cnt = 0; - if (_BitScanReverse(&cnt, x)) - return 31 - cnt; -@@ -8139,7 +8139,7 @@ FORCE_INLINE int _sse2neon_clz(unsigned int x) - - FORCE_INLINE int _sse2neon_ctz(unsigned int x) - { --#ifdef _MSC_VER -+#if defined(_MSC_VER) && !defined(__clang__) - unsigned long cnt = 0; - if (_BitScanForward(&cnt, x)) - return cnt; -@@ -9055,7 +9055,7 @@ FORCE_INLINE __m128i _mm_aeskeygenassist_si128(__m128i a, const int rcon) - // AESE does ShiftRows and SubBytes on A - uint8x16_t u8 = vaeseq_u8(vreinterpretq_u8_m128i(a), vdupq_n_u8(0)); - --#ifndef _MSC_VER -+#if !defined(_MSC_VER) || defined(__clang__) - uint8x16_t dest = { - // Undo ShiftRows step from AESE and extract X1 and X3 - u8[0x4], u8[0x1], u8[0xE], u8[0xB], // SubBytes(X1) -@@ -9242,7 +9242,7 @@ FORCE_INLINE uint64_t _rdtsc(void) - * bits wide and it is attributed with the flag 'cap_user_time_short' - * is true. - */ --#if defined(_MSC_VER) -+#if defined(_MSC_VER) && !defined(__clang__) - val = _ReadStatusReg(ARM64_SYSREG(3, 3, 14, 0, 2)); - #else - __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(val)); --- -2.48.1 - diff --git a/CMakeLists.txt b/CMakeLists.txt index d29898b819..dda6979911 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -613,9 +613,6 @@ find_package(VulkanUtilityLibraries) find_package(SimpleIni) find_package(SPIRV-Tools) find_package(sirit) -if (ARCHITECTURE_arm64) - find_package(sse2neon) -endif() if (ARCHITECTURE_x86 OR ARCHITECTURE_x86_64) find_package(xbyak) diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index 8cc04e99a2..096760925f 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -399,10 +399,3 @@ if (ANDROID) add_library(oboe::oboe ALIAS oboe) endif() - -# sse2neon -if (ARCHITECTURE_arm64 AND NOT TARGET sse2neon) - AddJsonPackage(sse2neon) - add_library(sse2neon INTERFACE) - target_include_directories(sse2neon INTERFACE ${sse2neon_SOURCE_DIR}) -endif() diff --git a/externals/cpmfile.json b/externals/cpmfile.json index e1eb15fad3..73cdf3e305 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -213,14 +213,5 @@ "key": "steamdeck", "bundled": true, "skip_updates": "true" - }, - "sse2neon": { - "repo": "DLTcollab/sse2neon", - "sha": "66267b52fd", - "hash": "3aed8676e1b8c428acb076464663e3968a721457b08710a7c5f8df2fbdaa5601053c1606169a55e987e7a58dd17e3cc3b7fbf953aa891c5ac5f8ce2941862e4b", - "download_only": "true", - "patches": [ - "0001-Add-support-for-clang-cl-on-Windows-633.patch" - ] } } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 444e0461f1..db99c2bcb9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -398,8 +398,4 @@ if (ANDROID AND ARCHITECTURE_arm64) target_link_libraries(video_core PRIVATE adrenotools) endif() -if (ARCHITECTURE_arm64) - target_link_libraries(video_core PRIVATE sse2neon) -endif() - create_target_directory_groups(video_core)