diff --git a/.patch/mbedtls/0002-aesni-fix.patch b/.patch/mbedtls/0002-aesni-fix.patch new file mode 100644 index 0000000000..dc5d3153b7 --- /dev/null +++ b/.patch/mbedtls/0002-aesni-fix.patch @@ -0,0 +1,13 @@ +diff --git a/library/aesni.h b/library/aesni.h +index 754c984c79..59e27afd3e 100644 +--- a/library/aesni.h ++++ b/library/aesni.h +@@ -35,7 +35,7 @@ + /* GCC-like compilers: currently, we only support intrinsics if the requisite + * target flag is enabled when building the library (e.g. `gcc -mpclmul -msse2` + * or `clang -maes -mpclmul`). */ +-#if (defined(__GNUC__) || defined(__clang__)) && defined(__AES__) && defined(__PCLMUL__) ++#if defined(__GNUC__) || defined(__clang__) + #define MBEDTLS_AESNI_HAVE_INTRINSICS + #endif + /* For 32-bit, we only support intrinsics */ diff --git a/.patch/mbedtls/0003-aesni-fix.patch b/.patch/mbedtls/0003-aesni-fix.patch new file mode 100644 index 0000000000..c620b42554 --- /dev/null +++ b/.patch/mbedtls/0003-aesni-fix.patch @@ -0,0 +1,22 @@ +diff --git a/library/aesni.c b/library/aesni.c +index 2857068..3e104ab 100644 +--- a/library/aesni.c ++++ b/library/aesni.c +@@ -31,16 +31,14 @@ + #include + #endif + +-#if defined(MBEDTLS_ARCH_IS_X86) + #if defined(MBEDTLS_COMPILER_IS_GCC) + #pragma GCC push_options + #pragma GCC target ("pclmul,sse2,aes") + #define MBEDTLS_POP_TARGET_PRAGMA +-#elif defined(__clang__) && (__clang_major__ >= 5) ++#elif defined(__clang__) + #pragma clang attribute push (__attribute__((target("pclmul,sse2,aes"))), apply_to=function) + #define MBEDTLS_POP_TARGET_PRAGMA + #endif +-#endif + + #if !defined(MBEDTLS_AES_USE_HARDWARE_ONLY) + /* diff --git a/CMakeLists.txt b/CMakeLists.txt index e0466da3bd..fa35ca3d16 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -52,6 +52,10 @@ if (PLATFORM_SUN) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O3") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") endif() + if (CMAKE_BUILD_TYPE MATCHES "RelWithDebInfo") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -O2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2") + endif() endif() # Needed for FFmpeg w/ VAAPI and DRM diff --git a/externals/cpmfile.json b/externals/cpmfile.json index 669357588a..9025ec2787 100644 --- a/externals/cpmfile.json +++ b/externals/cpmfile.json @@ -97,7 +97,11 @@ "version": "3", "git_version": "3.6.4", "artifact": "%TAG%.tar.bz2", - "skip_updates": true + "skip_updates": true, + "patches": [ + "0002-aesni-fix.patch", + "0003-aesni-fix.patch" + ] }, "enet": { "repo": "lsalzman/enet", diff --git a/src/core/crypto/aes_util.cpp b/src/core/crypto/aes_util.cpp index cd7e15a582..5c31eff3e1 100644 --- a/src/core/crypto/aes_util.cpp +++ b/src/core/crypto/aes_util.cpp @@ -1,7 +1,11 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "common/assert.h" #include "common/logging/log.h" @@ -71,37 +75,37 @@ void AESCipher::Transcode(const u8* src, std::size_t size, u8* des mbedtls_cipher_reset(context); + // Only ECB strictly requires block sized chunks. std::size_t written = 0; - if (mbedtls_cipher_get_cipher_mode(context) == MBEDTLS_MODE_XTS) { + if (mbedtls_cipher_get_cipher_mode(context) != MBEDTLS_MODE_ECB) { mbedtls_cipher_update(context, src, size, dest, &written); - if (written != size) { - LOG_WARNING(Crypto, "Not all data was decrypted requested={:016X}, actual={:016X}.", - size, written); - } - } else { - const auto block_size = mbedtls_cipher_get_block_size(context); - if (size < block_size) { - std::vector block(block_size); - std::memcpy(block.data(), src, size); - Transcode(block.data(), block.size(), block.data(), op); - std::memcpy(dest, block.data(), size); - return; - } + if (written != size) + LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", size, written); + return; + } + + // ECB path: operate in block sized chunks and mirror previous behavior. + const auto block_size = mbedtls_cipher_get_block_size(context); + if (size < block_size) { + std::vector block(block_size); + std::memcpy(block.data(), src, size); + Transcode(block.data(), block.size(), block.data(), op); + std::memcpy(dest, block.data(), size); + return; + } - for (std::size_t offset = 0; offset < size; offset += block_size) { - auto length = std::min(block_size, size - offset); - mbedtls_cipher_update(context, src + offset, length, dest + offset, &written); - if (written != length) { - if (length < block_size) { - std::vector block(block_size); - std::memcpy(block.data(), src + offset, length); - Transcode(block.data(), block.size(), block.data(), op); - std::memcpy(dest + offset, block.data(), length); - return; - } - LOG_WARNING(Crypto, "Not all data was decrypted requested={:016X}, actual={:016X}.", - length, written); + for (std::size_t offset = 0; offset < size; offset += block_size) { + const auto length = std::min(block_size, size - offset); + mbedtls_cipher_update(context, src + offset, length, dest + offset, &written); + if (written != length) { + if (length < block_size) { + std::vector block(block_size); + std::memcpy(block.data(), src + offset, length); + Transcode(block.data(), block.size(), block.data(), op); + std::memcpy(dest + offset, block.data(), length); + return; } + LOG_WARNING(Crypto, "Not all data was processed requested={:016X}, actual={:016X}.", length, written); } } } diff --git a/src/core/crypto/ctr_encryption_layer.cpp b/src/core/crypto/ctr_encryption_layer.cpp index b48c3f041f..6769754413 100644 --- a/src/core/crypto/ctr_encryption_layer.cpp +++ b/src/core/crypto/ctr_encryption_layer.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -15,26 +18,36 @@ std::size_t CTREncryptionLayer::Read(u8* data, std::size_t length, std::size_t o if (length == 0) return 0; - const auto sector_offset = offset & 0xF; - if (sector_offset == 0) { - UpdateIV(base_offset + offset); - std::vector raw = base->ReadBytes(length, offset); - cipher.Transcode(raw.data(), raw.size(), data, Op::Decrypt); - return length; + std::size_t total_read = 0; + // Handle an initial misaligned portion if needed. + if (auto const sector_offset = offset & 0xF; sector_offset != 0) { + const std::size_t aligned_off = offset - sector_offset; + std::array block{}; + if (auto const got = base->Read(block.data(), block.size(), aligned_off); got != 0) { + UpdateIV(base_offset + aligned_off); + cipher.Transcode(block.data(), got, block.data(), Op::Decrypt); + auto const to_copy = std::min(length, got > sector_offset ? got - sector_offset : 0); + if (to_copy > 0) { + std::memcpy(data, block.data() + sector_offset, to_copy); + data += to_copy; + offset += to_copy; + length -= to_copy; + total_read += to_copy; + } + } else { + return 0; + } } - - // offset does not fall on block boundary (0x10) - std::vector block = base->ReadBytes(0x10, offset - sector_offset); - UpdateIV(base_offset + offset - sector_offset); - cipher.Transcode(block.data(), block.size(), block.data(), Op::Decrypt); - std::size_t read = 0x10 - sector_offset; - - if (length + sector_offset < 0x10) { - std::memcpy(data, block.data() + sector_offset, std::min(length, read)); - return std::min(length, read); + if (length > 0) { + // Now aligned to 0x10 + UpdateIV(base_offset + offset); + const std::size_t got = base->Read(data, length, offset); + if (got > 0) { + cipher.Transcode(data, got, data, Op::Decrypt); + total_read += got; + } } - std::memcpy(data, block.data() + sector_offset, read); - return read + Read(data + read, length - read, offset + read); + return total_read; } void CTREncryptionLayer::SetIV(const IVData& iv_) { diff --git a/src/core/crypto/xts_encryption_layer.cpp b/src/core/crypto/xts_encryption_layer.cpp index 36cc501b90..240f930c57 100644 --- a/src/core/crypto/xts_encryption_layer.cpp +++ b/src/core/crypto/xts_encryption_layer.cpp @@ -5,12 +5,13 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include +#include #include #include "core/crypto/xts_encryption_layer.h" namespace Core::Crypto { -constexpr u64 XTS_SECTOR_SIZE = 0x4000; +constexpr std::size_t XTS_SECTOR_SIZE = 0x4000; XTSEncryptionLayer::XTSEncryptionLayer(FileSys::VirtualFile base_, Key256 key_) : EncryptionLayer(std::move(base_)), cipher(key_, Mode::XTS) {} @@ -19,41 +20,67 @@ std::size_t XTSEncryptionLayer::Read(u8* data, std::size_t length, std::size_t o if (length == 0) return 0; - const auto sector_offset = offset & 0x3FFF; - if (sector_offset == 0) { - if (length % XTS_SECTOR_SIZE == 0) { - std::vector raw = base->ReadBytes(length, offset); - cipher.XTSTranscode(raw.data(), raw.size(), data, offset / XTS_SECTOR_SIZE, + std::size_t total_read = 0; + // Handle initial unaligned part within a sector. + if (auto const sector_offset = offset % XTS_SECTOR_SIZE; sector_offset != 0) { + const std::size_t aligned_off = offset - sector_offset; + std::array block{}; + if (auto const got = base->Read(block.data(), XTS_SECTOR_SIZE, aligned_off); got > 0) { + if (got < XTS_SECTOR_SIZE) + std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got); + cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(), aligned_off / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt); - return raw.size(); - } - if (length > XTS_SECTOR_SIZE) { - const auto rem = length % XTS_SECTOR_SIZE; - const auto read = length - rem; - return Read(data, read, offset) + Read(data + read, rem, offset + read); + + auto const to_copy = std::min(length, got > sector_offset ? got - sector_offset : 0); + if (to_copy > 0) { + std::memcpy(data, block.data() + sector_offset, to_copy); + data += to_copy; + offset += to_copy; + length -= to_copy; + total_read += to_copy; + } + } else { + return 0; } - std::vector buffer = base->ReadBytes(XTS_SECTOR_SIZE, offset); - if (buffer.size() < XTS_SECTOR_SIZE) - buffer.resize(XTS_SECTOR_SIZE); - cipher.XTSTranscode(buffer.data(), buffer.size(), buffer.data(), offset / XTS_SECTOR_SIZE, - XTS_SECTOR_SIZE, Op::Decrypt); - std::memcpy(data, buffer.data(), (std::min)(buffer.size(), length)); - return (std::min)(buffer.size(), length); } - // offset does not fall on block boundary (0x4000) - std::vector block = base->ReadBytes(0x4000, offset - sector_offset); - if (block.size() < XTS_SECTOR_SIZE) - block.resize(XTS_SECTOR_SIZE); - cipher.XTSTranscode(block.data(), block.size(), block.data(), - (offset - sector_offset) / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt); - const std::size_t read = XTS_SECTOR_SIZE - sector_offset; - - if (length + sector_offset < XTS_SECTOR_SIZE) { - std::memcpy(data, block.data() + sector_offset, std::min(length, read)); - return std::min(length, read); + if (length > 0) { + // Process aligned middle inplace, in sector sized multiples. + while (length >= XTS_SECTOR_SIZE) { + const std::size_t req = (length / XTS_SECTOR_SIZE) * XTS_SECTOR_SIZE; + const std::size_t got = base->Read(data, req, offset); + if (got == 0) { + return total_read; + } + const std::size_t got_rounded = got - (got % XTS_SECTOR_SIZE); + if (got_rounded > 0) { + cipher.XTSTranscode(data, got_rounded, data, offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt); + data += got_rounded; + offset += got_rounded; + length -= got_rounded; + total_read += got_rounded; + } + // If we didn't get a full sector next, break to handle tail. + if (got_rounded != got) { + break; + } + } + // Handle tail within a sector, if any. + if (length > 0) { + std::array block{}; + const std::size_t got = base->Read(block.data(), XTS_SECTOR_SIZE, offset); + if (got > 0) { + if (got < XTS_SECTOR_SIZE) { + std::memset(block.data() + got, 0, XTS_SECTOR_SIZE - got); + } + cipher.XTSTranscode(block.data(), XTS_SECTOR_SIZE, block.data(), + offset / XTS_SECTOR_SIZE, XTS_SECTOR_SIZE, Op::Decrypt); + const std::size_t to_copy = std::min(length, got); + std::memcpy(data, block.data(), to_copy); + total_read += to_copy; + } + } } - std::memcpy(data, block.data() + sector_offset, read); - return read + Read(data + read, length - read, offset + read); + return total_read; } } // namespace Core::Crypto diff --git a/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp b/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp index aaf7788801..1e11d70d8d 100644 --- a/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_aes_ctr_storage.cpp @@ -4,6 +4,7 @@ // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "common/alignment.h" #include "common/swap.h" #include "core/file_sys/fssystem/fssystem_aes_ctr_storage.h" @@ -83,32 +84,24 @@ size_t AesCtrStorage::Write(const u8* buffer, size_t size, size_t offset) { std::memcpy(ctr.data(), m_iv.data(), IvSize); AddCounter(ctr.data(), IvSize, offset / BlockSize); - // Loop until all data is written. - size_t remaining = size; - s64 cur_offset = 0; - - // Get a pooled buffer. - std::vector pooled_buffer(BlockSize); - while (remaining > 0) { + // Loop until all data is written using a pooled buffer residing on the stack (blocksize = 0x10) + boost::container::static_vector pooled_buffer; + for (size_t remaining = size; remaining > 0; ) { // Determine data we're writing and where. - const size_t write_size = std::min(pooled_buffer.size(), remaining); - u8* write_buf = reinterpret_cast(pooled_buffer.data()); + auto const write_size = (std::min)(pooled_buffer.size(), remaining); + u8* write_buf = pooled_buffer.data(); - // Encrypt the data. + // Encrypt the data and then write it. m_cipher->SetIV(ctr); m_cipher->Transcode(buffer, write_size, write_buf, Core::Crypto::Op::Encrypt); + m_base_storage->Write(write_buf, write_size, offset); - // Write the encrypted data. - m_base_storage->Write(write_buf, write_size, offset + cur_offset); - - // Advance. - cur_offset += write_size; + // Advance next write chunk + offset += write_size; remaining -= write_size; - if (remaining > 0) { + if (remaining > 0) AddCounter(ctr.data(), IvSize, write_size / BlockSize); - } } - return size; } diff --git a/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp b/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp index 9e7a104c89..6e07d44cde 100644 --- a/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp +++ b/src/core/file_sys/fssystem/fssystem_aes_xts_storage.cpp @@ -4,9 +4,13 @@ // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include +#include #include "common/alignment.h" #include "common/swap.h" #include "core/file_sys/fssystem/fssystem_aes_xts_storage.h" +#include "core/file_sys/fssystem/fssystem_nca_header.h" #include "core/file_sys/fssystem/fssystem_utility.h" namespace FileSys { @@ -41,18 +45,12 @@ AesXtsStorage::AesXtsStorage(VirtualFile base, const void* key1, const void* key size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const { // Allow zero-size reads. - if (size == 0) { + if (size == 0) return size; - } - // Ensure buffer is valid. + // Ensure buffer is valid and we can only read at block aligned offsets. ASSERT(buffer != nullptr); - - // We can only read at block aligned offsets. - ASSERT(Common::IsAligned(offset, AesBlockSize)); - ASSERT(Common::IsAligned(size, AesBlockSize)); - - // Read the data. + ASSERT(Common::IsAligned(offset, AesBlockSize) && Common::IsAligned(size, AesBlockSize)); m_base_storage->Read(buffer, size, offset); // Setup the counter. @@ -60,25 +58,21 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const { std::memcpy(ctr.data(), m_iv.data(), IvSize); AddCounter(ctr.data(), IvSize, offset / m_block_size); - // Handle any unaligned data before the start. + // Handle any unaligned data before the start; then read said data into a local pooled + // buffer that resides on the stack, do not use the global memory allocator this is a + // very tiny (512 bytes) buffer so should be fine to keep on the stack (Nca::XtsBlockSize wide buffer) size_t processed_size = 0; if ((offset % m_block_size) != 0) { + // Decrypt into our pooled stack buffer (max bound = NCA::XtsBlockSize) + boost::container::static_vector tmp_buf; // Determine the size of the pre-data read. - const size_t skip_size = - static_cast(offset - Common::AlignDown(offset, m_block_size)); - const size_t data_size = (std::min)(size, m_block_size - skip_size); - - // Decrypt into a pooled buffer. - { - std::vector tmp_buf(m_block_size, 0); - std::memcpy(tmp_buf.data() + skip_size, buffer, data_size); - - m_cipher->SetIV(ctr); - m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(), - Core::Crypto::Op::Decrypt); - - std::memcpy(buffer, tmp_buf.data() + skip_size, data_size); - } + auto const skip_size = size_t(offset - Common::AlignDown(offset, m_block_size)); + auto const data_size = (std::min)(size, m_block_size - skip_size); + std::fill_n(tmp_buf.begin(), skip_size, u8{0}); + std::memcpy(tmp_buf.data() + skip_size, buffer, data_size); + m_cipher->SetIV(ctr); + m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(), Core::Crypto::Op::Decrypt); + std::memcpy(buffer, tmp_buf.data() + skip_size, data_size); AddCounter(ctr.data(), IvSize, 1); processed_size += data_size; @@ -86,20 +80,16 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const { } // Decrypt aligned chunks. - char* cur = reinterpret_cast(buffer) + processed_size; - size_t remaining = size - processed_size; - while (remaining > 0) { - const size_t cur_size = (std::min)(m_block_size, remaining); - + auto* cur = buffer + processed_size; + for (size_t remaining = size - processed_size; remaining > 0; ) { + auto const cur_size = (std::min)(m_block_size, remaining); m_cipher->SetIV(ctr); - m_cipher->Transcode(cur, cur_size, cur, Core::Crypto::Op::Decrypt); - + auto* char_cur = reinterpret_cast(cur); //same repr cur - diff signedness + m_cipher->Transcode(char_cur, cur_size, char_cur, Core::Crypto::Op::Decrypt); remaining -= cur_size; cur += cur_size; - AddCounter(ctr.data(), IvSize, 1); } - return size; }