diff --git a/src/video_core/host_shaders/astc_decoder.comp b/src/video_core/host_shaders/astc_decoder.comp index 6e4535d459..761b62582d 100644 --- a/src/video_core/host_shaders/astc_decoder.comp +++ b/src/video_core/host_shaders/astc_decoder.comp @@ -61,6 +61,13 @@ const uint encoding_values[22] = uint[]( (JUST_BITS | (5u << 8u)), (QUINT | (3u << 8u)), (TRIT | (4u << 8u)), (JUST_BITS | (6u << 8u)), (QUINT | (4u << 8u)), (TRIT | (5u << 8u)), (JUST_BITS | (7u << 8u)), (QUINT | (5u << 8u)), (TRIT | (6u << 8u)), (JUST_BITS | (8u << 8u))); + +// Precomputed weight tables +const uint WEIGHT_TABLE_1BIT[2] = uint[](0, 64); +const uint WEIGHT_TABLE_2BIT[4] = uint[](0, 21, 43, 64); +const uint WEIGHT_TABLE_3BIT[8] = uint[](0, 9, 18, 27, 37, 46, 55, 64); +const uint WEIGHT_TABLE_4BIT[16] = uint[](0, 4, 8, 12, 17, 21, 25, 29, 35, 39, 43, 47, 52, 56, 60, 64); +const uint WEIGHT_TABLE_5BIT[32] = uint[](0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64); // Input ASTC texture globals int total_bitsread = 0; @@ -730,18 +737,30 @@ uint UnquantizeTexelWeight(EncodingData val) { const uint encoding = Encoding(val); const uint bitlen = NumBits(val); const uint bitval = BitValue(val); + + if (encoding == JUST_BITS) { + switch (bitlen) { + case 1: return WEIGHT_TABLE_1BIT[bitval]; + case 2: return WEIGHT_TABLE_2BIT[bitval]; + case 3: return WEIGHT_TABLE_3BIT[bitval]; + case 4: return WEIGHT_TABLE_4BIT[bitval]; + case 5: return WEIGHT_TABLE_5BIT[bitval]; + default: return FastReplicateTo6(bitval, bitlen); + } + } + const uint A = ReplicateBitTo7((bitval & 1)); uint B = 0, C = 0, D = 0; uint result = 0; - const uint bitlen_0_results[5] = {0, 16, 32, 48, 64}; + switch (encoding) { - case JUST_BITS: - return FastReplicateTo6(bitval, bitlen); case TRIT: { D = QuintTritValue(val); switch (bitlen) { - case 0: - return bitlen_0_results[D * 2]; + case 0: { + const uint trit_base[3] = uint[](0, 32, 64); + return trit_base[D]; + } case 1: { C = 50; break; @@ -758,16 +777,16 @@ uint UnquantizeTexelWeight(EncodingData val) { B = (cb << 5) | cb; break; } - default: - break; } break; } case QUINT: { D = QuintTritValue(val); switch (bitlen) { - case 0: - return bitlen_0_results[D]; + case 0: { + const uint quint_base[5] = uint[](0, 16, 32, 48, 64); + return quint_base[D]; + } case 1: { C = 28; break; @@ -782,14 +801,17 @@ uint UnquantizeTexelWeight(EncodingData val) { break; } } - if (encoding != JUST_BITS && bitlen > 0) { + + if (bitlen > 0) { result = D * C + B; result ^= A; result = (A & 0x20) | (result >> 2); } + if (result > 32) { result += 1; } + return result; } @@ -1159,10 +1181,11 @@ void DecompressBlock(ivec3 coord) { } uint SwizzleOffset(uvec2 pos) { - const uint x = pos.x; - const uint y = pos.y; - return ((x % 64) / 32) * 256 + ((y % 8) / 2) * 64 + - ((x % 32) / 16) * 32 + (y % 2) * 16 + (x % 16); + return ((pos.x & 32u) << 3u) | + ((pos.y & 6u) << 5u) | + ((pos.x & 16u) << 1u) | + ((pos.y & 1u) << 4u) | + (pos.x & 15u); } void main() { diff --git a/src/video_core/renderer_vulkan/vk_compute_pass.cpp b/src/video_core/renderer_vulkan/vk_compute_pass.cpp index d97a35352a..411e33f06a 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pass.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pass.cpp @@ -623,7 +623,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .pNext = nullptr, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_GENERAL, .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, @@ -638,9 +638,8 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map, }, }; cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, - VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, image_barrier); + VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, 0, image_barrier); }); - scheduler.Finish(); } constexpr u32 BL3D_BINDING_SWIZZLE_TABLE = 0; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 82443d6599..f00997611a 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -70,10 +70,29 @@ TextureCache
::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManag
(std::max)((std::min)(device_local_memory - min_vacancy_critical, min_spacing_critical),
DEFAULT_CRITICAL_MEMORY));
minimum_memory = static_cast ::RefreshContents(Image& image, ImageId image_id) {
}
image.flags &= ~ImageFlagBits::CpuModified;
+ if( lowmemorydevice && image.info.format == PixelFormat::BC1_RGBA_UNORM && MapSizeBytes(image) >= 256_MiB ) {
+ return;
+ }
+
TrackImage(image, image_id);
if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) {
@@ -1465,17 +1488,13 @@ void TextureCache ::TickAsyncUnswizzle() {
task.initialized = true;
}
- // ToDo: Make these configurable
- static constexpr size_t CHUNK_SIZE = 48_MiB;
- static constexpr u32 SLICES_PER_BATCH = 48u;
-
// Read data
if (task.current_offset < task.total_size) {
const size_t remaining = task.total_size - task.current_offset;
- size_t copy_amount = std::min(CHUNK_SIZE, remaining);
+ size_t copy_amount = std::min(chunk_size, remaining);
- if (remaining > CHUNK_SIZE) {
+ if (remaining > chunk_size) {
copy_amount = (copy_amount / task.bytes_per_slice) * task.bytes_per_slice;
if (copy_amount == 0) copy_amount = task.bytes_per_slice;
}
@@ -1490,7 +1509,7 @@ void TextureCache ::TickAsyncUnswizzle() {
const u32 complete_slices = static_cast