Browse Source

fix with bigger batch sizes

lizzie/unity-build
lizzie 2 weeks ago
parent
commit
5146e41b80
  1. 4
      src/common/x64/cpu_detect.cpp
  2. 24
      src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp
  3. 26
      src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h
  4. 8
      src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp
  5. 7
      src/shader_recompiler/backend/glasm/emit_glasm_instructions.h
  6. 49
      src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp
  7. 8
      src/shader_recompiler/backend/glasm/glasm_emit_context.cpp
  8. 6
      src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp
  9. 2
      src/shader_recompiler/backend/glsl/emit_glsl_instructions.h
  10. 8
      src/shader_recompiler/backend/glsl/emit_glsl_special.cpp
  11. 10
      src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp
  12. 55
      src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp
  13. 35
      src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp
  14. 18
      src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp
  15. 27
      src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp
  16. 12
      src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp
  17. 11
      src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp
  18. 12
      src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp
  19. 16
      src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
  20. 4
      src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp
  21. 22
      src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp

4
src/common/x64/cpu_detect.cpp

@ -77,7 +77,7 @@ CPUCaps::Manufacturer CPUCaps::ParseManufacturer(std::string_view brand_string)
}
// Detects the various CPU features
static CPUCaps Detect() {
static CPUCaps DetectCPUCapabilities() {
CPUCaps caps = {};
// Assumes the CPU supports the CPUID instruction. Those that don't would likely not support
@ -208,7 +208,7 @@ static CPUCaps Detect() {
}
const CPUCaps& GetCPUCaps() {
static CPUCaps caps = Detect();
static CPUCaps caps = DetectCPUCapabilities();
return caps;
}

24
src/dynarmic/src/dynarmic/backend/arm64/emit_arm64_memory.cpp

@ -31,7 +31,7 @@ using namespace oaknut::util;
namespace {
bool IsOrdered(IR::AccType acctype) {
[[nodiscard]] inline bool IsOrdered(IR::AccType acctype) {
return acctype == IR::AccType::ORDERED || acctype == IR::AccType::ORDEREDRW || acctype == IR::AccType::LIMITEDORDERED;
}
@ -209,9 +209,9 @@ void CallbackOnlyEmitExclusiveWriteMemory(oaknut::CodeGenerator& code, EmitConte
ctx.reg_alloc.DefineAsRegister(inst, X0);
}
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
constexpr size_t page_table_const_bits = 12;
constexpr size_t page_table_const_size = 1 << page_table_const_bits;
constexpr size_t page_table_const_mask = (1 << page_table_const_bits) - 1;
// This function may use Xscratch0 as a scratch register
// Trashes NZCV
@ -242,28 +242,28 @@ void EmitDetectMisalignedVAddr(oaknut::CodeGenerator& code, EmitContext& ctx, oa
code.TST(Xaddr, align_mask);
code.B(NE, *fallback);
} else {
// If (addr & page_mask) > page_size - byte_size, use fallback.
code.AND(Xscratch0, Xaddr, page_mask);
code.CMP(Xscratch0, page_size - bitsize / 8);
// If (addr & page_table_const_mask) > page_table_const_size - byte_size, use fallback.
code.AND(Xscratch0, Xaddr, page_table_const_mask);
code.CMP(Xscratch0, page_table_const_size - bitsize / 8);
code.B(HI, *fallback);
}
}
// Outputs Xscratch0 = page_table[addr >> page_bits]
// Outputs Xscratch0 = page_table[addr >> page_table_const_bits]
// May use Xscratch1 as scratch register
// Address to read/write = [ret0 + ret1], ret0 is always Xscratch0 and ret1 is either Xaddr or Xscratch1
// Trashes NZCV
template<size_t bitsize>
std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::CodeGenerator& code, EmitContext& ctx, oaknut::XReg Xaddr, const SharedLabel& fallback) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_table_const_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
EmitDetectMisalignedVAddr<bitsize>(code, ctx, Xaddr, fallback);
if (ctx.conf.silently_mirror_page_table || unused_top_bits == 0) {
code.UBFX(Xscratch0, Xaddr, page_bits, valid_page_index_bits);
code.UBFX(Xscratch0, Xaddr, page_table_const_bits, valid_page_index_bits);
} else {
code.LSR(Xscratch0, Xaddr, page_bits);
code.LSR(Xscratch0, Xaddr, page_table_const_bits);
code.TST(Xscratch0, u64(~u64(0)) << valid_page_index_bits);
code.B(NE, *fallback);
}
@ -283,7 +283,7 @@ std::pair<oaknut::XReg, oaknut::XReg> InlinePageTableEmitVAddrLookup(oaknut::Cod
if (ctx.conf.absolute_offset_page_table) {
return std::make_pair(Xscratch0, Xaddr);
}
code.AND(Xscratch1, Xaddr, page_mask);
code.AND(Xscratch1, Xaddr, page_table_const_mask);
return std::make_pair(Xscratch0, Xscratch1);
}

26
src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h

@ -22,9 +22,9 @@ namespace {
using namespace Xbyak::util;
constexpr size_t page_bits = 12;
constexpr size_t page_size = 1 << page_bits;
constexpr size_t page_mask = (1 << page_bits) - 1;
constexpr size_t page_table_const_bits = 12;
constexpr size_t page_table_const_size = 1 << page_table_const_bits;
constexpr size_t page_table_const_mask = (1 << page_table_const_bits) - 1;
template<typename EmitContext>
void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr, Xbyak::Reg64 tmp) {
@ -50,7 +50,7 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
code.test(vaddr, align_mask);
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
const u32 page_align_mask = static_cast<u32>(page_table_const_size - 1) & ~align_mask;
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
@ -83,7 +83,7 @@ template<>
// TODO: This code assumes vaddr has been zext from 32-bits to 64-bits.
code.mov(tmp, vaddr.cvt32());
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
code.shl(tmp, int(ctx.conf.page_table_log2_stride));
code.mov(page, qword[r14 + tmp.cvt64()]);
if (ctx.conf.page_table_pointer_mask_bits == 0) {
@ -96,13 +96,13 @@ template<>
return page + vaddr;
}
code.mov(tmp, vaddr.cvt32());
code.and_(tmp, static_cast<u32>(page_mask));
code.and_(tmp, static_cast<u32>(page_table_const_mask));
return page + tmp.cvt64();
}
template<>
[[maybe_unused]] Xbyak::RegExp EmitVAddrLookup<A64EmitContext>(BlockOfCode& code, A64EmitContext& ctx, size_t bitsize, Xbyak::Label& abort, Xbyak::Reg64 vaddr) {
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_bits;
const size_t valid_page_index_bits = ctx.conf.page_table_address_space_bits - page_table_const_bits;
const size_t unused_top_bits = 64 - ctx.conf.page_table_address_space_bits;
const Xbyak::Reg64 page = ctx.reg_alloc.ScratchGpr(code);
@ -112,29 +112,29 @@ template<>
if (unused_top_bits == 0) {
code.mov(tmp, vaddr);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
} else if (ctx.conf.silently_mirror_page_table) {
if (valid_page_index_bits >= 32) {
if (code.HasHostFeature(HostFeature::BMI2)) {
const Xbyak::Reg64 bit_count = ctx.reg_alloc.ScratchGpr(code);
code.mov(bit_count, unused_top_bits);
code.bzhi(tmp, vaddr, bit_count);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
ctx.reg_alloc.Release(bit_count);
} else {
code.mov(tmp, vaddr);
code.shl(tmp, int(unused_top_bits));
code.shr(tmp, int(unused_top_bits + page_bits));
code.shr(tmp, int(unused_top_bits + page_table_const_bits));
}
} else {
code.mov(tmp, vaddr);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
code.and_(tmp, u32((1 << valid_page_index_bits) - 1));
}
} else {
ASSERT(valid_page_index_bits < 32);
code.mov(tmp, vaddr);
code.shr(tmp, int(page_bits));
code.shr(tmp, int(page_table_const_bits));
code.test(tmp, u32(-(1 << valid_page_index_bits)));
code.jnz(abort, code.T_NEAR);
}
@ -151,7 +151,7 @@ template<>
return page + vaddr;
}
code.mov(tmp, vaddr);
code.and_(tmp, static_cast<u32>(page_mask));
code.and_(tmp, static_cast<u32>(page_table_const_mask));
return page + tmp;
}

8
src/shader_recompiler/backend/glasm/emit_glasm_context_get_set.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -45,11 +48,6 @@ void GetCbuf(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, ScalarU
}
}
bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
stage == Stage::TessellationEval;
}
std::string VertexIndex(EmitContext& ctx, ScalarU32 vertex) {
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
}

7
src/shader_recompiler/backend/glasm/emit_glasm_instructions.h

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
@ -18,6 +18,11 @@ class Value;
namespace Shader::Backend::GLASM {
[[nodiscard]] inline bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl
|| stage == Stage::TessellationEval;
}
class EmitContext;
// Microinstruction emitters

49
src/shader_recompiler/backend/glasm/emit_glasm_not_implemented.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -7,94 +10,92 @@
namespace Shader::Backend::GLASM {
#define NotImplemented() throw NotImplementedException("GLASM instruction {}", __LINE__)
void EmitGetRegister(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetRegister(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetPred(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetPred(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetGotoVariable(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetGotoVariable(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetIndirectBranchVariable(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetIndirectBranchVariable(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetZFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetSFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetCFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetOFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetZFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetSFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetCFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitSetOFlag(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetZeroFromOp(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetSignFromOp(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetCarryFromOp(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetOverflowFromOp(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetSparseFromOp(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
void EmitGetInBoundsFromOp(EmitContext& ctx) {
NotImplemented();
throw NotImplementedException("GLASM instruction {}", __LINE__);
}
} // namespace Shader::Backend::GLASM

8
src/shader_recompiler/backend/glasm/glasm_emit_context.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -21,11 +24,6 @@ std::string_view InterpDecorator(Interpolation interp) {
}
throw InvalidArgument("Invalid interpolation {}", interp);
}
bool IsInputArray(Stage stage) {
return stage == Stage::Geometry || stage == Stage::TessellationControl ||
stage == Stage::TessellationEval;
}
} // Anonymous namespace
EmitContext::EmitContext(IR::Program& program, Bindings& bindings, const Profile& profile_,

6
src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp

@ -32,10 +32,6 @@ std::string InputVertexIndex(EmitContext& ctx, std::string_view vertex) {
return IsInputArray(ctx.stage) ? fmt::format("[{}]", vertex) : "";
}
std::string_view OutputVertexIndex(EmitContext& ctx) {
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
}
std::string ChooseCbuf(EmitContext& ctx, const IR::Value& binding, std::string_view index) {
if (binding.IsImmediate()) {
return fmt::format("{}_cbuf{}[{}]", ctx.stage_name, binding.U32(), index);
@ -281,7 +277,7 @@ void EmitSetAttribute(EmitContext& ctx, IR::Attribute attr, std::string_view val
const u32 index{IR::GenericAttributeIndex(attr)};
const u32 attr_element{IR::GenericAttributeElement(attr)};
const GenericElementInfo& info{ctx.output_generics.at(index).at(attr_element)};
const auto output_decorator{OutputVertexIndex(ctx)};
const auto output_decorator = ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
if (info.num_components == 1) {
ctx.Add("{}{}={};", info.name, output_decorator, value);
} else {

2
src/shader_recompiler/backend/glsl/emit_glsl_instructions.h

@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project

8
src/shader_recompiler/backend/glsl/emit_glsl_special.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -9,9 +12,6 @@
namespace Shader::Backend::GLSL {
namespace {
std::string_view OutputVertexIndex(EmitContext& ctx) {
return ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
}
void InitializeOutputVaryings(EmitContext& ctx) {
if (ctx.uses_geometry_passthrough) {
@ -25,7 +25,7 @@ void InitializeOutputVaryings(EmitContext& ctx) {
continue;
}
const auto& info_array{ctx.output_generics.at(index)};
const auto output_decorator{OutputVertexIndex(ctx)};
const auto output_decorator = ctx.stage == Stage::TessellationControl ? "[gl_InvocationID]" : "";
size_t element{};
while (element < info_array.size()) {
const auto& info{info_array.at(element)};

10
src/shader_recompiler/backend/spirv/emit_spirv_image_atomic.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -17,14 +20,13 @@ Id Image(EmitContext& ctx, IR::TextureInstInfo info) {
}
}
std::pair<Id, Id> AtomicArgs(EmitContext& ctx) {
std::pair<Id, Id> AtomicImageArgs(EmitContext& ctx) {
const Id scope{ctx.Const(static_cast<u32>(spv::Scope::Device))};
const Id semantics{ctx.u32_zero_value};
return {scope, semantics};
}
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value,
Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords, Id value, Id (Sirit::Module::*atomic_func)(Id, Id, Id, Id, Id)) {
if (!index.IsImmediate() || index.U32() != 0) {
// TODO: handle layers
throw NotImplementedException("Image indexing");
@ -32,7 +34,7 @@ Id ImageAtomicU32(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id c
const auto info{inst->Flags<IR::TextureInstInfo>()};
const Id image{Image(ctx, info)};
const Id pointer{ctx.OpImageTexelPointer(ctx.image_u32, image, coords, ctx.Const(0U))};
const auto [scope, semantics]{AtomicArgs(ctx)};
const auto [scope, semantics] = AtomicImageArgs(ctx);
return (ctx.*atomic_func)(ctx.U32[1], pointer, scope, semantics, value);
}
} // Anonymous namespace

55
src/shader_recompiler/frontend/maxwell/translate/impl/floating_point_conversion_floating_point.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -6,7 +9,7 @@
namespace Shader::Maxwell {
namespace {
enum class FloatFormat : u64 {
enum class FloatConversionFormat : u64 {
F16 = 1,
F32 = 2,
F64 = 3,
@ -21,13 +24,13 @@ enum class RoundingOp : u64 {
Trunc = 11,
};
[[nodiscard]] u32 WidthSize(FloatFormat width) {
[[nodiscard]] u32 WidthSize(FloatConversionFormat width) {
switch (width) {
case FloatFormat::F16:
case FloatConversionFormat::F16:
return 16;
case FloatFormat::F32:
case FloatConversionFormat::F32:
return 32;
case FloatFormat::F64:
case FloatConversionFormat::F64:
return 64;
default:
throw NotImplementedException("Invalid width {}", width);
@ -44,8 +47,8 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
BitField<50, 1, u64> sat;
BitField<39, 4, u64> rounding_op;
BitField<39, 2, FpRounding> rounding;
BitField<10, 2, FloatFormat> src_size;
BitField<8, 2, FloatFormat> dst_size;
BitField<10, 2, FloatConversionFormat> src_size;
BitField<8, 2, FloatConversionFormat> dst_size;
[[nodiscard]] RoundingOp RoundingOperation() const {
constexpr u64 rounding_mask = 0x0B;
@ -59,7 +62,7 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
IR::F16F32F64 input{v.ir.FPAbsNeg(src_a, abs, f2f.neg != 0)};
const bool any_fp64{f2f.src_size == FloatFormat::F64 || f2f.dst_size == FloatFormat::F64};
const bool any_fp64{f2f.src_size == FloatConversionFormat::F64 || f2f.dst_size == FloatConversionFormat::F64};
IR::FpControl fp_control{
.no_contraction = false,
.rounding = IR::FpRounding::DontCare,
@ -74,13 +77,13 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
case RoundingOp::Pass:
// Make sure NANs are handled properly
switch (f2f.src_size) {
case FloatFormat::F16:
case FloatConversionFormat::F16:
input = v.ir.FPAdd(input, v.ir.FPConvert(16, v.ir.Imm32(0.0f)), fp_control);
break;
case FloatFormat::F32:
case FloatConversionFormat::F32:
input = v.ir.FPAdd(input, v.ir.Imm32(0.0f), fp_control);
break;
case FloatFormat::F64:
case FloatConversionFormat::F64:
input = v.ir.FPAdd(input, v.ir.Imm64(0.0), fp_control);
break;
}
@ -106,15 +109,15 @@ void F2F(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a, bool abs) {
}
switch (f2f.dst_size) {
case FloatFormat::F16: {
case FloatConversionFormat::F16: {
const IR::F16 imm{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(f2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(input, imm)));
break;
}
case FloatFormat::F32:
case FloatConversionFormat::F32:
v.F(f2f.dest_reg, input);
break;
case FloatFormat::F64:
case FloatConversionFormat::F64:
v.D(f2f.dest_reg, input);
break;
default:
@ -127,21 +130,21 @@ void TranslatorVisitor::F2F_reg(u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<10, 2, FloatConversionFormat> src_size;
BitField<41, 1, u64> selector;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
case FloatConversionFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetReg20(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break;
}
case FloatFormat::F32:
case FloatConversionFormat::F32:
src_a = GetFloatReg20(insn);
break;
case FloatFormat::F64:
case FloatConversionFormat::F64:
src_a = GetDoubleReg20(insn);
break;
default:
@ -154,21 +157,21 @@ void TranslatorVisitor::F2F_cbuf(u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<10, 2, FloatConversionFormat> src_size;
BitField<41, 1, u64> selector;
} const f2f{insn};
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
case FloatConversionFormat::F16: {
auto [lhs_a, rhs_a]{Extract(ir, GetCbuf(insn), Swizzle::H1_H0)};
src_a = f2f.selector != 0 ? rhs_a : lhs_a;
break;
}
case FloatFormat::F32:
case FloatConversionFormat::F32:
src_a = GetFloatCbuf(insn);
break;
case FloatFormat::F64:
case FloatConversionFormat::F64:
src_a = GetDoubleCbuf(insn);
break;
default:
@ -181,7 +184,7 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
union {
u64 insn;
BitField<49, 1, u64> abs;
BitField<10, 2, FloatFormat> src_size;
BitField<10, 2, FloatConversionFormat> src_size;
BitField<41, 1, u64> selector;
BitField<20, 19, u64> imm;
BitField<56, 1, u64> imm_neg;
@ -189,7 +192,7 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
IR::F16F32F64 src_a;
switch (f2f.src_size) {
case FloatFormat::F16: {
case FloatConversionFormat::F16: {
const u32 imm{static_cast<u32>(f2f.imm & 0x0000ffff)};
const IR::Value vector{ir.UnpackFloat2x16(ir.Imm32(imm | (imm << 16)))};
src_a = IR::F16{ir.CompositeExtract(vector, f2f.selector != 0 ? 0 : 1)};
@ -198,10 +201,10 @@ void TranslatorVisitor::F2F_imm([[maybe_unused]] u64 insn) {
}
break;
}
case FloatFormat::F32:
case FloatConversionFormat::F32:
src_a = GetFloatImm20(insn);
break;
case FloatFormat::F64:
case FloatConversionFormat::F64:
src_a = GetDoubleImm20(insn);
break;
default:

35
src/shader_recompiler/frontend/maxwell/translate/impl/integer_add_three_input.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -7,48 +10,48 @@
namespace Shader::Maxwell {
namespace {
enum class Shift : u64 {
enum class IADD3Shift : u64 {
None,
Right,
Left,
};
enum class Half : u64 {
enum class IADD3Half : u64 {
All,
Lower,
Upper,
};
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, Half half) {
[[nodiscard]] IR::U32 IntegerHalf(IR::IREmitter& ir, const IR::U32& value, IADD3Half half) {
constexpr bool is_signed{false};
switch (half) {
case Half::All:
case IADD3Half::All:
return value;
case Half::Lower:
case IADD3Half::Lower:
return ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(16), is_signed);
case Half::Upper:
case IADD3Half::Upper:
return ir.BitFieldExtract(value, ir.Imm32(16), ir.Imm32(16), is_signed);
}
throw NotImplementedException("Invalid half");
}
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, Shift shift) {
[[nodiscard]] IR::U32 IntegerShift(IR::IREmitter& ir, const IR::U32& value, IADD3Shift shift) {
switch (shift) {
case Shift::None:
case IADD3Shift::None:
return value;
case Shift::Right: {
case IADD3Shift::Right: {
// 33-bit RS IADD3 edge case
const IR::U1 edge_case{ir.GetCarryFromOp(value)};
const IR::U32 shifted{ir.ShiftRightLogical(value, ir.Imm32(16))};
return IR::U32{ir.Select(edge_case, ir.IAdd(shifted, ir.Imm32(0x10000)), shifted)};
}
case Shift::Left:
case IADD3Shift::Left:
return ir.ShiftLeftLogical(value, ir.Imm32(16));
}
throw NotImplementedException("Invalid shift");
}
void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 op_c,
Shift shift = Shift::None) {
IADD3Shift shift = IADD3Shift::None) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
@ -71,7 +74,7 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o
IR::U32 lhs_1{v.ir.IAdd(op_a, op_b)};
if (iadd3.x != 0) {
// TODO: How does RS behave when X is set?
if (shift == Shift::Right) {
if (shift == IADD3Shift::Right) {
throw NotImplementedException("IADD3 X+RS");
}
const IR::U32 carry{v.ir.Select(v.ir.GetCFlag(), v.ir.Imm32(1), v.ir.Imm32(0))};
@ -98,10 +101,10 @@ void IADD3(TranslatorVisitor& v, u64 insn, IR::U32 op_a, IR::U32 op_b, IR::U32 o
void TranslatorVisitor::IADD3_reg(u64 insn) {
union {
u64 insn;
BitField<37, 2, Shift> shift;
BitField<35, 2, Half> half_a;
BitField<33, 2, Half> half_b;
BitField<31, 2, Half> half_c;
BitField<37, 2, IADD3Shift> shift;
BitField<35, 2, IADD3Half> half_a;
BitField<33, 2, IADD3Half> half_b;
BitField<31, 2, IADD3Half> half_c;
} const iadd3{insn};
const auto op_a{IntegerHalf(ir, GetReg8(insn), iadd3.half_a)};

18
src/shader_recompiler/frontend/maxwell/translate/impl/integer_floating_point_conversion.cpp

@ -11,7 +11,7 @@
namespace Shader::Maxwell {
namespace {
enum class FloatFormat : u64 {
enum class IntegerToFloatFormat : u64 {
F16 = 1,
F32 = 2,
F64 = 3,
@ -27,7 +27,7 @@ enum class IntFormat : u64 {
union EncodingIFPC {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 2, FloatFormat> float_format;
BitField<8, 2, IntegerToFloatFormat> float_format;
BitField<10, 2, IntFormat> int_format;
BitField<13, 1, u64> is_signed;
BitField<39, 2, FpRounding> fp_rounding;
@ -41,13 +41,13 @@ bool Is64(u64 insn) {
return EncodingIFPC{insn}.int_format == IntFormat::U64;
}
int BitSize(FloatFormat format) {
int BitSize(IntegerToFloatFormat format) {
switch (format) {
case FloatFormat::F16:
case IntegerToFloatFormat::F16:
return 16;
case FloatFormat::F32:
case IntegerToFloatFormat::F32:
return 32;
case FloatFormat::F64:
case IntegerToFloatFormat::F64:
return 64;
}
throw NotImplementedException("Invalid float format {}", format);
@ -119,15 +119,15 @@ void I2F(TranslatorVisitor& v, u64 insn, IR::U32U64 src) {
}
}
switch (i2f.float_format) {
case FloatFormat::F16: {
case IntegerToFloatFormat::F16: {
const IR::F16 zero{v.ir.FPConvert(16, v.ir.Imm32(0.0f))};
v.X(i2f.dest_reg, v.ir.PackFloat2x16(v.ir.CompositeConstruct(value, zero)));
break;
}
case FloatFormat::F32:
case IntegerToFloatFormat::F32:
v.F(i2f.dest_reg, value);
break;
case FloatFormat::F64: {
case IntegerToFloatFormat::F64: {
if (!IR::IsAligned(i2f.dest_reg, 2)) {
throw NotImplementedException("Unaligned destination {}", i2f.dest_reg.Value());
}

27
src/shader_recompiler/frontend/maxwell/translate/impl/integer_short_multiply_add.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -15,18 +18,18 @@ enum class SelectMode : u64 {
CBCC,
};
enum class Half : u64 {
enum class IMADHalf : u64 {
H0, // Least-significant bits (15:0)
H1, // Most-significant bits (31:16)
};
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, Half half, bool is_signed) {
const IR::U32 offset{v.ir.Imm32(half == Half::H1 ? 16 : 0)};
IR::U32 ExtractHalf(TranslatorVisitor& v, const IR::U32& src, IMADHalf half, bool is_signed) {
const IR::U32 offset{v.ir.Imm32(half == IMADHalf::H1 ? 16 : 0)};
return v.ir.BitFieldExtract(src, offset, v.ir.Imm32(16), is_signed);
}
void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c,
SelectMode select_mode, Half half_b, bool psl, bool mrg, bool x) {
SelectMode select_mode, IMADHalf half_b, bool psl, bool mrg, bool x) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
@ -34,7 +37,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
BitField<47, 1, u64> cc;
BitField<48, 1, u64> is_a_signed;
BitField<49, 1, u64> is_b_signed;
BitField<53, 1, Half> half_a;
BitField<53, 1, IMADHalf> half_a;
} const xmad{insn};
if (x) {
@ -53,9 +56,9 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
case SelectMode::Default:
return src_c;
case SelectMode::CLO:
return ExtractHalf(v, src_c, Half::H0, false);
return ExtractHalf(v, src_c, IMADHalf::H0, false);
case SelectMode::CHI:
return ExtractHalf(v, src_c, Half::H1, false);
return ExtractHalf(v, src_c, IMADHalf::H1, false);
case SelectMode::CBCC:
return v.ir.IAdd(v.ir.ShiftLeftLogical(src_b, v.ir.Imm32(16)), src_c);
case SelectMode::CSFU:
@ -66,7 +69,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
IR::U32 result{v.ir.IAdd(product, op_c)};
if (mrg) {
// .MRG inserts src_b [15:0] into result's [31:16].
const IR::U32 lsb_b{ExtractHalf(v, src_b, Half::H0, false)};
const IR::U32 lsb_b{ExtractHalf(v, src_b, IMADHalf::H0, false)};
result = v.ir.BitFieldInsert(result, lsb_b, v.ir.Imm32(16), v.ir.Imm32(16));
}
if (xmad.cc) {
@ -80,7 +83,7 @@ void XMAD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
void TranslatorVisitor::XMAD_reg(u64 insn) {
union {
u64 raw;
BitField<35, 1, Half> half_b;
BitField<35, 1, IMADHalf> half_b;
BitField<36, 1, u64> psl;
BitField<37, 1, u64> mrg;
BitField<38, 1, u64> x;
@ -95,7 +98,7 @@ void TranslatorVisitor::XMAD_rc(u64 insn) {
union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<52, 1, IMADHalf> half_b;
BitField<54, 1, u64> x;
} const xmad{insn};
@ -107,7 +110,7 @@ void TranslatorVisitor::XMAD_cr(u64 insn) {
union {
u64 raw;
BitField<50, 2, SelectMode> select_mode;
BitField<52, 1, Half> half_b;
BitField<52, 1, IMADHalf> half_b;
BitField<54, 1, u64> x;
BitField<55, 1, u64> psl;
BitField<56, 1, u64> mrg;
@ -128,7 +131,7 @@ void TranslatorVisitor::XMAD_imm(u64 insn) {
} const xmad{insn};
XMAD(*this, insn, ir.Imm32(static_cast<u32>(xmad.src_b)), GetReg39(insn), xmad.select_mode,
Half::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
IMADHalf::H0, xmad.psl != 0, xmad.mrg != 0, xmad.x != 0);
}
} // namespace Shader::Maxwell

12
src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp

@ -24,17 +24,17 @@ enum class SZ : u64 {
F32
};
enum class Shift : u64 {
enum class ISBERDShift : u64 {
Default,
U16,
B32,
};
IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, Shift shift) {
IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, ISBERDShift shift) {
switch (shift) {
case Shift::Default: return index;
case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1));
case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2));
case ISBERDShift::Default: return index;
case ISBERDShift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1));
case ISBERDShift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2));
default: UNREACHABLE();
}
}
@ -65,7 +65,7 @@ void TranslatorVisitor::ISBERD(u64 insn) {
BitField<32, 1, u64> o;
BitField<33, 2, ISBERDMode> mode;
BitField<36, 4, SZ> sz;
BitField<47, 2, Shift> shift;
BitField<47, 2, ISBERDShift> shift;
} const isberd{insn};
IR::U32 index{};

11
src/shader_recompiler/frontend/maxwell/translate/impl/load_store_memory.cpp

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -30,7 +33,7 @@ enum class StoreSize : u64 {
};
// See Table 27 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
enum class LoadCache : u64 {
enum class XMEMLoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (cache in L2 and below, not L1)
CI, // ???
@ -38,7 +41,7 @@ enum class LoadCache : u64 {
};
// See Table 28 in https://docs.nvidia.com/cuda/parallel-thread-execution/index.html
enum class StoreCache : u64 {
enum class XMEMStoreCache : u64 {
WB, // Cache write-back all coherent levels
CG, // Cache at global level
CS, // Cache streaming, likely to be accessed once
@ -83,7 +86,7 @@ void TranslatorVisitor::LDG(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<46, 2, LoadCache> cache;
BitField<46, 2, XMEMLoadCache> cache;
BitField<48, 3, LoadSize> size;
} const ldg{insn};
@ -137,7 +140,7 @@ void TranslatorVisitor::STG(u64 insn) {
union {
u64 raw;
BitField<0, 8, IR::Reg> data_reg;
BitField<46, 2, StoreCache> cache;
BitField<46, 2, XMEMStoreCache> cache;
BitField<48, 3, StoreSize> size;
} const stg{insn};

12
src/shader_recompiler/frontend/maxwell/translate/impl/surface_load_store.cpp

@ -64,14 +64,14 @@ enum class SurfaceLoadStoreClamp : u64 {
};
// https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#cache-operators
enum class LoadCache : u64 {
enum class SURFLoadCache : u64 {
CA, // Cache at all levels, likely to be accessed again
CG, // Cache at global level (L2 and below, not L1)
CI, // ???
CV, // Don't cache and fetch again (volatile)
};
enum class StoreCache : u64 {
enum class SURFStoreCache : u64 {
WB, // Cache write-back all coherent levels
CG, // Cache at global level (L2 and below, not L1)
CS, // Cache streaming, likely to be accessed once
@ -178,7 +178,7 @@ void TranslatorVisitor::SULD(u64 insn) {
BitField<52, 1, u64> d;
BitField<23, 1, u64> ba;
BitField<33, 3, SurfaceLoadStoreType> type;
BitField<24, 2, LoadCache> cache;
BitField<24, 2, SURFLoadCache> cache;
BitField<20, 3, SurfaceLoadStoreSize> size; // .D
BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, SurfaceLoadStoreClamp> clamp;
@ -191,7 +191,7 @@ void TranslatorVisitor::SULD(u64 insn) {
if (suld.clamp != SurfaceLoadStoreClamp::IGN) {
throw NotImplementedException("SurfaceLoadStoreClamp {}", suld.clamp.Value());
}
if (suld.cache != LoadCache::CA && suld.cache != LoadCache::CG) {
if (suld.cache != SURFLoadCache::CA && suld.cache != SURFLoadCache::CG) {
throw NotImplementedException("Cache {}", suld.cache.Value());
}
const bool is_typed{suld.d != 0};
@ -238,7 +238,7 @@ void TranslatorVisitor::SUST(u64 insn) {
BitField<52, 1, u64> d;
BitField<23, 1, u64> ba;
BitField<33, 3, SurfaceLoadStoreType> type;
BitField<24, 2, StoreCache> cache;
BitField<24, 2, SURFStoreCache> cache;
BitField<20, 3, SurfaceLoadStoreSize> size; // .D
BitField<20, 4, u64> swizzle; // .P
BitField<49, 2, SurfaceLoadStoreClamp> clamp;
@ -251,7 +251,7 @@ void TranslatorVisitor::SUST(u64 insn) {
if (sust.clamp != SurfaceLoadStoreClamp::IGN) {
throw NotImplementedException("SurfaceLoadStoreClamp {}", sust.clamp.Value());
}
if (sust.cache != StoreCache::WB && sust.cache != StoreCache::CG) {
if (sust.cache != SURFStoreCache::WB && sust.cache != SURFStoreCache::CG) {
throw NotImplementedException("Cache {}", sust.cache.Value());
}
const bool is_typed{sust.d != 0};

16
src/shader_recompiler/frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp

@ -134,7 +134,7 @@ IR::Value SampleTFS(TranslatorVisitor& v, u64 insn) {
}
}
unsigned Swizzle(u64 insn) {
unsigned FetchSwizzle(u64 insn) {
#define R 1
#define G 2
#define B 4
@ -173,7 +173,7 @@ unsigned Swizzle(u64 insn) {
}
}
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
IR::F32 FetchExtract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
const bool is_shadow{sample.Type() == IR::Type::F32};
if (is_shadow) {
const bool is_alpha{component == 3};
@ -183,7 +183,7 @@ IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned componen
}
}
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
IR::Reg FetchRegStoreComponent32(u64 insn, unsigned index) {
const EncodinTFS texs{insn};
switch (index) {
case 0:
@ -201,14 +201,14 @@ IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
}
void Store32TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
const unsigned swizzle{FetchSwizzle(insn)};
unsigned store_index{0};
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
v.F(dest, Extract(v, sample, component));
const IR::Reg dest{FetchRegStoreComponent32(insn, store_index)};
v.F(dest, FetchExtract(v, sample, component));
++store_index;
}
}
@ -218,14 +218,14 @@ IR::U32 PackTFS(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
}
void Store16TFS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
const unsigned swizzle{FetchSwizzle(insn)};
unsigned store_index{0};
std::array<IR::F32, 4> swizzled;
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
swizzled[store_index] = Extract(v, sample, component);
swizzled[store_index] = FetchExtract(v, sample, component);
++store_index;
}
const IR::F32 zero{v.ir.Imm32(0.0f)};

4
src/shader_recompiler/frontend/maxwell/translate/impl/texture_gather_swizzled.cpp

@ -43,7 +43,7 @@ void CheckAlignmentTGS(IR::Reg reg, size_t alignment) {
}
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
IR::Value MakeGatherOffset(TranslatorVisitor& v, IR::Reg reg) {
const IR::U32 value{v.X(reg)};
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(6), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(6), true));
@ -65,7 +65,7 @@ IR::Value SampleTGS(TranslatorVisitor& v, u64 insn) {
if (tld4s.aoffi != 0) {
CheckAlignmentTGS(reg_a, 2);
coords = v.ir.CompositeConstruct(v.F(reg_a), v.F(reg_a + 1));
IR::Value offset = MakeOffset(v, reg_b);
IR::Value offset = MakeGatherOffset(v, reg_b);
if (tld4s.dc != 0) {
CheckAlignmentTGS(reg_b, 2);
IR::F32 dref = v.F(reg_b + 1);

22
src/shader_recompiler/frontend/maxwell/translate/impl/texture_load_swizzled.cpp

@ -40,7 +40,7 @@ void CheckAlignmentTLS(IR::Reg reg, size_t alignment) {
}
}
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg reg) {
IR::Value MakeLoadOffset(TranslatorVisitor& v, IR::Reg reg) {
const IR::U32 value{v.X(reg)};
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4), true),
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4), true));
@ -74,7 +74,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
CheckAlignmentTLS(reg_a, 2);
texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
offsets = MakeOffset(v, reg_b);
offsets = MakeLoadOffset(v, reg_b);
break;
case 5:
CheckAlignmentTLS(reg_a, 2);
@ -106,7 +106,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
texture_type = Shader::TextureType::Color2D;
coords = v.ir.CompositeConstruct(v.X(reg_a), v.X(reg_a + 1));
lod = v.X(reg_b);
offsets = MakeOffset(v, reg_b + 1);
offsets = MakeLoadOffset(v, reg_b + 1);
break;
default:
throw NotImplementedException("Illegal encoding {}", tlds.encoding.Value());
@ -119,7 +119,7 @@ IR::Value SampleTLS(TranslatorVisitor& v, u64 insn) {
return v.ir.ImageFetch(handle, coords, offsets, lod, multisample, info);
}
unsigned Swizzle(u64 insn) {
unsigned LoadSwizzle(u64 insn) {
#define R 1
#define G 2
#define B 4
@ -160,11 +160,11 @@ unsigned Swizzle(u64 insn) {
}
}
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
IR::F32 LoadExtract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
return IR::F32{v.ir.CompositeExtract(sample, component)};
}
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
IR::Reg LoadRegStoreComponent32(u64 insn, unsigned index) {
const EncodinTLS tlds{insn};
switch (index) {
case 0:
@ -182,14 +182,14 @@ IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
}
void Store32TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
const unsigned swizzle{LoadSwizzle(insn)};
unsigned store_index{0};
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
v.F(dest, Extract(v, sample, component));
const IR::Reg dest{LoadRegStoreComponent32(insn, store_index)};
v.F(dest, LoadExtract(v, sample, component));
++store_index;
}
}
@ -199,14 +199,14 @@ IR::U32 PackTLS(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
}
void Store16TLS(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
const unsigned swizzle{LoadSwizzle(insn)};
unsigned store_index{0};
std::array<IR::F32, 4> swizzled;
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
swizzled[store_index] = Extract(v, sample, component);
swizzled[store_index] = LoadExtract(v, sample, component);
++store_index;
}
const IR::F32 zero{v.ir.Imm32(0.0f)};

Loading…
Cancel
Save