committed by
ameerj
20 changed files with 730 additions and 36 deletions
-
2src/shader_recompiler/CMakeLists.txt
-
115src/shader_recompiler/backend/spirv/emit_context.cpp
-
21src/shader_recompiler/backend/spirv/emit_context.h
-
14src/shader_recompiler/backend/spirv/emit_spirv.h
-
10src/shader_recompiler/backend/spirv/emit_spirv_context_get_set.cpp
-
175src/shader_recompiler/backend/spirv/emit_spirv_shared_memory.cpp
-
4src/shader_recompiler/environment.h
-
46src/shader_recompiler/frontend/ir/ir_emitter.cpp
-
6src/shader_recompiler/frontend/ir/ir_emitter.h
-
6src/shader_recompiler/frontend/ir/microinstruction.cpp
-
18src/shader_recompiler/frontend/ir/opcodes.inc
-
2src/shader_recompiler/frontend/ir/program.h
-
2src/shader_recompiler/frontend/maxwell/program.cpp
-
197src/shader_recompiler/frontend/maxwell/translate/impl/load_store_local_shared.cpp
-
16src/shader_recompiler/frontend/maxwell/translate/impl/not_implemented.cpp
-
6src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp
-
3src/shader_recompiler/profile.h
-
47src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
-
34src/video_core/vulkan_common/vulkan_device.cpp
-
42src/video_core/vulkan_common/vulkan_device.h
@ -0,0 +1,175 @@ |
|||
// Copyright 2021 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
|||
|
|||
namespace Shader::Backend::SPIRV { |
|||
namespace { |
|||
Id Pointer(EmitContext& ctx, Id pointer_type, Id array, Id offset, u32 shift) { |
|||
const Id shift_id{ctx.Constant(ctx.U32[1], shift)}; |
|||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; |
|||
return ctx.OpAccessChain(pointer_type, array, ctx.u32_zero_value, index); |
|||
} |
|||
|
|||
Id Word(EmitContext& ctx, Id offset) { |
|||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; |
|||
const Id index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; |
|||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; |
|||
return ctx.OpLoad(ctx.U32[1], pointer); |
|||
} |
|||
|
|||
std::pair<Id, Id> ExtractArgs(EmitContext& ctx, Id offset, u32 mask, u32 count) { |
|||
const Id shift{ctx.OpShiftLeftLogical(ctx.U32[1], offset, ctx.Constant(ctx.U32[1], 3U))}; |
|||
const Id bit{ctx.OpBitwiseAnd(ctx.U32[1], shift, ctx.Constant(ctx.U32[1], mask))}; |
|||
const Id count_id{ctx.Constant(ctx.U32[1], count)}; |
|||
return {bit, count_id}; |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
Id EmitLoadSharedU8(EmitContext& ctx, Id offset) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{ |
|||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; |
|||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); |
|||
} else { |
|||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; |
|||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); |
|||
} |
|||
} |
|||
|
|||
Id EmitLoadSharedS8(EmitContext& ctx, Id offset) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{ |
|||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; |
|||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U8, pointer)); |
|||
} else { |
|||
const auto [bit, count]{ExtractArgs(ctx, offset, 24, 8)}; |
|||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); |
|||
} |
|||
} |
|||
|
|||
Id EmitLoadSharedU16(EmitContext& ctx, Id offset) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; |
|||
return ctx.OpUConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); |
|||
} else { |
|||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; |
|||
return ctx.OpBitFieldUExtract(ctx.U32[1], Word(ctx, offset), bit, count); |
|||
} |
|||
} |
|||
|
|||
Id EmitLoadSharedS16(EmitContext& ctx, Id offset) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; |
|||
return ctx.OpSConvert(ctx.U32[1], ctx.OpLoad(ctx.U16, pointer)); |
|||
} else { |
|||
const auto [bit, count]{ExtractArgs(ctx, offset, 16, 16)}; |
|||
return ctx.OpBitFieldSExtract(ctx.U32[1], Word(ctx, offset), bit, count); |
|||
} |
|||
} |
|||
|
|||
Id EmitLoadSharedU32(EmitContext& ctx, Id offset) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2)}; |
|||
return ctx.OpLoad(ctx.U32[1], pointer); |
|||
} else { |
|||
return Word(ctx, offset); |
|||
} |
|||
} |
|||
|
|||
Id EmitLoadSharedU64(EmitContext& ctx, Id offset) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; |
|||
return ctx.OpLoad(ctx.U32[2], pointer); |
|||
} else { |
|||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; |
|||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; |
|||
const Id next_index{ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], 1U))}; |
|||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, base_index)}; |
|||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_index)}; |
|||
return ctx.OpCompositeConstruct(ctx.U32[2], ctx.OpLoad(ctx.U32[1], lhs_pointer), |
|||
ctx.OpLoad(ctx.U32[1], rhs_pointer)); |
|||
} |
|||
} |
|||
|
|||
Id EmitLoadSharedU128(EmitContext& ctx, Id offset) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; |
|||
return ctx.OpLoad(ctx.U32[4], pointer); |
|||
} |
|||
const Id shift_id{ctx.Constant(ctx.U32[1], 2U)}; |
|||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift_id)}; |
|||
std::array<Id, 4> values{}; |
|||
for (u32 i = 0; i < 4; ++i) { |
|||
const Id index{i == 0 ? base_index |
|||
: ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; |
|||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; |
|||
values[i] = ctx.OpLoad(ctx.U32[1], pointer); |
|||
} |
|||
return ctx.OpCompositeConstruct(ctx.U32[4], values); |
|||
} |
|||
|
|||
void EmitWriteSharedU8(EmitContext& ctx, Id offset, Id value) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{ |
|||
ctx.OpAccessChain(ctx.shared_u8, ctx.shared_memory_u8, ctx.u32_zero_value, offset)}; |
|||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U8, value)); |
|||
} else { |
|||
ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u8_func, offset, value); |
|||
} |
|||
} |
|||
|
|||
void EmitWriteSharedU16(EmitContext& ctx, Id offset, Id value) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u16, ctx.shared_memory_u16, offset, 1)}; |
|||
ctx.OpStore(pointer, ctx.OpUConvert(ctx.U16, value)); |
|||
} else { |
|||
ctx.OpFunctionCall(ctx.void_id, ctx.shared_store_u16_func, offset, value); |
|||
} |
|||
} |
|||
|
|||
void EmitWriteSharedU32(EmitContext& ctx, Id offset, Id value) { |
|||
Id pointer{}; |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
pointer = Pointer(ctx, ctx.shared_u32, ctx.shared_memory_u32, offset, 2); |
|||
} else { |
|||
const Id shift{ctx.Constant(ctx.U32[1], 2U)}; |
|||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; |
|||
pointer = ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset); |
|||
} |
|||
ctx.OpStore(pointer, value); |
|||
} |
|||
|
|||
void EmitWriteSharedU64(EmitContext& ctx, Id offset, Id value) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u32x2, ctx.shared_memory_u32x2, offset, 3)}; |
|||
ctx.OpStore(pointer, value); |
|||
return; |
|||
} |
|||
const Id shift{ctx.Constant(ctx.U32[1], 2U)}; |
|||
const Id word_offset{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; |
|||
const Id next_offset{ctx.OpIAdd(ctx.U32[1], word_offset, ctx.Constant(ctx.U32[1], 1U))}; |
|||
const Id lhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, word_offset)}; |
|||
const Id rhs_pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, next_offset)}; |
|||
ctx.OpStore(lhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 0U)); |
|||
ctx.OpStore(rhs_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U)); |
|||
} |
|||
|
|||
void EmitWriteSharedU128(EmitContext& ctx, Id offset, Id value) { |
|||
if (ctx.profile.support_explicit_workgroup_layout) { |
|||
const Id pointer{Pointer(ctx, ctx.shared_u32x4, ctx.shared_memory_u32x4, offset, 4)}; |
|||
ctx.OpStore(pointer, value); |
|||
return; |
|||
} |
|||
const Id shift{ctx.Constant(ctx.U32[1], 2U)}; |
|||
const Id base_index{ctx.OpShiftRightArithmetic(ctx.U32[1], offset, shift)}; |
|||
for (u32 i = 0; i < 4; ++i) { |
|||
const Id index{i == 0 ? base_index |
|||
: ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], i))}; |
|||
const Id pointer{ctx.OpAccessChain(ctx.shared_u32, ctx.shared_memory_u32, index)}; |
|||
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, i)); |
|||
} |
|||
} |
|||
|
|||
} // namespace Shader::Backend::SPIRV
|
|||
@ -0,0 +1,197 @@ |
|||
// Copyright 2021 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/bit_field.h"
|
|||
#include "common/common_types.h"
|
|||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
|||
|
|||
namespace Shader::Maxwell { |
|||
namespace { |
|||
enum class Size : u64 { |
|||
U8, |
|||
S8, |
|||
U16, |
|||
S16, |
|||
B32, |
|||
B64, |
|||
B128, |
|||
}; |
|||
|
|||
IR::U32 Offset(TranslatorVisitor& v, u64 insn) { |
|||
union { |
|||
u64 raw; |
|||
BitField<8, 8, IR::Reg> offset_reg; |
|||
BitField<20, 24, u64> absolute_offset; |
|||
BitField<20, 24, s64> relative_offset; |
|||
} const encoding{insn}; |
|||
|
|||
if (encoding.offset_reg == IR::Reg::RZ) { |
|||
return v.ir.Imm32(static_cast<u32>(encoding.absolute_offset)); |
|||
} else { |
|||
const s32 relative{static_cast<s32>(encoding.relative_offset.Value())}; |
|||
return v.ir.IAdd(v.X(encoding.offset_reg), v.ir.Imm32(relative)); |
|||
} |
|||
} |
|||
|
|||
std::pair<int, bool> GetSize(u64 insn) { |
|||
union { |
|||
u64 raw; |
|||
BitField<48, 3, Size> size; |
|||
} const encoding{insn}; |
|||
|
|||
const Size nnn = encoding.size; |
|||
switch (encoding.size) { |
|||
case Size::U8: |
|||
return {8, false}; |
|||
case Size::S8: |
|||
return {8, true}; |
|||
case Size::U16: |
|||
return {16, false}; |
|||
case Size::S16: |
|||
return {16, true}; |
|||
case Size::B32: |
|||
return {32, false}; |
|||
case Size::B64: |
|||
return {64, false}; |
|||
case Size::B128: |
|||
return {128, false}; |
|||
default: |
|||
throw NotImplementedException("Invalid size {}", encoding.size.Value()); |
|||
} |
|||
} |
|||
|
|||
IR::Reg Reg(u64 insn) { |
|||
union { |
|||
u64 raw; |
|||
BitField<0, 8, IR::Reg> reg; |
|||
} const encoding{insn}; |
|||
|
|||
return encoding.reg; |
|||
} |
|||
|
|||
IR::U32 ByteOffset(IR::IREmitter& ir, const IR::U32& offset) { |
|||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(24)); |
|||
} |
|||
|
|||
IR::U32 ShortOffset(IR::IREmitter& ir, const IR::U32& offset) { |
|||
return ir.BitwiseAnd(ir.ShiftLeftLogical(offset, ir.Imm32(3)), ir.Imm32(16)); |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
void TranslatorVisitor::LDL(u64 insn) { |
|||
const IR::U32 offset{Offset(*this, insn)}; |
|||
const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; |
|||
|
|||
const IR::Reg dest{Reg(insn)}; |
|||
const auto [bit_size, is_signed]{GetSize(insn)}; |
|||
switch (bit_size) { |
|||
case 8: { |
|||
const IR::U32 bit{ByteOffset(ir, offset)}; |
|||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(8), is_signed)); |
|||
break; |
|||
} |
|||
case 16: { |
|||
const IR::U32 bit{ShortOffset(ir, offset)}; |
|||
X(dest, ir.BitFieldExtract(ir.LoadLocal(word_offset), bit, ir.Imm32(16), is_signed)); |
|||
break; |
|||
} |
|||
case 32: |
|||
case 64: |
|||
case 128: |
|||
if (!IR::IsAligned(dest, bit_size / 32)) { |
|||
throw NotImplementedException("Unaligned destination register {}", dest); |
|||
} |
|||
X(dest, ir.LoadLocal(word_offset)); |
|||
for (int i = 1; i < bit_size / 32; ++i) { |
|||
X(dest + i, ir.LoadLocal(ir.IAdd(word_offset, ir.Imm32(i)))); |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
|
|||
void TranslatorVisitor::LDS(u64 insn) { |
|||
const IR::U32 offset{Offset(*this, insn)}; |
|||
const IR::Reg dest{Reg(insn)}; |
|||
const auto [bit_size, is_signed]{GetSize(insn)}; |
|||
const IR::Value value{ir.LoadShared(bit_size, is_signed, offset)}; |
|||
switch (bit_size) { |
|||
case 8: |
|||
case 16: |
|||
case 32: |
|||
X(dest, IR::U32{value}); |
|||
break; |
|||
case 64: |
|||
case 128: |
|||
if (!IR::IsAligned(dest, bit_size / 32)) { |
|||
throw NotImplementedException("Unaligned destination register {}", dest); |
|||
} |
|||
for (int element = 0; element < bit_size / 32; ++element) { |
|||
X(dest + element, IR::U32{ir.CompositeExtract(value, element)}); |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
|
|||
void TranslatorVisitor::STL(u64 insn) { |
|||
const IR::U32 offset{Offset(*this, insn)}; |
|||
const IR::U32 word_offset{ir.ShiftRightArithmetic(offset, ir.Imm32(2))}; |
|||
|
|||
const IR::Reg reg{Reg(insn)}; |
|||
const IR::U32 src{X(reg)}; |
|||
const int bit_size{GetSize(insn).first}; |
|||
switch (bit_size) { |
|||
case 8: { |
|||
const IR::U32 bit{ByteOffset(ir, offset)}; |
|||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(8))}; |
|||
ir.WriteLocal(word_offset, value); |
|||
break; |
|||
} |
|||
case 16: { |
|||
const IR::U32 bit{ShortOffset(ir, offset)}; |
|||
const IR::U32 value{ir.BitFieldInsert(ir.LoadLocal(word_offset), src, bit, ir.Imm32(16))}; |
|||
ir.WriteLocal(word_offset, value); |
|||
break; |
|||
} |
|||
case 32: |
|||
case 64: |
|||
case 128: |
|||
if (!IR::IsAligned(reg, bit_size / 32)) { |
|||
throw NotImplementedException("Unaligned source register"); |
|||
} |
|||
ir.WriteLocal(word_offset, src); |
|||
for (int i = 1; i < bit_size / 32; ++i) { |
|||
ir.WriteLocal(ir.IAdd(word_offset, ir.Imm32(i)), X(reg + i)); |
|||
} |
|||
break; |
|||
} |
|||
} |
|||
|
|||
void TranslatorVisitor::STS(u64 insn) { |
|||
const IR::U32 offset{Offset(*this, insn)}; |
|||
const IR::Reg reg{Reg(insn)}; |
|||
const int bit_size{GetSize(insn).first}; |
|||
switch (bit_size) { |
|||
case 8: |
|||
case 16: |
|||
case 32: |
|||
ir.WriteShared(bit_size, offset, X(reg)); |
|||
break; |
|||
case 64: |
|||
if (!IR::IsAligned(reg, 2)) { |
|||
throw NotImplementedException("Unaligned source register {}", reg); |
|||
} |
|||
ir.WriteShared(64, offset, ir.CompositeConstruct(X(reg), X(reg + 1))); |
|||
break; |
|||
case 128: { |
|||
if (!IR::IsAligned(reg, 2)) { |
|||
throw NotImplementedException("Unaligned source register {}", reg); |
|||
} |
|||
const IR::Value vector{ir.CompositeConstruct(X(reg), X(reg + 1), X(reg + 2), X(reg + 3))}; |
|||
ir.WriteShared(128, offset, vector); |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
|
|||
} // namespace Shader::Maxwell
|
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue