|
|
@ -6,14 +6,25 @@ |
|
|
|
|
|
|
|
|
#include <array> |
|
|
#include <array> |
|
|
#include <cstddef> |
|
|
#include <cstddef> |
|
|
|
|
|
#include <vector> |
|
|
|
|
|
#include "common/bit_field.h" |
|
|
#include "common/common_funcs.h" |
|
|
#include "common/common_funcs.h" |
|
|
#include "common/common_types.h" |
|
|
#include "common/common_types.h" |
|
|
|
|
|
#include "video_core/engines/engine_upload.h" |
|
|
#include "video_core/gpu.h" |
|
|
#include "video_core/gpu.h" |
|
|
|
|
|
|
|
|
|
|
|
namespace Core { |
|
|
|
|
|
class System; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
namespace Tegra { |
|
|
namespace Tegra { |
|
|
class MemoryManager; |
|
|
class MemoryManager; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
namespace VideoCore { |
|
|
|
|
|
class RasterizerInterface; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
namespace Tegra::Engines { |
|
|
namespace Tegra::Engines { |
|
|
|
|
|
|
|
|
#define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
|
|
#define KEPLER_COMPUTE_REG_INDEX(field_name) \ |
|
|
@ -21,7 +32,8 @@ namespace Tegra::Engines { |
|
|
|
|
|
|
|
|
class KeplerCompute final { |
|
|
class KeplerCompute final { |
|
|
public: |
|
|
public: |
|
|
explicit KeplerCompute(MemoryManager& memory_manager); |
|
|
|
|
|
|
|
|
explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
|
|
|
|
|
MemoryManager& memory_manager); |
|
|
~KeplerCompute(); |
|
|
~KeplerCompute(); |
|
|
|
|
|
|
|
|
static constexpr std::size_t NumConstBuffers = 8; |
|
|
static constexpr std::size_t NumConstBuffers = 8; |
|
|
@ -31,30 +43,183 @@ public: |
|
|
|
|
|
|
|
|
union { |
|
|
union { |
|
|
struct { |
|
|
struct { |
|
|
INSERT_PADDING_WORDS(0xAF); |
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x60); |
|
|
|
|
|
|
|
|
|
|
|
Upload::Data upload; |
|
|
|
|
|
|
|
|
|
|
|
struct { |
|
|
|
|
|
union { |
|
|
|
|
|
BitField<0, 1, u32> linear; |
|
|
|
|
|
}; |
|
|
|
|
|
} exec_upload; |
|
|
|
|
|
|
|
|
|
|
|
u32 data_upload; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x3F); |
|
|
|
|
|
|
|
|
|
|
|
struct { |
|
|
|
|
|
u32 address; |
|
|
|
|
|
GPUVAddr Address() const { |
|
|
|
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address) << 8)); |
|
|
|
|
|
} |
|
|
|
|
|
} launch_desc_loc; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x1); |
|
|
|
|
|
|
|
|
u32 launch; |
|
|
u32 launch; |
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0xC48); |
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x4A7); |
|
|
|
|
|
|
|
|
|
|
|
struct { |
|
|
|
|
|
u32 address_high; |
|
|
|
|
|
u32 address_low; |
|
|
|
|
|
u32 limit; |
|
|
|
|
|
GPUVAddr Address() const { |
|
|
|
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
|
|
|
|
|
address_low); |
|
|
|
|
|
} |
|
|
|
|
|
} tsc; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x3); |
|
|
|
|
|
|
|
|
|
|
|
struct { |
|
|
|
|
|
u32 address_high; |
|
|
|
|
|
u32 address_low; |
|
|
|
|
|
u32 limit; |
|
|
|
|
|
GPUVAddr Address() const { |
|
|
|
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
|
|
|
|
|
address_low); |
|
|
|
|
|
} |
|
|
|
|
|
} tic; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x22); |
|
|
|
|
|
|
|
|
|
|
|
struct { |
|
|
|
|
|
u32 address_high; |
|
|
|
|
|
u32 address_low; |
|
|
|
|
|
GPUVAddr Address() const { |
|
|
|
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | |
|
|
|
|
|
address_low); |
|
|
|
|
|
} |
|
|
|
|
|
} code_loc; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x3FE); |
|
|
|
|
|
|
|
|
|
|
|
u32 texture_const_buffer_index; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x374); |
|
|
}; |
|
|
}; |
|
|
std::array<u32, NUM_REGS> reg_array; |
|
|
std::array<u32, NUM_REGS> reg_array; |
|
|
}; |
|
|
}; |
|
|
} regs{}; |
|
|
} regs{}; |
|
|
|
|
|
|
|
|
|
|
|
struct LaunchParams { |
|
|
|
|
|
static constexpr std::size_t NUM_LAUNCH_PARAMETERS = 0x40; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x8); |
|
|
|
|
|
|
|
|
|
|
|
u32 program_start; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x2); |
|
|
|
|
|
|
|
|
|
|
|
BitField<30, 1, u32> linked_tsc; |
|
|
|
|
|
|
|
|
|
|
|
BitField<0, 31, u32> grid_dim_x; |
|
|
|
|
|
|
|
|
|
|
|
union { |
|
|
|
|
|
BitField<0, 16, u32> grid_dim_y; |
|
|
|
|
|
BitField<16, 16, u32> grid_dim_z; |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x3); |
|
|
|
|
|
|
|
|
|
|
|
BitField<0, 16, u32> shared_alloc; |
|
|
|
|
|
|
|
|
|
|
|
BitField<0, 31, u32> block_dim_x; |
|
|
|
|
|
|
|
|
|
|
|
union { |
|
|
|
|
|
BitField<0, 16, u32> block_dim_y; |
|
|
|
|
|
BitField<16, 16, u32> block_dim_z; |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
union { |
|
|
|
|
|
BitField<0, 8, u32> const_buffer_enable_mask; |
|
|
|
|
|
BitField<29, 2, u32> cache_layout; |
|
|
|
|
|
} memory_config; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x8); |
|
|
|
|
|
|
|
|
|
|
|
struct { |
|
|
|
|
|
u32 address_low; |
|
|
|
|
|
union { |
|
|
|
|
|
BitField<0, 8, u32> address_high; |
|
|
|
|
|
BitField<15, 17, u32> size; |
|
|
|
|
|
}; |
|
|
|
|
|
GPUVAddr Address() const { |
|
|
|
|
|
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high.Value()) << 32) | |
|
|
|
|
|
address_low); |
|
|
|
|
|
} |
|
|
|
|
|
} const_buffer_config[8]; |
|
|
|
|
|
|
|
|
|
|
|
union { |
|
|
|
|
|
BitField<0, 20, u32> local_pos_alloc; |
|
|
|
|
|
BitField<27, 5, u32> barrier_alloc; |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
union { |
|
|
|
|
|
BitField<0, 20, u32> local_neg_alloc; |
|
|
|
|
|
BitField<24, 5, u32> gpr_alloc; |
|
|
|
|
|
}; |
|
|
|
|
|
|
|
|
|
|
|
INSERT_PADDING_WORDS(0x11); |
|
|
|
|
|
} launch_description; |
|
|
|
|
|
|
|
|
|
|
|
struct { |
|
|
|
|
|
u32 write_offset = 0; |
|
|
|
|
|
u32 copy_size = 0; |
|
|
|
|
|
std::vector<u8> inner_buffer; |
|
|
|
|
|
} state{}; |
|
|
|
|
|
|
|
|
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), |
|
|
static_assert(sizeof(Regs) == Regs::NUM_REGS * sizeof(u32), |
|
|
"KeplerCompute Regs has wrong size"); |
|
|
"KeplerCompute Regs has wrong size"); |
|
|
|
|
|
|
|
|
|
|
|
static_assert(sizeof(LaunchParams) == LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32), |
|
|
|
|
|
"KeplerCompute LaunchParams has wrong size"); |
|
|
|
|
|
|
|
|
/// Write the value to the register identified by method. |
|
|
/// Write the value to the register identified by method. |
|
|
void CallMethod(const GPU::MethodCall& method_call); |
|
|
void CallMethod(const GPU::MethodCall& method_call); |
|
|
|
|
|
|
|
|
private: |
|
|
private: |
|
|
|
|
|
Core::System& system; |
|
|
|
|
|
VideoCore::RasterizerInterface& rasterizer; |
|
|
MemoryManager& memory_manager; |
|
|
MemoryManager& memory_manager; |
|
|
|
|
|
Upload::State upload_state; |
|
|
|
|
|
|
|
|
|
|
|
void ProcessLaunch(); |
|
|
}; |
|
|
}; |
|
|
|
|
|
|
|
|
#define ASSERT_REG_POSITION(field_name, position) \ |
|
|
#define ASSERT_REG_POSITION(field_name, position) \ |
|
|
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ |
|
|
static_assert(offsetof(KeplerCompute::Regs, field_name) == position * 4, \ |
|
|
"Field " #field_name " has invalid position") |
|
|
"Field " #field_name " has invalid position") |
|
|
|
|
|
|
|
|
|
|
|
#define ASSERT_LAUNCH_PARAM_POSITION(field_name, position) \ |
|
|
|
|
|
static_assert(offsetof(KeplerCompute::LaunchParams, field_name) == position * 4, \ |
|
|
|
|
|
"Field " #field_name " has invalid position") |
|
|
|
|
|
|
|
|
|
|
|
ASSERT_REG_POSITION(upload, 0x60); |
|
|
|
|
|
ASSERT_REG_POSITION(exec_upload, 0x6C); |
|
|
|
|
|
ASSERT_REG_POSITION(data_upload, 0x6D); |
|
|
ASSERT_REG_POSITION(launch, 0xAF); |
|
|
ASSERT_REG_POSITION(launch, 0xAF); |
|
|
|
|
|
ASSERT_REG_POSITION(tsc, 0x557); |
|
|
|
|
|
ASSERT_REG_POSITION(tic, 0x55D); |
|
|
|
|
|
ASSERT_REG_POSITION(code_loc, 0x582); |
|
|
|
|
|
ASSERT_REG_POSITION(texture_const_buffer_index, 0x982); |
|
|
|
|
|
ASSERT_LAUNCH_PARAM_POSITION(program_start, 0x8); |
|
|
|
|
|
ASSERT_LAUNCH_PARAM_POSITION(grid_dim_x, 0xC); |
|
|
|
|
|
ASSERT_LAUNCH_PARAM_POSITION(shared_alloc, 0x11); |
|
|
|
|
|
ASSERT_LAUNCH_PARAM_POSITION(block_dim_x, 0x12); |
|
|
|
|
|
ASSERT_LAUNCH_PARAM_POSITION(memory_config, 0x14); |
|
|
|
|
|
ASSERT_LAUNCH_PARAM_POSITION(const_buffer_config, 0x1D); |
|
|
|
|
|
|
|
|
#undef ASSERT_REG_POSITION |
|
|
#undef ASSERT_REG_POSITION |
|
|
|
|
|
|
|
|
|