Browse Source

GPU: Delay Fences.

nce_cpp
Fernando Sahmkow 6 years ago
parent
commit
fda21f5a93
  1. 1
      src/video_core/dma_pusher.cpp
  2. 10
      src/video_core/engines/maxwell_3d.cpp
  3. 4
      src/video_core/engines/maxwell_3d.h
  4. 4
      src/video_core/gpu.cpp
  5. 1
      src/video_core/gpu.h
  6. 2
      src/video_core/gpu_thread.cpp

1
src/video_core/dma_pusher.cpp

@ -34,6 +34,7 @@ void DmaPusher::DispatchCalls() {
}
gpu.FlushCommands();
gpu.SyncGuestHost();
gpu.OnCommandListEnd();
}
bool DmaPusher::Step() {

10
src/video_core/engines/maxwell_3d.cpp

@ -397,6 +397,14 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) {
}
}
void Maxwell3D::ReleaseFences() {
for (const auto pair : delay_fences) {
const auto [addr, payload] = pair;
memory_manager.Write<u32>(addr, static_cast<u32>(payload));
}
delay_fences.clear();
}
void Maxwell3D::ProcessQueryGet() {
// TODO(Subv): Support the other query units.
ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop,
@ -407,7 +415,7 @@ void Maxwell3D::ProcessQueryGet() {
rasterizer.FlushCommands();
rasterizer.SyncGuestHost();
const u64 result = regs.query.query_sequence;
StampQueryResult(result, regs.query.query_get.short_query == 0);
delay_fences.emplace_back(regs.query.QueryAddress(), result);
break;
}
case Regs::QueryOperation::Acquire:

4
src/video_core/engines/maxwell_3d.h

@ -1427,6 +1427,8 @@ public:
Tables tables{};
} dirty;
void ReleaseFences();
private:
void InitializeRegisterDefaults();
@ -1467,6 +1469,8 @@ private:
std::array<u8, Regs::NUM_REGS> dirty_pointers{};
std::vector<std::pair<GPUVAddr, u64>> delay_fences;
/// Retrieves information about a specific TIC entry from the TIC buffer.
Texture::TICEntry GetTICEntry(u32 tic_index) const;

4
src/video_core/gpu.cpp

@ -145,6 +145,10 @@ void GPU::FlushCommands() {
void GPU::SyncGuestHost() {
renderer->Rasterizer().SyncGuestHost();
}
void GPU::OnCommandListEnd() {
maxwell_3d->ReleaseFences();
}
// Note that, traditionally, methods are treated as 4-byte addressable locations, and hence
// their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4.
// So the values you see in docs might be multiplied by 4.

1
src/video_core/gpu.h

@ -157,6 +157,7 @@ public:
void FlushCommands();
void SyncGuestHost();
void OnCommandListEnd();
/// Returns a reference to the Maxwell3D GPU engine.
Engines::Maxwell3D& Maxwell3D();

2
src/video_core/gpu_thread.cpp

@ -78,7 +78,7 @@ void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
}
void ThreadManager::FlushRegion(VAddr addr, u64 size) {
system.Renderer().Rasterizer().FlushRegion(addr, size);
PushCommand(FlushRegionCommand(addr, size));
}
void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {

Loading…
Cancel
Save