Browse Source
Merge pull request #2601 from FernandoS27/texture_cache
Merge pull request #2601 from FernandoS27/texture_cache
Implement a new Texture Cachepull/15/merge
committed by
GitHub
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
63 changed files with 4196 additions and 3269 deletions
-
1CMakeModules/GenerateSCMRev.cmake
-
2src/common/CMakeLists.txt
-
6src/common/alignment.h
-
21src/common/binary_find.h
-
44src/common/bit_util.h
-
1src/common/common_funcs.h
-
16src/video_core/CMakeLists.txt
-
6src/video_core/engines/engine_upload.cpp
-
6src/video_core/engines/engine_upload.h
-
25src/video_core/engines/fermi_2d.cpp
-
53src/video_core/engines/fermi_2d.h
-
12src/video_core/engines/maxwell_3d.cpp
-
2src/video_core/engines/maxwell_dma.cpp
-
4src/video_core/engines/maxwell_dma.h
-
83src/video_core/engines/shader_bytecode.h
-
5src/video_core/memory_manager.cpp
-
7src/video_core/rasterizer_interface.h
-
75src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
-
68src/video_core/renderer_opengl/gl_framebuffer_cache.h
-
178src/video_core/renderer_opengl/gl_rasterizer.cpp
-
26src/video_core/renderer_opengl/gl_rasterizer.h
-
1362src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
-
572src/video_core/renderer_opengl/gl_rasterizer_cache.h
-
24src/video_core/renderer_opengl/gl_resource_manager.cpp
-
28src/video_core/renderer_opengl/gl_resource_manager.h
-
91src/video_core/renderer_opengl/gl_shader_cache.cpp
-
17src/video_core/renderer_opengl/gl_shader_cache.h
-
116src/video_core/renderer_opengl/gl_shader_decompiler.cpp
-
2src/video_core/renderer_opengl/gl_shader_decompiler.h
-
38src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
-
76src/video_core/renderer_opengl/gl_shader_disk_cache.h
-
5src/video_core/renderer_opengl/gl_stream_buffer.cpp
-
3src/video_core/renderer_opengl/gl_stream_buffer.h
-
614src/video_core/renderer_opengl/gl_texture_cache.cpp
-
143src/video_core/renderer_opengl/gl_texture_cache.h
-
1src/video_core/renderer_opengl/renderer_opengl.cpp
-
4src/video_core/renderer_opengl/utils.cpp
-
2src/video_core/renderer_opengl/utils.h
-
7src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
-
1src/video_core/shader/decode.cpp
-
120src/video_core/shader/decode/image.cpp
-
45src/video_core/shader/decode/texture.cpp
-
52src/video_core/shader/node.h
-
14src/video_core/shader/shader_ir.h
-
8src/video_core/surface.cpp
-
225src/video_core/surface.h
-
386src/video_core/texture_cache.cpp
-
586src/video_core/texture_cache.h
-
36src/video_core/texture_cache/copy_params.h
-
300src/video_core/texture_cache/surface_base.cpp
-
317src/video_core/texture_cache/surface_base.h
-
334src/video_core/texture_cache/surface_params.cpp
-
286src/video_core/texture_cache/surface_params.h
-
23src/video_core/texture_cache/surface_view.cpp
-
67src/video_core/texture_cache/surface_view.h
-
814src/video_core/texture_cache/texture_cache.h
-
14src/video_core/textures/convert.cpp
-
7src/video_core/textures/convert.h
-
42src/video_core/textures/decoders.cpp
-
4src/video_core/textures/decoders.h
-
31src/video_core/textures/texture.h
-
3src/yuzu/main.cpp
-
4src/yuzu_cmd/emu_window/emu_window_sdl2_gl.cpp
@ -0,0 +1,21 @@ |
|||
// Copyright 2019 yuzu emulator team |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <algorithm> |
|||
|
|||
namespace Common { |
|||
|
|||
template <class ForwardIt, class T, class Compare = std::less<>> |
|||
ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) { |
|||
// Note: BOTH type T and the type after ForwardIt is dereferenced |
|||
// must be implicitly convertible to BOTH Type1 and Type2, used in Compare. |
|||
// This is stricter than lower_bound requirement (see above) |
|||
|
|||
first = std::lower_bound(first, last, value, comp); |
|||
return first != last && !comp(value, *first) ? first : last; |
|||
} |
|||
|
|||
} // namespace Common |
|||
@ -0,0 +1,75 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <tuple>
|
|||
|
|||
#include "common/cityhash.h"
|
|||
#include "common/scope_exit.h"
|
|||
#include "video_core/engines/maxwell_3d.h"
|
|||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
|||
#include "video_core/renderer_opengl/gl_state.h"
|
|||
|
|||
namespace OpenGL { |
|||
|
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
|||
|
|||
FramebufferCacheOpenGL::FramebufferCacheOpenGL() = default; |
|||
|
|||
FramebufferCacheOpenGL::~FramebufferCacheOpenGL() = default; |
|||
|
|||
GLuint FramebufferCacheOpenGL::GetFramebuffer(const FramebufferCacheKey& key) { |
|||
const auto [entry, is_cache_miss] = cache.try_emplace(key); |
|||
auto& framebuffer{entry->second}; |
|||
if (is_cache_miss) { |
|||
framebuffer = CreateFramebuffer(key); |
|||
} |
|||
return framebuffer.handle; |
|||
} |
|||
|
|||
OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheKey& key) { |
|||
OGLFramebuffer framebuffer; |
|||
framebuffer.Create(); |
|||
|
|||
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
|
|||
local_state.draw.draw_framebuffer = framebuffer.handle; |
|||
local_state.ApplyFramebufferState(); |
|||
|
|||
if (key.is_single_buffer) { |
|||
if (key.color_attachments[0] != GL_NONE && key.colors[0]) { |
|||
key.colors[0]->Attach(key.color_attachments[0], GL_DRAW_FRAMEBUFFER); |
|||
glDrawBuffer(key.color_attachments[0]); |
|||
} else { |
|||
glDrawBuffer(GL_NONE); |
|||
} |
|||
} else { |
|||
for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { |
|||
if (key.colors[index]) { |
|||
key.colors[index]->Attach(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), |
|||
GL_DRAW_FRAMEBUFFER); |
|||
} |
|||
} |
|||
glDrawBuffers(key.colors_count, key.color_attachments.data()); |
|||
} |
|||
|
|||
if (key.zeta) { |
|||
key.zeta->Attach(key.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT, |
|||
GL_DRAW_FRAMEBUFFER); |
|||
} |
|||
|
|||
return framebuffer; |
|||
} |
|||
|
|||
std::size_t FramebufferCacheKey::Hash() const { |
|||
static_assert(sizeof(*this) % sizeof(u64) == 0, "Unaligned struct"); |
|||
return static_cast<std::size_t>( |
|||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); |
|||
} |
|||
|
|||
bool FramebufferCacheKey::operator==(const FramebufferCacheKey& rhs) const { |
|||
return std::tie(is_single_buffer, stencil_enable, colors_count, color_attachments, colors, |
|||
zeta) == std::tie(rhs.is_single_buffer, rhs.stencil_enable, rhs.colors_count, |
|||
rhs.color_attachments, rhs.colors, rhs.zeta); |
|||
} |
|||
|
|||
} // namespace OpenGL
|
|||
@ -0,0 +1,68 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <cstddef> |
|||
#include <unordered_map> |
|||
|
|||
#include <glad/glad.h> |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/renderer_opengl/gl_resource_manager.h" |
|||
#include "video_core/renderer_opengl/gl_state.h" |
|||
#include "video_core/renderer_opengl/gl_texture_cache.h" |
|||
|
|||
namespace OpenGL { |
|||
|
|||
struct alignas(sizeof(u64)) FramebufferCacheKey { |
|||
bool is_single_buffer = false; |
|||
bool stencil_enable = false; |
|||
u16 colors_count = 0; |
|||
|
|||
std::array<GLenum, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> color_attachments{}; |
|||
std::array<View, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors; |
|||
View zeta; |
|||
|
|||
std::size_t Hash() const; |
|||
|
|||
bool operator==(const FramebufferCacheKey& rhs) const; |
|||
|
|||
bool operator!=(const FramebufferCacheKey& rhs) const { |
|||
return !operator==(rhs); |
|||
} |
|||
}; |
|||
|
|||
} // namespace OpenGL |
|||
|
|||
namespace std { |
|||
|
|||
template <> |
|||
struct hash<OpenGL::FramebufferCacheKey> { |
|||
std::size_t operator()(const OpenGL::FramebufferCacheKey& k) const noexcept { |
|||
return k.Hash(); |
|||
} |
|||
}; |
|||
|
|||
} // namespace std |
|||
|
|||
namespace OpenGL { |
|||
|
|||
class FramebufferCacheOpenGL { |
|||
public: |
|||
FramebufferCacheOpenGL(); |
|||
~FramebufferCacheOpenGL(); |
|||
|
|||
GLuint GetFramebuffer(const FramebufferCacheKey& key); |
|||
|
|||
private: |
|||
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); |
|||
|
|||
OpenGLState local_state; |
|||
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; |
|||
}; |
|||
|
|||
} // namespace OpenGL |
|||
1362
src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -1,572 +0,0 @@ |
|||
// Copyright 2018 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <memory> |
|||
#include <string> |
|||
#include <tuple> |
|||
#include <vector> |
|||
|
|||
#include "common/alignment.h" |
|||
#include "common/bit_util.h" |
|||
#include "common/common_types.h" |
|||
#include "common/hash.h" |
|||
#include "common/math_util.h" |
|||
#include "video_core/engines/fermi_2d.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/rasterizer_cache.h" |
|||
#include "video_core/renderer_opengl/gl_resource_manager.h" |
|||
#include "video_core/renderer_opengl/gl_shader_gen.h" |
|||
#include "video_core/surface.h" |
|||
#include "video_core/textures/decoders.h" |
|||
#include "video_core/textures/texture.h" |
|||
|
|||
namespace OpenGL { |
|||
|
|||
class CachedSurface; |
|||
using Surface = std::shared_ptr<CachedSurface>; |
|||
using SurfaceSurfaceRect_Tuple = std::tuple<Surface, Surface, Common::Rectangle<u32>>; |
|||
|
|||
using SurfaceTarget = VideoCore::Surface::SurfaceTarget; |
|||
using SurfaceType = VideoCore::Surface::SurfaceType; |
|||
using PixelFormat = VideoCore::Surface::PixelFormat; |
|||
using ComponentType = VideoCore::Surface::ComponentType; |
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs; |
|||
|
|||
struct SurfaceParams { |
|||
enum class SurfaceClass { |
|||
Uploaded, |
|||
RenderTarget, |
|||
DepthBuffer, |
|||
Copy, |
|||
}; |
|||
|
|||
static std::string SurfaceTargetName(SurfaceTarget target) { |
|||
switch (target) { |
|||
case SurfaceTarget::Texture1D: |
|||
return "Texture1D"; |
|||
case SurfaceTarget::Texture2D: |
|||
return "Texture2D"; |
|||
case SurfaceTarget::Texture3D: |
|||
return "Texture3D"; |
|||
case SurfaceTarget::Texture1DArray: |
|||
return "Texture1DArray"; |
|||
case SurfaceTarget::Texture2DArray: |
|||
return "Texture2DArray"; |
|||
case SurfaceTarget::TextureCubemap: |
|||
return "TextureCubemap"; |
|||
case SurfaceTarget::TextureCubeArray: |
|||
return "TextureCubeArray"; |
|||
default: |
|||
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); |
|||
UNREACHABLE(); |
|||
return fmt::format("TextureUnknown({})", static_cast<u32>(target)); |
|||
} |
|||
} |
|||
|
|||
u32 GetFormatBpp() const { |
|||
return VideoCore::Surface::GetFormatBpp(pixel_format); |
|||
} |
|||
|
|||
/// Returns the rectangle corresponding to this surface |
|||
Common::Rectangle<u32> GetRect(u32 mip_level = 0) const; |
|||
|
|||
/// Returns the total size of this surface in bytes, adjusted for compression |
|||
std::size_t SizeInBytesRaw(bool ignore_tiled = false) const { |
|||
const u32 compression_factor{GetCompressionFactor(pixel_format)}; |
|||
const u32 bytes_per_pixel{GetBytesPerPixel(pixel_format)}; |
|||
const size_t uncompressed_size{ |
|||
Tegra::Texture::CalculateSize((ignore_tiled ? false : is_tiled), bytes_per_pixel, width, |
|||
height, depth, block_height, block_depth)}; |
|||
|
|||
// Divide by compression_factor^2, as height and width are factored by this |
|||
return uncompressed_size / (compression_factor * compression_factor); |
|||
} |
|||
|
|||
/// Returns the size of this surface as an OpenGL texture in bytes |
|||
std::size_t SizeInBytesGL() const { |
|||
return SizeInBytesRaw(true); |
|||
} |
|||
|
|||
/// Returns the size of this surface as a cube face in bytes |
|||
std::size_t SizeInBytesCubeFace() const { |
|||
return size_in_bytes / 6; |
|||
} |
|||
|
|||
/// Returns the size of this surface as an OpenGL cube face in bytes |
|||
std::size_t SizeInBytesCubeFaceGL() const { |
|||
return size_in_bytes_gl / 6; |
|||
} |
|||
|
|||
/// Returns the exact size of memory occupied by the texture in VRAM, including mipmaps. |
|||
std::size_t MemorySize() const { |
|||
std::size_t size = InnerMemorySize(false, is_layered); |
|||
if (is_layered) |
|||
return size * depth; |
|||
return size; |
|||
} |
|||
|
|||
/// Returns true if the parameters constitute a valid rasterizer surface. |
|||
bool IsValid() const { |
|||
return gpu_addr && host_ptr && height && width; |
|||
} |
|||
|
|||
/// Returns the exact size of the memory occupied by a layer in a texture in VRAM, including |
|||
/// mipmaps. |
|||
std::size_t LayerMemorySize() const { |
|||
return InnerMemorySize(false, true); |
|||
} |
|||
|
|||
/// Returns the size of a layer of this surface in OpenGL. |
|||
std::size_t LayerSizeGL(u32 mip_level) const { |
|||
return InnerMipmapMemorySize(mip_level, true, is_layered, false); |
|||
} |
|||
|
|||
std::size_t GetMipmapSizeGL(u32 mip_level, bool ignore_compressed = true) const { |
|||
std::size_t size = InnerMipmapMemorySize(mip_level, true, is_layered, ignore_compressed); |
|||
if (is_layered) |
|||
return size * depth; |
|||
return size; |
|||
} |
|||
|
|||
std::size_t GetMipmapLevelOffset(u32 mip_level) const { |
|||
std::size_t offset = 0; |
|||
for (u32 i = 0; i < mip_level; i++) |
|||
offset += InnerMipmapMemorySize(i, false, is_layered); |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t GetMipmapLevelOffsetGL(u32 mip_level) const { |
|||
std::size_t offset = 0; |
|||
for (u32 i = 0; i < mip_level; i++) |
|||
offset += InnerMipmapMemorySize(i, true, is_layered); |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t GetMipmapSingleSize(u32 mip_level) const { |
|||
return InnerMipmapMemorySize(mip_level, false, is_layered); |
|||
} |
|||
|
|||
u32 MipWidth(u32 mip_level) const { |
|||
return std::max(1U, width >> mip_level); |
|||
} |
|||
|
|||
u32 MipWidthGobAligned(u32 mip_level) const { |
|||
return Common::AlignUp(std::max(1U, width >> mip_level), 64U * 8U / GetFormatBpp()); |
|||
} |
|||
|
|||
u32 MipHeight(u32 mip_level) const { |
|||
return std::max(1U, height >> mip_level); |
|||
} |
|||
|
|||
u32 MipDepth(u32 mip_level) const { |
|||
return is_layered ? depth : std::max(1U, depth >> mip_level); |
|||
} |
|||
|
|||
// Auto block resizing algorithm from: |
|||
// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c |
|||
u32 MipBlockHeight(u32 mip_level) const { |
|||
if (mip_level == 0) |
|||
return block_height; |
|||
u32 alt_height = MipHeight(mip_level); |
|||
u32 h = GetDefaultBlockHeight(pixel_format); |
|||
u32 blocks_in_y = (alt_height + h - 1) / h; |
|||
u32 bh = 16; |
|||
while (bh > 1 && blocks_in_y <= bh * 4) { |
|||
bh >>= 1; |
|||
} |
|||
return bh; |
|||
} |
|||
|
|||
u32 MipBlockDepth(u32 mip_level) const { |
|||
if (mip_level == 0) { |
|||
return block_depth; |
|||
} |
|||
|
|||
if (is_layered) { |
|||
return 1; |
|||
} |
|||
|
|||
const u32 mip_depth = MipDepth(mip_level); |
|||
u32 bd = 32; |
|||
while (bd > 1 && mip_depth * 2 <= bd) { |
|||
bd >>= 1; |
|||
} |
|||
|
|||
if (bd == 32) { |
|||
const u32 bh = MipBlockHeight(mip_level); |
|||
if (bh >= 4) { |
|||
return 16; |
|||
} |
|||
} |
|||
|
|||
return bd; |
|||
} |
|||
|
|||
u32 RowAlign(u32 mip_level) const { |
|||
const u32 m_width = MipWidth(mip_level); |
|||
const u32 bytes_per_pixel = GetBytesPerPixel(pixel_format); |
|||
const u32 l2 = Common::CountTrailingZeroes32(m_width * bytes_per_pixel); |
|||
return (1U << l2); |
|||
} |
|||
|
|||
/// Creates SurfaceParams from a texture configuration |
|||
static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config, |
|||
const GLShader::SamplerEntry& entry); |
|||
|
|||
/// Creates SurfaceParams from a framebuffer configuration |
|||
static SurfaceParams CreateForFramebuffer(std::size_t index); |
|||
|
|||
/// Creates SurfaceParams for a depth buffer configuration |
|||
static SurfaceParams CreateForDepthBuffer( |
|||
u32 zeta_width, u32 zeta_height, GPUVAddr zeta_address, Tegra::DepthFormat format, |
|||
u32 block_width, u32 block_height, u32 block_depth, |
|||
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); |
|||
|
|||
/// Creates SurfaceParams for a Fermi2D surface copy |
|||
static SurfaceParams CreateForFermiCopySurface( |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& config); |
|||
|
|||
/// Checks if surfaces are compatible for caching |
|||
bool IsCompatibleSurface(const SurfaceParams& other) const { |
|||
if (std::tie(pixel_format, type, width, height, target, depth, is_tiled) == |
|||
std::tie(other.pixel_format, other.type, other.width, other.height, other.target, |
|||
other.depth, other.is_tiled)) { |
|||
if (!is_tiled) |
|||
return true; |
|||
return std::tie(block_height, block_depth, tile_width_spacing) == |
|||
std::tie(other.block_height, other.block_depth, other.tile_width_spacing); |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
/// Initializes parameters for caching, should be called after everything has been initialized |
|||
void InitCacheParameters(GPUVAddr gpu_addr); |
|||
|
|||
std::string TargetName() const { |
|||
switch (target) { |
|||
case SurfaceTarget::Texture1D: |
|||
return "1D"; |
|||
case SurfaceTarget::Texture2D: |
|||
return "2D"; |
|||
case SurfaceTarget::Texture3D: |
|||
return "3D"; |
|||
case SurfaceTarget::Texture1DArray: |
|||
return "1DArray"; |
|||
case SurfaceTarget::Texture2DArray: |
|||
return "2DArray"; |
|||
case SurfaceTarget::TextureCubemap: |
|||
return "Cube"; |
|||
default: |
|||
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); |
|||
UNREACHABLE(); |
|||
return fmt::format("TUK({})", static_cast<u32>(target)); |
|||
} |
|||
} |
|||
|
|||
std::string ClassName() const { |
|||
switch (identity) { |
|||
case SurfaceClass::Uploaded: |
|||
return "UP"; |
|||
case SurfaceClass::RenderTarget: |
|||
return "RT"; |
|||
case SurfaceClass::DepthBuffer: |
|||
return "DB"; |
|||
case SurfaceClass::Copy: |
|||
return "CP"; |
|||
default: |
|||
LOG_CRITICAL(HW_GPU, "Unimplemented surface_class={}", static_cast<u32>(identity)); |
|||
UNREACHABLE(); |
|||
return fmt::format("CUK({})", static_cast<u32>(identity)); |
|||
} |
|||
} |
|||
|
|||
std::string IdentityString() const { |
|||
return ClassName() + '_' + TargetName() + '_' + (is_tiled ? 'T' : 'L'); |
|||
} |
|||
|
|||
bool is_tiled; |
|||
u32 block_width; |
|||
u32 block_height; |
|||
u32 block_depth; |
|||
u32 tile_width_spacing; |
|||
PixelFormat pixel_format; |
|||
ComponentType component_type; |
|||
SurfaceType type; |
|||
u32 width; |
|||
u32 height; |
|||
u32 depth; |
|||
u32 unaligned_height; |
|||
u32 pitch; |
|||
SurfaceTarget target; |
|||
SurfaceClass identity; |
|||
u32 max_mip_level; |
|||
bool is_layered; |
|||
bool is_array; |
|||
bool srgb_conversion; |
|||
// Parameters used for caching |
|||
u8* host_ptr; |
|||
GPUVAddr gpu_addr; |
|||
std::size_t size_in_bytes; |
|||
std::size_t size_in_bytes_gl; |
|||
|
|||
// Render target specific parameters, not used in caching |
|||
struct { |
|||
u32 index; |
|||
u32 array_mode; |
|||
u32 volume; |
|||
u32 layer_stride; |
|||
u32 base_layer; |
|||
} rt; |
|||
|
|||
private: |
|||
std::size_t InnerMipmapMemorySize(u32 mip_level, bool force_gl = false, bool layer_only = false, |
|||
bool uncompressed = false) const; |
|||
std::size_t InnerMemorySize(bool force_gl = false, bool layer_only = false, |
|||
bool uncompressed = false) const; |
|||
}; |
|||
|
|||
}; // namespace OpenGL |
|||
|
|||
/// Hashable variation of SurfaceParams, used for a key in the surface cache |
|||
struct SurfaceReserveKey : Common::HashableStruct<OpenGL::SurfaceParams> { |
|||
static SurfaceReserveKey Create(const OpenGL::SurfaceParams& params) { |
|||
SurfaceReserveKey res; |
|||
res.state = params; |
|||
res.state.identity = {}; // Ignore the origin of the texture |
|||
res.state.gpu_addr = {}; // Ignore GPU vaddr in caching |
|||
res.state.rt = {}; // Ignore rt config in caching |
|||
return res; |
|||
} |
|||
}; |
|||
namespace std { |
|||
template <> |
|||
struct hash<SurfaceReserveKey> { |
|||
std::size_t operator()(const SurfaceReserveKey& k) const { |
|||
return k.Hash(); |
|||
} |
|||
}; |
|||
} // namespace std |
|||
|
|||
namespace OpenGL { |
|||
|
|||
class RasterizerOpenGL; |
|||
|
|||
// This is used to store temporary big buffers, |
|||
// instead of creating/destroying all the time |
|||
struct RasterizerTemporaryMemory { |
|||
std::vector<std::vector<u8>> gl_buffer; |
|||
}; |
|||
|
|||
class CachedSurface final : public RasterizerCacheObject { |
|||
public: |
|||
explicit CachedSurface(const SurfaceParams& params); |
|||
|
|||
VAddr GetCpuAddr() const override { |
|||
return cpu_addr; |
|||
} |
|||
|
|||
std::size_t GetSizeInBytes() const override { |
|||
return cached_size_in_bytes; |
|||
} |
|||
|
|||
std::size_t GetMemorySize() const { |
|||
return memory_size; |
|||
} |
|||
|
|||
const OGLTexture& Texture() const { |
|||
return texture; |
|||
} |
|||
|
|||
const OGLTexture& Texture(bool as_array) { |
|||
if (params.is_array == as_array) { |
|||
return texture; |
|||
} else { |
|||
EnsureTextureDiscrepantView(); |
|||
return discrepant_view; |
|||
} |
|||
} |
|||
|
|||
GLenum Target() const { |
|||
return gl_target; |
|||
} |
|||
|
|||
const SurfaceParams& GetSurfaceParams() const { |
|||
return params; |
|||
} |
|||
|
|||
// Read/Write data in Switch memory to/from gl_buffer |
|||
void LoadGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); |
|||
void FlushGLBuffer(RasterizerTemporaryMemory& res_cache_tmp_mem); |
|||
|
|||
// Upload data in gl_buffer to this surface's texture |
|||
void UploadGLTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, GLuint read_fb_handle, |
|||
GLuint draw_fb_handle); |
|||
|
|||
void UpdateSwizzle(Tegra::Texture::SwizzleSource swizzle_x, |
|||
Tegra::Texture::SwizzleSource swizzle_y, |
|||
Tegra::Texture::SwizzleSource swizzle_z, |
|||
Tegra::Texture::SwizzleSource swizzle_w); |
|||
|
|||
void MarkReinterpreted() { |
|||
reinterpreted = true; |
|||
} |
|||
|
|||
bool IsReinterpreted() const { |
|||
return reinterpreted; |
|||
} |
|||
|
|||
void MarkForReload(bool reload) { |
|||
must_reload = reload; |
|||
} |
|||
|
|||
bool MustReload() const { |
|||
return must_reload; |
|||
} |
|||
|
|||
bool IsUploaded() const { |
|||
return params.identity == SurfaceParams::SurfaceClass::Uploaded; |
|||
} |
|||
|
|||
private: |
|||
void UploadGLMipmapTexture(RasterizerTemporaryMemory& res_cache_tmp_mem, u32 mip_map, |
|||
GLuint read_fb_handle, GLuint draw_fb_handle); |
|||
|
|||
void EnsureTextureDiscrepantView(); |
|||
|
|||
OGLTexture texture; |
|||
OGLTexture discrepant_view; |
|||
SurfaceParams params{}; |
|||
GLenum gl_target{}; |
|||
GLenum gl_internal_format{}; |
|||
std::size_t cached_size_in_bytes{}; |
|||
std::array<GLenum, 4> swizzle{GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA}; |
|||
std::size_t memory_size; |
|||
bool reinterpreted = false; |
|||
bool must_reload = false; |
|||
VAddr cpu_addr{}; |
|||
}; |
|||
|
|||
class RasterizerCacheOpenGL final : public RasterizerCache<Surface> { |
|||
public: |
|||
explicit RasterizerCacheOpenGL(RasterizerOpenGL& rasterizer); |
|||
|
|||
/// Get a surface based on the texture configuration |
|||
Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, |
|||
const GLShader::SamplerEntry& entry); |
|||
|
|||
/// Get the depth surface based on the framebuffer configuration |
|||
Surface GetDepthBufferSurface(bool preserve_contents); |
|||
|
|||
/// Get the color surface based on the framebuffer configuration and the specified render target |
|||
Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); |
|||
|
|||
/// Tries to find a framebuffer using on the provided CPU address |
|||
Surface TryFindFramebufferSurface(const u8* host_ptr) const; |
|||
|
|||
/// Copies the contents of one surface to another |
|||
void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, |
|||
const Common::Rectangle<u32>& src_rect, |
|||
const Common::Rectangle<u32>& dst_rect); |
|||
|
|||
void SignalPreDrawCall(); |
|||
void SignalPostDrawCall(); |
|||
|
|||
protected: |
|||
void FlushObjectInner(const Surface& object) override { |
|||
object->FlushGLBuffer(temporal_memory); |
|||
} |
|||
|
|||
private: |
|||
void LoadSurface(const Surface& surface); |
|||
Surface GetSurface(const SurfaceParams& params, bool preserve_contents = true); |
|||
|
|||
/// Gets an uncached surface, creating it if need be |
|||
Surface GetUncachedSurface(const SurfaceParams& params); |
|||
|
|||
/// Recreates a surface with new parameters |
|||
Surface RecreateSurface(const Surface& old_surface, const SurfaceParams& new_params); |
|||
|
|||
/// Reserves a unique surface that can be reused later |
|||
void ReserveSurface(const Surface& surface); |
|||
|
|||
/// Tries to get a reserved surface for the specified parameters |
|||
Surface TryGetReservedSurface(const SurfaceParams& params); |
|||
|
|||
// Partialy reinterpret a surface based on a triggering_surface that collides with it. |
|||
// returns true if the reinterpret was successful, false in case it was not. |
|||
bool PartialReinterpretSurface(Surface triggering_surface, Surface intersect); |
|||
|
|||
/// Performs a slow but accurate surface copy, flushing to RAM and reinterpreting the data |
|||
void AccurateCopySurface(const Surface& src_surface, const Surface& dst_surface); |
|||
void FastLayeredCopySurface(const Surface& src_surface, const Surface& dst_surface); |
|||
void FastCopySurface(const Surface& src_surface, const Surface& dst_surface); |
|||
void CopySurface(const Surface& src_surface, const Surface& dst_surface, |
|||
const GLuint copy_pbo_handle, const GLenum src_attachment = 0, |
|||
const GLenum dst_attachment = 0, const std::size_t cubemap_face = 0); |
|||
|
|||
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have |
|||
/// previously been used. This is to prevent surfaces from being constantly created and |
|||
/// destroyed when used with different surface parameters. |
|||
std::unordered_map<SurfaceReserveKey, Surface> surface_reserve; |
|||
|
|||
OGLFramebuffer read_framebuffer; |
|||
OGLFramebuffer draw_framebuffer; |
|||
|
|||
bool texception = false; |
|||
|
|||
/// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one |
|||
/// using the new format. |
|||
OGLBuffer copy_pbo; |
|||
|
|||
std::array<Surface, Maxwell::NumRenderTargets> last_color_buffers; |
|||
std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; |
|||
Surface last_depth_buffer; |
|||
|
|||
RasterizerTemporaryMemory temporal_memory; |
|||
|
|||
using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; |
|||
using SurfaceInterval = typename SurfaceIntervalCache::interval_type; |
|||
|
|||
static auto GetReinterpretInterval(const Surface& object) { |
|||
return SurfaceInterval::right_open(object->GetCacheAddr() + 1, |
|||
object->GetCacheAddr() + object->GetMemorySize() - 1); |
|||
} |
|||
|
|||
// Reinterpreted surfaces are very fragil as the game may keep rendering into them. |
|||
SurfaceIntervalCache reinterpreted_surfaces; |
|||
|
|||
void RegisterReinterpretSurface(Surface reinterpret_surface) { |
|||
auto interval = GetReinterpretInterval(reinterpret_surface); |
|||
reinterpreted_surfaces.insert({interval, reinterpret_surface}); |
|||
reinterpret_surface->MarkReinterpreted(); |
|||
} |
|||
|
|||
Surface CollideOnReinterpretedSurface(CacheAddr addr) const { |
|||
const SurfaceInterval interval{addr}; |
|||
for (auto& pair : |
|||
boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { |
|||
return pair.second; |
|||
} |
|||
return nullptr; |
|||
} |
|||
|
|||
void Register(const Surface& object) override { |
|||
RasterizerCache<Surface>::Register(object); |
|||
} |
|||
|
|||
/// Unregisters an object from the cache |
|||
void Unregister(const Surface& object) override { |
|||
if (object->IsReinterpreted()) { |
|||
auto interval = GetReinterpretInterval(object); |
|||
reinterpreted_surfaces.erase(interval); |
|||
} |
|||
RasterizerCache<Surface>::Unregister(object); |
|||
} |
|||
}; |
|||
|
|||
} // namespace OpenGL |
|||
@ -0,0 +1,614 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/bit_util.h"
|
|||
#include "common/common_types.h"
|
|||
#include "common/microprofile.h"
|
|||
#include "common/scope_exit.h"
|
|||
#include "core/core.h"
|
|||
#include "video_core/morton.h"
|
|||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
|||
#include "video_core/renderer_opengl/gl_state.h"
|
|||
#include "video_core/renderer_opengl/gl_texture_cache.h"
|
|||
#include "video_core/renderer_opengl/utils.h"
|
|||
#include "video_core/texture_cache/surface_base.h"
|
|||
#include "video_core/texture_cache/texture_cache.h"
|
|||
#include "video_core/textures/convert.h"
|
|||
#include "video_core/textures/texture.h"
|
|||
|
|||
namespace OpenGL { |
|||
|
|||
using Tegra::Texture::SwizzleSource; |
|||
using VideoCore::MortonSwizzleMode; |
|||
|
|||
using VideoCore::Surface::ComponentType; |
|||
using VideoCore::Surface::PixelFormat; |
|||
using VideoCore::Surface::SurfaceCompression; |
|||
using VideoCore::Surface::SurfaceTarget; |
|||
using VideoCore::Surface::SurfaceType; |
|||
|
|||
MICROPROFILE_DEFINE(OpenGL_Texture_Upload, "OpenGL", "Texture Upload", MP_RGB(128, 192, 128)); |
|||
MICROPROFILE_DEFINE(OpenGL_Texture_Download, "OpenGL", "Texture Download", MP_RGB(128, 192, 128)); |
|||
|
|||
namespace { |
|||
|
|||
struct FormatTuple { |
|||
GLint internal_format; |
|||
GLenum format; |
|||
GLenum type; |
|||
ComponentType component_type; |
|||
bool compressed; |
|||
}; |
|||
|
|||
constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{ |
|||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
|
|||
{GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false}, // ABGR8S
|
|||
{GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // ABGR8UI
|
|||
{GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
|
|||
{GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm, |
|||
false}, // A2B10G10R10U
|
|||
{GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
|
|||
{GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // R8U
|
|||
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false}, // R8UI
|
|||
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false}, // RGBA16F
|
|||
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RGBA16U
|
|||
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RGBA16UI
|
|||
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float, |
|||
false}, // R11FG11FB10F
|
|||
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
|
|||
{GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // DXT1
|
|||
{GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // DXT23
|
|||
{GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // DXT45
|
|||
{GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
|
|||
{GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // DXN2UNORM
|
|||
{GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
|
|||
{GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // BC7U
|
|||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, |
|||
true}, // BC6H_UF16
|
|||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float, |
|||
true}, // BC6H_SF16
|
|||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
|
|||
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
|
|||
{GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false}, // RGBA32F
|
|||
{GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false}, // RG32F
|
|||
{GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false}, // R32F
|
|||
{GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false}, // R16F
|
|||
{GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // R16U
|
|||
{GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false}, // R16S
|
|||
{GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
|
|||
{GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false}, // R16I
|
|||
{GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false}, // RG16
|
|||
{GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false}, // RG16F
|
|||
{GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
|
|||
{GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false}, // RG16I
|
|||
{GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false}, // RG16S
|
|||
{GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false}, // RGB32F
|
|||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, |
|||
false}, // RGBA8_SRGB
|
|||
{GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // RG8U
|
|||
{GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false}, // RG8S
|
|||
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RG32UI
|
|||
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // R32UI
|
|||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8
|
|||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5
|
|||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4
|
|||
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
|
|||
// Compressed sRGB formats
|
|||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // DXT1_SRGB
|
|||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // DXT23_SRGB
|
|||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // DXT45_SRGB
|
|||
{GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, |
|||
true}, // BC7U_SRGB
|
|||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
|
|||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
|
|||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
|
|||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
|
|||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5
|
|||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
|
|||
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8
|
|||
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
|
|||
|
|||
// Depth formats
|
|||
{GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
|
|||
{GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm, |
|||
false}, // Z16
|
|||
|
|||
// DepthStencil formats
|
|||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, |
|||
false}, // Z24S8
|
|||
{GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm, |
|||
false}, // S8Z24
|
|||
{GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, |
|||
ComponentType::Float, false}, // Z32FS8
|
|||
}}; |
|||
|
|||
const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) { |
|||
ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size()); |
|||
const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]}; |
|||
ASSERT(component_type == format.component_type); |
|||
return format; |
|||
} |
|||
|
|||
GLenum GetTextureTarget(const SurfaceTarget& target) { |
|||
switch (target) { |
|||
case SurfaceTarget::TextureBuffer: |
|||
return GL_TEXTURE_BUFFER; |
|||
case SurfaceTarget::Texture1D: |
|||
return GL_TEXTURE_1D; |
|||
case SurfaceTarget::Texture2D: |
|||
return GL_TEXTURE_2D; |
|||
case SurfaceTarget::Texture3D: |
|||
return GL_TEXTURE_3D; |
|||
case SurfaceTarget::Texture1DArray: |
|||
return GL_TEXTURE_1D_ARRAY; |
|||
case SurfaceTarget::Texture2DArray: |
|||
return GL_TEXTURE_2D_ARRAY; |
|||
case SurfaceTarget::TextureCubemap: |
|||
return GL_TEXTURE_CUBE_MAP; |
|||
case SurfaceTarget::TextureCubeArray: |
|||
return GL_TEXTURE_CUBE_MAP_ARRAY; |
|||
} |
|||
UNREACHABLE(); |
|||
return {}; |
|||
} |
|||
|
|||
GLint GetSwizzleSource(SwizzleSource source) { |
|||
switch (source) { |
|||
case SwizzleSource::Zero: |
|||
return GL_ZERO; |
|||
case SwizzleSource::R: |
|||
return GL_RED; |
|||
case SwizzleSource::G: |
|||
return GL_GREEN; |
|||
case SwizzleSource::B: |
|||
return GL_BLUE; |
|||
case SwizzleSource::A: |
|||
return GL_ALPHA; |
|||
case SwizzleSource::OneInt: |
|||
case SwizzleSource::OneFloat: |
|||
return GL_ONE; |
|||
} |
|||
UNREACHABLE(); |
|||
return GL_NONE; |
|||
} |
|||
|
|||
void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) { |
|||
glTextureParameteri(texture, GL_TEXTURE_MIN_FILTER, GL_LINEAR); |
|||
glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR); |
|||
glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); |
|||
glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); |
|||
glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1); |
|||
if (params.num_levels == 1) { |
|||
glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f); |
|||
} |
|||
} |
|||
|
|||
OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum internal_format, |
|||
OGLBuffer& texture_buffer) { |
|||
OGLTexture texture; |
|||
texture.Create(target); |
|||
|
|||
switch (params.target) { |
|||
case SurfaceTarget::Texture1D: |
|||
glTextureStorage1D(texture.handle, params.emulated_levels, internal_format, params.width); |
|||
break; |
|||
case SurfaceTarget::TextureBuffer: |
|||
texture_buffer.Create(); |
|||
glNamedBufferStorage(texture_buffer.handle, params.width * params.GetBytesPerPixel(), |
|||
nullptr, GL_DYNAMIC_STORAGE_BIT); |
|||
glTextureBuffer(texture.handle, internal_format, texture_buffer.handle); |
|||
case SurfaceTarget::Texture2D: |
|||
case SurfaceTarget::TextureCubemap: |
|||
glTextureStorage2D(texture.handle, params.emulated_levels, internal_format, params.width, |
|||
params.height); |
|||
break; |
|||
case SurfaceTarget::Texture3D: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
glTextureStorage3D(texture.handle, params.emulated_levels, internal_format, params.width, |
|||
params.height, params.depth); |
|||
break; |
|||
default: |
|||
UNREACHABLE(); |
|||
} |
|||
|
|||
ApplyTextureDefaults(params, texture.handle); |
|||
|
|||
return texture; |
|||
} |
|||
|
|||
} // Anonymous namespace
|
|||
|
|||
CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) |
|||
: VideoCommon::SurfaceBase<View>(gpu_addr, params) { |
|||
const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)}; |
|||
internal_format = tuple.internal_format; |
|||
format = tuple.format; |
|||
type = tuple.type; |
|||
is_compressed = tuple.compressed; |
|||
target = GetTextureTarget(params.target); |
|||
texture = CreateTexture(params, target, internal_format, texture_buffer); |
|||
DecorateSurfaceName(); |
|||
main_view = CreateViewInner( |
|||
ViewParams(params.target, 0, params.is_layered ? params.depth : 1, 0, params.num_levels), |
|||
true); |
|||
} |
|||
|
|||
CachedSurface::~CachedSurface() = default; |
|||
|
|||
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { |
|||
MICROPROFILE_SCOPE(OpenGL_Texture_Download); |
|||
|
|||
SCOPE_EXIT({ glPixelStorei(GL_PACK_ROW_LENGTH, 0); }); |
|||
|
|||
for (u32 level = 0; level < params.emulated_levels; ++level) { |
|||
glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); |
|||
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); |
|||
const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level); |
|||
if (is_compressed) { |
|||
glGetCompressedTextureImage(texture.handle, level, |
|||
static_cast<GLsizei>(params.GetHostMipmapSize(level)), |
|||
staging_buffer.data() + mip_offset); |
|||
} else { |
|||
glGetTextureImage(texture.handle, level, format, type, |
|||
static_cast<GLsizei>(params.GetHostMipmapSize(level)), |
|||
staging_buffer.data() + mip_offset); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) { |
|||
MICROPROFILE_SCOPE(OpenGL_Texture_Upload); |
|||
SCOPE_EXIT({ glPixelStorei(GL_UNPACK_ROW_LENGTH, 0); }); |
|||
for (u32 level = 0; level < params.emulated_levels; ++level) { |
|||
UploadTextureMipmap(level, staging_buffer); |
|||
} |
|||
} |
|||
|
|||
void CachedSurface::UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer) { |
|||
glPixelStorei(GL_UNPACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level))); |
|||
glPixelStorei(GL_UNPACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level))); |
|||
|
|||
auto compression_type = params.GetCompressionType(); |
|||
|
|||
const std::size_t mip_offset = compression_type == SurfaceCompression::Converted |
|||
? params.GetConvertedMipmapOffset(level) |
|||
: params.GetHostMipmapLevelOffset(level); |
|||
const u8* buffer{staging_buffer.data() + mip_offset}; |
|||
if (is_compressed) { |
|||
const auto image_size{static_cast<GLsizei>(params.GetHostMipmapSize(level))}; |
|||
switch (params.target) { |
|||
case SurfaceTarget::Texture2D: |
|||
glCompressedTextureSubImage2D(texture.handle, level, 0, 0, |
|||
static_cast<GLsizei>(params.GetMipWidth(level)), |
|||
static_cast<GLsizei>(params.GetMipHeight(level)), |
|||
internal_format, image_size, buffer); |
|||
break; |
|||
case SurfaceTarget::Texture3D: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
glCompressedTextureSubImage3D(texture.handle, level, 0, 0, 0, |
|||
static_cast<GLsizei>(params.GetMipWidth(level)), |
|||
static_cast<GLsizei>(params.GetMipHeight(level)), |
|||
static_cast<GLsizei>(params.GetMipDepth(level)), |
|||
internal_format, image_size, buffer); |
|||
break; |
|||
case SurfaceTarget::TextureCubemap: { |
|||
const std::size_t layer_size{params.GetHostLayerSize(level)}; |
|||
for (std::size_t face = 0; face < params.depth; ++face) { |
|||
glCompressedTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), |
|||
static_cast<GLsizei>(params.GetMipWidth(level)), |
|||
static_cast<GLsizei>(params.GetMipHeight(level)), 1, |
|||
internal_format, static_cast<GLsizei>(layer_size), |
|||
buffer); |
|||
buffer += layer_size; |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
UNREACHABLE(); |
|||
} |
|||
} else { |
|||
switch (params.target) { |
|||
case SurfaceTarget::Texture1D: |
|||
glTextureSubImage1D(texture.handle, level, 0, params.GetMipWidth(level), format, type, |
|||
buffer); |
|||
break; |
|||
case SurfaceTarget::TextureBuffer: |
|||
ASSERT(level == 0); |
|||
glNamedBufferSubData(texture_buffer.handle, 0, |
|||
params.GetMipWidth(level) * params.GetBytesPerPixel(), buffer); |
|||
break; |
|||
case SurfaceTarget::Texture1DArray: |
|||
case SurfaceTarget::Texture2D: |
|||
glTextureSubImage2D(texture.handle, level, 0, 0, params.GetMipWidth(level), |
|||
params.GetMipHeight(level), format, type, buffer); |
|||
break; |
|||
case SurfaceTarget::Texture3D: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
glTextureSubImage3D( |
|||
texture.handle, level, 0, 0, 0, static_cast<GLsizei>(params.GetMipWidth(level)), |
|||
static_cast<GLsizei>(params.GetMipHeight(level)), |
|||
static_cast<GLsizei>(params.GetMipDepth(level)), format, type, buffer); |
|||
break; |
|||
case SurfaceTarget::TextureCubemap: |
|||
for (std::size_t face = 0; face < params.depth; ++face) { |
|||
glTextureSubImage3D(texture.handle, level, 0, 0, static_cast<GLint>(face), |
|||
params.GetMipWidth(level), params.GetMipHeight(level), 1, |
|||
format, type, buffer); |
|||
buffer += params.GetHostLayerSize(level); |
|||
} |
|||
break; |
|||
default: |
|||
UNREACHABLE(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
void CachedSurface::DecorateSurfaceName() { |
|||
LabelGLObject(GL_TEXTURE, texture.handle, GetGpuAddr(), params.TargetName()); |
|||
} |
|||
|
|||
void CachedSurfaceView::DecorateViewName(GPUVAddr gpu_addr, std::string prefix) { |
|||
LabelGLObject(GL_TEXTURE, texture_view.handle, gpu_addr, prefix); |
|||
} |
|||
|
|||
View CachedSurface::CreateView(const ViewParams& view_key) { |
|||
return CreateViewInner(view_key, false); |
|||
} |
|||
|
|||
View CachedSurface::CreateViewInner(const ViewParams& view_key, const bool is_proxy) { |
|||
auto view = std::make_shared<CachedSurfaceView>(*this, view_key, is_proxy); |
|||
views[view_key] = view; |
|||
if (!is_proxy) |
|||
view->DecorateViewName(gpu_addr, params.TargetName() + "V:" + std::to_string(view_count++)); |
|||
return view; |
|||
} |
|||
|
|||
CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& params, |
|||
const bool is_proxy) |
|||
: VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { |
|||
target = GetTextureTarget(params.target); |
|||
if (!is_proxy) { |
|||
texture_view = CreateTextureView(); |
|||
} |
|||
swizzle = EncodeSwizzle(SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A); |
|||
} |
|||
|
|||
CachedSurfaceView::~CachedSurfaceView() = default; |
|||
|
|||
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { |
|||
ASSERT(params.num_layers == 1 && params.num_levels == 1); |
|||
|
|||
const auto& owner_params = surface.GetSurfaceParams(); |
|||
|
|||
switch (owner_params.target) { |
|||
case SurfaceTarget::Texture1D: |
|||
glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), |
|||
params.base_level); |
|||
break; |
|||
case SurfaceTarget::Texture2D: |
|||
glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), |
|||
params.base_level); |
|||
break; |
|||
case SurfaceTarget::Texture1DArray: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubemap: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, |
|||
params.base_layer); |
|||
break; |
|||
default: |
|||
UNIMPLEMENTED(); |
|||
} |
|||
} |
|||
|
|||
void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_source, |
|||
SwizzleSource z_source, SwizzleSource w_source) { |
|||
u32 new_swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source); |
|||
if (new_swizzle == swizzle) |
|||
return; |
|||
swizzle = new_swizzle; |
|||
const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source), |
|||
GetSwizzleSource(z_source), |
|||
GetSwizzleSource(w_source)}; |
|||
const GLuint handle = GetTexture(); |
|||
glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data()); |
|||
} |
|||
|
|||
OGLTextureView CachedSurfaceView::CreateTextureView() const { |
|||
const auto& owner_params = surface.GetSurfaceParams(); |
|||
OGLTextureView texture_view; |
|||
texture_view.Create(); |
|||
|
|||
const GLuint handle{texture_view.handle}; |
|||
const FormatTuple& tuple{ |
|||
GetFormatTuple(owner_params.pixel_format, owner_params.component_type)}; |
|||
|
|||
glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, |
|||
params.num_levels, params.base_layer, params.num_layers); |
|||
|
|||
ApplyTextureDefaults(owner_params, handle); |
|||
|
|||
return texture_view; |
|||
} |
|||
|
|||
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, |
|||
VideoCore::RasterizerInterface& rasterizer, |
|||
const Device& device) |
|||
: TextureCacheBase{system, rasterizer} { |
|||
src_framebuffer.Create(); |
|||
dst_framebuffer.Create(); |
|||
} |
|||
|
|||
TextureCacheOpenGL::~TextureCacheOpenGL() = default; |
|||
|
|||
Surface TextureCacheOpenGL::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) { |
|||
return std::make_shared<CachedSurface>(gpu_addr, params); |
|||
} |
|||
|
|||
void TextureCacheOpenGL::ImageCopy(Surface& src_surface, Surface& dst_surface, |
|||
const VideoCommon::CopyParams& copy_params) { |
|||
const auto& src_params = src_surface->GetSurfaceParams(); |
|||
const auto& dst_params = dst_surface->GetSurfaceParams(); |
|||
if (src_params.type != dst_params.type) { |
|||
// A fallback is needed
|
|||
return; |
|||
} |
|||
const auto src_handle = src_surface->GetTexture(); |
|||
const auto src_target = src_surface->GetTarget(); |
|||
const auto dst_handle = dst_surface->GetTexture(); |
|||
const auto dst_target = dst_surface->GetTarget(); |
|||
glCopyImageSubData(src_handle, src_target, copy_params.source_level, copy_params.source_x, |
|||
copy_params.source_y, copy_params.source_z, dst_handle, dst_target, |
|||
copy_params.dest_level, copy_params.dest_x, copy_params.dest_y, |
|||
copy_params.dest_z, copy_params.width, copy_params.height, |
|||
copy_params.depth); |
|||
} |
|||
|
|||
void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, |
|||
const Tegra::Engines::Fermi2D::Config& copy_config) { |
|||
const auto& src_params{src_view->GetSurfaceParams()}; |
|||
const auto& dst_params{dst_view->GetSurfaceParams()}; |
|||
|
|||
OpenGLState prev_state{OpenGLState::GetCurState()}; |
|||
SCOPE_EXIT({ prev_state.Apply(); }); |
|||
|
|||
OpenGLState state; |
|||
state.draw.read_framebuffer = src_framebuffer.handle; |
|||
state.draw.draw_framebuffer = dst_framebuffer.handle; |
|||
state.Apply(); |
|||
|
|||
u32 buffers{}; |
|||
|
|||
UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); |
|||
UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); |
|||
|
|||
if (src_params.type == SurfaceType::ColorTexture) { |
|||
src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); |
|||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, |
|||
0); |
|||
|
|||
dst_view->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER); |
|||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, |
|||
0); |
|||
|
|||
buffers = GL_COLOR_BUFFER_BIT; |
|||
} else if (src_params.type == SurfaceType::Depth) { |
|||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
|||
src_view->Attach(GL_DEPTH_ATTACHMENT, GL_READ_FRAMEBUFFER); |
|||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); |
|||
|
|||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
|||
dst_view->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER); |
|||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); |
|||
|
|||
buffers = GL_DEPTH_BUFFER_BIT; |
|||
} else if (src_params.type == SurfaceType::DepthStencil) { |
|||
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
|||
src_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_READ_FRAMEBUFFER); |
|||
|
|||
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0); |
|||
dst_view->Attach(GL_DEPTH_STENCIL_ATTACHMENT, GL_DRAW_FRAMEBUFFER); |
|||
|
|||
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; |
|||
} |
|||
|
|||
const Common::Rectangle<u32>& src_rect = copy_config.src_rect; |
|||
const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect; |
|||
const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear; |
|||
|
|||
glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left, |
|||
dst_rect.top, dst_rect.right, dst_rect.bottom, buffers, |
|||
is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST); |
|||
} |
|||
|
|||
void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface) { |
|||
const auto& src_params = src_surface->GetSurfaceParams(); |
|||
const auto& dst_params = dst_surface->GetSurfaceParams(); |
|||
UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1); |
|||
|
|||
const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type); |
|||
const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type); |
|||
|
|||
const std::size_t source_size = src_surface->GetHostSizeInBytes(); |
|||
const std::size_t dest_size = dst_surface->GetHostSizeInBytes(); |
|||
|
|||
const std::size_t buffer_size = std::max(source_size, dest_size); |
|||
|
|||
GLuint copy_pbo_handle = FetchPBO(buffer_size); |
|||
|
|||
glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle); |
|||
|
|||
if (source_format.compressed) { |
|||
glGetCompressedTextureImage(src_surface->GetTexture(), 0, static_cast<GLsizei>(source_size), |
|||
nullptr); |
|||
} else { |
|||
glGetTextureImage(src_surface->GetTexture(), 0, source_format.format, source_format.type, |
|||
static_cast<GLsizei>(source_size), nullptr); |
|||
} |
|||
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); |
|||
|
|||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo_handle); |
|||
|
|||
const GLsizei width = static_cast<GLsizei>(dst_params.width); |
|||
const GLsizei height = static_cast<GLsizei>(dst_params.height); |
|||
const GLsizei depth = static_cast<GLsizei>(dst_params.depth); |
|||
if (dest_format.compressed) { |
|||
LOG_CRITICAL(HW_GPU, "Compressed buffer copy is unimplemented!"); |
|||
UNREACHABLE(); |
|||
} else { |
|||
switch (dst_params.target) { |
|||
case SurfaceTarget::Texture1D: |
|||
glTextureSubImage1D(dst_surface->GetTexture(), 0, 0, width, dest_format.format, |
|||
dest_format.type, nullptr); |
|||
break; |
|||
case SurfaceTarget::Texture2D: |
|||
glTextureSubImage2D(dst_surface->GetTexture(), 0, 0, 0, width, height, |
|||
dest_format.format, dest_format.type, nullptr); |
|||
break; |
|||
case SurfaceTarget::Texture3D: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, |
|||
dest_format.format, dest_format.type, nullptr); |
|||
break; |
|||
case SurfaceTarget::TextureCubemap: |
|||
glTextureSubImage3D(dst_surface->GetTexture(), 0, 0, 0, 0, width, height, depth, |
|||
dest_format.format, dest_format.type, nullptr); |
|||
break; |
|||
default: |
|||
LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}", |
|||
static_cast<u32>(dst_params.target)); |
|||
UNREACHABLE(); |
|||
} |
|||
} |
|||
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); |
|||
|
|||
glTextureBarrier(); |
|||
} |
|||
|
|||
GLuint TextureCacheOpenGL::FetchPBO(std::size_t buffer_size) { |
|||
ASSERT_OR_EXECUTE(buffer_size > 0, { return 0; }); |
|||
const u32 l2 = Common::Log2Ceil64(static_cast<u64>(buffer_size)); |
|||
OGLBuffer& cp = copy_pbo_cache[l2]; |
|||
if (cp.handle == 0) { |
|||
const std::size_t ceil_size = 1ULL << l2; |
|||
cp.Create(); |
|||
cp.MakeStreamCopy(ceil_size); |
|||
} |
|||
return cp.handle; |
|||
} |
|||
|
|||
} // namespace OpenGL
|
|||
@ -0,0 +1,143 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <array> |
|||
#include <functional> |
|||
#include <memory> |
|||
#include <unordered_map> |
|||
#include <utility> |
|||
#include <vector> |
|||
|
|||
#include <glad/glad.h> |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/engines/shader_bytecode.h" |
|||
#include "video_core/renderer_opengl/gl_device.h" |
|||
#include "video_core/renderer_opengl/gl_resource_manager.h" |
|||
#include "video_core/texture_cache/texture_cache.h" |
|||
|
|||
namespace OpenGL { |
|||
|
|||
using VideoCommon::SurfaceParams; |
|||
using VideoCommon::ViewParams; |
|||
|
|||
class CachedSurfaceView; |
|||
class CachedSurface; |
|||
class TextureCacheOpenGL; |
|||
|
|||
using Surface = std::shared_ptr<CachedSurface>; |
|||
using View = std::shared_ptr<CachedSurfaceView>; |
|||
using TextureCacheBase = VideoCommon::TextureCache<Surface, View>; |
|||
|
|||
class CachedSurface final : public VideoCommon::SurfaceBase<View> { |
|||
friend CachedSurfaceView; |
|||
|
|||
public: |
|||
explicit CachedSurface(GPUVAddr gpu_addr, const SurfaceParams& params); |
|||
~CachedSurface(); |
|||
|
|||
void UploadTexture(const std::vector<u8>& staging_buffer) override; |
|||
void DownloadTexture(std::vector<u8>& staging_buffer) override; |
|||
|
|||
GLenum GetTarget() const { |
|||
return target; |
|||
} |
|||
|
|||
GLuint GetTexture() const { |
|||
return texture.handle; |
|||
} |
|||
|
|||
protected: |
|||
void DecorateSurfaceName(); |
|||
|
|||
View CreateView(const ViewParams& view_key) override; |
|||
View CreateViewInner(const ViewParams& view_key, bool is_proxy); |
|||
|
|||
private: |
|||
void UploadTextureMipmap(u32 level, const std::vector<u8>& staging_buffer); |
|||
|
|||
GLenum internal_format{}; |
|||
GLenum format{}; |
|||
GLenum type{}; |
|||
bool is_compressed{}; |
|||
GLenum target{}; |
|||
u32 view_count{}; |
|||
|
|||
OGLTexture texture; |
|||
OGLBuffer texture_buffer; |
|||
}; |
|||
|
|||
class CachedSurfaceView final : public VideoCommon::ViewBase { |
|||
public: |
|||
explicit CachedSurfaceView(CachedSurface& surface, const ViewParams& params, bool is_proxy); |
|||
~CachedSurfaceView(); |
|||
|
|||
/// Attaches this texture view to the current bound GL_DRAW_FRAMEBUFFER |
|||
void Attach(GLenum attachment, GLenum target) const; |
|||
|
|||
GLuint GetTexture() const { |
|||
if (is_proxy) { |
|||
return surface.GetTexture(); |
|||
} |
|||
return texture_view.handle; |
|||
} |
|||
|
|||
const SurfaceParams& GetSurfaceParams() const { |
|||
return surface.GetSurfaceParams(); |
|||
} |
|||
|
|||
void ApplySwizzle(Tegra::Texture::SwizzleSource x_source, |
|||
Tegra::Texture::SwizzleSource y_source, |
|||
Tegra::Texture::SwizzleSource z_source, |
|||
Tegra::Texture::SwizzleSource w_source); |
|||
|
|||
void DecorateViewName(GPUVAddr gpu_addr, std::string prefix); |
|||
|
|||
private: |
|||
u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source, |
|||
Tegra::Texture::SwizzleSource y_source, |
|||
Tegra::Texture::SwizzleSource z_source, |
|||
Tegra::Texture::SwizzleSource w_source) const { |
|||
return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) | |
|||
(static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source); |
|||
} |
|||
|
|||
OGLTextureView CreateTextureView() const; |
|||
|
|||
CachedSurface& surface; |
|||
GLenum target{}; |
|||
|
|||
OGLTextureView texture_view; |
|||
u32 swizzle; |
|||
bool is_proxy; |
|||
}; |
|||
|
|||
class TextureCacheOpenGL final : public TextureCacheBase { |
|||
public: |
|||
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, |
|||
const Device& device); |
|||
~TextureCacheOpenGL(); |
|||
|
|||
protected: |
|||
Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override; |
|||
|
|||
void ImageCopy(Surface& src_surface, Surface& dst_surface, |
|||
const VideoCommon::CopyParams& copy_params) override; |
|||
|
|||
void ImageBlit(View& src_view, View& dst_view, |
|||
const Tegra::Engines::Fermi2D::Config& copy_config) override; |
|||
|
|||
void BufferCopy(Surface& src_surface, Surface& dst_surface) override; |
|||
|
|||
private: |
|||
GLuint FetchPBO(std::size_t buffer_size); |
|||
|
|||
OGLFramebuffer src_framebuffer; |
|||
OGLFramebuffer dst_framebuffer; |
|||
std::unordered_map<u32, OGLBuffer> copy_pbo_cache; |
|||
}; |
|||
|
|||
} // namespace OpenGL |
|||
@ -0,0 +1,120 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <algorithm>
|
|||
#include <vector>
|
|||
#include <fmt/format.h>
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/bit_field.h"
|
|||
#include "common/common_types.h"
|
|||
#include "common/logging/log.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/shader/node_helper.h"
|
|||
#include "video_core/shader/shader_ir.h"
|
|||
|
|||
namespace VideoCommon::Shader { |
|||
|
|||
using Tegra::Shader::Instruction; |
|||
using Tegra::Shader::OpCode; |
|||
|
|||
namespace { |
|||
std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { |
|||
switch (image_type) { |
|||
case Tegra::Shader::ImageType::Texture1D: |
|||
case Tegra::Shader::ImageType::TextureBuffer: |
|||
return 1; |
|||
case Tegra::Shader::ImageType::Texture1DArray: |
|||
case Tegra::Shader::ImageType::Texture2D: |
|||
return 2; |
|||
case Tegra::Shader::ImageType::Texture2DArray: |
|||
case Tegra::Shader::ImageType::Texture3D: |
|||
return 3; |
|||
} |
|||
UNREACHABLE(); |
|||
return 1; |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { |
|||
const Instruction instr = {program_code[pc]}; |
|||
const auto opcode = OpCode::Decode(instr); |
|||
|
|||
switch (opcode->get().GetId()) { |
|||
case OpCode::Id::SUST: { |
|||
UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); |
|||
UNIMPLEMENTED_IF(instr.sust.image_type == Tegra::Shader::ImageType::TextureBuffer); |
|||
UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); |
|||
UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store
|
|||
|
|||
std::vector<Node> values; |
|||
constexpr std::size_t hardcoded_size{4}; |
|||
for (std::size_t i = 0; i < hardcoded_size; ++i) { |
|||
values.push_back(GetRegister(instr.gpr0.Value() + i)); |
|||
} |
|||
|
|||
std::vector<Node> coords; |
|||
const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; |
|||
for (std::size_t i = 0; i < num_coords; ++i) { |
|||
coords.push_back(GetRegister(instr.gpr8.Value() + i)); |
|||
} |
|||
|
|||
const auto type{instr.sust.image_type}; |
|||
const auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) |
|||
: GetBindlessImage(instr.gpr39, type)}; |
|||
MetaImage meta{image, values}; |
|||
const Node store{Operation(OperationCode::ImageStore, meta, std::move(coords))}; |
|||
bb.push_back(store); |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unhandled conversion instruction: {}", opcode->get().GetName()); |
|||
} |
|||
|
|||
return pc; |
|||
} |
|||
|
|||
const Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) { |
|||
const auto offset{static_cast<std::size_t>(image.index.Value())}; |
|||
|
|||
// If this image has already been used, return the existing mapping.
|
|||
const auto itr{std::find_if(used_images.begin(), used_images.end(), |
|||
[=](const Image& entry) { return entry.GetOffset() == offset; })}; |
|||
if (itr != used_images.end()) { |
|||
ASSERT(itr->GetType() == type); |
|||
return *itr; |
|||
} |
|||
|
|||
// Otherwise create a new mapping for this image.
|
|||
const std::size_t next_index{used_images.size()}; |
|||
const Image entry{offset, next_index, type}; |
|||
return *used_images.emplace(entry).first; |
|||
} |
|||
|
|||
const Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, |
|||
Tegra::Shader::ImageType type) { |
|||
const Node image_register{GetRegister(reg)}; |
|||
const Node base_image{ |
|||
TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))}; |
|||
const auto cbuf{std::get_if<CbufNode>(&*base_image)}; |
|||
const auto cbuf_offset_imm{std::get_if<ImmediateNode>(&*cbuf->GetOffset())}; |
|||
const auto cbuf_offset{cbuf_offset_imm->GetValue()}; |
|||
const auto cbuf_index{cbuf->GetIndex()}; |
|||
const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)}; |
|||
|
|||
// If this image has already been used, return the existing mapping.
|
|||
const auto itr{std::find_if(used_images.begin(), used_images.end(), |
|||
[=](const Image& entry) { return entry.GetOffset() == cbuf_key; })}; |
|||
if (itr != used_images.end()) { |
|||
ASSERT(itr->GetType() == type); |
|||
return *itr; |
|||
} |
|||
|
|||
// Otherwise create a new mapping for this image.
|
|||
const std::size_t next_index{used_images.size()}; |
|||
const Image entry{cbuf_index, cbuf_offset, next_index, type}; |
|||
return *used_images.emplace(entry).first; |
|||
} |
|||
|
|||
} // namespace VideoCommon::Shader
|
|||
@ -1,386 +0,0 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/alignment.h"
|
|||
#include "common/assert.h"
|
|||
#include "common/cityhash.h"
|
|||
#include "common/common_types.h"
|
|||
#include "core/core.h"
|
|||
#include "video_core/surface.h"
|
|||
#include "video_core/texture_cache.h"
|
|||
#include "video_core/textures/decoders.h"
|
|||
#include "video_core/textures/texture.h"
|
|||
|
|||
namespace VideoCommon { |
|||
|
|||
using VideoCore::Surface::SurfaceTarget; |
|||
|
|||
using VideoCore::Surface::ComponentTypeFromDepthFormat; |
|||
using VideoCore::Surface::ComponentTypeFromRenderTarget; |
|||
using VideoCore::Surface::ComponentTypeFromTexture; |
|||
using VideoCore::Surface::PixelFormatFromDepthFormat; |
|||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
|||
using VideoCore::Surface::PixelFormatFromTextureFormat; |
|||
using VideoCore::Surface::SurfaceTargetFromTextureType; |
|||
|
|||
constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { |
|||
return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); |
|||
} |
|||
|
|||
SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, |
|||
const Tegra::Texture::FullTextureInfo& config) { |
|||
SurfaceParams params; |
|||
params.is_tiled = config.tic.IsTiled(); |
|||
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, |
|||
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, |
|||
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, |
|||
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; |
|||
params.pixel_format = |
|||
PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), false); |
|||
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
params.target = SurfaceTargetFromTextureType(config.tic.texture_type); |
|||
params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format)); |
|||
params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format)); |
|||
params.depth = config.tic.Depth(); |
|||
if (params.target == SurfaceTarget::TextureCubemap || |
|||
params.target == SurfaceTarget::TextureCubeArray) { |
|||
params.depth *= 6; |
|||
} |
|||
params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); |
|||
params.unaligned_height = config.tic.Height(); |
|||
params.num_levels = config.tic.max_mip_level + 1; |
|||
|
|||
params.CalculateCachedValues(); |
|||
return params; |
|||
} |
|||
|
|||
SurfaceParams SurfaceParams::CreateForDepthBuffer( |
|||
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, |
|||
u32 block_width, u32 block_height, u32 block_depth, |
|||
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { |
|||
SurfaceParams params; |
|||
params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
|||
params.block_width = 1 << std::min(block_width, 5U); |
|||
params.block_height = 1 << std::min(block_height, 5U); |
|||
params.block_depth = 1 << std::min(block_depth, 5U); |
|||
params.tile_width_spacing = 1; |
|||
params.pixel_format = PixelFormatFromDepthFormat(format); |
|||
params.component_type = ComponentTypeFromDepthFormat(format); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
params.width = zeta_width; |
|||
params.height = zeta_height; |
|||
params.unaligned_height = zeta_height; |
|||
params.target = SurfaceTarget::Texture2D; |
|||
params.depth = 1; |
|||
params.num_levels = 1; |
|||
|
|||
params.CalculateCachedValues(); |
|||
return params; |
|||
} |
|||
|
|||
SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { |
|||
const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; |
|||
SurfaceParams params; |
|||
params.is_tiled = |
|||
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
|||
params.block_width = 1 << config.memory_layout.block_width; |
|||
params.block_height = 1 << config.memory_layout.block_height; |
|||
params.block_depth = 1 << config.memory_layout.block_depth; |
|||
params.tile_width_spacing = 1; |
|||
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
|||
params.component_type = ComponentTypeFromRenderTarget(config.format); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
if (params.is_tiled) { |
|||
params.width = config.width; |
|||
} else { |
|||
const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; |
|||
params.pitch = config.width; |
|||
params.width = params.pitch / bpp; |
|||
} |
|||
params.height = config.height; |
|||
params.depth = 1; |
|||
params.unaligned_height = config.height; |
|||
params.target = SurfaceTarget::Texture2D; |
|||
params.num_levels = 1; |
|||
|
|||
params.CalculateCachedValues(); |
|||
return params; |
|||
} |
|||
|
|||
SurfaceParams SurfaceParams::CreateForFermiCopySurface( |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& config) { |
|||
SurfaceParams params{}; |
|||
params.is_tiled = !config.linear; |
|||
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 32U) : 0, |
|||
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 32U) : 0, |
|||
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 32U) : 0, |
|||
params.tile_width_spacing = 1; |
|||
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
|||
params.component_type = ComponentTypeFromRenderTarget(config.format); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
params.width = config.width; |
|||
params.height = config.height; |
|||
params.unaligned_height = config.height; |
|||
// TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
|
|||
params.target = SurfaceTarget::Texture2D; |
|||
params.depth = 1; |
|||
params.num_levels = 1; |
|||
|
|||
params.CalculateCachedValues(); |
|||
return params; |
|||
} |
|||
|
|||
u32 SurfaceParams::GetMipWidth(u32 level) const { |
|||
return std::max(1U, width >> level); |
|||
} |
|||
|
|||
u32 SurfaceParams::GetMipHeight(u32 level) const { |
|||
return std::max(1U, height >> level); |
|||
} |
|||
|
|||
u32 SurfaceParams::GetMipDepth(u32 level) const { |
|||
return IsLayered() ? depth : std::max(1U, depth >> level); |
|||
} |
|||
|
|||
bool SurfaceParams::IsLayered() const { |
|||
switch (target) { |
|||
case SurfaceTarget::Texture1DArray: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
case SurfaceTarget::TextureCubemap: |
|||
return true; |
|||
default: |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
u32 SurfaceParams::GetMipBlockHeight(u32 level) const { |
|||
// Auto block resizing algorithm from:
|
|||
// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
|
|||
if (level == 0) { |
|||
return block_height; |
|||
} |
|||
const u32 height{GetMipHeight(level)}; |
|||
const u32 default_block_height{GetDefaultBlockHeight(pixel_format)}; |
|||
const u32 blocks_in_y{(height + default_block_height - 1) / default_block_height}; |
|||
u32 block_height = 16; |
|||
while (block_height > 1 && blocks_in_y <= block_height * 4) { |
|||
block_height >>= 1; |
|||
} |
|||
return block_height; |
|||
} |
|||
|
|||
u32 SurfaceParams::GetMipBlockDepth(u32 level) const { |
|||
if (level == 0) |
|||
return block_depth; |
|||
if (target != SurfaceTarget::Texture3D) |
|||
return 1; |
|||
|
|||
const u32 depth{GetMipDepth(level)}; |
|||
u32 block_depth = 32; |
|||
while (block_depth > 1 && depth * 2 <= block_depth) { |
|||
block_depth >>= 1; |
|||
} |
|||
if (block_depth == 32 && GetMipBlockHeight(level) >= 4) { |
|||
return 16; |
|||
} |
|||
return block_depth; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { |
|||
std::size_t offset = 0; |
|||
for (u32 i = 0; i < level; i++) { |
|||
offset += GetInnerMipmapMemorySize(i, false, IsLayered(), false); |
|||
} |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { |
|||
std::size_t offset = 0; |
|||
for (u32 i = 0; i < level; i++) { |
|||
offset += GetInnerMipmapMemorySize(i, true, false, false); |
|||
} |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetGuestLayerSize() const { |
|||
return GetInnerMemorySize(false, true, false); |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetHostLayerSize(u32 level) const { |
|||
return GetInnerMipmapMemorySize(level, true, IsLayered(), false); |
|||
} |
|||
|
|||
bool SurfaceParams::IsFamiliar(const SurfaceParams& view_params) const { |
|||
if (std::tie(is_tiled, tile_width_spacing, pixel_format, component_type, type) != |
|||
std::tie(view_params.is_tiled, view_params.tile_width_spacing, view_params.pixel_format, |
|||
view_params.component_type, view_params.type)) { |
|||
return false; |
|||
} |
|||
|
|||
const SurfaceTarget view_target{view_params.target}; |
|||
if (view_target == target) { |
|||
return true; |
|||
} |
|||
|
|||
switch (target) { |
|||
case SurfaceTarget::Texture1D: |
|||
case SurfaceTarget::Texture2D: |
|||
case SurfaceTarget::Texture3D: |
|||
return false; |
|||
case SurfaceTarget::Texture1DArray: |
|||
return view_target == SurfaceTarget::Texture1D; |
|||
case SurfaceTarget::Texture2DArray: |
|||
return view_target == SurfaceTarget::Texture2D; |
|||
case SurfaceTarget::TextureCubemap: |
|||
return view_target == SurfaceTarget::Texture2D || |
|||
view_target == SurfaceTarget::Texture2DArray; |
|||
case SurfaceTarget::TextureCubeArray: |
|||
return view_target == SurfaceTarget::Texture2D || |
|||
view_target == SurfaceTarget::Texture2DArray || |
|||
view_target == SurfaceTarget::TextureCubemap; |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented texture family={}", static_cast<u32>(target)); |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
bool SurfaceParams::IsPixelFormatZeta() const { |
|||
return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && |
|||
pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; |
|||
} |
|||
|
|||
void SurfaceParams::CalculateCachedValues() { |
|||
guest_size_in_bytes = GetInnerMemorySize(false, false, false); |
|||
|
|||
// ASTC is uncompressed in software, in emulated as RGBA8
|
|||
if (IsPixelFormatASTC(pixel_format)) { |
|||
host_size_in_bytes = width * height * depth * 4; |
|||
} else { |
|||
host_size_in_bytes = GetInnerMemorySize(true, false, false); |
|||
} |
|||
|
|||
switch (target) { |
|||
case SurfaceTarget::Texture1D: |
|||
case SurfaceTarget::Texture2D: |
|||
case SurfaceTarget::Texture3D: |
|||
num_layers = 1; |
|||
break; |
|||
case SurfaceTarget::Texture1DArray: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubemap: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
num_layers = depth; |
|||
break; |
|||
default: |
|||
UNREACHABLE(); |
|||
} |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, |
|||
bool uncompressed) const { |
|||
const bool tiled{as_host_size ? false : is_tiled}; |
|||
const u32 tile_x{GetDefaultBlockWidth(pixel_format)}; |
|||
const u32 tile_y{GetDefaultBlockHeight(pixel_format)}; |
|||
const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), tile_x)}; |
|||
const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), tile_y)}; |
|||
const u32 depth{layer_only ? 1U : GetMipDepth(level)}; |
|||
return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(pixel_format), width, height, |
|||
depth, GetMipBlockHeight(level), GetMipBlockDepth(level)); |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetInnerMemorySize(bool as_host_size, bool layer_only, |
|||
bool uncompressed) const { |
|||
std::size_t size = 0; |
|||
for (u32 level = 0; level < num_levels; ++level) { |
|||
size += GetInnerMipmapMemorySize(level, as_host_size, layer_only, uncompressed); |
|||
} |
|||
if (!as_host_size && is_tiled) { |
|||
size = Common::AlignUp(size, Tegra::Texture::GetGOBSize() * block_height * block_depth); |
|||
} |
|||
return size; |
|||
} |
|||
|
|||
std::map<u64, std::pair<u32, u32>> SurfaceParams::CreateViewOffsetMap() const { |
|||
std::map<u64, std::pair<u32, u32>> view_offset_map; |
|||
switch (target) { |
|||
case SurfaceTarget::Texture1D: |
|||
case SurfaceTarget::Texture2D: |
|||
case SurfaceTarget::Texture3D: { |
|||
constexpr u32 layer = 0; |
|||
for (u32 level = 0; level < num_levels; ++level) { |
|||
const std::size_t offset{GetGuestMipmapLevelOffset(level)}; |
|||
view_offset_map.insert({offset, {layer, level}}); |
|||
} |
|||
break; |
|||
} |
|||
case SurfaceTarget::Texture1DArray: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubemap: |
|||
case SurfaceTarget::TextureCubeArray: { |
|||
const std::size_t layer_size{GetGuestLayerSize()}; |
|||
for (u32 level = 0; level < num_levels; ++level) { |
|||
const std::size_t level_offset{GetGuestMipmapLevelOffset(level)}; |
|||
for (u32 layer = 0; layer < num_layers; ++layer) { |
|||
const auto layer_offset{static_cast<std::size_t>(layer_size * layer)}; |
|||
const std::size_t offset{level_offset + layer_offset}; |
|||
view_offset_map.insert({offset, {layer, level}}); |
|||
} |
|||
} |
|||
break; |
|||
} |
|||
default: |
|||
UNIMPLEMENTED_MSG("Unimplemented surface target {}", static_cast<u32>(target)); |
|||
} |
|||
return view_offset_map; |
|||
} |
|||
|
|||
bool SurfaceParams::IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const { |
|||
return IsDimensionValid(view_params, level) && IsDepthValid(view_params, level) && |
|||
IsInBounds(view_params, layer, level); |
|||
} |
|||
|
|||
bool SurfaceParams::IsDimensionValid(const SurfaceParams& view_params, u32 level) const { |
|||
return view_params.width == GetMipWidth(level) && view_params.height == GetMipHeight(level); |
|||
} |
|||
|
|||
bool SurfaceParams::IsDepthValid(const SurfaceParams& view_params, u32 level) const { |
|||
if (view_params.target != SurfaceTarget::Texture3D) { |
|||
return true; |
|||
} |
|||
return view_params.depth == GetMipDepth(level); |
|||
} |
|||
|
|||
bool SurfaceParams::IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const { |
|||
return layer + view_params.num_layers <= num_layers && |
|||
level + view_params.num_levels <= num_levels; |
|||
} |
|||
|
|||
std::size_t HasheableSurfaceParams::Hash() const { |
|||
return static_cast<std::size_t>( |
|||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); |
|||
} |
|||
|
|||
bool HasheableSurfaceParams::operator==(const HasheableSurfaceParams& rhs) const { |
|||
return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, |
|||
height, depth, pitch, unaligned_height, num_levels, pixel_format, |
|||
component_type, type, target) == |
|||
std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, |
|||
rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, |
|||
rhs.unaligned_height, rhs.num_levels, rhs.pixel_format, rhs.component_type, |
|||
rhs.type, rhs.target); |
|||
} |
|||
|
|||
std::size_t ViewKey::Hash() const { |
|||
return static_cast<std::size_t>( |
|||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); |
|||
} |
|||
|
|||
bool ViewKey::operator==(const ViewKey& rhs) const { |
|||
return std::tie(base_layer, num_layers, base_level, num_levels) == |
|||
std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels); |
|||
} |
|||
|
|||
} // namespace VideoCommon
|
|||
@ -1,586 +0,0 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <list> |
|||
#include <memory> |
|||
#include <set> |
|||
#include <tuple> |
|||
#include <type_traits> |
|||
#include <unordered_map> |
|||
|
|||
#include <boost/icl/interval_map.hpp> |
|||
#include <boost/range/iterator_range.hpp> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/common_types.h" |
|||
#include "core/memory.h" |
|||
#include "video_core/engines/fermi_2d.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/gpu.h" |
|||
#include "video_core/rasterizer_interface.h" |
|||
#include "video_core/surface.h" |
|||
|
|||
namespace Core { |
|||
class System; |
|||
} |
|||
|
|||
namespace Tegra::Texture { |
|||
struct FullTextureInfo; |
|||
} |
|||
|
|||
namespace VideoCore { |
|||
class RasterizerInterface; |
|||
} |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
class HasheableSurfaceParams { |
|||
public: |
|||
std::size_t Hash() const; |
|||
|
|||
bool operator==(const HasheableSurfaceParams& rhs) const; |
|||
|
|||
protected: |
|||
// Avoid creation outside of a managed environment. |
|||
HasheableSurfaceParams() = default; |
|||
|
|||
bool is_tiled; |
|||
u32 block_width; |
|||
u32 block_height; |
|||
u32 block_depth; |
|||
u32 tile_width_spacing; |
|||
u32 width; |
|||
u32 height; |
|||
u32 depth; |
|||
u32 pitch; |
|||
u32 unaligned_height; |
|||
u32 num_levels; |
|||
VideoCore::Surface::PixelFormat pixel_format; |
|||
VideoCore::Surface::ComponentType component_type; |
|||
VideoCore::Surface::SurfaceType type; |
|||
VideoCore::Surface::SurfaceTarget target; |
|||
}; |
|||
|
|||
class SurfaceParams final : public HasheableSurfaceParams { |
|||
public: |
|||
/// Creates SurfaceCachedParams from a texture configuration. |
|||
static SurfaceParams CreateForTexture(Core::System& system, |
|||
const Tegra::Texture::FullTextureInfo& config); |
|||
|
|||
/// Creates SurfaceCachedParams for a depth buffer configuration. |
|||
static SurfaceParams CreateForDepthBuffer( |
|||
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, |
|||
u32 block_width, u32 block_height, u32 block_depth, |
|||
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); |
|||
|
|||
/// Creates SurfaceCachedParams from a framebuffer configuration. |
|||
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); |
|||
|
|||
/// Creates SurfaceCachedParams from a Fermi2D surface configuration. |
|||
static SurfaceParams CreateForFermiCopySurface( |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& config); |
|||
|
|||
bool IsTiled() const { |
|||
return is_tiled; |
|||
} |
|||
|
|||
u32 GetBlockWidth() const { |
|||
return block_width; |
|||
} |
|||
|
|||
u32 GetTileWidthSpacing() const { |
|||
return tile_width_spacing; |
|||
} |
|||
|
|||
u32 GetWidth() const { |
|||
return width; |
|||
} |
|||
|
|||
u32 GetHeight() const { |
|||
return height; |
|||
} |
|||
|
|||
u32 GetDepth() const { |
|||
return depth; |
|||
} |
|||
|
|||
u32 GetPitch() const { |
|||
return pitch; |
|||
} |
|||
|
|||
u32 GetNumLevels() const { |
|||
return num_levels; |
|||
} |
|||
|
|||
VideoCore::Surface::PixelFormat GetPixelFormat() const { |
|||
return pixel_format; |
|||
} |
|||
|
|||
VideoCore::Surface::ComponentType GetComponentType() const { |
|||
return component_type; |
|||
} |
|||
|
|||
VideoCore::Surface::SurfaceTarget GetTarget() const { |
|||
return target; |
|||
} |
|||
|
|||
VideoCore::Surface::SurfaceType GetType() const { |
|||
return type; |
|||
} |
|||
|
|||
std::size_t GetGuestSizeInBytes() const { |
|||
return guest_size_in_bytes; |
|||
} |
|||
|
|||
std::size_t GetHostSizeInBytes() const { |
|||
return host_size_in_bytes; |
|||
} |
|||
|
|||
u32 GetNumLayers() const { |
|||
return num_layers; |
|||
} |
|||
|
|||
/// Returns the width of a given mipmap level. |
|||
u32 GetMipWidth(u32 level) const; |
|||
|
|||
/// Returns the height of a given mipmap level. |
|||
u32 GetMipHeight(u32 level) const; |
|||
|
|||
/// Returns the depth of a given mipmap level. |
|||
u32 GetMipDepth(u32 level) const; |
|||
|
|||
/// Returns true if these parameters are from a layered surface. |
|||
bool IsLayered() const; |
|||
|
|||
/// Returns the block height of a given mipmap level. |
|||
u32 GetMipBlockHeight(u32 level) const; |
|||
|
|||
/// Returns the block depth of a given mipmap level. |
|||
u32 GetMipBlockDepth(u32 level) const; |
|||
|
|||
/// Returns the offset in bytes in guest memory of a given mipmap level. |
|||
std::size_t GetGuestMipmapLevelOffset(u32 level) const; |
|||
|
|||
/// Returns the offset in bytes in host memory (linear) of a given mipmap level. |
|||
std::size_t GetHostMipmapLevelOffset(u32 level) const; |
|||
|
|||
/// Returns the size of a layer in bytes in guest memory. |
|||
std::size_t GetGuestLayerSize() const; |
|||
|
|||
/// Returns the size of a layer in bytes in host memory for a given mipmap level. |
|||
std::size_t GetHostLayerSize(u32 level) const; |
|||
|
|||
/// Returns true if another surface can be familiar with this. This is a loosely defined term |
|||
/// that reflects the possibility of these two surface parameters potentially being part of a |
|||
/// bigger superset. |
|||
bool IsFamiliar(const SurfaceParams& view_params) const; |
|||
|
|||
/// Returns true if the pixel format is a depth and/or stencil format. |
|||
bool IsPixelFormatZeta() const; |
|||
|
|||
/// Creates a map that redirects an address difference to a layer and mipmap level. |
|||
std::map<u64, std::pair<u32, u32>> CreateViewOffsetMap() const; |
|||
|
|||
/// Returns true if the passed surface view parameters is equal or a valid subset of this. |
|||
bool IsViewValid(const SurfaceParams& view_params, u32 layer, u32 level) const; |
|||
|
|||
private: |
|||
/// Calculates values that can be deduced from HasheableSurfaceParams. |
|||
void CalculateCachedValues(); |
|||
|
|||
/// Returns the size of a given mipmap level. |
|||
std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool layer_only, |
|||
bool uncompressed) const; |
|||
|
|||
/// Returns the size of all mipmap levels and aligns as needed. |
|||
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const; |
|||
|
|||
/// Returns true if the passed view width and height match the size of this params in a given |
|||
/// mipmap level. |
|||
bool IsDimensionValid(const SurfaceParams& view_params, u32 level) const; |
|||
|
|||
/// Returns true if the passed view depth match the size of this params in a given mipmap level. |
|||
bool IsDepthValid(const SurfaceParams& view_params, u32 level) const; |
|||
|
|||
/// Returns true if the passed view layers and mipmap levels are in bounds. |
|||
bool IsInBounds(const SurfaceParams& view_params, u32 layer, u32 level) const; |
|||
|
|||
std::size_t guest_size_in_bytes; |
|||
std::size_t host_size_in_bytes; |
|||
u32 num_layers; |
|||
}; |
|||
|
|||
struct ViewKey { |
|||
std::size_t Hash() const; |
|||
|
|||
bool operator==(const ViewKey& rhs) const; |
|||
|
|||
u32 base_layer{}; |
|||
u32 num_layers{}; |
|||
u32 base_level{}; |
|||
u32 num_levels{}; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
|
|||
namespace std { |
|||
|
|||
template <> |
|||
struct hash<VideoCommon::SurfaceParams> { |
|||
std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { |
|||
return k.Hash(); |
|||
} |
|||
}; |
|||
|
|||
template <> |
|||
struct hash<VideoCommon::ViewKey> { |
|||
std::size_t operator()(const VideoCommon::ViewKey& k) const noexcept { |
|||
return k.Hash(); |
|||
} |
|||
}; |
|||
|
|||
} // namespace std |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
template <typename TView, typename TExecutionContext> |
|||
class SurfaceBase { |
|||
static_assert(std::is_trivially_copyable_v<TExecutionContext>); |
|||
|
|||
public: |
|||
virtual void LoadBuffer() = 0; |
|||
|
|||
virtual TExecutionContext FlushBuffer(TExecutionContext exctx) = 0; |
|||
|
|||
virtual TExecutionContext UploadTexture(TExecutionContext exctx) = 0; |
|||
|
|||
TView* TryGetView(VAddr view_addr, const SurfaceParams& view_params) { |
|||
if (view_addr < cpu_addr || !params.IsFamiliar(view_params)) { |
|||
// It can't be a view if it's in a prior address. |
|||
return {}; |
|||
} |
|||
|
|||
const auto relative_offset{static_cast<u64>(view_addr - cpu_addr)}; |
|||
const auto it{view_offset_map.find(relative_offset)}; |
|||
if (it == view_offset_map.end()) { |
|||
// Couldn't find an aligned view. |
|||
return {}; |
|||
} |
|||
const auto [layer, level] = it->second; |
|||
|
|||
if (!params.IsViewValid(view_params, layer, level)) { |
|||
return {}; |
|||
} |
|||
|
|||
return GetView(layer, view_params.GetNumLayers(), level, view_params.GetNumLevels()); |
|||
} |
|||
|
|||
VAddr GetCpuAddr() const { |
|||
ASSERT(is_registered); |
|||
return cpu_addr; |
|||
} |
|||
|
|||
u8* GetHostPtr() const { |
|||
ASSERT(is_registered); |
|||
return host_ptr; |
|||
} |
|||
|
|||
CacheAddr GetCacheAddr() const { |
|||
ASSERT(is_registered); |
|||
return cache_addr; |
|||
} |
|||
|
|||
std::size_t GetSizeInBytes() const { |
|||
return params.GetGuestSizeInBytes(); |
|||
} |
|||
|
|||
void MarkAsModified(bool is_modified_) { |
|||
is_modified = is_modified_; |
|||
} |
|||
|
|||
const SurfaceParams& GetSurfaceParams() const { |
|||
return params; |
|||
} |
|||
|
|||
TView* GetView(VAddr view_addr, const SurfaceParams& view_params) { |
|||
TView* view{TryGetView(view_addr, view_params)}; |
|||
ASSERT(view != nullptr); |
|||
return view; |
|||
} |
|||
|
|||
void Register(VAddr cpu_addr_, u8* host_ptr_) { |
|||
ASSERT(!is_registered); |
|||
is_registered = true; |
|||
cpu_addr = cpu_addr_; |
|||
host_ptr = host_ptr_; |
|||
cache_addr = ToCacheAddr(host_ptr_); |
|||
} |
|||
|
|||
void Register(VAddr cpu_addr_) { |
|||
Register(cpu_addr_, Memory::GetPointer(cpu_addr_)); |
|||
} |
|||
|
|||
void Unregister() { |
|||
ASSERT(is_registered); |
|||
is_registered = false; |
|||
} |
|||
|
|||
bool IsRegistered() const { |
|||
return is_registered; |
|||
} |
|||
|
|||
protected: |
|||
explicit SurfaceBase(const SurfaceParams& params) |
|||
: params{params}, view_offset_map{params.CreateViewOffsetMap()} {} |
|||
|
|||
~SurfaceBase() = default; |
|||
|
|||
virtual std::unique_ptr<TView> CreateView(const ViewKey& view_key) = 0; |
|||
|
|||
bool IsModified() const { |
|||
return is_modified; |
|||
} |
|||
|
|||
const SurfaceParams params; |
|||
|
|||
private: |
|||
TView* GetView(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels) { |
|||
const ViewKey key{base_layer, num_layers, base_level, num_levels}; |
|||
const auto [entry, is_cache_miss] = views.try_emplace(key); |
|||
auto& view{entry->second}; |
|||
if (is_cache_miss) { |
|||
view = CreateView(key); |
|||
} |
|||
return view.get(); |
|||
} |
|||
|
|||
const std::map<u64, std::pair<u32, u32>> view_offset_map; |
|||
|
|||
VAddr cpu_addr{}; |
|||
u8* host_ptr{}; |
|||
CacheAddr cache_addr{}; |
|||
bool is_modified{}; |
|||
bool is_registered{}; |
|||
std::unordered_map<ViewKey, std::unique_ptr<TView>> views; |
|||
}; |
|||
|
|||
template <typename TSurface, typename TView, typename TExecutionContext> |
|||
class TextureCache { |
|||
static_assert(std::is_trivially_copyable_v<TExecutionContext>); |
|||
using ResultType = std::tuple<TView*, TExecutionContext>; |
|||
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface*>>; |
|||
using IntervalType = typename IntervalMap::interval_type; |
|||
|
|||
public: |
|||
void InvalidateRegion(CacheAddr addr, std::size_t size) { |
|||
for (TSurface* surface : GetSurfacesInRegion(addr, size)) { |
|||
if (!surface->IsRegistered()) { |
|||
// Skip duplicates |
|||
continue; |
|||
} |
|||
Unregister(surface); |
|||
} |
|||
} |
|||
|
|||
ResultType GetTextureSurface(TExecutionContext exctx, |
|||
const Tegra::Texture::FullTextureInfo& config) { |
|||
auto& memory_manager{system.GPU().MemoryManager()}; |
|||
const auto cpu_addr{memory_manager.GpuToCpuAddress(config.tic.Address())}; |
|||
if (!cpu_addr) { |
|||
return {{}, exctx}; |
|||
} |
|||
const auto params{SurfaceParams::CreateForTexture(system, config)}; |
|||
return GetSurfaceView(exctx, *cpu_addr, params, true); |
|||
} |
|||
|
|||
ResultType GetDepthBufferSurface(TExecutionContext exctx, bool preserve_contents) { |
|||
const auto& regs{system.GPU().Maxwell3D().regs}; |
|||
if (!regs.zeta.Address() || !regs.zeta_enable) { |
|||
return {{}, exctx}; |
|||
} |
|||
|
|||
auto& memory_manager{system.GPU().MemoryManager()}; |
|||
const auto cpu_addr{memory_manager.GpuToCpuAddress(regs.zeta.Address())}; |
|||
if (!cpu_addr) { |
|||
return {{}, exctx}; |
|||
} |
|||
|
|||
const auto depth_params{SurfaceParams::CreateForDepthBuffer( |
|||
system, regs.zeta_width, regs.zeta_height, regs.zeta.format, |
|||
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, |
|||
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; |
|||
return GetSurfaceView(exctx, *cpu_addr, depth_params, preserve_contents); |
|||
} |
|||
|
|||
ResultType GetColorBufferSurface(TExecutionContext exctx, std::size_t index, |
|||
bool preserve_contents) { |
|||
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
|||
|
|||
const auto& regs{system.GPU().Maxwell3D().regs}; |
|||
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
|||
regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { |
|||
return {{}, exctx}; |
|||
} |
|||
|
|||
auto& memory_manager{system.GPU().MemoryManager()}; |
|||
const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; |
|||
const auto cpu_addr{memory_manager.GpuToCpuAddress( |
|||
config.Address() + config.base_layer * config.layer_stride * sizeof(u32))}; |
|||
if (!cpu_addr) { |
|||
return {{}, exctx}; |
|||
} |
|||
|
|||
return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |
|||
preserve_contents); |
|||
} |
|||
|
|||
ResultType GetFermiSurface(TExecutionContext exctx, |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& config) { |
|||
const auto cpu_addr{system.GPU().MemoryManager().GpuToCpuAddress(config.Address())}; |
|||
ASSERT(cpu_addr); |
|||
return GetSurfaceView(exctx, *cpu_addr, SurfaceParams::CreateForFermiCopySurface(config), |
|||
true); |
|||
} |
|||
|
|||
TSurface* TryFindFramebufferSurface(const u8* host_ptr) const { |
|||
const auto it{registered_surfaces.find(ToCacheAddr(host_ptr))}; |
|||
return it != registered_surfaces.end() ? *it->second.begin() : nullptr; |
|||
} |
|||
|
|||
protected: |
|||
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
|||
: system{system}, rasterizer{rasterizer} {} |
|||
|
|||
~TextureCache() = default; |
|||
|
|||
virtual ResultType TryFastGetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, |
|||
const SurfaceParams& params, bool preserve_contents, |
|||
const std::vector<TSurface*>& overlaps) = 0; |
|||
|
|||
virtual std::unique_ptr<TSurface> CreateSurface(const SurfaceParams& params) = 0; |
|||
|
|||
void Register(TSurface* surface, VAddr cpu_addr, u8* host_ptr) { |
|||
surface->Register(cpu_addr, host_ptr); |
|||
registered_surfaces.add({GetSurfaceInterval(surface), {surface}}); |
|||
rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), 1); |
|||
} |
|||
|
|||
void Unregister(TSurface* surface) { |
|||
registered_surfaces.subtract({GetSurfaceInterval(surface), {surface}}); |
|||
rasterizer.UpdatePagesCachedCount(surface->GetCpuAddr(), surface->GetSizeInBytes(), -1); |
|||
surface->Unregister(); |
|||
} |
|||
|
|||
TSurface* GetUncachedSurface(const SurfaceParams& params) { |
|||
if (TSurface* surface = TryGetReservedSurface(params); surface) |
|||
return surface; |
|||
// No reserved surface available, create a new one and reserve it |
|||
auto new_surface{CreateSurface(params)}; |
|||
TSurface* surface{new_surface.get()}; |
|||
ReserveSurface(params, std::move(new_surface)); |
|||
return surface; |
|||
} |
|||
|
|||
Core::System& system; |
|||
|
|||
private: |
|||
ResultType GetSurfaceView(TExecutionContext exctx, VAddr cpu_addr, const SurfaceParams& params, |
|||
bool preserve_contents) { |
|||
const auto host_ptr{Memory::GetPointer(cpu_addr)}; |
|||
const auto cache_addr{ToCacheAddr(host_ptr)}; |
|||
const auto overlaps{GetSurfacesInRegion(cache_addr, params.GetGuestSizeInBytes())}; |
|||
if (overlaps.empty()) { |
|||
return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); |
|||
} |
|||
|
|||
if (overlaps.size() == 1) { |
|||
if (TView* view = overlaps[0]->TryGetView(cpu_addr, params); view) |
|||
return {view, exctx}; |
|||
} |
|||
|
|||
TView* fast_view; |
|||
std::tie(fast_view, exctx) = |
|||
TryFastGetSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents, overlaps); |
|||
|
|||
for (TSurface* surface : overlaps) { |
|||
if (!fast_view) { |
|||
// Flush even when we don't care about the contents, to preserve memory not written |
|||
// by the new surface. |
|||
exctx = surface->FlushBuffer(exctx); |
|||
} |
|||
Unregister(surface); |
|||
} |
|||
|
|||
if (fast_view) { |
|||
return {fast_view, exctx}; |
|||
} |
|||
|
|||
return LoadSurfaceView(exctx, cpu_addr, host_ptr, params, preserve_contents); |
|||
} |
|||
|
|||
ResultType LoadSurfaceView(TExecutionContext exctx, VAddr cpu_addr, u8* host_ptr, |
|||
const SurfaceParams& params, bool preserve_contents) { |
|||
TSurface* new_surface{GetUncachedSurface(params)}; |
|||
Register(new_surface, cpu_addr, host_ptr); |
|||
if (preserve_contents) { |
|||
exctx = LoadSurface(exctx, new_surface); |
|||
} |
|||
return {new_surface->GetView(cpu_addr, params), exctx}; |
|||
} |
|||
|
|||
TExecutionContext LoadSurface(TExecutionContext exctx, TSurface* surface) { |
|||
surface->LoadBuffer(); |
|||
exctx = surface->UploadTexture(exctx); |
|||
surface->MarkAsModified(false); |
|||
return exctx; |
|||
} |
|||
|
|||
std::vector<TSurface*> GetSurfacesInRegion(CacheAddr cache_addr, std::size_t size) const { |
|||
if (size == 0) { |
|||
return {}; |
|||
} |
|||
const IntervalType interval{cache_addr, cache_addr + size}; |
|||
|
|||
std::vector<TSurface*> surfaces; |
|||
for (auto& pair : boost::make_iterator_range(registered_surfaces.equal_range(interval))) { |
|||
surfaces.push_back(*pair.second.begin()); |
|||
} |
|||
return surfaces; |
|||
} |
|||
|
|||
void ReserveSurface(const SurfaceParams& params, std::unique_ptr<TSurface> surface) { |
|||
surface_reserve[params].push_back(std::move(surface)); |
|||
} |
|||
|
|||
TSurface* TryGetReservedSurface(const SurfaceParams& params) { |
|||
auto search{surface_reserve.find(params)}; |
|||
if (search == surface_reserve.end()) { |
|||
return {}; |
|||
} |
|||
for (auto& surface : search->second) { |
|||
if (!surface->IsRegistered()) { |
|||
return surface.get(); |
|||
} |
|||
} |
|||
return {}; |
|||
} |
|||
|
|||
IntervalType GetSurfaceInterval(TSurface* surface) const { |
|||
return IntervalType::right_open(surface->GetCacheAddr(), |
|||
surface->GetCacheAddr() + surface->GetSizeInBytes()); |
|||
} |
|||
|
|||
VideoCore::RasterizerInterface& rasterizer; |
|||
|
|||
IntervalMap registered_surfaces; |
|||
|
|||
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have |
|||
/// previously been used. This is to prevent surfaces from being constantly created and |
|||
/// destroyed when used with different surface parameters. |
|||
std::unordered_map<SurfaceParams, std::list<std::unique_ptr<TSurface>>> surface_reserve; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,36 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include "common/common_types.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
struct CopyParams { |
|||
constexpr CopyParams(u32 source_x, u32 source_y, u32 source_z, u32 dest_x, u32 dest_y, |
|||
u32 dest_z, u32 source_level, u32 dest_level, u32 width, u32 height, |
|||
u32 depth) |
|||
: source_x{source_x}, source_y{source_y}, source_z{source_z}, dest_x{dest_x}, |
|||
dest_y{dest_y}, dest_z{dest_z}, source_level{source_level}, |
|||
dest_level{dest_level}, width{width}, height{height}, depth{depth} {} |
|||
|
|||
constexpr CopyParams(u32 width, u32 height, u32 depth, u32 level) |
|||
: source_x{}, source_y{}, source_z{}, dest_x{}, dest_y{}, dest_z{}, source_level{level}, |
|||
dest_level{level}, width{width}, height{height}, depth{depth} {} |
|||
|
|||
u32 source_x; |
|||
u32 source_y; |
|||
u32 source_z; |
|||
u32 dest_x; |
|||
u32 dest_y; |
|||
u32 dest_z; |
|||
u32 source_level; |
|||
u32 dest_level; |
|||
u32 width; |
|||
u32 height; |
|||
u32 depth; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,300 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include "common/assert.h"
|
|||
#include "common/common_types.h"
|
|||
#include "common/microprofile.h"
|
|||
#include "video_core/memory_manager.h"
|
|||
#include "video_core/texture_cache/surface_base.h"
|
|||
#include "video_core/texture_cache/surface_params.h"
|
|||
#include "video_core/textures/convert.h"
|
|||
|
|||
namespace VideoCommon { |
|||
|
|||
MICROPROFILE_DEFINE(GPU_Load_Texture, "GPU", "Texture Load", MP_RGB(128, 192, 128)); |
|||
MICROPROFILE_DEFINE(GPU_Flush_Texture, "GPU", "Texture Flush", MP_RGB(128, 192, 128)); |
|||
|
|||
using Tegra::Texture::ConvertFromGuestToHost; |
|||
using VideoCore::MortonSwizzleMode; |
|||
using VideoCore::Surface::SurfaceCompression; |
|||
|
|||
StagingCache::StagingCache() = default; |
|||
|
|||
StagingCache::~StagingCache() = default; |
|||
|
|||
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) |
|||
: params{params}, mipmap_sizes(params.num_levels), |
|||
mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{ |
|||
params.GetHostSizeInBytes()} { |
|||
std::size_t offset = 0; |
|||
for (u32 level = 0; level < params.num_levels; ++level) { |
|||
const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; |
|||
mipmap_sizes[level] = mipmap_size; |
|||
mipmap_offsets[level] = offset; |
|||
offset += mipmap_size; |
|||
} |
|||
layer_size = offset; |
|||
if (params.is_layered) { |
|||
if (params.is_tiled) { |
|||
layer_size = |
|||
SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth); |
|||
} |
|||
guest_memory_size = layer_size * params.depth; |
|||
} else { |
|||
guest_memory_size = layer_size; |
|||
} |
|||
} |
|||
|
|||
MatchTopologyResult SurfaceBaseImpl::MatchesTopology(const SurfaceParams& rhs) const { |
|||
const u32 src_bpp{params.GetBytesPerPixel()}; |
|||
const u32 dst_bpp{rhs.GetBytesPerPixel()}; |
|||
const bool ib1 = params.IsBuffer(); |
|||
const bool ib2 = rhs.IsBuffer(); |
|||
if (std::tie(src_bpp, params.is_tiled, ib1) == std::tie(dst_bpp, rhs.is_tiled, ib2)) { |
|||
const bool cb1 = params.IsCompressed(); |
|||
const bool cb2 = rhs.IsCompressed(); |
|||
if (cb1 == cb2) { |
|||
return MatchTopologyResult::FullMatch; |
|||
} |
|||
return MatchTopologyResult::CompressUnmatch; |
|||
} |
|||
return MatchTopologyResult::None; |
|||
} |
|||
|
|||
MatchStructureResult SurfaceBaseImpl::MatchesStructure(const SurfaceParams& rhs) const { |
|||
// Buffer surface Check
|
|||
if (params.IsBuffer()) { |
|||
const std::size_t wd1 = params.width * params.GetBytesPerPixel(); |
|||
const std::size_t wd2 = rhs.width * rhs.GetBytesPerPixel(); |
|||
if (wd1 == wd2) { |
|||
return MatchStructureResult::FullMatch; |
|||
} |
|||
return MatchStructureResult::None; |
|||
} |
|||
|
|||
// Linear Surface check
|
|||
if (!params.is_tiled) { |
|||
if (std::tie(params.width, params.height, params.pitch) == |
|||
std::tie(rhs.width, rhs.height, rhs.pitch)) { |
|||
return MatchStructureResult::FullMatch; |
|||
} |
|||
return MatchStructureResult::None; |
|||
} |
|||
|
|||
// Tiled Surface check
|
|||
if (std::tie(params.depth, params.block_width, params.block_height, params.block_depth, |
|||
params.tile_width_spacing, params.num_levels) == |
|||
std::tie(rhs.depth, rhs.block_width, rhs.block_height, rhs.block_depth, |
|||
rhs.tile_width_spacing, rhs.num_levels)) { |
|||
if (std::tie(params.width, params.height) == std::tie(rhs.width, rhs.height)) { |
|||
return MatchStructureResult::FullMatch; |
|||
} |
|||
const u32 ws = SurfaceParams::ConvertWidth(rhs.GetBlockAlignedWidth(), params.pixel_format, |
|||
rhs.pixel_format); |
|||
const u32 hs = |
|||
SurfaceParams::ConvertHeight(rhs.height, params.pixel_format, rhs.pixel_format); |
|||
const u32 w1 = params.GetBlockAlignedWidth(); |
|||
if (std::tie(w1, params.height) == std::tie(ws, hs)) { |
|||
return MatchStructureResult::SemiMatch; |
|||
} |
|||
} |
|||
return MatchStructureResult::None; |
|||
} |
|||
|
|||
std::optional<std::pair<u32, u32>> SurfaceBaseImpl::GetLayerMipmap( |
|||
const GPUVAddr candidate_gpu_addr) const { |
|||
if (gpu_addr == candidate_gpu_addr) { |
|||
return {{0, 0}}; |
|||
} |
|||
if (candidate_gpu_addr < gpu_addr) { |
|||
return {}; |
|||
} |
|||
const auto relative_address{static_cast<GPUVAddr>(candidate_gpu_addr - gpu_addr)}; |
|||
const auto layer{static_cast<u32>(relative_address / layer_size)}; |
|||
const GPUVAddr mipmap_address = relative_address - layer_size * layer; |
|||
const auto mipmap_it = |
|||
Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); |
|||
if (mipmap_it == mipmap_offsets.end()) { |
|||
return {}; |
|||
} |
|||
const auto level{static_cast<u32>(std::distance(mipmap_offsets.begin(), mipmap_it))}; |
|||
return std::make_pair(layer, level); |
|||
} |
|||
|
|||
std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& in_params) const { |
|||
const u32 layers{params.depth}; |
|||
const u32 mipmaps{params.num_levels}; |
|||
std::vector<CopyParams> result; |
|||
result.reserve(static_cast<std::size_t>(layers) * static_cast<std::size_t>(mipmaps)); |
|||
|
|||
for (u32 layer = 0; layer < layers; layer++) { |
|||
for (u32 level = 0; level < mipmaps; level++) { |
|||
const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); |
|||
const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); |
|||
result.emplace_back(width, height, layer, level); |
|||
} |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
std::vector<CopyParams> SurfaceBaseImpl::BreakDownNonLayered(const SurfaceParams& in_params) const { |
|||
const u32 mipmaps{params.num_levels}; |
|||
std::vector<CopyParams> result; |
|||
result.reserve(mipmaps); |
|||
|
|||
for (u32 level = 0; level < mipmaps; level++) { |
|||
const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level); |
|||
const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level); |
|||
const u32 depth{std::min(params.GetMipDepth(level), in_params.GetMipDepth(level))}; |
|||
result.emplace_back(width, height, depth, level); |
|||
} |
|||
return result; |
|||
} |
|||
|
|||
void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, |
|||
u8* buffer, u32 level) { |
|||
const u32 width{params.GetMipWidth(level)}; |
|||
const u32 height{params.GetMipHeight(level)}; |
|||
const u32 block_height{params.GetMipBlockHeight(level)}; |
|||
const u32 block_depth{params.GetMipBlockDepth(level)}; |
|||
|
|||
std::size_t guest_offset{mipmap_offsets[level]}; |
|||
if (params.is_layered) { |
|||
std::size_t host_offset{0}; |
|||
const std::size_t guest_stride = layer_size; |
|||
const std::size_t host_stride = params.GetHostLayerSize(level); |
|||
for (u32 layer = 0; layer < params.depth; ++layer) { |
|||
MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, 1, |
|||
params.tile_width_spacing, buffer + host_offset, memory + guest_offset); |
|||
guest_offset += guest_stride; |
|||
host_offset += host_stride; |
|||
} |
|||
} else { |
|||
MortonSwizzle(mode, params.pixel_format, width, block_height, height, block_depth, |
|||
params.GetMipDepth(level), params.tile_width_spacing, buffer, |
|||
memory + guest_offset); |
|||
} |
|||
} |
|||
|
|||
void SurfaceBaseImpl::LoadBuffer(Tegra::MemoryManager& memory_manager, |
|||
StagingCache& staging_cache) { |
|||
MICROPROFILE_SCOPE(GPU_Load_Texture); |
|||
auto& staging_buffer = staging_cache.GetBuffer(0); |
|||
u8* host_ptr; |
|||
is_continuous = memory_manager.IsBlockContinuous(gpu_addr, guest_memory_size); |
|||
|
|||
// Handle continuouty
|
|||
if (is_continuous) { |
|||
// Use physical memory directly
|
|||
host_ptr = memory_manager.GetPointer(gpu_addr); |
|||
if (!host_ptr) { |
|||
return; |
|||
} |
|||
} else { |
|||
// Use an extra temporal buffer
|
|||
auto& tmp_buffer = staging_cache.GetBuffer(1); |
|||
tmp_buffer.resize(guest_memory_size); |
|||
host_ptr = tmp_buffer.data(); |
|||
memory_manager.ReadBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
|||
} |
|||
|
|||
if (params.is_tiled) { |
|||
ASSERT_MSG(params.block_width == 0, "Block width is defined as {} on texture target {}", |
|||
params.block_width, static_cast<u32>(params.target)); |
|||
for (u32 level = 0; level < params.num_levels; ++level) { |
|||
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; |
|||
SwizzleFunc(MortonSwizzleMode::MortonToLinear, host_ptr, params, |
|||
staging_buffer.data() + host_offset, level); |
|||
} |
|||
} else { |
|||
ASSERT_MSG(params.num_levels == 1, "Linear mipmap loading is not implemented"); |
|||
const u32 bpp{params.GetBytesPerPixel()}; |
|||
const u32 block_width{params.GetDefaultBlockWidth()}; |
|||
const u32 block_height{params.GetDefaultBlockHeight()}; |
|||
const u32 width{(params.width + block_width - 1) / block_width}; |
|||
const u32 height{(params.height + block_height - 1) / block_height}; |
|||
const u32 copy_size{width * bpp}; |
|||
if (params.pitch == copy_size) { |
|||
std::memcpy(staging_buffer.data(), host_ptr, params.GetHostSizeInBytes()); |
|||
} else { |
|||
const u8* start{host_ptr}; |
|||
u8* write_to{staging_buffer.data()}; |
|||
for (u32 h = height; h > 0; --h) { |
|||
std::memcpy(write_to, start, copy_size); |
|||
start += params.pitch; |
|||
write_to += copy_size; |
|||
} |
|||
} |
|||
} |
|||
|
|||
auto compression_type = params.GetCompressionType(); |
|||
if (compression_type == SurfaceCompression::None || |
|||
compression_type == SurfaceCompression::Compressed) |
|||
return; |
|||
|
|||
for (u32 level_up = params.num_levels; level_up > 0; --level_up) { |
|||
const u32 level = level_up - 1; |
|||
const std::size_t in_host_offset{params.GetHostMipmapLevelOffset(level)}; |
|||
const std::size_t out_host_offset = compression_type == SurfaceCompression::Rearranged |
|||
? in_host_offset |
|||
: params.GetConvertedMipmapOffset(level); |
|||
u8* in_buffer = staging_buffer.data() + in_host_offset; |
|||
u8* out_buffer = staging_buffer.data() + out_host_offset; |
|||
ConvertFromGuestToHost(in_buffer, out_buffer, params.pixel_format, |
|||
params.GetMipWidth(level), params.GetMipHeight(level), |
|||
params.GetMipDepth(level), true, true); |
|||
} |
|||
} |
|||
|
|||
void SurfaceBaseImpl::FlushBuffer(Tegra::MemoryManager& memory_manager, |
|||
StagingCache& staging_cache) { |
|||
MICROPROFILE_SCOPE(GPU_Flush_Texture); |
|||
auto& staging_buffer = staging_cache.GetBuffer(0); |
|||
u8* host_ptr; |
|||
|
|||
// Handle continuouty
|
|||
if (is_continuous) { |
|||
// Use physical memory directly
|
|||
host_ptr = memory_manager.GetPointer(gpu_addr); |
|||
if (!host_ptr) { |
|||
return; |
|||
} |
|||
} else { |
|||
// Use an extra temporal buffer
|
|||
auto& tmp_buffer = staging_cache.GetBuffer(1); |
|||
tmp_buffer.resize(guest_memory_size); |
|||
host_ptr = tmp_buffer.data(); |
|||
} |
|||
|
|||
if (params.is_tiled) { |
|||
ASSERT_MSG(params.block_width == 0, "Block width is defined as {}", params.block_width); |
|||
for (u32 level = 0; level < params.num_levels; ++level) { |
|||
const std::size_t host_offset{params.GetHostMipmapLevelOffset(level)}; |
|||
SwizzleFunc(MortonSwizzleMode::LinearToMorton, host_ptr, params, |
|||
staging_buffer.data() + host_offset, level); |
|||
} |
|||
} else { |
|||
ASSERT(params.target == SurfaceTarget::Texture2D); |
|||
ASSERT(params.num_levels == 1); |
|||
|
|||
const u32 bpp{params.GetBytesPerPixel()}; |
|||
const u32 copy_size{params.width * bpp}; |
|||
if (params.pitch == copy_size) { |
|||
std::memcpy(host_ptr, staging_buffer.data(), guest_memory_size); |
|||
} else { |
|||
u8* start{host_ptr}; |
|||
const u8* read_to{staging_buffer.data()}; |
|||
for (u32 h = params.height; h > 0; --h) { |
|||
std::memcpy(start, read_to, copy_size); |
|||
start += params.pitch; |
|||
read_to += copy_size; |
|||
} |
|||
} |
|||
} |
|||
if (!is_continuous) { |
|||
memory_manager.WriteBlockUnsafe(gpu_addr, host_ptr, guest_memory_size); |
|||
} |
|||
} |
|||
|
|||
} // namespace VideoCommon
|
|||
@ -0,0 +1,317 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <algorithm> |
|||
#include <unordered_map> |
|||
#include <vector> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/binary_find.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/gpu.h" |
|||
#include "video_core/morton.h" |
|||
#include "video_core/texture_cache/copy_params.h" |
|||
#include "video_core/texture_cache/surface_params.h" |
|||
#include "video_core/texture_cache/surface_view.h" |
|||
|
|||
namespace Tegra { |
|||
class MemoryManager; |
|||
} |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
using VideoCore::MortonSwizzleMode; |
|||
using VideoCore::Surface::SurfaceTarget; |
|||
|
|||
enum class MatchStructureResult : u32 { |
|||
FullMatch = 0, |
|||
SemiMatch = 1, |
|||
None = 2, |
|||
}; |
|||
|
|||
enum class MatchTopologyResult : u32 { |
|||
FullMatch = 0, |
|||
CompressUnmatch = 1, |
|||
None = 2, |
|||
}; |
|||
|
|||
class StagingCache { |
|||
public: |
|||
explicit StagingCache(); |
|||
~StagingCache(); |
|||
|
|||
std::vector<u8>& GetBuffer(std::size_t index) { |
|||
return staging_buffer[index]; |
|||
} |
|||
|
|||
const std::vector<u8>& GetBuffer(std::size_t index) const { |
|||
return staging_buffer[index]; |
|||
} |
|||
|
|||
void SetSize(std::size_t size) { |
|||
staging_buffer.resize(size); |
|||
} |
|||
|
|||
private: |
|||
std::vector<std::vector<u8>> staging_buffer; |
|||
}; |
|||
|
|||
class SurfaceBaseImpl { |
|||
public: |
|||
void LoadBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); |
|||
|
|||
void FlushBuffer(Tegra::MemoryManager& memory_manager, StagingCache& staging_cache); |
|||
|
|||
GPUVAddr GetGpuAddr() const { |
|||
return gpu_addr; |
|||
} |
|||
|
|||
bool Overlaps(const CacheAddr start, const CacheAddr end) const { |
|||
return (cache_addr < end) && (cache_addr_end > start); |
|||
} |
|||
|
|||
bool IsInside(const GPUVAddr other_start, const GPUVAddr other_end) { |
|||
const GPUVAddr gpu_addr_end = gpu_addr + guest_memory_size; |
|||
return (gpu_addr <= other_start && other_end <= gpu_addr_end); |
|||
} |
|||
|
|||
// Use only when recycling a surface |
|||
void SetGpuAddr(const GPUVAddr new_addr) { |
|||
gpu_addr = new_addr; |
|||
} |
|||
|
|||
VAddr GetCpuAddr() const { |
|||
return cpu_addr; |
|||
} |
|||
|
|||
void SetCpuAddr(const VAddr new_addr) { |
|||
cpu_addr = new_addr; |
|||
} |
|||
|
|||
CacheAddr GetCacheAddr() const { |
|||
return cache_addr; |
|||
} |
|||
|
|||
CacheAddr GetCacheAddrEnd() const { |
|||
return cache_addr_end; |
|||
} |
|||
|
|||
void SetCacheAddr(const CacheAddr new_addr) { |
|||
cache_addr = new_addr; |
|||
cache_addr_end = new_addr + guest_memory_size; |
|||
} |
|||
|
|||
const SurfaceParams& GetSurfaceParams() const { |
|||
return params; |
|||
} |
|||
|
|||
std::size_t GetSizeInBytes() const { |
|||
return guest_memory_size; |
|||
} |
|||
|
|||
std::size_t GetHostSizeInBytes() const { |
|||
return host_memory_size; |
|||
} |
|||
|
|||
std::size_t GetMipmapSize(const u32 level) const { |
|||
return mipmap_sizes[level]; |
|||
} |
|||
|
|||
void MarkAsContinuous(const bool is_continuous) { |
|||
this->is_continuous = is_continuous; |
|||
} |
|||
|
|||
bool IsContinuous() const { |
|||
return is_continuous; |
|||
} |
|||
|
|||
bool IsLinear() const { |
|||
return !params.is_tiled; |
|||
} |
|||
|
|||
bool MatchFormat(VideoCore::Surface::PixelFormat pixel_format) const { |
|||
return params.pixel_format == pixel_format; |
|||
} |
|||
|
|||
VideoCore::Surface::PixelFormat GetFormat() const { |
|||
return params.pixel_format; |
|||
} |
|||
|
|||
bool MatchTarget(VideoCore::Surface::SurfaceTarget target) const { |
|||
return params.target == target; |
|||
} |
|||
|
|||
MatchTopologyResult MatchesTopology(const SurfaceParams& rhs) const; |
|||
|
|||
MatchStructureResult MatchesStructure(const SurfaceParams& rhs) const; |
|||
|
|||
bool MatchesSubTexture(const SurfaceParams& rhs, const GPUVAddr other_gpu_addr) const { |
|||
return std::tie(gpu_addr, params.target, params.num_levels) == |
|||
std::tie(other_gpu_addr, rhs.target, rhs.num_levels) && |
|||
params.target == SurfaceTarget::Texture2D && params.num_levels == 1; |
|||
} |
|||
|
|||
std::optional<std::pair<u32, u32>> GetLayerMipmap(const GPUVAddr candidate_gpu_addr) const; |
|||
|
|||
std::vector<CopyParams> BreakDown(const SurfaceParams& in_params) const { |
|||
return params.is_layered ? BreakDownLayered(in_params) : BreakDownNonLayered(in_params); |
|||
} |
|||
|
|||
protected: |
|||
explicit SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params); |
|||
~SurfaceBaseImpl() = default; |
|||
|
|||
virtual void DecorateSurfaceName() = 0; |
|||
|
|||
const SurfaceParams params; |
|||
std::size_t layer_size; |
|||
std::size_t guest_memory_size; |
|||
const std::size_t host_memory_size; |
|||
GPUVAddr gpu_addr{}; |
|||
CacheAddr cache_addr{}; |
|||
CacheAddr cache_addr_end{}; |
|||
VAddr cpu_addr{}; |
|||
bool is_continuous{}; |
|||
|
|||
std::vector<std::size_t> mipmap_sizes; |
|||
std::vector<std::size_t> mipmap_offsets; |
|||
|
|||
private: |
|||
void SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, u8* buffer, |
|||
u32 level); |
|||
|
|||
std::vector<CopyParams> BreakDownLayered(const SurfaceParams& in_params) const; |
|||
|
|||
std::vector<CopyParams> BreakDownNonLayered(const SurfaceParams& in_params) const; |
|||
}; |
|||
|
|||
template <typename TView> |
|||
class SurfaceBase : public SurfaceBaseImpl { |
|||
public: |
|||
virtual void UploadTexture(const std::vector<u8>& staging_buffer) = 0; |
|||
|
|||
virtual void DownloadTexture(std::vector<u8>& staging_buffer) = 0; |
|||
|
|||
void MarkAsModified(const bool is_modified_, const u64 tick) { |
|||
is_modified = is_modified_ || is_target; |
|||
modification_tick = tick; |
|||
} |
|||
|
|||
void MarkAsRenderTarget(const bool is_target) { |
|||
this->is_target = is_target; |
|||
} |
|||
|
|||
void MarkAsPicked(const bool is_picked) { |
|||
this->is_picked = is_picked; |
|||
} |
|||
|
|||
bool IsModified() const { |
|||
return is_modified; |
|||
} |
|||
|
|||
bool IsProtected() const { |
|||
// Only 3D Slices are to be protected |
|||
return is_target && params.block_depth > 0; |
|||
} |
|||
|
|||
bool IsRenderTarget() const { |
|||
return is_target; |
|||
} |
|||
|
|||
bool IsRegistered() const { |
|||
return is_registered; |
|||
} |
|||
|
|||
bool IsPicked() const { |
|||
return is_picked; |
|||
} |
|||
|
|||
void MarkAsRegistered(bool is_reg) { |
|||
is_registered = is_reg; |
|||
} |
|||
|
|||
u64 GetModificationTick() const { |
|||
return modification_tick; |
|||
} |
|||
|
|||
TView EmplaceOverview(const SurfaceParams& overview_params) { |
|||
const u32 num_layers{(params.is_layered && !overview_params.is_layered) ? 1 : params.depth}; |
|||
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); |
|||
} |
|||
|
|||
std::optional<TView> EmplaceIrregularView(const SurfaceParams& view_params, |
|||
const GPUVAddr view_addr, |
|||
const std::size_t candidate_size, const u32 mipmap, |
|||
const u32 layer) { |
|||
const auto layer_mipmap{GetLayerMipmap(view_addr + candidate_size)}; |
|||
if (!layer_mipmap) { |
|||
return {}; |
|||
} |
|||
const u32 end_layer{layer_mipmap->first}; |
|||
const u32 end_mipmap{layer_mipmap->second}; |
|||
if (layer != end_layer) { |
|||
if (mipmap == 0 && end_mipmap == 0) { |
|||
return GetView(ViewParams(view_params.target, layer, end_layer - layer + 1, 0, 1)); |
|||
} |
|||
return {}; |
|||
} else { |
|||
return GetView( |
|||
ViewParams(view_params.target, layer, 1, mipmap, end_mipmap - mipmap + 1)); |
|||
} |
|||
} |
|||
|
|||
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr, |
|||
const std::size_t candidate_size) { |
|||
if (params.target == SurfaceTarget::Texture3D || |
|||
(params.num_levels == 1 && !params.is_layered) || |
|||
view_params.target == SurfaceTarget::Texture3D) { |
|||
return {}; |
|||
} |
|||
const auto layer_mipmap{GetLayerMipmap(view_addr)}; |
|||
if (!layer_mipmap) { |
|||
return {}; |
|||
} |
|||
const u32 layer{layer_mipmap->first}; |
|||
const u32 mipmap{layer_mipmap->second}; |
|||
if (GetMipmapSize(mipmap) != candidate_size) { |
|||
return EmplaceIrregularView(view_params, view_addr, candidate_size, mipmap, layer); |
|||
} |
|||
return GetView(ViewParams(view_params.target, layer, 1, mipmap, 1)); |
|||
} |
|||
|
|||
TView GetMainView() const { |
|||
return main_view; |
|||
} |
|||
|
|||
protected: |
|||
explicit SurfaceBase(const GPUVAddr gpu_addr, const SurfaceParams& params) |
|||
: SurfaceBaseImpl(gpu_addr, params) {} |
|||
|
|||
~SurfaceBase() = default; |
|||
|
|||
virtual TView CreateView(const ViewParams& view_key) = 0; |
|||
|
|||
TView main_view; |
|||
std::unordered_map<ViewParams, TView> views; |
|||
|
|||
private: |
|||
TView GetView(const ViewParams& key) { |
|||
const auto [entry, is_cache_miss] = views.try_emplace(key); |
|||
auto& view{entry->second}; |
|||
if (is_cache_miss) { |
|||
view = CreateView(key); |
|||
} |
|||
return view; |
|||
} |
|||
|
|||
bool is_modified{}; |
|||
bool is_target{}; |
|||
bool is_registered{}; |
|||
bool is_picked{}; |
|||
u64 modification_tick{}; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
@ -0,0 +1,334 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <map>
|
|||
|
|||
#include "common/alignment.h"
|
|||
#include "common/bit_util.h"
|
|||
#include "core/core.h"
|
|||
#include "video_core/engines/shader_bytecode.h"
|
|||
#include "video_core/surface.h"
|
|||
#include "video_core/texture_cache/surface_params.h"
|
|||
|
|||
namespace VideoCommon { |
|||
|
|||
using VideoCore::Surface::ComponentTypeFromDepthFormat; |
|||
using VideoCore::Surface::ComponentTypeFromRenderTarget; |
|||
using VideoCore::Surface::ComponentTypeFromTexture; |
|||
using VideoCore::Surface::PixelFormat; |
|||
using VideoCore::Surface::PixelFormatFromDepthFormat; |
|||
using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
|||
using VideoCore::Surface::PixelFormatFromTextureFormat; |
|||
using VideoCore::Surface::SurfaceTarget; |
|||
using VideoCore::Surface::SurfaceTargetFromTextureType; |
|||
using VideoCore::Surface::SurfaceType; |
|||
|
|||
SurfaceTarget TextureType2SurfaceTarget(Tegra::Shader::TextureType type, bool is_array) { |
|||
switch (type) { |
|||
case Tegra::Shader::TextureType::Texture1D: { |
|||
if (is_array) |
|||
return SurfaceTarget::Texture1DArray; |
|||
else |
|||
return SurfaceTarget::Texture1D; |
|||
} |
|||
case Tegra::Shader::TextureType::Texture2D: { |
|||
if (is_array) |
|||
return SurfaceTarget::Texture2DArray; |
|||
else |
|||
return SurfaceTarget::Texture2D; |
|||
} |
|||
case Tegra::Shader::TextureType::Texture3D: { |
|||
ASSERT(!is_array); |
|||
return SurfaceTarget::Texture3D; |
|||
} |
|||
case Tegra::Shader::TextureType::TextureCube: { |
|||
if (is_array) |
|||
return SurfaceTarget::TextureCubeArray; |
|||
else |
|||
return SurfaceTarget::TextureCubemap; |
|||
} |
|||
default: { |
|||
UNREACHABLE(); |
|||
return SurfaceTarget::Texture2D; |
|||
} |
|||
} |
|||
} |
|||
|
|||
namespace { |
|||
constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) { |
|||
return uncompressed ? mip_size : std::max(1U, (mip_size + tile - 1) / tile); |
|||
} |
|||
} // Anonymous namespace
|
|||
|
|||
SurfaceParams SurfaceParams::CreateForTexture(Core::System& system, |
|||
const Tegra::Texture::FullTextureInfo& config, |
|||
const VideoCommon::Shader::Sampler& entry) { |
|||
SurfaceParams params; |
|||
params.is_tiled = config.tic.IsTiled(); |
|||
params.srgb_conversion = config.tic.IsSrgbConversionEnabled(); |
|||
params.block_width = params.is_tiled ? config.tic.BlockWidth() : 0, |
|||
params.block_height = params.is_tiled ? config.tic.BlockHeight() : 0, |
|||
params.block_depth = params.is_tiled ? config.tic.BlockDepth() : 0, |
|||
params.tile_width_spacing = params.is_tiled ? (1 << config.tic.tile_width_spacing.Value()) : 1; |
|||
params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(), |
|||
params.srgb_conversion); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) { |
|||
switch (params.pixel_format) { |
|||
case PixelFormat::R16U: |
|||
case PixelFormat::R16F: { |
|||
params.pixel_format = PixelFormat::Z16; |
|||
break; |
|||
} |
|||
case PixelFormat::R32F: { |
|||
params.pixel_format = PixelFormat::Z32F; |
|||
break; |
|||
} |
|||
default: { |
|||
UNIMPLEMENTED_MSG("Unimplemented shadow convert format: {}", |
|||
static_cast<u32>(params.pixel_format)); |
|||
} |
|||
} |
|||
params.type = GetFormatType(params.pixel_format); |
|||
} |
|||
params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value()); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
// TODO: on 1DBuffer we should use the tic info.
|
|||
if (!config.tic.IsBuffer()) { |
|||
params.target = TextureType2SurfaceTarget(entry.GetType(), entry.IsArray()); |
|||
params.width = config.tic.Width(); |
|||
params.height = config.tic.Height(); |
|||
params.depth = config.tic.Depth(); |
|||
params.pitch = params.is_tiled ? 0 : config.tic.Pitch(); |
|||
if (params.target == SurfaceTarget::TextureCubemap || |
|||
params.target == SurfaceTarget::TextureCubeArray) { |
|||
params.depth *= 6; |
|||
} |
|||
params.num_levels = config.tic.max_mip_level + 1; |
|||
params.emulated_levels = std::min(params.num_levels, params.MaxPossibleMipmap()); |
|||
params.is_layered = params.IsLayered(); |
|||
} else { |
|||
params.target = SurfaceTarget::TextureBuffer; |
|||
params.width = config.tic.Width(); |
|||
params.pitch = params.width * params.GetBytesPerPixel(); |
|||
params.height = 1; |
|||
params.depth = 1; |
|||
params.num_levels = 1; |
|||
params.emulated_levels = 1; |
|||
params.is_layered = false; |
|||
} |
|||
return params; |
|||
} |
|||
|
|||
SurfaceParams SurfaceParams::CreateForDepthBuffer( |
|||
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, |
|||
u32 block_width, u32 block_height, u32 block_depth, |
|||
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type) { |
|||
SurfaceParams params; |
|||
params.is_tiled = type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
|||
params.srgb_conversion = false; |
|||
params.block_width = std::min(block_width, 5U); |
|||
params.block_height = std::min(block_height, 5U); |
|||
params.block_depth = std::min(block_depth, 5U); |
|||
params.tile_width_spacing = 1; |
|||
params.pixel_format = PixelFormatFromDepthFormat(format); |
|||
params.component_type = ComponentTypeFromDepthFormat(format); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
params.width = zeta_width; |
|||
params.height = zeta_height; |
|||
params.target = SurfaceTarget::Texture2D; |
|||
params.depth = 1; |
|||
params.pitch = 0; |
|||
params.num_levels = 1; |
|||
params.emulated_levels = 1; |
|||
params.is_layered = false; |
|||
return params; |
|||
} |
|||
|
|||
SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::size_t index) { |
|||
const auto& config{system.GPU().Maxwell3D().regs.rt[index]}; |
|||
SurfaceParams params; |
|||
params.is_tiled = |
|||
config.memory_layout.type == Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout::BlockLinear; |
|||
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || |
|||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; |
|||
params.block_width = config.memory_layout.block_width; |
|||
params.block_height = config.memory_layout.block_height; |
|||
params.block_depth = config.memory_layout.block_depth; |
|||
params.tile_width_spacing = 1; |
|||
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
|||
params.component_type = ComponentTypeFromRenderTarget(config.format); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
if (params.is_tiled) { |
|||
params.pitch = 0; |
|||
params.width = config.width; |
|||
} else { |
|||
const u32 bpp = GetFormatBpp(params.pixel_format) / CHAR_BIT; |
|||
params.pitch = config.width; |
|||
params.width = params.pitch / bpp; |
|||
} |
|||
params.height = config.height; |
|||
params.depth = 1; |
|||
params.target = SurfaceTarget::Texture2D; |
|||
params.num_levels = 1; |
|||
params.emulated_levels = 1; |
|||
params.is_layered = false; |
|||
return params; |
|||
} |
|||
|
|||
SurfaceParams SurfaceParams::CreateForFermiCopySurface( |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& config) { |
|||
SurfaceParams params{}; |
|||
params.is_tiled = !config.linear; |
|||
params.srgb_conversion = config.format == Tegra::RenderTargetFormat::BGRA8_SRGB || |
|||
config.format == Tegra::RenderTargetFormat::RGBA8_SRGB; |
|||
params.block_width = params.is_tiled ? std::min(config.BlockWidth(), 5U) : 0, |
|||
params.block_height = params.is_tiled ? std::min(config.BlockHeight(), 5U) : 0, |
|||
params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0, |
|||
params.tile_width_spacing = 1; |
|||
params.pixel_format = PixelFormatFromRenderTargetFormat(config.format); |
|||
params.component_type = ComponentTypeFromRenderTarget(config.format); |
|||
params.type = GetFormatType(params.pixel_format); |
|||
params.width = config.width; |
|||
params.height = config.height; |
|||
params.pitch = config.pitch; |
|||
// TODO(Rodrigo): Try to guess the surface target from depth and layer parameters
|
|||
params.target = SurfaceTarget::Texture2D; |
|||
params.depth = 1; |
|||
params.num_levels = 1; |
|||
params.emulated_levels = 1; |
|||
params.is_layered = params.IsLayered(); |
|||
return params; |
|||
} |
|||
|
|||
bool SurfaceParams::IsLayered() const { |
|||
switch (target) { |
|||
case SurfaceTarget::Texture1DArray: |
|||
case SurfaceTarget::Texture2DArray: |
|||
case SurfaceTarget::TextureCubemap: |
|||
case SurfaceTarget::TextureCubeArray: |
|||
return true; |
|||
default: |
|||
return false; |
|||
} |
|||
} |
|||
|
|||
// Auto block resizing algorithm from:
|
|||
// https://cgit.freedesktop.org/mesa/mesa/tree/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
|
|||
u32 SurfaceParams::GetMipBlockHeight(u32 level) const { |
|||
if (level == 0) { |
|||
return this->block_height; |
|||
} |
|||
|
|||
const u32 height_new{GetMipHeight(level)}; |
|||
const u32 default_block_height{GetDefaultBlockHeight()}; |
|||
const u32 blocks_in_y{(height_new + default_block_height - 1) / default_block_height}; |
|||
const u32 block_height_new = Common::Log2Ceil32(blocks_in_y); |
|||
return std::clamp(block_height_new, 3U, 7U) - 3U; |
|||
} |
|||
|
|||
u32 SurfaceParams::GetMipBlockDepth(u32 level) const { |
|||
if (level == 0) { |
|||
return this->block_depth; |
|||
} |
|||
if (is_layered) { |
|||
return 0; |
|||
} |
|||
|
|||
const u32 depth_new{GetMipDepth(level)}; |
|||
const u32 block_depth_new = Common::Log2Ceil32(depth_new); |
|||
if (block_depth_new > 4) { |
|||
return 5 - (GetMipBlockHeight(level) >= 2); |
|||
} |
|||
return block_depth_new; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetGuestMipmapLevelOffset(u32 level) const { |
|||
std::size_t offset = 0; |
|||
for (u32 i = 0; i < level; i++) { |
|||
offset += GetInnerMipmapMemorySize(i, false, false); |
|||
} |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetHostMipmapLevelOffset(u32 level) const { |
|||
std::size_t offset = 0; |
|||
for (u32 i = 0; i < level; i++) { |
|||
offset += GetInnerMipmapMemorySize(i, true, false) * GetNumLayers(); |
|||
} |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetConvertedMipmapOffset(u32 level) const { |
|||
std::size_t offset = 0; |
|||
for (u32 i = 0; i < level; i++) { |
|||
offset += GetConvertedMipmapSize(i); |
|||
} |
|||
return offset; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetConvertedMipmapSize(u32 level) const { |
|||
constexpr std::size_t rgba8_bpp = 4ULL; |
|||
const std::size_t width_t = GetMipWidth(level); |
|||
const std::size_t height_t = GetMipHeight(level); |
|||
const std::size_t depth_t = is_layered ? depth : GetMipDepth(level); |
|||
return width_t * height_t * depth_t * rgba8_bpp; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetLayerSize(bool as_host_size, bool uncompressed) const { |
|||
std::size_t size = 0; |
|||
for (u32 level = 0; level < num_levels; ++level) { |
|||
size += GetInnerMipmapMemorySize(level, as_host_size, uncompressed); |
|||
} |
|||
if (is_tiled && is_layered) { |
|||
return Common::AlignBits(size, |
|||
Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); |
|||
} |
|||
return size; |
|||
} |
|||
|
|||
std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size, |
|||
bool uncompressed) const { |
|||
const bool tiled{as_host_size ? false : is_tiled}; |
|||
const u32 width{GetMipmapSize(uncompressed, GetMipWidth(level), GetDefaultBlockWidth())}; |
|||
const u32 height{GetMipmapSize(uncompressed, GetMipHeight(level), GetDefaultBlockHeight())}; |
|||
const u32 depth{is_layered ? 1U : GetMipDepth(level)}; |
|||
return Tegra::Texture::CalculateSize(tiled, GetBytesPerPixel(), width, height, depth, |
|||
GetMipBlockHeight(level), GetMipBlockDepth(level)); |
|||
} |
|||
|
|||
bool SurfaceParams::operator==(const SurfaceParams& rhs) const { |
|||
return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width, |
|||
height, depth, pitch, num_levels, pixel_format, component_type, type, target) == |
|||
std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth, |
|||
rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch, |
|||
rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target); |
|||
} |
|||
|
|||
std::string SurfaceParams::TargetName() const { |
|||
switch (target) { |
|||
case SurfaceTarget::Texture1D: |
|||
return "1D"; |
|||
case SurfaceTarget::TextureBuffer: |
|||
return "TexBuffer"; |
|||
case SurfaceTarget::Texture2D: |
|||
return "2D"; |
|||
case SurfaceTarget::Texture3D: |
|||
return "3D"; |
|||
case SurfaceTarget::Texture1DArray: |
|||
return "1DArray"; |
|||
case SurfaceTarget::Texture2DArray: |
|||
return "2DArray"; |
|||
case SurfaceTarget::TextureCubemap: |
|||
return "Cube"; |
|||
case SurfaceTarget::TextureCubeArray: |
|||
return "CubeArray"; |
|||
default: |
|||
LOG_CRITICAL(HW_GPU, "Unimplemented surface_target={}", static_cast<u32>(target)); |
|||
UNREACHABLE(); |
|||
return fmt::format("TUK({})", static_cast<u32>(target)); |
|||
} |
|||
} |
|||
|
|||
} // namespace VideoCommon
|
|||
@ -0,0 +1,286 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <map> |
|||
|
|||
#include "common/alignment.h" |
|||
#include "common/bit_util.h" |
|||
#include "common/cityhash.h" |
|||
#include "common/common_types.h" |
|||
#include "video_core/engines/fermi_2d.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/shader/shader_ir.h" |
|||
#include "video_core/surface.h" |
|||
#include "video_core/textures/decoders.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
using VideoCore::Surface::SurfaceCompression; |
|||
|
|||
class SurfaceParams { |
|||
public: |
|||
/// Creates SurfaceCachedParams from a texture configuration. |
|||
static SurfaceParams CreateForTexture(Core::System& system, |
|||
const Tegra::Texture::FullTextureInfo& config, |
|||
const VideoCommon::Shader::Sampler& entry); |
|||
|
|||
/// Creates SurfaceCachedParams for a depth buffer configuration. |
|||
static SurfaceParams CreateForDepthBuffer( |
|||
Core::System& system, u32 zeta_width, u32 zeta_height, Tegra::DepthFormat format, |
|||
u32 block_width, u32 block_height, u32 block_depth, |
|||
Tegra::Engines::Maxwell3D::Regs::InvMemoryLayout type); |
|||
|
|||
/// Creates SurfaceCachedParams from a framebuffer configuration. |
|||
static SurfaceParams CreateForFramebuffer(Core::System& system, std::size_t index); |
|||
|
|||
/// Creates SurfaceCachedParams from a Fermi2D surface configuration. |
|||
static SurfaceParams CreateForFermiCopySurface( |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& config); |
|||
|
|||
std::size_t Hash() const { |
|||
return static_cast<std::size_t>( |
|||
Common::CityHash64(reinterpret_cast<const char*>(this), sizeof(*this))); |
|||
} |
|||
|
|||
bool operator==(const SurfaceParams& rhs) const; |
|||
|
|||
bool operator!=(const SurfaceParams& rhs) const { |
|||
return !operator==(rhs); |
|||
} |
|||
|
|||
std::size_t GetGuestSizeInBytes() const { |
|||
return GetInnerMemorySize(false, false, false); |
|||
} |
|||
|
|||
std::size_t GetHostSizeInBytes() const { |
|||
std::size_t host_size_in_bytes; |
|||
if (GetCompressionType() == SurfaceCompression::Converted) { |
|||
constexpr std::size_t rgb8_bpp = 4ULL; |
|||
// ASTC is uncompressed in software, in emulated as RGBA8 |
|||
host_size_in_bytes = 0; |
|||
for (u32 level = 0; level < num_levels; ++level) { |
|||
host_size_in_bytes += GetConvertedMipmapSize(level); |
|||
} |
|||
} else { |
|||
host_size_in_bytes = GetInnerMemorySize(true, false, false); |
|||
} |
|||
return host_size_in_bytes; |
|||
} |
|||
|
|||
u32 GetBlockAlignedWidth() const { |
|||
return Common::AlignUp(width, 64 / GetBytesPerPixel()); |
|||
} |
|||
|
|||
/// Returns the width of a given mipmap level. |
|||
u32 GetMipWidth(u32 level) const { |
|||
return std::max(1U, width >> level); |
|||
} |
|||
|
|||
/// Returns the height of a given mipmap level. |
|||
u32 GetMipHeight(u32 level) const { |
|||
return std::max(1U, height >> level); |
|||
} |
|||
|
|||
/// Returns the depth of a given mipmap level. |
|||
u32 GetMipDepth(u32 level) const { |
|||
return is_layered ? depth : std::max(1U, depth >> level); |
|||
} |
|||
|
|||
/// Returns the block height of a given mipmap level. |
|||
u32 GetMipBlockHeight(u32 level) const; |
|||
|
|||
/// Returns the block depth of a given mipmap level. |
|||
u32 GetMipBlockDepth(u32 level) const; |
|||
|
|||
/// Returns the best possible row/pitch alignment for the surface. |
|||
u32 GetRowAlignment(u32 level) const { |
|||
const u32 bpp = |
|||
GetCompressionType() == SurfaceCompression::Converted ? 4 : GetBytesPerPixel(); |
|||
return 1U << Common::CountTrailingZeroes32(GetMipWidth(level) * bpp); |
|||
} |
|||
|
|||
/// Returns the offset in bytes in guest memory of a given mipmap level. |
|||
std::size_t GetGuestMipmapLevelOffset(u32 level) const; |
|||
|
|||
/// Returns the offset in bytes in host memory (linear) of a given mipmap level. |
|||
std::size_t GetHostMipmapLevelOffset(u32 level) const; |
|||
|
|||
/// Returns the offset in bytes in host memory (linear) of a given mipmap level |
|||
/// for a texture that is converted in host gpu. |
|||
std::size_t GetConvertedMipmapOffset(u32 level) const; |
|||
|
|||
/// Returns the size in bytes in guest memory of a given mipmap level. |
|||
std::size_t GetGuestMipmapSize(u32 level) const { |
|||
return GetInnerMipmapMemorySize(level, false, false); |
|||
} |
|||
|
|||
/// Returns the size in bytes in host memory (linear) of a given mipmap level. |
|||
std::size_t GetHostMipmapSize(u32 level) const { |
|||
return GetInnerMipmapMemorySize(level, true, false) * GetNumLayers(); |
|||
} |
|||
|
|||
std::size_t GetConvertedMipmapSize(u32 level) const; |
|||
|
|||
/// Returns the size of a layer in bytes in guest memory. |
|||
std::size_t GetGuestLayerSize() const { |
|||
return GetLayerSize(false, false); |
|||
} |
|||
|
|||
/// Returns the size of a layer in bytes in host memory for a given mipmap level. |
|||
std::size_t GetHostLayerSize(u32 level) const { |
|||
ASSERT(target != VideoCore::Surface::SurfaceTarget::Texture3D); |
|||
return GetInnerMipmapMemorySize(level, true, false); |
|||
} |
|||
|
|||
/// Returns the max possible mipmap that the texture can have in host gpu |
|||
u32 MaxPossibleMipmap() const { |
|||
const u32 max_mipmap_w = Common::Log2Ceil32(width) + 1U; |
|||
const u32 max_mipmap_h = Common::Log2Ceil32(height) + 1U; |
|||
const u32 max_mipmap = std::max(max_mipmap_w, max_mipmap_h); |
|||
if (target != VideoCore::Surface::SurfaceTarget::Texture3D) |
|||
return max_mipmap; |
|||
return std::max(max_mipmap, Common::Log2Ceil32(depth) + 1U); |
|||
} |
|||
|
|||
/// Returns if the guest surface is a compressed surface. |
|||
bool IsCompressed() const { |
|||
return GetDefaultBlockHeight() > 1 || GetDefaultBlockWidth() > 1; |
|||
} |
|||
|
|||
/// Returns the default block width. |
|||
u32 GetDefaultBlockWidth() const { |
|||
return VideoCore::Surface::GetDefaultBlockWidth(pixel_format); |
|||
} |
|||
|
|||
/// Returns the default block height. |
|||
u32 GetDefaultBlockHeight() const { |
|||
return VideoCore::Surface::GetDefaultBlockHeight(pixel_format); |
|||
} |
|||
|
|||
/// Returns the bits per pixel. |
|||
u32 GetBitsPerPixel() const { |
|||
return VideoCore::Surface::GetFormatBpp(pixel_format); |
|||
} |
|||
|
|||
/// Returns the bytes per pixel. |
|||
u32 GetBytesPerPixel() const { |
|||
return VideoCore::Surface::GetBytesPerPixel(pixel_format); |
|||
} |
|||
|
|||
/// Returns true if the pixel format is a depth and/or stencil format. |
|||
bool IsPixelFormatZeta() const { |
|||
return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && |
|||
pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; |
|||
} |
|||
|
|||
/// Returns how the compression should be handled for this texture. |
|||
SurfaceCompression GetCompressionType() const { |
|||
return VideoCore::Surface::GetFormatCompressionType(pixel_format); |
|||
} |
|||
|
|||
/// Returns is the surface is a TextureBuffer type of surface. |
|||
bool IsBuffer() const { |
|||
return target == VideoCore::Surface::SurfaceTarget::TextureBuffer; |
|||
} |
|||
|
|||
/// Returns the debug name of the texture for use in graphic debuggers. |
|||
std::string TargetName() const; |
|||
|
|||
// Helper used for out of class size calculations |
|||
static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height, |
|||
const u32 block_depth) { |
|||
return Common::AlignBits(out_size, |
|||
Tegra::Texture::GetGOBSizeShift() + block_height + block_depth); |
|||
} |
|||
|
|||
/// Converts a width from a type of surface into another. This helps represent the |
|||
/// equivalent value between compressed/non-compressed textures. |
|||
static u32 ConvertWidth(u32 width, VideoCore::Surface::PixelFormat pixel_format_from, |
|||
VideoCore::Surface::PixelFormat pixel_format_to) { |
|||
const u32 bw1 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_from); |
|||
const u32 bw2 = VideoCore::Surface::GetDefaultBlockWidth(pixel_format_to); |
|||
return (width * bw2 + bw1 - 1) / bw1; |
|||
} |
|||
|
|||
/// Converts a height from a type of surface into another. This helps represent the |
|||
/// equivalent value between compressed/non-compressed textures. |
|||
static u32 ConvertHeight(u32 height, VideoCore::Surface::PixelFormat pixel_format_from, |
|||
VideoCore::Surface::PixelFormat pixel_format_to) { |
|||
const u32 bh1 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_from); |
|||
const u32 bh2 = VideoCore::Surface::GetDefaultBlockHeight(pixel_format_to); |
|||
return (height * bh2 + bh1 - 1) / bh1; |
|||
} |
|||
|
|||
// Finds the maximun possible width between 2 2D layers of different formats |
|||
static u32 IntersectWidth(const SurfaceParams& src_params, const SurfaceParams& dst_params, |
|||
const u32 src_level, const u32 dst_level) { |
|||
const u32 bw1 = src_params.GetDefaultBlockWidth(); |
|||
const u32 bw2 = dst_params.GetDefaultBlockWidth(); |
|||
const u32 t_src_width = (src_params.GetMipWidth(src_level) * bw2 + bw1 - 1) / bw1; |
|||
const u32 t_dst_width = (dst_params.GetMipWidth(dst_level) * bw1 + bw2 - 1) / bw2; |
|||
return std::min(t_src_width, t_dst_width); |
|||
} |
|||
|
|||
// Finds the maximun possible height between 2 2D layers of different formats |
|||
static u32 IntersectHeight(const SurfaceParams& src_params, const SurfaceParams& dst_params, |
|||
const u32 src_level, const u32 dst_level) { |
|||
const u32 bh1 = src_params.GetDefaultBlockHeight(); |
|||
const u32 bh2 = dst_params.GetDefaultBlockHeight(); |
|||
const u32 t_src_height = (src_params.GetMipHeight(src_level) * bh2 + bh1 - 1) / bh1; |
|||
const u32 t_dst_height = (dst_params.GetMipHeight(dst_level) * bh1 + bh2 - 1) / bh2; |
|||
return std::min(t_src_height, t_dst_height); |
|||
} |
|||
|
|||
bool is_tiled; |
|||
bool srgb_conversion; |
|||
bool is_layered; |
|||
u32 block_width; |
|||
u32 block_height; |
|||
u32 block_depth; |
|||
u32 tile_width_spacing; |
|||
u32 width; |
|||
u32 height; |
|||
u32 depth; |
|||
u32 pitch; |
|||
u32 num_levels; |
|||
u32 emulated_levels; |
|||
VideoCore::Surface::PixelFormat pixel_format; |
|||
VideoCore::Surface::ComponentType component_type; |
|||
VideoCore::Surface::SurfaceType type; |
|||
VideoCore::Surface::SurfaceTarget target; |
|||
|
|||
private: |
|||
/// Returns the size of a given mipmap level inside a layer. |
|||
std::size_t GetInnerMipmapMemorySize(u32 level, bool as_host_size, bool uncompressed) const; |
|||
|
|||
/// Returns the size of all mipmap levels and aligns as needed. |
|||
std::size_t GetInnerMemorySize(bool as_host_size, bool layer_only, bool uncompressed) const { |
|||
return GetLayerSize(as_host_size, uncompressed) * (layer_only ? 1U : depth); |
|||
} |
|||
|
|||
/// Returns the size of a layer |
|||
std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const; |
|||
|
|||
std::size_t GetNumLayers() const { |
|||
return is_layered ? depth : 1; |
|||
} |
|||
|
|||
/// Returns true if these parameters are from a layered surface. |
|||
bool IsLayered() const; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
|
|||
namespace std { |
|||
|
|||
template <> |
|||
struct hash<VideoCommon::SurfaceParams> { |
|||
std::size_t operator()(const VideoCommon::SurfaceParams& k) const noexcept { |
|||
return k.Hash(); |
|||
} |
|||
}; |
|||
|
|||
} // namespace std |
|||
@ -0,0 +1,23 @@ |
|||
// Copyright 2019 yuzu Emulator Project
|
|||
// Licensed under GPLv2 or any later version
|
|||
// Refer to the license.txt file included.
|
|||
|
|||
#include <tuple>
|
|||
|
|||
#include "common/common_types.h"
|
|||
#include "video_core/texture_cache/surface_view.h"
|
|||
|
|||
namespace VideoCommon { |
|||
|
|||
std::size_t ViewParams::Hash() const { |
|||
return static_cast<std::size_t>(base_layer) ^ static_cast<std::size_t>(num_layers << 16) ^ |
|||
(static_cast<std::size_t>(base_level) << 24) ^ |
|||
(static_cast<std::size_t>(num_levels) << 32) ^ (static_cast<std::size_t>(target) << 36); |
|||
} |
|||
|
|||
bool ViewParams::operator==(const ViewParams& rhs) const { |
|||
return std::tie(base_layer, num_layers, base_level, num_levels, target) == |
|||
std::tie(rhs.base_layer, rhs.num_layers, rhs.base_level, rhs.num_levels, rhs.target); |
|||
} |
|||
|
|||
} // namespace VideoCommon
|
|||
@ -0,0 +1,67 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <functional> |
|||
|
|||
#include "common/common_types.h" |
|||
#include "video_core/surface.h" |
|||
#include "video_core/texture_cache/surface_params.h" |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
struct ViewParams { |
|||
ViewParams(VideoCore::Surface::SurfaceTarget target, u32 base_layer, u32 num_layers, |
|||
u32 base_level, u32 num_levels) |
|||
: target{target}, base_layer{base_layer}, num_layers{num_layers}, base_level{base_level}, |
|||
num_levels{num_levels} {} |
|||
|
|||
std::size_t Hash() const; |
|||
|
|||
bool operator==(const ViewParams& rhs) const; |
|||
|
|||
VideoCore::Surface::SurfaceTarget target{}; |
|||
u32 base_layer{}; |
|||
u32 num_layers{}; |
|||
u32 base_level{}; |
|||
u32 num_levels{}; |
|||
|
|||
bool IsLayered() const { |
|||
switch (target) { |
|||
case VideoCore::Surface::SurfaceTarget::Texture1DArray: |
|||
case VideoCore::Surface::SurfaceTarget::Texture2DArray: |
|||
case VideoCore::Surface::SurfaceTarget::TextureCubemap: |
|||
case VideoCore::Surface::SurfaceTarget::TextureCubeArray: |
|||
return true; |
|||
default: |
|||
return false; |
|||
} |
|||
} |
|||
}; |
|||
|
|||
class ViewBase { |
|||
public: |
|||
ViewBase(const ViewParams& params) : params{params} {} |
|||
|
|||
const ViewParams& GetViewParams() const { |
|||
return params; |
|||
} |
|||
|
|||
protected: |
|||
ViewParams params; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
|
|||
namespace std { |
|||
|
|||
template <> |
|||
struct hash<VideoCommon::ViewParams> { |
|||
std::size_t operator()(const VideoCommon::ViewParams& k) const noexcept { |
|||
return k.Hash(); |
|||
} |
|||
}; |
|||
|
|||
} // namespace std |
|||
@ -0,0 +1,814 @@ |
|||
// Copyright 2019 yuzu Emulator Project |
|||
// Licensed under GPLv2 or any later version |
|||
// Refer to the license.txt file included. |
|||
|
|||
#pragma once |
|||
|
|||
#include <algorithm> |
|||
#include <array> |
|||
#include <memory> |
|||
#include <mutex> |
|||
#include <set> |
|||
#include <tuple> |
|||
#include <unordered_map> |
|||
#include <vector> |
|||
|
|||
#include <boost/icl/interval_map.hpp> |
|||
#include <boost/range/iterator_range.hpp> |
|||
|
|||
#include "common/assert.h" |
|||
#include "common/common_types.h" |
|||
#include "common/math_util.h" |
|||
#include "core/core.h" |
|||
#include "core/memory.h" |
|||
#include "core/settings.h" |
|||
#include "video_core/engines/fermi_2d.h" |
|||
#include "video_core/engines/maxwell_3d.h" |
|||
#include "video_core/gpu.h" |
|||
#include "video_core/memory_manager.h" |
|||
#include "video_core/rasterizer_interface.h" |
|||
#include "video_core/surface.h" |
|||
#include "video_core/texture_cache/copy_params.h" |
|||
#include "video_core/texture_cache/surface_base.h" |
|||
#include "video_core/texture_cache/surface_params.h" |
|||
#include "video_core/texture_cache/surface_view.h" |
|||
|
|||
namespace Tegra::Texture { |
|||
struct FullTextureInfo; |
|||
} |
|||
|
|||
namespace VideoCore { |
|||
class RasterizerInterface; |
|||
} |
|||
|
|||
namespace VideoCommon { |
|||
|
|||
using VideoCore::Surface::PixelFormat; |
|||
|
|||
using VideoCore::Surface::SurfaceTarget; |
|||
using RenderTargetConfig = Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig; |
|||
|
|||
template <typename TSurface, typename TView> |
|||
class TextureCache { |
|||
using IntervalMap = boost::icl::interval_map<CacheAddr, std::set<TSurface>>; |
|||
using IntervalType = typename IntervalMap::interval_type; |
|||
|
|||
public: |
|||
void InvalidateRegion(CacheAddr addr, std::size_t size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
for (const auto& surface : GetSurfacesInRegion(addr, size)) { |
|||
Unregister(surface); |
|||
} |
|||
} |
|||
|
|||
/*** |
|||
* `Guard` guarantees that rendertargets don't unregister themselves if the |
|||
* collide. Protection is currently only done on 3D slices. |
|||
***/ |
|||
void GuardRenderTargets(bool new_guard) { |
|||
guard_render_targets = new_guard; |
|||
} |
|||
|
|||
void GuardSamplers(bool new_guard) { |
|||
guard_samplers = new_guard; |
|||
} |
|||
|
|||
void FlushRegion(CacheAddr addr, std::size_t size) { |
|||
std::lock_guard lock{mutex}; |
|||
|
|||
auto surfaces = GetSurfacesInRegion(addr, size); |
|||
if (surfaces.empty()) { |
|||
return; |
|||
} |
|||
std::sort(surfaces.begin(), surfaces.end(), [](const TSurface& a, const TSurface& b) { |
|||
return a->GetModificationTick() < b->GetModificationTick(); |
|||
}); |
|||
for (const auto& surface : surfaces) { |
|||
FlushSurface(surface); |
|||
} |
|||
} |
|||
|
|||
TView GetTextureSurface(const Tegra::Texture::FullTextureInfo& config, |
|||
const VideoCommon::Shader::Sampler& entry) { |
|||
std::lock_guard lock{mutex}; |
|||
const auto gpu_addr{config.tic.Address()}; |
|||
if (!gpu_addr) { |
|||
return {}; |
|||
} |
|||
const auto params{SurfaceParams::CreateForTexture(system, config, entry)}; |
|||
const auto [surface, view] = GetSurface(gpu_addr, params, true, false); |
|||
if (guard_samplers) { |
|||
sampled_textures.push_back(surface); |
|||
} |
|||
return view; |
|||
} |
|||
|
|||
bool TextureBarrier() { |
|||
const bool any_rt = |
|||
std::any_of(sampled_textures.begin(), sampled_textures.end(), |
|||
[](const auto& surface) { return surface->IsRenderTarget(); }); |
|||
sampled_textures.clear(); |
|||
return any_rt; |
|||
} |
|||
|
|||
TView GetDepthBufferSurface(bool preserve_contents) { |
|||
std::lock_guard lock{mutex}; |
|||
auto& maxwell3d = system.GPU().Maxwell3D(); |
|||
|
|||
if (!maxwell3d.dirty_flags.zeta_buffer) { |
|||
return depth_buffer.view; |
|||
} |
|||
maxwell3d.dirty_flags.zeta_buffer = false; |
|||
|
|||
const auto& regs{maxwell3d.regs}; |
|||
const auto gpu_addr{regs.zeta.Address()}; |
|||
if (!gpu_addr || !regs.zeta_enable) { |
|||
SetEmptyDepthBuffer(); |
|||
return {}; |
|||
} |
|||
const auto depth_params{SurfaceParams::CreateForDepthBuffer( |
|||
system, regs.zeta_width, regs.zeta_height, regs.zeta.format, |
|||
regs.zeta.memory_layout.block_width, regs.zeta.memory_layout.block_height, |
|||
regs.zeta.memory_layout.block_depth, regs.zeta.memory_layout.type)}; |
|||
auto surface_view = GetSurface(gpu_addr, depth_params, preserve_contents, true); |
|||
if (depth_buffer.target) |
|||
depth_buffer.target->MarkAsRenderTarget(false); |
|||
depth_buffer.target = surface_view.first; |
|||
depth_buffer.view = surface_view.second; |
|||
if (depth_buffer.target) |
|||
depth_buffer.target->MarkAsRenderTarget(true); |
|||
return surface_view.second; |
|||
} |
|||
|
|||
TView GetColorBufferSurface(std::size_t index, bool preserve_contents) { |
|||
std::lock_guard lock{mutex}; |
|||
ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets); |
|||
auto& maxwell3d = system.GPU().Maxwell3D(); |
|||
if (!maxwell3d.dirty_flags.color_buffer[index]) { |
|||
return render_targets[index].view; |
|||
} |
|||
maxwell3d.dirty_flags.color_buffer.reset(index); |
|||
|
|||
const auto& regs{maxwell3d.regs}; |
|||
if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 || |
|||
regs.rt[index].format == Tegra::RenderTargetFormat::NONE) { |
|||
SetEmptyColorBuffer(index); |
|||
return {}; |
|||
} |
|||
|
|||
const auto& config{regs.rt[index]}; |
|||
const auto gpu_addr{config.Address()}; |
|||
if (!gpu_addr) { |
|||
SetEmptyColorBuffer(index); |
|||
return {}; |
|||
} |
|||
|
|||
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), |
|||
preserve_contents, true); |
|||
if (render_targets[index].target) |
|||
render_targets[index].target->MarkAsRenderTarget(false); |
|||
render_targets[index].target = surface_view.first; |
|||
render_targets[index].view = surface_view.second; |
|||
if (render_targets[index].target) |
|||
render_targets[index].target->MarkAsRenderTarget(true); |
|||
return surface_view.second; |
|||
} |
|||
|
|||
void MarkColorBufferInUse(std::size_t index) { |
|||
if (auto& render_target = render_targets[index].target) { |
|||
render_target->MarkAsModified(true, Tick()); |
|||
} |
|||
} |
|||
|
|||
void MarkDepthBufferInUse() { |
|||
if (depth_buffer.target) { |
|||
depth_buffer.target->MarkAsModified(true, Tick()); |
|||
} |
|||
} |
|||
|
|||
void SetEmptyDepthBuffer() { |
|||
if (depth_buffer.target == nullptr) { |
|||
return; |
|||
} |
|||
depth_buffer.target->MarkAsRenderTarget(false); |
|||
depth_buffer.target = nullptr; |
|||
depth_buffer.view = nullptr; |
|||
} |
|||
|
|||
void SetEmptyColorBuffer(std::size_t index) { |
|||
if (render_targets[index].target == nullptr) { |
|||
return; |
|||
} |
|||
render_targets[index].target->MarkAsRenderTarget(false); |
|||
render_targets[index].target = nullptr; |
|||
render_targets[index].view = nullptr; |
|||
} |
|||
|
|||
void DoFermiCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, |
|||
const Tegra::Engines::Fermi2D::Config& copy_config) { |
|||
std::lock_guard lock{mutex}; |
|||
std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config); |
|||
std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config); |
|||
ImageBlit(src_surface.second, dst_surface.second, copy_config); |
|||
dst_surface.first->MarkAsModified(true, Tick()); |
|||
} |
|||
|
|||
TSurface TryFindFramebufferSurface(const u8* host_ptr) { |
|||
const CacheAddr cache_addr = ToCacheAddr(host_ptr); |
|||
if (!cache_addr) { |
|||
return nullptr; |
|||
} |
|||
const CacheAddr page = cache_addr >> registry_page_bits; |
|||
std::vector<TSurface>& list = registry[page]; |
|||
for (auto& surface : list) { |
|||
if (surface->GetCacheAddr() == cache_addr) { |
|||
return surface; |
|||
} |
|||
} |
|||
return nullptr; |
|||
} |
|||
|
|||
u64 Tick() { |
|||
return ++ticks; |
|||
} |
|||
|
|||
protected: |
|||
TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer) |
|||
: system{system}, rasterizer{rasterizer} { |
|||
for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { |
|||
SetEmptyColorBuffer(i); |
|||
} |
|||
|
|||
SetEmptyDepthBuffer(); |
|||
staging_cache.SetSize(2); |
|||
|
|||
const auto make_siblings = [this](PixelFormat a, PixelFormat b) { |
|||
siblings_table[static_cast<std::size_t>(a)] = b; |
|||
siblings_table[static_cast<std::size_t>(b)] = a; |
|||
}; |
|||
std::fill(siblings_table.begin(), siblings_table.end(), PixelFormat::Invalid); |
|||
make_siblings(PixelFormat::Z16, PixelFormat::R16U); |
|||
make_siblings(PixelFormat::Z32F, PixelFormat::R32F); |
|||
make_siblings(PixelFormat::Z32FS8, PixelFormat::RG32F); |
|||
|
|||
sampled_textures.reserve(64); |
|||
} |
|||
|
|||
~TextureCache() = default; |
|||
|
|||
virtual TSurface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) = 0; |
|||
|
|||
virtual void ImageCopy(TSurface& src_surface, TSurface& dst_surface, |
|||
const CopyParams& copy_params) = 0; |
|||
|
|||
virtual void ImageBlit(TView& src_view, TView& dst_view, |
|||
const Tegra::Engines::Fermi2D::Config& copy_config) = 0; |
|||
|
|||
// Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture |
|||
// and reading it from a sepparate buffer. |
|||
virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0; |
|||
|
|||
void Register(TSurface surface) { |
|||
const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
|||
const CacheAddr cache_ptr = ToCacheAddr(system.GPU().MemoryManager().GetPointer(gpu_addr)); |
|||
const std::size_t size = surface->GetSizeInBytes(); |
|||
const std::optional<VAddr> cpu_addr = |
|||
system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); |
|||
if (!cache_ptr || !cpu_addr) { |
|||
LOG_CRITICAL(HW_GPU, "Failed to register surface with unmapped gpu_address 0x{:016x}", |
|||
gpu_addr); |
|||
return; |
|||
} |
|||
const bool continuous = system.GPU().MemoryManager().IsBlockContinuous(gpu_addr, size); |
|||
surface->MarkAsContinuous(continuous); |
|||
surface->SetCacheAddr(cache_ptr); |
|||
surface->SetCpuAddr(*cpu_addr); |
|||
RegisterInnerCache(surface); |
|||
surface->MarkAsRegistered(true); |
|||
rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); |
|||
} |
|||
|
|||
void Unregister(TSurface surface) { |
|||
if (guard_render_targets && surface->IsProtected()) { |
|||
return; |
|||
} |
|||
const GPUVAddr gpu_addr = surface->GetGpuAddr(); |
|||
const CacheAddr cache_ptr = surface->GetCacheAddr(); |
|||
const std::size_t size = surface->GetSizeInBytes(); |
|||
const VAddr cpu_addr = surface->GetCpuAddr(); |
|||
rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1); |
|||
UnregisterInnerCache(surface); |
|||
surface->MarkAsRegistered(false); |
|||
ReserveSurface(surface->GetSurfaceParams(), surface); |
|||
} |
|||
|
|||
TSurface GetUncachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) { |
|||
if (const auto surface = TryGetReservedSurface(params); surface) { |
|||
surface->SetGpuAddr(gpu_addr); |
|||
return surface; |
|||
} |
|||
// No reserved surface available, create a new one and reserve it |
|||
auto new_surface{CreateSurface(gpu_addr, params)}; |
|||
return new_surface; |
|||
} |
|||
|
|||
std::pair<TSurface, TView> GetFermiSurface( |
|||
const Tegra::Engines::Fermi2D::Regs::Surface& config) { |
|||
SurfaceParams params = SurfaceParams::CreateForFermiCopySurface(config); |
|||
const GPUVAddr gpu_addr = config.Address(); |
|||
return GetSurface(gpu_addr, params, true, false); |
|||
} |
|||
|
|||
Core::System& system; |
|||
|
|||
private: |
|||
enum class RecycleStrategy : u32 { |
|||
Ignore = 0, |
|||
Flush = 1, |
|||
BufferCopy = 3, |
|||
}; |
|||
|
|||
/** |
|||
* `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle. |
|||
* @param overlaps, the overlapping surfaces registered in the cache. |
|||
* @param params, the paremeters on the new surface. |
|||
* @param gpu_addr, the starting address of the new surface. |
|||
* @param untopological, tells the recycler that the texture has no way to match the overlaps |
|||
* due to topological reasons. |
|||
**/ |
|||
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, |
|||
const GPUVAddr gpu_addr, const MatchTopologyResult untopological) { |
|||
if (Settings::values.use_accurate_gpu_emulation) { |
|||
return RecycleStrategy::Flush; |
|||
} |
|||
// 3D Textures decision |
|||
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { |
|||
return RecycleStrategy::Flush; |
|||
} |
|||
for (auto s : overlaps) { |
|||
const auto& s_params = s->GetSurfaceParams(); |
|||
if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) { |
|||
return RecycleStrategy::Flush; |
|||
} |
|||
} |
|||
// Untopological decision |
|||
if (untopological == MatchTopologyResult::CompressUnmatch) { |
|||
return RecycleStrategy::Flush; |
|||
} |
|||
if (untopological == MatchTopologyResult::FullMatch && !params.is_tiled) { |
|||
return RecycleStrategy::Flush; |
|||
} |
|||
return RecycleStrategy::Ignore; |
|||
} |
|||
|
|||
/** |
|||
* `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in |
|||
*the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the |
|||
*overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the |
|||
*new surface from that data. |
|||
* @param overlaps, the overlapping surfaces registered in the cache. |
|||
* @param params, the paremeters on the new surface. |
|||
* @param gpu_addr, the starting address of the new surface. |
|||
* @param preserve_contents, tells if the new surface should be loaded from meory or left blank |
|||
* @param untopological, tells the recycler that the texture has no way to match the overlaps |
|||
* due to topological reasons. |
|||
**/ |
|||
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, |
|||
const SurfaceParams& params, const GPUVAddr gpu_addr, |
|||
const bool preserve_contents, |
|||
const MatchTopologyResult untopological) { |
|||
const bool do_load = preserve_contents && Settings::values.use_accurate_gpu_emulation; |
|||
for (auto& surface : overlaps) { |
|||
Unregister(surface); |
|||
} |
|||
switch (PickStrategy(overlaps, params, gpu_addr, untopological)) { |
|||
case RecycleStrategy::Ignore: { |
|||
return InitializeSurface(gpu_addr, params, do_load); |
|||
} |
|||
case RecycleStrategy::Flush: { |
|||
std::sort(overlaps.begin(), overlaps.end(), |
|||
[](const TSurface& a, const TSurface& b) -> bool { |
|||
return a->GetModificationTick() < b->GetModificationTick(); |
|||
}); |
|||
for (auto& surface : overlaps) { |
|||
FlushSurface(surface); |
|||
} |
|||
return InitializeSurface(gpu_addr, params, preserve_contents); |
|||
} |
|||
case RecycleStrategy::BufferCopy: { |
|||
auto new_surface = GetUncachedSurface(gpu_addr, params); |
|||
BufferCopy(overlaps[0], new_surface); |
|||
return {new_surface, new_surface->GetMainView()}; |
|||
} |
|||
default: { |
|||
UNIMPLEMENTED_MSG("Unimplemented Texture Cache Recycling Strategy!"); |
|||
return InitializeSurface(gpu_addr, params, do_load); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* `RebuildSurface` this method takes a single surface and recreates into another that |
|||
* may differ in format, target or width alingment. |
|||
* @param current_surface, the registered surface in the cache which we want to convert. |
|||
* @param params, the new surface params which we'll use to recreate the surface. |
|||
**/ |
|||
std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params, |
|||
bool is_render) { |
|||
const auto gpu_addr = current_surface->GetGpuAddr(); |
|||
const auto& cr_params = current_surface->GetSurfaceParams(); |
|||
TSurface new_surface; |
|||
if (cr_params.pixel_format != params.pixel_format && !is_render && |
|||
GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) { |
|||
SurfaceParams new_params = params; |
|||
new_params.pixel_format = cr_params.pixel_format; |
|||
new_params.component_type = cr_params.component_type; |
|||
new_params.type = cr_params.type; |
|||
new_surface = GetUncachedSurface(gpu_addr, new_params); |
|||
} else { |
|||
new_surface = GetUncachedSurface(gpu_addr, params); |
|||
} |
|||
const auto& final_params = new_surface->GetSurfaceParams(); |
|||
if (cr_params.type != final_params.type || |
|||
(cr_params.component_type != final_params.component_type)) { |
|||
BufferCopy(current_surface, new_surface); |
|||
} else { |
|||
std::vector<CopyParams> bricks = current_surface->BreakDown(final_params); |
|||
for (auto& brick : bricks) { |
|||
ImageCopy(current_surface, new_surface, brick); |
|||
} |
|||
} |
|||
Unregister(current_surface); |
|||
Register(new_surface); |
|||
new_surface->MarkAsModified(current_surface->IsModified(), Tick()); |
|||
return {new_surface, new_surface->GetMainView()}; |
|||
} |
|||
|
|||
/** |
|||
* `ManageStructuralMatch` this method takes a single surface and checks with the new surface's |
|||
* params if it's an exact match, we return the main view of the registered surface. If it's |
|||
* formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats |
|||
* match but the targets don't, we create an overview View of the registered surface. |
|||
* @param current_surface, the registered surface in the cache which we want to convert. |
|||
* @param params, the new surface params which we want to check. |
|||
**/ |
|||
std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface, |
|||
const SurfaceParams& params, bool is_render) { |
|||
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); |
|||
const bool matches_target = current_surface->MatchTarget(params.target); |
|||
const auto match_check = [&]() -> std::pair<TSurface, TView> { |
|||
if (matches_target) { |
|||
return {current_surface, current_surface->GetMainView()}; |
|||
} |
|||
return {current_surface, current_surface->EmplaceOverview(params)}; |
|||
}; |
|||
if (!is_mirage) { |
|||
return match_check(); |
|||
} |
|||
if (!is_render && GetSiblingFormat(current_surface->GetFormat()) == params.pixel_format) { |
|||
return match_check(); |
|||
} |
|||
return RebuildSurface(current_surface, params, is_render); |
|||
} |
|||
|
|||
/** |
|||
* `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface |
|||
* matches the candidate in some way, we got no guarantess here. We try to see if the overlaps |
|||
* are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface |
|||
* for them, else we return nothing. |
|||
* @param overlaps, the overlapping surfaces registered in the cache. |
|||
* @param params, the paremeters on the new surface. |
|||
* @param gpu_addr, the starting address of the new surface. |
|||
**/ |
|||
std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps, |
|||
const SurfaceParams& params, |
|||
const GPUVAddr gpu_addr) { |
|||
if (params.target == SurfaceTarget::Texture3D) { |
|||
return {}; |
|||
} |
|||
bool modified = false; |
|||
TSurface new_surface = GetUncachedSurface(gpu_addr, params); |
|||
u32 passed_tests = 0; |
|||
for (auto& surface : overlaps) { |
|||
const SurfaceParams& src_params = surface->GetSurfaceParams(); |
|||
if (src_params.is_layered || src_params.num_levels > 1) { |
|||
// We send this cases to recycle as they are more complex to handle |
|||
return {}; |
|||
} |
|||
const std::size_t candidate_size = surface->GetSizeInBytes(); |
|||
auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; |
|||
if (!mipmap_layer) { |
|||
continue; |
|||
} |
|||
const auto [layer, mipmap] = *mipmap_layer; |
|||
if (new_surface->GetMipmapSize(mipmap) != candidate_size) { |
|||
continue; |
|||
} |
|||
modified |= surface->IsModified(); |
|||
// Now we got all the data set up |
|||
const u32 width = SurfaceParams::IntersectWidth(src_params, params, 0, mipmap); |
|||
const u32 height = SurfaceParams::IntersectHeight(src_params, params, 0, mipmap); |
|||
const CopyParams copy_params(0, 0, 0, 0, 0, layer, 0, mipmap, width, height, 1); |
|||
passed_tests++; |
|||
ImageCopy(surface, new_surface, copy_params); |
|||
} |
|||
if (passed_tests == 0) { |
|||
return {}; |
|||
// In Accurate GPU all tests should pass, else we recycle |
|||
} else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) { |
|||
return {}; |
|||
} |
|||
for (auto surface : overlaps) { |
|||
Unregister(surface); |
|||
} |
|||
new_surface->MarkAsModified(modified, Tick()); |
|||
Register(new_surface); |
|||
return {{new_surface, new_surface->GetMainView()}}; |
|||
} |
|||
|
|||
/** |
|||
* `GetSurface` gets the starting address and parameters of a candidate surface and tries |
|||
* to find a matching surface within the cache. This is done in 3 big steps. The first is to |
|||
* check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2. |
|||
* Step 2 is checking if there are any overlaps at all, if none, we just load the texture from |
|||
* memory else we move to step 3. Step 3 consists on figuring the relationship between the |
|||
* candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many |
|||
* overlaps. If there's many, we just try to reconstruct a new surface out of them based on the |
|||
* candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to |
|||
* check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface |
|||
* is a mipmap/layer of the candidate. In this last case we reconstruct a new surface. |
|||
* @param gpu_addr, the starting address of the candidate surface. |
|||
* @param params, the paremeters on the candidate surface. |
|||
* @param preserve_contents, tells if the new surface should be loaded from meory or left blank. |
|||
**/ |
|||
std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params, |
|||
bool preserve_contents, bool is_render) { |
|||
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)}; |
|||
const auto cache_addr{ToCacheAddr(host_ptr)}; |
|||
|
|||
// Step 0: guarantee a valid surface |
|||
if (!cache_addr) { |
|||
// Return a null surface if it's invalid |
|||
SurfaceParams new_params = params; |
|||
new_params.width = 1; |
|||
new_params.height = 1; |
|||
new_params.depth = 1; |
|||
new_params.block_height = 0; |
|||
new_params.block_depth = 0; |
|||
return InitializeSurface(gpu_addr, new_params, false); |
|||
} |
|||
|
|||
// Step 1 |
|||
// Check Level 1 Cache for a fast structural match. If candidate surface |
|||
// matches at certain level we are pretty much done. |
|||
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) { |
|||
TSurface& current_surface = iter->second; |
|||
const auto topological_result = current_surface->MatchesTopology(params); |
|||
if (topological_result != MatchTopologyResult::FullMatch) { |
|||
std::vector<TSurface> overlaps{current_surface}; |
|||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
|||
topological_result); |
|||
} |
|||
const auto struct_result = current_surface->MatchesStructure(params); |
|||
if (struct_result != MatchStructureResult::None && |
|||
(params.target != SurfaceTarget::Texture3D || |
|||
current_surface->MatchTarget(params.target))) { |
|||
if (struct_result == MatchStructureResult::FullMatch) { |
|||
return ManageStructuralMatch(current_surface, params, is_render); |
|||
} else { |
|||
return RebuildSurface(current_surface, params, is_render); |
|||
} |
|||
} |
|||
} |
|||
|
|||
// Step 2 |
|||
// Obtain all possible overlaps in the memory region |
|||
const std::size_t candidate_size = params.GetGuestSizeInBytes(); |
|||
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)}; |
|||
|
|||
// If none are found, we are done. we just load the surface and create it. |
|||
if (overlaps.empty()) { |
|||
return InitializeSurface(gpu_addr, params, preserve_contents); |
|||
} |
|||
|
|||
// Step 3 |
|||
// Now we need to figure the relationship between the texture and its overlaps |
|||
// we do a topological test to ensure we can find some relationship. If it fails |
|||
// inmediatly recycle the texture |
|||
for (const auto& surface : overlaps) { |
|||
const auto topological_result = surface->MatchesTopology(params); |
|||
if (topological_result != MatchTopologyResult::FullMatch) { |
|||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
|||
topological_result); |
|||
} |
|||
} |
|||
|
|||
// Split cases between 1 overlap or many. |
|||
if (overlaps.size() == 1) { |
|||
TSurface current_surface = overlaps[0]; |
|||
// First check if the surface is within the overlap. If not, it means |
|||
// two things either the candidate surface is a supertexture of the overlap |
|||
// or they don't match in any known way. |
|||
if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { |
|||
if (current_surface->GetGpuAddr() == gpu_addr) { |
|||
std::optional<std::pair<TSurface, TView>> view = |
|||
TryReconstructSurface(overlaps, params, gpu_addr); |
|||
if (view) { |
|||
return *view; |
|||
} |
|||
} |
|||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
|||
MatchTopologyResult::FullMatch); |
|||
} |
|||
// Now we check if the candidate is a mipmap/layer of the overlap |
|||
std::optional<TView> view = |
|||
current_surface->EmplaceView(params, gpu_addr, candidate_size); |
|||
if (view) { |
|||
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); |
|||
if (is_mirage) { |
|||
// On a mirage view, we need to recreate the surface under this new view |
|||
// and then obtain a view again. |
|||
SurfaceParams new_params = current_surface->GetSurfaceParams(); |
|||
const u32 wh = SurfaceParams::ConvertWidth( |
|||
new_params.width, new_params.pixel_format, params.pixel_format); |
|||
const u32 hh = SurfaceParams::ConvertHeight( |
|||
new_params.height, new_params.pixel_format, params.pixel_format); |
|||
new_params.width = wh; |
|||
new_params.height = hh; |
|||
new_params.pixel_format = params.pixel_format; |
|||
std::pair<TSurface, TView> pair = |
|||
RebuildSurface(current_surface, new_params, is_render); |
|||
std::optional<TView> mirage_view = |
|||
pair.first->EmplaceView(params, gpu_addr, candidate_size); |
|||
if (mirage_view) |
|||
return {pair.first, *mirage_view}; |
|||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
|||
MatchTopologyResult::FullMatch); |
|||
} |
|||
return {current_surface, *view}; |
|||
} |
|||
// The next case is unsafe, so if we r in accurate GPU, just skip it |
|||
if (Settings::values.use_accurate_gpu_emulation) { |
|||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
|||
MatchTopologyResult::FullMatch); |
|||
} |
|||
// This is the case the texture is a part of the parent. |
|||
if (current_surface->MatchesSubTexture(params, gpu_addr)) { |
|||
return RebuildSurface(current_surface, params, is_render); |
|||
} |
|||
} else { |
|||
// If there are many overlaps, odds are they are subtextures of the candidate |
|||
// surface. We try to construct a new surface based on the candidate parameters, |
|||
// using the overlaps. If a single overlap fails, this will fail. |
|||
std::optional<std::pair<TSurface, TView>> view = |
|||
TryReconstructSurface(overlaps, params, gpu_addr); |
|||
if (view) { |
|||
return *view; |
|||
} |
|||
} |
|||
// We failed all the tests, recycle the overlaps into a new texture. |
|||
return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, |
|||
MatchTopologyResult::FullMatch); |
|||
} |
|||
|
|||
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params, |
|||
bool preserve_contents) { |
|||
auto new_surface{GetUncachedSurface(gpu_addr, params)}; |
|||
Register(new_surface); |
|||
if (preserve_contents) { |
|||
LoadSurface(new_surface); |
|||
} |
|||
return {new_surface, new_surface->GetMainView()}; |
|||
} |
|||
|
|||
void LoadSurface(const TSurface& surface) { |
|||
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); |
|||
surface->LoadBuffer(system.GPU().MemoryManager(), staging_cache); |
|||
surface->UploadTexture(staging_cache.GetBuffer(0)); |
|||
surface->MarkAsModified(false, Tick()); |
|||
} |
|||
|
|||
void FlushSurface(const TSurface& surface) { |
|||
if (!surface->IsModified()) { |
|||
return; |
|||
} |
|||
staging_cache.GetBuffer(0).resize(surface->GetHostSizeInBytes()); |
|||
surface->DownloadTexture(staging_cache.GetBuffer(0)); |
|||
surface->FlushBuffer(system.GPU().MemoryManager(), staging_cache); |
|||
surface->MarkAsModified(false, Tick()); |
|||
} |
|||
|
|||
void RegisterInnerCache(TSurface& surface) { |
|||
const CacheAddr cache_addr = surface->GetCacheAddr(); |
|||
CacheAddr start = cache_addr >> registry_page_bits; |
|||
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; |
|||
l1_cache[cache_addr] = surface; |
|||
while (start <= end) { |
|||
registry[start].push_back(surface); |
|||
start++; |
|||
} |
|||
} |
|||
|
|||
void UnregisterInnerCache(TSurface& surface) { |
|||
const CacheAddr cache_addr = surface->GetCacheAddr(); |
|||
CacheAddr start = cache_addr >> registry_page_bits; |
|||
const CacheAddr end = (surface->GetCacheAddrEnd() - 1) >> registry_page_bits; |
|||
l1_cache.erase(cache_addr); |
|||
while (start <= end) { |
|||
auto& reg{registry[start]}; |
|||
reg.erase(std::find(reg.begin(), reg.end(), surface)); |
|||
start++; |
|||
} |
|||
} |
|||
|
|||
std::vector<TSurface> GetSurfacesInRegion(const CacheAddr cache_addr, const std::size_t size) { |
|||
if (size == 0) { |
|||
return {}; |
|||
} |
|||
const CacheAddr cache_addr_end = cache_addr + size; |
|||
CacheAddr start = cache_addr >> registry_page_bits; |
|||
const CacheAddr end = (cache_addr_end - 1) >> registry_page_bits; |
|||
std::vector<TSurface> surfaces; |
|||
while (start <= end) { |
|||
std::vector<TSurface>& list = registry[start]; |
|||
for (auto& surface : list) { |
|||
if (!surface->IsPicked() && surface->Overlaps(cache_addr, cache_addr_end)) { |
|||
surface->MarkAsPicked(true); |
|||
surfaces.push_back(surface); |
|||
} |
|||
} |
|||
start++; |
|||
} |
|||
for (auto& surface : surfaces) { |
|||
surface->MarkAsPicked(false); |
|||
} |
|||
return surfaces; |
|||
} |
|||
|
|||
void ReserveSurface(const SurfaceParams& params, TSurface surface) { |
|||
surface_reserve[params].push_back(std::move(surface)); |
|||
} |
|||
|
|||
TSurface TryGetReservedSurface(const SurfaceParams& params) { |
|||
auto search{surface_reserve.find(params)}; |
|||
if (search == surface_reserve.end()) { |
|||
return {}; |
|||
} |
|||
for (auto& surface : search->second) { |
|||
if (!surface->IsRegistered()) { |
|||
return surface; |
|||
} |
|||
} |
|||
return {}; |
|||
} |
|||
|
|||
constexpr PixelFormat GetSiblingFormat(PixelFormat format) const { |
|||
return siblings_table[static_cast<std::size_t>(format)]; |
|||
} |
|||
|
|||
struct FramebufferTargetInfo { |
|||
TSurface target; |
|||
TView view; |
|||
}; |
|||
|
|||
VideoCore::RasterizerInterface& rasterizer; |
|||
|
|||
u64 ticks{}; |
|||
|
|||
// Guards the cache for protection conflicts. |
|||
bool guard_render_targets{}; |
|||
bool guard_samplers{}; |
|||
|
|||
// The siblings table is for formats that can inter exchange with one another |
|||
// without causing issues. This is only valid when a conflict occurs on a non |
|||
// rendering use. |
|||
std::array<PixelFormat, static_cast<std::size_t>(PixelFormat::Max)> siblings_table; |
|||
|
|||
// The internal Cache is different for the Texture Cache. It's based on buckets |
|||
// of 1MB. This fits better for the purpose of this cache as textures are normaly |
|||
// large in size. |
|||
static constexpr u64 registry_page_bits{20}; |
|||
static constexpr u64 registry_page_size{1 << registry_page_bits}; |
|||
std::unordered_map<CacheAddr, std::vector<TSurface>> registry; |
|||
|
|||
// The L1 Cache is used for fast texture lookup before checking the overlaps |
|||
// This avoids calculating size and other stuffs. |
|||
std::unordered_map<CacheAddr, TSurface> l1_cache; |
|||
|
|||
/// The surface reserve is a "backup" cache, this is where we put unique surfaces that have |
|||
/// previously been used. This is to prevent surfaces from being constantly created and |
|||
/// destroyed when used with different surface parameters. |
|||
std::unordered_map<SurfaceParams, std::vector<TSurface>> surface_reserve; |
|||
std::array<FramebufferTargetInfo, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> |
|||
render_targets; |
|||
FramebufferTargetInfo depth_buffer; |
|||
|
|||
std::vector<TSurface> sampled_textures; |
|||
|
|||
StagingCache staging_cache; |
|||
std::recursive_mutex mutex; |
|||
}; |
|||
|
|||
} // namespace VideoCommon |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue