|
|
|
@ -336,20 +336,22 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, u32 block_depth, u32 d |
|
|
|
constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT; |
|
|
|
constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format); |
|
|
|
|
|
|
|
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
|
|
|
|
// pixel values.
|
|
|
|
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; |
|
|
|
|
|
|
|
if (morton_to_gl) { |
|
|
|
// With the BCn formats (DXT and DXN), each 4x4 tile is swizzled instead of just individual
|
|
|
|
// pixel values.
|
|
|
|
const u32 tile_size{IsFormatBCn(format) ? 4U : 1U}; |
|
|
|
const std::vector<u8> data = Tegra::Texture::UnswizzleTexture( |
|
|
|
addr, tile_size, bytes_per_pixel, stride, height, depth, block_height, block_depth); |
|
|
|
const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; |
|
|
|
memcpy(gl_buffer, data.data(), size_to_copy); |
|
|
|
} else { |
|
|
|
// TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
|
|
|
|
// check the configuration for this and perform more generic un/swizzle
|
|
|
|
LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!"); |
|
|
|
VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel, |
|
|
|
Memory::GetPointer(addr), gl_buffer, morton_to_gl); |
|
|
|
std::vector<u8> data(height * stride * bytes_per_pixel); |
|
|
|
Tegra::Texture::CopySwizzledData(stride / tile_size, height / tile_size, depth, |
|
|
|
bytes_per_pixel, bytes_per_pixel, data.data(), gl_buffer, |
|
|
|
false, block_height, block_depth); |
|
|
|
const std::size_t size_to_copy{std::min(gl_buffer_size, data.size())}; |
|
|
|
memcpy(Memory::GetPointer(addr), data.data(), size_to_copy); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@ -430,17 +432,16 @@ static constexpr std::array<void (*)(u32, u32, u32, u32, u32, u8*, std::size_t, |
|
|
|
MortonCopy<false, PixelFormat::RGBA16UI>, |
|
|
|
MortonCopy<false, PixelFormat::R11FG11FB10F>, |
|
|
|
MortonCopy<false, PixelFormat::RGBA32UI>, |
|
|
|
// TODO(Subv): Swizzling DXT1/DXT23/DXT45/DXN1/DXN2/BC7U/BC6H_UF16/BC6H_SF16/ASTC_2D_4X4
|
|
|
|
// formats are not supported
|
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
nullptr, |
|
|
|
MortonCopy<false, PixelFormat::DXT1>, |
|
|
|
MortonCopy<false, PixelFormat::DXT23>, |
|
|
|
MortonCopy<false, PixelFormat::DXT45>, |
|
|
|
MortonCopy<false, PixelFormat::DXN1>, |
|
|
|
MortonCopy<false, PixelFormat::DXN2UNORM>, |
|
|
|
MortonCopy<false, PixelFormat::DXN2SNORM>, |
|
|
|
MortonCopy<false, PixelFormat::BC7U>, |
|
|
|
MortonCopy<false, PixelFormat::BC6H_UF16>, |
|
|
|
MortonCopy<false, PixelFormat::BC6H_SF16>, |
|
|
|
// TODO(Subv): Swizzling ASTC formats are not supported
|
|
|
|
nullptr, |
|
|
|
MortonCopy<false, PixelFormat::G8R8U>, |
|
|
|
MortonCopy<false, PixelFormat::G8R8S>, |
|
|
|
@ -754,7 +755,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params) |
|
|
|
SurfaceParams::SurfaceTargetName(params.target)); |
|
|
|
} |
|
|
|
|
|
|
|
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { |
|
|
|
static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bool reverse) { |
|
|
|
union S8Z24 { |
|
|
|
BitField<0, 24, u32> z24; |
|
|
|
BitField<24, 8, u32> s8; |
|
|
|
@ -767,16 +768,23 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) { |
|
|
|
}; |
|
|
|
static_assert(sizeof(Z24S8) == 4, "Z24S8 is incorrect size"); |
|
|
|
|
|
|
|
S8Z24 input_pixel{}; |
|
|
|
Z24S8 output_pixel{}; |
|
|
|
S8Z24 s8z24_pixel{}; |
|
|
|
Z24S8 z24s8_pixel{}; |
|
|
|
constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)}; |
|
|
|
for (std::size_t y = 0; y < height; ++y) { |
|
|
|
for (std::size_t x = 0; x < width; ++x) { |
|
|
|
const std::size_t offset{bpp * (y * width + x)}; |
|
|
|
std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24)); |
|
|
|
output_pixel.s8.Assign(input_pixel.s8); |
|
|
|
output_pixel.z24.Assign(input_pixel.z24); |
|
|
|
std::memcpy(&data[offset], &output_pixel, sizeof(Z24S8)); |
|
|
|
if (reverse) { |
|
|
|
std::memcpy(&z24s8_pixel, &data[offset], sizeof(Z24S8)); |
|
|
|
s8z24_pixel.s8.Assign(z24s8_pixel.s8); |
|
|
|
s8z24_pixel.z24.Assign(z24s8_pixel.z24); |
|
|
|
std::memcpy(&data[offset], &s8z24_pixel, sizeof(S8Z24)); |
|
|
|
} else { |
|
|
|
std::memcpy(&s8z24_pixel, &data[offset], sizeof(S8Z24)); |
|
|
|
z24s8_pixel.s8.Assign(s8z24_pixel.s8); |
|
|
|
z24s8_pixel.z24.Assign(s8z24_pixel.z24); |
|
|
|
std::memcpy(&data[offset], &z24s8_pixel, sizeof(Z24S8)); |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
@ -814,7 +822,7 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma |
|
|
|
} |
|
|
|
case PixelFormat::S8Z24: |
|
|
|
// Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
|
|
|
|
ConvertS8Z24ToZ24S8(data, width, height); |
|
|
|
ConvertS8Z24ToZ24S8(data, width, height, false); |
|
|
|
break; |
|
|
|
|
|
|
|
case PixelFormat::G8R8U: |
|
|
|
@ -825,6 +833,30 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/**
|
|
|
|
* Helper function to perform software conversion (as needed) when flushing a buffer from OpenGL to |
|
|
|
* Switch memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or |
|
|
|
* with typical desktop GPUs. |
|
|
|
*/ |
|
|
|
static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format, |
|
|
|
u32 width, u32 height) { |
|
|
|
switch (pixel_format) { |
|
|
|
case PixelFormat::G8R8U: |
|
|
|
case PixelFormat::G8R8S: |
|
|
|
case PixelFormat::ASTC_2D_4X4: |
|
|
|
case PixelFormat::ASTC_2D_8X8: { |
|
|
|
LOG_CRITICAL(HW_GPU, "Conversion of format {} after texture flushing is not implemented", |
|
|
|
static_cast<u32>(pixel_format)); |
|
|
|
UNREACHABLE(); |
|
|
|
break; |
|
|
|
} |
|
|
|
case PixelFormat::S8Z24: |
|
|
|
// Convert the Z24S8 depth format to S8Z24, as OpenGL does not support S8Z24.
|
|
|
|
ConvertS8Z24ToZ24S8(data, width, height, true); |
|
|
|
break; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192)); |
|
|
|
void CachedSurface::LoadGLBuffer() { |
|
|
|
ASSERT(params.type != SurfaceType::Fill); |
|
|
|
@ -864,11 +896,57 @@ void CachedSurface::LoadGLBuffer() { |
|
|
|
} |
|
|
|
|
|
|
|
ConvertFormatAsNeeded_LoadGLBuffer(gl_buffer, params.pixel_format, params.width, params.height); |
|
|
|
|
|
|
|
dirty = false; |
|
|
|
} |
|
|
|
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64)); |
|
|
|
void CachedSurface::FlushGLBuffer() { |
|
|
|
ASSERT_MSG(false, "Unimplemented"); |
|
|
|
MICROPROFILE_SCOPE(OpenGL_SurfaceFlush); |
|
|
|
const auto& rect{params.GetRect()}; |
|
|
|
// Load data from memory to the surface
|
|
|
|
const GLint x0 = static_cast<GLint>(rect.left); |
|
|
|
const GLint y0 = static_cast<GLint>(rect.bottom); |
|
|
|
const size_t buffer_offset = |
|
|
|
static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) * |
|
|
|
GetGLBytesPerPixel(params.pixel_format); |
|
|
|
const u32 bytes_per_pixel = GetGLBytesPerPixel(params.pixel_format); |
|
|
|
const u32 copy_size = params.width * params.height * bytes_per_pixel; |
|
|
|
gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size); |
|
|
|
const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type); |
|
|
|
// Ensure no bad interactions with GL_UNPACK_ALIGNMENT
|
|
|
|
ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0); |
|
|
|
glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width)); |
|
|
|
ASSERT(!tuple.compressed); |
|
|
|
ASSERT(x0 == 0 && y0 == 0); |
|
|
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); |
|
|
|
glGetTextureImage(texture.handle, 0, tuple.format, tuple.type, gl_buffer.size(), |
|
|
|
gl_buffer.data()); |
|
|
|
glPixelStorei(GL_PACK_ROW_LENGTH, 0); |
|
|
|
ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width, |
|
|
|
params.height); |
|
|
|
ASSERT(params.type != SurfaceType::Fill); |
|
|
|
const u8* const texture_src_data = Memory::GetPointer(params.addr); |
|
|
|
ASSERT(texture_src_data); |
|
|
|
if (params.is_tiled) { |
|
|
|
u32 depth = params.depth; |
|
|
|
u32 block_depth = params.block_depth; |
|
|
|
|
|
|
|
ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", |
|
|
|
params.block_width, static_cast<u32>(params.target)); |
|
|
|
|
|
|
|
if (params.target == SurfaceParams::SurfaceTarget::Texture2D) { |
|
|
|
// TODO(Blinkhawk): Eliminate this condition once all texture types are implemented.
|
|
|
|
depth = 1U; |
|
|
|
block_depth = 1U; |
|
|
|
} |
|
|
|
gl_to_morton_fns[static_cast<size_t>(params.pixel_format)]( |
|
|
|
params.width, params.block_height, params.height, block_depth, depth, |
|
|
|
&gl_buffer[buffer_offset], copy_size, params.addr + buffer_offset); |
|
|
|
} else { |
|
|
|
Memory::WriteBlock(params.addr + buffer_offset, &gl_buffer[buffer_offset], |
|
|
|
gl_buffer.size() - buffer_offset); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192)); |
|
|
|
|