Browse Source
Merge pull request #9194 from FernandoS27/yfc-fermi2d
Merge pull request #9194 from FernandoS27/yfc-fermi2d
YFC - Fermi2D: Rework blit engine and add a software blitter.pull/15/merge
committed by
GitHub
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
21 changed files with 1832 additions and 31 deletions
-
4src/video_core/CMakeLists.txt
-
2src/video_core/control/channel_state.cpp
-
26src/video_core/engines/fermi_2d.cpp
-
9src/video_core/engines/fermi_2d.h
-
67src/video_core/engines/maxwell_dma.cpp
-
3src/video_core/engines/maxwell_dma.h
-
238src/video_core/engines/sw_blitter/blitter.cpp
-
27src/video_core/engines/sw_blitter/blitter.h
-
1234src/video_core/engines/sw_blitter/converter.cpp
-
36src/video_core/engines/sw_blitter/converter.h
-
136src/video_core/engines/sw_blitter/generate_converters.py
-
18src/video_core/gpu.h
-
3src/video_core/renderer_opengl/gl_rasterizer.cpp
-
1src/video_core/renderer_opengl/maxwell_to_gl.h
-
1src/video_core/renderer_vulkan/maxwell_to_vk.cpp
-
3src/video_core/renderer_vulkan/vk_rasterizer.cpp
-
12src/video_core/surface.cpp
-
4src/video_core/surface.h
-
2src/video_core/texture_cache/formatter.h
-
29src/video_core/texture_cache/texture_cache.h
-
8src/video_core/texture_cache/texture_cache_base.h
@ -0,0 +1,238 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
|
||||
|
#include <algorithm>
|
||||
|
#include <cmath>
|
||||
|
#include <vector>
|
||||
|
|
||||
|
#include "video_core/engines/sw_blitter/blitter.h"
|
||||
|
#include "video_core/engines/sw_blitter/converter.h"
|
||||
|
#include "video_core/memory_manager.h"
|
||||
|
#include "video_core/surface.h"
|
||||
|
#include "video_core/textures/decoders.h"
|
||||
|
|
||||
|
namespace Tegra { |
||||
|
class MemoryManager; |
||||
|
} |
||||
|
|
||||
|
using VideoCore::Surface::BytesPerBlock; |
||||
|
using VideoCore::Surface::PixelFormatFromRenderTargetFormat; |
||||
|
|
||||
|
namespace Tegra::Engines::Blitter { |
||||
|
|
||||
|
using namespace Texture; |
||||
|
|
||||
|
namespace { |
||||
|
|
||||
|
constexpr size_t ir_components = 4; |
||||
|
|
||||
|
void NearestNeighbor(std::span<const u8> input, std::span<u8> output, u32 src_width, u32 src_height, |
||||
|
u32 dst_width, u32 dst_height, size_t bpp) { |
||||
|
const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); |
||||
|
const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); |
||||
|
size_t src_y = 0; |
||||
|
for (u32 y = 0; y < dst_height; y++) { |
||||
|
size_t src_x = 0; |
||||
|
for (u32 x = 0; x < dst_width; x++) { |
||||
|
const size_t read_from = ((src_y * src_width + src_x) >> 32) * bpp; |
||||
|
const size_t write_to = (y * dst_width + x) * bpp; |
||||
|
|
||||
|
std::memcpy(&output[write_to], &input[read_from], bpp); |
||||
|
src_x += dx_du; |
||||
|
} |
||||
|
src_y += dy_dv; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void NearestNeighborFast(std::span<const f32> input, std::span<f32> output, u32 src_width, |
||||
|
u32 src_height, u32 dst_width, u32 dst_height) { |
||||
|
const size_t dx_du = std::llround((static_cast<f64>(src_width) / dst_width) * (1ULL << 32)); |
||||
|
const size_t dy_dv = std::llround((static_cast<f64>(src_height) / dst_height) * (1ULL << 32)); |
||||
|
size_t src_y = 0; |
||||
|
for (u32 y = 0; y < dst_height; y++) { |
||||
|
size_t src_x = 0; |
||||
|
for (u32 x = 0; x < dst_width; x++) { |
||||
|
const size_t read_from = ((src_y * src_width + src_x) >> 32) * ir_components; |
||||
|
const size_t write_to = (y * dst_width + x) * ir_components; |
||||
|
|
||||
|
std::memcpy(&output[write_to], &input[read_from], sizeof(f32) * ir_components); |
||||
|
src_x += dx_du; |
||||
|
} |
||||
|
src_y += dy_dv; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
void Bilinear(std::span<const f32> input, std::span<f32> output, size_t src_width, |
||||
|
size_t src_height, size_t dst_width, size_t dst_height) { |
||||
|
const auto bilinear_sample = [](std::span<const f32> x0_y0, std::span<const f32> x1_y0, |
||||
|
std::span<const f32> x0_y1, std::span<const f32> x1_y1, |
||||
|
f32 weight_x, f32 weight_y) { |
||||
|
std::array<f32, ir_components> result{}; |
||||
|
for (size_t i = 0; i < ir_components; i++) { |
||||
|
const f32 a = std::lerp(x0_y0[i], x1_y0[i], weight_x); |
||||
|
const f32 b = std::lerp(x0_y1[i], x1_y1[i], weight_x); |
||||
|
result[i] = std::lerp(a, b, weight_y); |
||||
|
} |
||||
|
return result; |
||||
|
}; |
||||
|
const f32 dx_du = |
||||
|
dst_width > 1 ? static_cast<f32>(src_width - 1) / static_cast<f32>(dst_width - 1) : 0.f; |
||||
|
const f32 dy_dv = |
||||
|
dst_height > 1 ? static_cast<f32>(src_height - 1) / static_cast<f32>(dst_height - 1) : 0.f; |
||||
|
for (u32 y = 0; y < dst_height; y++) { |
||||
|
for (u32 x = 0; x < dst_width; x++) { |
||||
|
const f32 x_low = std::floor(static_cast<f32>(x) * dx_du); |
||||
|
const f32 y_low = std::floor(static_cast<f32>(y) * dy_dv); |
||||
|
const f32 x_high = std::ceil(static_cast<f32>(x) * dx_du); |
||||
|
const f32 y_high = std::ceil(static_cast<f32>(y) * dy_dv); |
||||
|
const f32 weight_x = (static_cast<f32>(x) * dx_du) - x_low; |
||||
|
const f32 weight_y = (static_cast<f32>(y) * dy_dv) - y_low; |
||||
|
|
||||
|
const auto read_src = [&](f32 in_x, f32 in_y) { |
||||
|
const size_t read_from = |
||||
|
((static_cast<size_t>(in_x) * src_width + static_cast<size_t>(in_y)) >> 32) * |
||||
|
ir_components; |
||||
|
return std::span<const f32>(&input[read_from], ir_components); |
||||
|
}; |
||||
|
|
||||
|
auto x0_y0 = read_src(x_low, y_low); |
||||
|
auto x1_y0 = read_src(x_high, y_low); |
||||
|
auto x0_y1 = read_src(x_low, y_high); |
||||
|
auto x1_y1 = read_src(x_high, y_high); |
||||
|
|
||||
|
const auto result = bilinear_sample(x0_y0, x1_y0, x0_y1, x1_y1, weight_x, weight_y); |
||||
|
|
||||
|
const size_t write_to = (y * dst_width + x) * ir_components; |
||||
|
|
||||
|
std::memcpy(&output[write_to], &result, sizeof(f32) * ir_components); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} // namespace
|
||||
|
|
||||
|
struct SoftwareBlitEngine::BlitEngineImpl { |
||||
|
std::vector<u8> tmp_buffer; |
||||
|
std::vector<u8> src_buffer; |
||||
|
std::vector<u8> dst_buffer; |
||||
|
std::vector<f32> intermediate_src; |
||||
|
std::vector<f32> intermediate_dst; |
||||
|
ConverterFactory converter_factory; |
||||
|
}; |
||||
|
|
||||
|
SoftwareBlitEngine::SoftwareBlitEngine(MemoryManager& memory_manager_) |
||||
|
: memory_manager{memory_manager_} { |
||||
|
impl = std::make_unique<BlitEngineImpl>(); |
||||
|
} |
||||
|
|
||||
|
SoftwareBlitEngine::~SoftwareBlitEngine() = default; |
||||
|
|
||||
|
bool SoftwareBlitEngine::Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, |
||||
|
Fermi2D::Config& config) { |
||||
|
const auto get_surface_size = [](Fermi2D::Surface& surface, u32 bytes_per_pixel) { |
||||
|
if (surface.linear == Fermi2D::MemoryLayout::BlockLinear) { |
||||
|
return CalculateSize(true, bytes_per_pixel, surface.width, surface.height, |
||||
|
surface.depth, surface.block_height, surface.block_depth); |
||||
|
} |
||||
|
return static_cast<size_t>(surface.pitch * surface.height); |
||||
|
}; |
||||
|
const auto process_pitch_linear = [](bool unpack, std::span<const u8> input, |
||||
|
std::span<u8> output, u32 extent_x, u32 extent_y, |
||||
|
u32 pitch, u32 x0, u32 y0, size_t bpp) { |
||||
|
const size_t base_offset = x0 * bpp; |
||||
|
const size_t copy_size = extent_x * bpp; |
||||
|
for (u32 y = y0; y < extent_y; y++) { |
||||
|
const size_t first_offset = y * pitch + base_offset; |
||||
|
const size_t second_offset = y * extent_x * bpp; |
||||
|
u8* write_to = unpack ? &output[first_offset] : &output[second_offset]; |
||||
|
const u8* read_from = unpack ? &input[second_offset] : &input[first_offset]; |
||||
|
std::memcpy(write_to, read_from, copy_size); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
const u32 src_extent_x = config.src_x1 - config.src_x0; |
||||
|
const u32 src_extent_y = config.src_y1 - config.src_y0; |
||||
|
|
||||
|
const u32 dst_extent_x = config.dst_x1 - config.dst_x0; |
||||
|
const u32 dst_extent_y = config.dst_y1 - config.dst_y0; |
||||
|
const auto src_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); |
||||
|
const auto dst_bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(dst.format)); |
||||
|
const size_t src_size = get_surface_size(src, src_bytes_per_pixel); |
||||
|
impl->tmp_buffer.resize(src_size); |
||||
|
memory_manager.ReadBlock(src.Address(), impl->tmp_buffer.data(), src_size); |
||||
|
|
||||
|
const size_t src_copy_size = src_extent_x * src_extent_y * src_bytes_per_pixel; |
||||
|
|
||||
|
const size_t dst_copy_size = dst_extent_x * dst_extent_y * dst_bytes_per_pixel; |
||||
|
|
||||
|
impl->src_buffer.resize(src_copy_size); |
||||
|
|
||||
|
const bool no_passthrough = |
||||
|
src.format != dst.format || src_extent_x != dst_extent_x || src_extent_y != dst_extent_y; |
||||
|
|
||||
|
const auto convertion_phase_same_format = [&]() { |
||||
|
NearestNeighbor(impl->src_buffer, impl->dst_buffer, src_extent_x, src_extent_y, |
||||
|
dst_extent_x, dst_extent_y, dst_bytes_per_pixel); |
||||
|
}; |
||||
|
|
||||
|
const auto convertion_phase_ir = [&]() { |
||||
|
auto* input_converter = impl->converter_factory.GetFormatConverter(src.format); |
||||
|
impl->intermediate_src.resize((src_copy_size / src_bytes_per_pixel) * ir_components); |
||||
|
impl->intermediate_dst.resize((dst_copy_size / dst_bytes_per_pixel) * ir_components); |
||||
|
input_converter->ConvertTo(impl->src_buffer, impl->intermediate_src); |
||||
|
|
||||
|
if (config.filter != Fermi2D::Filter::Bilinear) { |
||||
|
NearestNeighborFast(impl->intermediate_src, impl->intermediate_dst, src_extent_x, |
||||
|
src_extent_y, dst_extent_x, dst_extent_y); |
||||
|
} else { |
||||
|
Bilinear(impl->intermediate_src, impl->intermediate_dst, src_extent_x, src_extent_y, |
||||
|
dst_extent_x, dst_extent_y); |
||||
|
} |
||||
|
|
||||
|
auto* output_converter = impl->converter_factory.GetFormatConverter(dst.format); |
||||
|
output_converter->ConvertFrom(impl->intermediate_dst, impl->dst_buffer); |
||||
|
}; |
||||
|
|
||||
|
// Do actuall Blit
|
||||
|
|
||||
|
impl->dst_buffer.resize(dst_copy_size); |
||||
|
if (src.linear == Fermi2D::MemoryLayout::BlockLinear) { |
||||
|
UnswizzleSubrect(impl->src_buffer, impl->tmp_buffer, src_bytes_per_pixel, src.width, |
||||
|
src.height, src.depth, config.src_x0, config.src_y0, src_extent_x, |
||||
|
src_extent_y, src.block_height, src.block_depth, |
||||
|
src_extent_x * src_bytes_per_pixel); |
||||
|
} else { |
||||
|
process_pitch_linear(false, impl->tmp_buffer, impl->src_buffer, src_extent_x, src_extent_y, |
||||
|
src.pitch, config.src_x0, config.src_y0, src_bytes_per_pixel); |
||||
|
} |
||||
|
|
||||
|
// Conversion Phase
|
||||
|
if (no_passthrough) { |
||||
|
if (src.format != dst.format || config.filter == Fermi2D::Filter::Bilinear) { |
||||
|
convertion_phase_ir(); |
||||
|
} else { |
||||
|
convertion_phase_same_format(); |
||||
|
} |
||||
|
} else { |
||||
|
impl->dst_buffer.swap(impl->src_buffer); |
||||
|
} |
||||
|
|
||||
|
const size_t dst_size = get_surface_size(dst, dst_bytes_per_pixel); |
||||
|
impl->tmp_buffer.resize(dst_size); |
||||
|
memory_manager.ReadBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); |
||||
|
|
||||
|
if (dst.linear == Fermi2D::MemoryLayout::BlockLinear) { |
||||
|
SwizzleSubrect(impl->tmp_buffer, impl->dst_buffer, dst_bytes_per_pixel, dst.width, |
||||
|
dst.height, dst.depth, config.dst_x0, config.dst_y0, dst_extent_x, |
||||
|
dst_extent_y, dst.block_height, dst.block_depth, |
||||
|
dst_extent_x * dst_bytes_per_pixel); |
||||
|
} else { |
||||
|
process_pitch_linear(true, impl->dst_buffer, impl->tmp_buffer, dst_extent_x, dst_extent_y, |
||||
|
dst.pitch, config.dst_x0, config.dst_y0, |
||||
|
static_cast<size_t>(dst_bytes_per_pixel)); |
||||
|
} |
||||
|
memory_manager.WriteBlock(dst.Address(), impl->tmp_buffer.data(), dst_size); |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
} // namespace Tegra::Engines::Blitter
|
||||
@ -0,0 +1,27 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include "video_core/engines/fermi_2d.h" |
||||
|
|
||||
|
namespace Tegra { |
||||
|
class MemoryManager; |
||||
|
} |
||||
|
|
||||
|
namespace Tegra::Engines::Blitter { |
||||
|
|
||||
|
class SoftwareBlitEngine { |
||||
|
public: |
||||
|
explicit SoftwareBlitEngine(MemoryManager& memory_manager_); |
||||
|
~SoftwareBlitEngine(); |
||||
|
|
||||
|
bool Blit(Fermi2D::Surface& src, Fermi2D::Surface& dst, Fermi2D::Config& copy_config); |
||||
|
|
||||
|
private: |
||||
|
MemoryManager& memory_manager; |
||||
|
struct BlitEngineImpl; |
||||
|
std::unique_ptr<BlitEngineImpl> impl; |
||||
|
}; |
||||
|
|
||||
|
} // namespace Tegra::Engines::Blitter |
||||
1234
src/video_core/engines/sw_blitter/converter.cpp
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,36 @@ |
|||||
|
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
||||
|
// SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
#pragma once |
||||
|
|
||||
|
#include <memory> |
||||
|
#include <span> |
||||
|
|
||||
|
#include "common/common_types.h" |
||||
|
|
||||
|
#include "video_core/gpu.h" |
||||
|
|
||||
|
namespace Tegra::Engines::Blitter { |
||||
|
|
||||
|
class Converter { |
||||
|
public: |
||||
|
virtual void ConvertTo(std::span<const u8> input, std::span<f32> output) = 0; |
||||
|
virtual void ConvertFrom(std::span<const f32> input, std::span<u8> output) = 0; |
||||
|
virtual ~Converter() = default; |
||||
|
}; |
||||
|
|
||||
|
class ConverterFactory { |
||||
|
public: |
||||
|
ConverterFactory(); |
||||
|
~ConverterFactory(); |
||||
|
|
||||
|
Converter* GetFormatConverter(RenderTargetFormat format); |
||||
|
|
||||
|
private: |
||||
|
Converter* BuildConverter(RenderTargetFormat format); |
||||
|
|
||||
|
struct ConverterFactoryImpl; |
||||
|
std::unique_ptr<ConverterFactoryImpl> impl; |
||||
|
}; |
||||
|
|
||||
|
} // namespace Tegra::Engines::Blitter |
||||
@ -0,0 +1,136 @@ |
|||||
|
# SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project |
||||
|
# SPDX-License-Identifier: GPL-3.0-or-later |
||||
|
|
||||
|
import re |
||||
|
|
||||
|
class Format: |
||||
|
def __init__(self, string_value): |
||||
|
self.name = string_value |
||||
|
tmp = string_value.split('_') |
||||
|
self.component_type = tmp[1] |
||||
|
component_data = re.findall(r"\w\d+", tmp[0]) |
||||
|
self.num_components = len(component_data) |
||||
|
sizes = [] |
||||
|
swizzle = [] |
||||
|
for data in component_data: |
||||
|
swizzle.append(data[0]) |
||||
|
sizes.append(int(data[1:])) |
||||
|
self.sizes = sizes |
||||
|
self.swizzle = swizzle |
||||
|
|
||||
|
def build_component_type_array(self): |
||||
|
result = "{ " |
||||
|
b = False |
||||
|
for i in range(0, self.num_components): |
||||
|
if b: |
||||
|
result += ", " |
||||
|
b = True |
||||
|
result += "ComponentType::" + self.component_type |
||||
|
result += " }" |
||||
|
return result |
||||
|
|
||||
|
def build_component_sizes_array(self): |
||||
|
result = "{ " |
||||
|
b = False |
||||
|
for i in range(0, self.num_components): |
||||
|
if b: |
||||
|
result += ", " |
||||
|
b = True |
||||
|
result += str(self.sizes[i]) |
||||
|
result += " }" |
||||
|
return result |
||||
|
|
||||
|
def build_component_swizzle_array(self): |
||||
|
result = "{ " |
||||
|
b = False |
||||
|
for i in range(0, self.num_components): |
||||
|
if b: |
||||
|
result += ", " |
||||
|
b = True |
||||
|
swizzle = self.swizzle[i] |
||||
|
if swizzle == "X": |
||||
|
swizzle = "None" |
||||
|
result += "Swizzle::" + swizzle |
||||
|
result += " }" |
||||
|
return result |
||||
|
|
||||
|
def print_declaration(self): |
||||
|
print("struct " + self.name + "Traits {") |
||||
|
print(" static constexpr size_t num_components = " + str(self.num_components) + ";") |
||||
|
print(" static constexpr std::array<ComponentType, num_components> component_types = " + self.build_component_type_array() + ";") |
||||
|
print(" static constexpr std::array<size_t, num_components> component_sizes = " + self.build_component_sizes_array() + ";") |
||||
|
print(" static constexpr std::array<Swizzle, num_components> component_swizzle = " + self.build_component_swizzle_array() + ";") |
||||
|
print("};\n") |
||||
|
|
||||
|
def print_case(self): |
||||
|
print("case RenderTargetFormat::" + self.name + ":") |
||||
|
print(" return impl->converters_cache") |
||||
|
print(" .emplace(format, std::make_unique<ConverterImpl<" + self.name + "Traits>>())") |
||||
|
print(" .first->second.get();") |
||||
|
print(" break;") |
||||
|
|
||||
|
txt = """ |
||||
|
R32G32B32A32_FLOAT |
||||
|
R32G32B32A32_SINT |
||||
|
R32G32B32A32_UINT |
||||
|
R32G32B32X32_FLOAT |
||||
|
R32G32B32X32_SINT |
||||
|
R32G32B32X32_UINT |
||||
|
R16G16B16A16_UNORM |
||||
|
R16G16B16A16_SNORM |
||||
|
R16G16B16A16_SINT |
||||
|
R16G16B16A16_UINT |
||||
|
R16G16B16A16_FLOAT |
||||
|
R32G32_FLOAT |
||||
|
R32G32_SINT |
||||
|
R32G32_UINT |
||||
|
R16G16B16X16_FLOAT |
||||
|
A8R8G8B8_UNORM |
||||
|
A8R8G8B8_SRGB |
||||
|
A2B10G10R10_UNORM |
||||
|
A2B10G10R10_UINT |
||||
|
A2R10G10B10_UNORM |
||||
|
A8B8G8R8_UNORM |
||||
|
A8B8G8R8_SRGB |
||||
|
A8B8G8R8_SNORM |
||||
|
A8B8G8R8_SINT |
||||
|
A8B8G8R8_UINT |
||||
|
R16G16_UNORM |
||||
|
R16G16_SNORM |
||||
|
R16G16_SINT |
||||
|
R16G16_UINT |
||||
|
R16G16_FLOAT |
||||
|
B10G11R11_FLOAT |
||||
|
R32_SINT |
||||
|
R32_UINT |
||||
|
R32_FLOAT |
||||
|
X8R8G8B8_UNORM |
||||
|
X8R8G8B8_SRGB |
||||
|
R5G6B5_UNORM |
||||
|
A1R5G5B5_UNORM |
||||
|
R8G8_UNORM |
||||
|
R8G8_SNORM |
||||
|
R8G8_SINT |
||||
|
R8G8_UINT |
||||
|
R16_UNORM |
||||
|
R16_SNORM |
||||
|
R16_SINT |
||||
|
R16_UINT |
||||
|
R16_FLOAT |
||||
|
R8_UNORM |
||||
|
R8_SNORM |
||||
|
R8_SINT |
||||
|
R8_UINT |
||||
|
X1R5G5B5_UNORM |
||||
|
X8B8G8R8_UNORM |
||||
|
X8B8G8R8_SRGB |
||||
|
""" |
||||
|
|
||||
|
x = txt.split() |
||||
|
y = list(map(lambda a: Format(a), x)) |
||||
|
formats = list(y) |
||||
|
for format in formats: |
||||
|
format.print_declaration() |
||||
|
|
||||
|
for format in formats: |
||||
|
format.print_case() |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue