|
|
@ -13,9 +13,146 @@ |
|
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
|
|
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
|
|
#include "video_core/vulkan_common/vulkan_device.h"
|
|
|
#include "video_core/vulkan_common/vulkan_device.h"
|
|
|
|
|
|
|
|
|
#define A_CPU
|
|
|
|
|
|
#include <ffx_a.h>
|
|
|
|
|
|
#include <ffx_fsr1.h>
|
|
|
|
|
|
|
|
|
// Reimplementations of the constant generating functions in ffx_fsr1.h
|
|
|
|
|
|
// GCC generated a lot of warnings when using the official header.
|
|
|
|
|
|
static u32 AU1_AH1_AF1(f32 f) { |
|
|
|
|
|
static constexpr u32 base[512]{ |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, |
|
|
|
|
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, |
|
|
|
|
|
0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, |
|
|
|
|
|
0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, |
|
|
|
|
|
0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, |
|
|
|
|
|
0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, |
|
|
|
|
|
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, |
|
|
|
|
|
0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, |
|
|
|
|
|
0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, |
|
|
|
|
|
0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, |
|
|
|
|
|
0xf000, 0xf400, 0xf800, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, |
|
|
|
|
|
}; |
|
|
|
|
|
static constexpr s8 shift[512]{ |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, |
|
|
|
|
|
0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, |
|
|
|
|
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, |
|
|
|
|
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, |
|
|
|
|
|
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, |
|
|
|
|
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, |
|
|
|
|
|
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, |
|
|
|
|
|
0x18, 0x18, |
|
|
|
|
|
}; |
|
|
|
|
|
auto u = std::bit_cast<u32>(f); |
|
|
|
|
|
u32 i = u >> 23; |
|
|
|
|
|
return base[i] + ((u & 0x7fffff) >> shift[i]); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static u32 AU1_AH2_AF2(f32 a[2]) { |
|
|
|
|
|
return AU1_AH1_AF1(a[0]) + (AU1_AH1_AF1(a[1]) << 16); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void FsrEasuCon(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], |
|
|
|
|
|
f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, |
|
|
|
|
|
f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, f32 outputSizeInPixelsX, |
|
|
|
|
|
f32 outputSizeInPixelsY) { |
|
|
|
|
|
con0[0] = std::bit_cast<u32>(inputViewportInPixelsX / outputSizeInPixelsX); |
|
|
|
|
|
con0[1] = std::bit_cast<u32>(inputViewportInPixelsY / outputSizeInPixelsY); |
|
|
|
|
|
con0[2] = std::bit_cast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f); |
|
|
|
|
|
con0[3] = std::bit_cast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f); |
|
|
|
|
|
con1[0] = std::bit_cast<u32>(1.0f / inputSizeInPixelsX); |
|
|
|
|
|
con1[1] = std::bit_cast<u32>(1.0f / inputSizeInPixelsY); |
|
|
|
|
|
con1[2] = std::bit_cast<u32>(1.0f / inputSizeInPixelsX); |
|
|
|
|
|
con1[3] = std::bit_cast<u32>(-1.0f / inputSizeInPixelsY); |
|
|
|
|
|
con2[0] = std::bit_cast<u32>(-1.0f / inputSizeInPixelsX); |
|
|
|
|
|
con2[1] = std::bit_cast<u32>(2.0f / inputSizeInPixelsY); |
|
|
|
|
|
con2[2] = std::bit_cast<u32>(1.0f / inputSizeInPixelsX); |
|
|
|
|
|
con2[3] = std::bit_cast<u32>(2.0f / inputSizeInPixelsY); |
|
|
|
|
|
con3[0] = std::bit_cast<u32>(0.0f / inputSizeInPixelsX); |
|
|
|
|
|
con3[1] = std::bit_cast<u32>(4.0f / inputSizeInPixelsY); |
|
|
|
|
|
con3[2] = con3[3] = 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void FsrEasuConOffset(u32 con0[4], u32 con1[4], u32 con2[4], u32 con3[4], |
|
|
|
|
|
f32 inputViewportInPixelsX, f32 inputViewportInPixelsY, |
|
|
|
|
|
f32 inputSizeInPixelsX, f32 inputSizeInPixelsY, |
|
|
|
|
|
f32 outputSizeInPixelsX, f32 outputSizeInPixelsY, |
|
|
|
|
|
f32 inputOffsetInPixelsX, f32 inputOffsetInPixelsY) { |
|
|
|
|
|
FsrEasuCon(con0, con1, con2, con3, inputViewportInPixelsX, inputViewportInPixelsY, |
|
|
|
|
|
inputSizeInPixelsX, inputSizeInPixelsY, outputSizeInPixelsX, outputSizeInPixelsY); |
|
|
|
|
|
con0[2] = std::bit_cast<u32>(0.5f * inputViewportInPixelsX / outputSizeInPixelsX - 0.5f + |
|
|
|
|
|
inputOffsetInPixelsX); |
|
|
|
|
|
con0[3] = std::bit_cast<u32>(0.5f * inputViewportInPixelsY / outputSizeInPixelsY - 0.5f + |
|
|
|
|
|
inputOffsetInPixelsY); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static void FsrRcasCon(u32* con, f32 sharpness) { |
|
|
|
|
|
sharpness = std::exp2f(-sharpness); |
|
|
|
|
|
f32 hSharp[2]{sharpness, sharpness}; |
|
|
|
|
|
con[0] = std::bit_cast<u32>(sharpness); |
|
|
|
|
|
con[1] = AU1_AH2_AF2(hSharp); |
|
|
|
|
|
con[2] = 0; |
|
|
|
|
|
con[3] = 0; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
namespace Vulkan { |
|
|
namespace Vulkan { |
|
|
|
|
|
|
|
|
@ -62,15 +199,15 @@ VkImageView FSR::Draw(VKScheduler& scheduler, size_t image_index, VkImageView im |
|
|
|
|
|
|
|
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); |
|
|
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); |
|
|
|
|
|
|
|
|
std::array<AU1, 4 * 4> push_constants; |
|
|
|
|
|
|
|
|
std::array<u32, 4 * 4> push_constants; |
|
|
FsrEasuConOffset( |
|
|
FsrEasuConOffset( |
|
|
push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, |
|
|
push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, |
|
|
push_constants.data() + 12, |
|
|
push_constants.data() + 12, |
|
|
|
|
|
|
|
|
static_cast<AF1>(crop_rect.GetWidth()), static_cast<AF1>(crop_rect.GetHeight()), |
|
|
|
|
|
static_cast<AF1>(input_image_extent.width), static_cast<AF1>(input_image_extent.height), |
|
|
|
|
|
static_cast<AF1>(output_size.width), static_cast<AF1>(output_size.height), |
|
|
|
|
|
static_cast<AF1>(crop_rect.left), static_cast<AF1>(crop_rect.top)); |
|
|
|
|
|
|
|
|
static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), |
|
|
|
|
|
static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height), |
|
|
|
|
|
static_cast<f32>(output_size.width), static_cast<f32>(output_size.height), |
|
|
|
|
|
static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); |
|
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); |
|
|
cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); |
|
|
|
|
|
|
|
|
{ |
|
|
{ |
|
|
|