Browse Source

[memory, vk] TEST: Tiled GPU optimization try #1

test-revert-gpu-optim
CamilleLaVey 1 month ago
parent
commit
ee5565077c
  1. 54
      src/common/settings.h
  2. 10
      src/common/settings_enums.h
  3. 37
      src/shader_recompiler/backend/spirv/emit_spirv.cpp
  4. 8
      src/shader_recompiler/profile.h
  5. 17
      src/video_core/renderer_vulkan/pipeline_helper.h
  6. 14
      src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
  7. 73
      src/video_core/renderer_vulkan/vk_render_pass_cache.cpp
  8. 7
      src/video_core/renderer_vulkan/vk_render_pass_cache.h
  9. 75
      src/video_core/renderer_vulkan/vk_texture_cache.cpp
  10. 65
      src/video_core/vulkan_common/vulkan_device.cpp
  11. 22
      src/video_core/vulkan_common/vulkan_memory_allocator.cpp

54
src/common/settings.h

@ -560,6 +560,60 @@ struct Values {
false,
&sample_shading};
#ifdef ANDROID
// Shader Float Controls (Android only) - Eden Veil / Extensions
// Force enable VK_KHR_shader_float_controls even if driver has known issues
// Allows fine-tuning float behavior to match Switch/Maxwell or optimize performance
SwitchableSetting<bool> shader_float_controls_force_enable{linkage,
false,
"shader_float_controls_force_enable",
Category::RendererExtensions,
Specialization::Paired};
// Individual float behavior controls (visible only when force_enable is true)
// Multiple can be active simultaneously EXCEPT FTZ and DenormPreserve (mutually exclusive)
//
// Recommended configurations:
// Switch-native: FTZ=ON, RTE=ON, SignedZero=ON (matches Maxwell behavior)
// Performance: FTZ=ON only (fastest)
// Accuracy: DenormPreserve=ON, RTE=ON, SignedZero=ON (slowest, highest precision)
SwitchableSetting<bool> shader_float_ftz{linkage,
false,
"shader_float_ftz",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_denorm_preserve{linkage,
false,
"shader_float_denorm_preserve",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_rte{linkage,
false,
"shader_float_rte",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
SwitchableSetting<bool> shader_float_signed_zero_inf_nan{linkage,
false,
"shader_float_signed_zero_inf_nan",
Category::RendererExtensions,
Specialization::Default,
true,
false,
&shader_float_controls_force_enable};
#endif
Setting<bool> renderer_debug{linkage, false, "debug", Category::RendererDebug};
Setting<bool> renderer_shader_feedback{linkage, false, "shader_feedback",
Category::RendererDebug};

10
src/common/settings_enums.h

@ -152,6 +152,16 @@ ENUM(SpirvOptimizeMode, Never, OnLoad, Always);
ENUM(GpuOverclock, Low, Medium, High)
ENUM(TemperatureUnits, Celsius, Fahrenheit)
// Shader Float Controls behavior modes
// These control how floating-point denormals and special values are handled in shaders
ENUM(ShaderFloatBehavior,
DriverDefault, // Let driver choose (safest, may not match Switch behavior)
SwitchNative, // Emulate Switch/Maxwell behavior (FTZ + RTE + SignedZero)
FlushToZero, // FTZ only - flush denorms to zero (fastest, some precision loss)
PreserveDenorms, // Preserve denorms (slowest, highest precision)
RoundToEven, // RTE rounding mode (IEEE 754 compliant)
SignedZeroInfNan); // Preserve signed zero, inf, nan (accuracy for edge cases)
template <typename Type>
inline std::string_view CanonicalizeEnum(Type id) {
const auto group = EnumMetadata<Type>::Canonicalizations();

37
src/shader_recompiler/backend/spirv/emit_spirv.cpp

@ -341,19 +341,35 @@ void DefineEntryPoint(const IR::Program& program, EmitContext& ctx, Id main) {
void SetupDenormControl(const Profile& profile, const IR::Program& program, EmitContext& ctx,
Id main_func) {
const Info& info{program.info};
// User-forced behavior overrides (Android Eden Veil/Extensions)
// When force flags are active, they take precedence over shader-declared behavior
const bool force_flush = profile.force_fp32_denorm_flush;
const bool force_preserve = profile.force_fp32_denorm_preserve;
if (force_flush && force_preserve) {
LOG_WARNING(Shader_SPIRV, "Both FTZ and Preserve forced simultaneously - FTZ takes precedence");
}
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm flush and preserve on the same shader");
} else if (info.uses_fp32_denorms_flush) {
} else if (force_flush || info.uses_fp32_denorms_flush) {
if (profile.support_fp32_denorm_flush) {
ctx.AddCapability(spv::Capability::DenormFlushToZero);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormFlushToZero, 32U);
if (force_flush) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormFlushToZero FORCED by user setting");
}
} else {
// Drivers will most likely flush denorms by default, no need to warn
}
} else if (info.uses_fp32_denorms_preserve) {
} else if (force_preserve || info.uses_fp32_denorms_preserve) {
if (profile.support_fp32_denorm_preserve) {
ctx.AddCapability(spv::Capability::DenormPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::DenormPreserve, 32U);
if (force_preserve) {
LOG_DEBUG(Shader_SPIRV, "Fp32 DenormPreserve FORCED by user setting");
}
} else {
LOG_DEBUG(Shader_SPIRV, "Fp32 denorm preserve used in shader without host support");
}
@ -386,13 +402,24 @@ void SetupSignedNanCapabilities(const Profile& profile, const IR::Program& progr
if (profile.has_broken_fp16_float_controls && program.info.uses_fp16) {
return;
}
// User-forced behavior (Android Eden Veil/Extensions)
const bool force_signed_zero_inf_nan = profile.force_fp32_signed_zero_inf_nan;
if (program.info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
}
if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
if (force_signed_zero_inf_nan || profile.support_fp32_signed_zero_nan_preserve) {
if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
if (force_signed_zero_inf_nan) {
LOG_DEBUG(Shader_SPIRV, "Fp32 SignedZeroInfNanPreserve FORCED by user setting");
}
} else if (force_signed_zero_inf_nan) {
LOG_WARNING(Shader_SPIRV, "SignedZeroInfNanPreserve forced but driver doesn't support it");
}
}
if (program.info.uses_fp64 && profile.support_fp64_signed_zero_nan_preserve) {
ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);

8
src/shader_recompiler/profile.h

@ -28,6 +28,14 @@ struct Profile {
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
// User-forced float behavior overrides (Android Eden Veil/Extensions)
// When shader_float_controls_force_enable is true, these override shader-declared behavior
bool force_fp32_denorm_flush{}; // Force FTZ for all FP32 ops
bool force_fp32_denorm_preserve{}; // Force denorm preservation for all FP32 ops
bool force_fp32_rte_rounding{}; // Force Round-To-Even for all FP32 ops
bool force_fp32_signed_zero_inf_nan{}; // Force signed zero/inf/nan preservation
bool support_explicit_workgroup_layout{};
bool support_vote{};
bool support_viewport_index_layer_non_geometry{};

17
src/video_core/renderer_vulkan/pipeline_helper.h

@ -24,8 +24,21 @@ public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
bool CanUsePushDescriptor() const noexcept {
return device->IsKhrPushDescriptorSupported() &&
num_descriptors <= device->MaxPushDescriptors();
if (!device->IsKhrPushDescriptorSupported()) {
return false;
}
if (num_descriptors > device->MaxPushDescriptors()) {
return false;
}
// Qualcomm has slow push descriptor implementation - use conservative threshold
// Prefer descriptor pools for complex shaders (>8 descriptors)
const bool is_qualcomm = device->GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
if (is_qualcomm && num_descriptors > 8) {
return false;
}
return true;
}
// TODO(crueter): utilize layout binding flags

14
src/video_core/renderer_vulkan/vk_pipeline_cache.cpp

@ -341,6 +341,20 @@ PipelineCache::PipelineCache(Tegra::MaxwellDeviceMemoryManager& device_memory_,
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
#ifdef ANDROID
// User-forced float behavior overrides (Eden Veil/Extensions)
.force_fp32_denorm_flush = Settings::values.shader_float_ftz.GetValue(),
.force_fp32_denorm_preserve = Settings::values.shader_float_denorm_preserve.GetValue(),
.force_fp32_rte_rounding = Settings::values.shader_float_rte.GetValue(),
.force_fp32_signed_zero_inf_nan = Settings::values.shader_float_signed_zero_inf_nan.GetValue(),
#else
.force_fp32_denorm_flush = false,
.force_fp32_denorm_preserve = false,
.force_fp32_rte_rounding = false,
.force_fp32_signed_zero_inf_nan = false,
#endif
.support_explicit_workgroup_layout = device.IsKhrWorkgroupMemoryExplicitLayoutSupported(),
.support_vote = device.IsSubgroupFeatureSupported(VK_SUBGROUP_FEATURE_VOTE_BIT),
.support_viewport_index_layer_non_geometry =

73
src/video_core/renderer_vulkan/vk_render_pass_cache.cpp

@ -8,6 +8,7 @@
#include <boost/container/static_vector.hpp>
#include "common/logging/log.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/surface.h"
@ -19,6 +20,23 @@ namespace {
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
// Check if the driver uses tile-based deferred rendering (TBDR) architecture
// These GPUs benefit from optimized load/store operations to keep data on-chip
//
// TBDR GPUs supported in Eden:
// - Qualcomm Adreno (Snapdragon): Most Android flagship/midrange devices
// - ARM Mali: Android devices (Samsung Exynos, MediaTek, etc.)
// - Imagination PowerVR: Older iOS devices, some Android tablets
// - Samsung Xclipse: Galaxy S22+ (AMD RDNA2-based, but uses TBDR mode)
// - Broadcom VideoCore: Raspberry Pi
[[nodiscard]] constexpr bool IsTBDRGPU(VkDriverId driver_id) {
return driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_ARM_PROPRIETARY ||
driver_id == VK_DRIVER_ID_IMAGINATION_PROPRIETARY ||
driver_id == VK_DRIVER_ID_SAMSUNG_PROPRIETARY ||
driver_id == VK_DRIVER_ID_BROADCOM_PROPRIETARY;
}
constexpr SurfaceType GetSurfaceType(PixelFormat format) {
switch (format) {
// Depth formats
@ -44,23 +62,51 @@ using VideoCore::Surface::SurfaceType;
}
VkAttachmentDescription AttachmentDescription(const Device& device, PixelFormat format,
VkSampleCountFlagBits samples) {
VkSampleCountFlagBits samples,
bool tbdr_will_clear,
bool tbdr_discard_after) {
using MaxwellToVK::SurfaceFormat;
const SurfaceType surface_type = GetSurfaceType(format);
const bool has_stencil = surface_type == SurfaceType::DepthStencil ||
surface_type == SurfaceType::Stencil;
// TBDR optimization: Apply hints only on tile-based GPUs
// Desktop GPUs (NVIDIA/AMD/Intel) ignore these hints and use standard behavior
const bool is_tbdr = IsTBDRGPU(device.GetDriverID());
// On TBDR: Use DONT_CARE if clear is guaranteed (avoids loading from main memory)
// On Desktop: Always LOAD to preserve existing content (safer default)
VkAttachmentLoadOp load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
if (is_tbdr && tbdr_will_clear) {
load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
}
// On TBDR: Use DONT_CARE if content won't be read (avoids storing to main memory)
// On Desktop: Always STORE (safer default)
VkAttachmentStoreOp store_op = VK_ATTACHMENT_STORE_OP_STORE;
if (is_tbdr && tbdr_discard_after) {
store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
}
// Stencil operations follow same logic
VkAttachmentLoadOp stencil_load_op = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
VkAttachmentStoreOp stencil_store_op = VK_ATTACHMENT_STORE_OP_DONT_CARE;
if (has_stencil) {
stencil_load_op = (is_tbdr && tbdr_will_clear) ? VK_ATTACHMENT_LOAD_OP_DONT_CARE
: VK_ATTACHMENT_LOAD_OP_LOAD;
stencil_store_op = (is_tbdr && tbdr_discard_after) ? VK_ATTACHMENT_STORE_OP_DONT_CARE
: VK_ATTACHMENT_STORE_OP_STORE;
}
return {
.flags = {},
.format = SurfaceFormat(device, FormatType::Optimal, true, format).format,
.samples = samples,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.stencilLoadOp = has_stencil ? VK_ATTACHMENT_LOAD_OP_LOAD
: VK_ATTACHMENT_LOAD_OP_DONT_CARE,
.stencilStoreOp = has_stencil ? VK_ATTACHMENT_STORE_OP_STORE
: VK_ATTACHMENT_STORE_OP_DONT_CARE,
.loadOp = load_op,
.storeOp = store_op,
.stencilLoadOp = stencil_load_op,
.stencilStoreOp = stencil_store_op,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
};
@ -75,6 +121,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
if (!is_new) {
return *pair->second;
}
const bool is_tbdr = IsTBDRGPU(device->GetDriverID());
if (is_tbdr && (key.tbdr_will_clear || key.tbdr_discard_after)) {
LOG_DEBUG(Render_Vulkan, "Creating TBDR-optimized render pass (driver={}, clear={}, discard={})",
static_cast<u32>(device->GetDriverID()), key.tbdr_will_clear, key.tbdr_discard_after);
}
boost::container::static_vector<VkAttachmentDescription, 9> descriptions;
std::array<VkAttachmentReference, 8> references{};
u32 num_attachments{};
@ -87,7 +140,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
if (is_valid) {
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
descriptions.push_back(AttachmentDescription(*device, format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after));
num_attachments = static_cast<u32>(index + 1);
++num_colors;
}
@ -99,7 +153,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.attachment = num_colors,
.layout = VK_IMAGE_LAYOUT_GENERAL,
};
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples,
key.tbdr_will_clear, key.tbdr_discard_after));
}
const VkSubpassDescription subpass{
.flags = 0,

7
src/video_core/renderer_vulkan/vk_render_pass_cache.h

@ -17,6 +17,11 @@ struct RenderPassKey {
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
VideoCore::Surface::PixelFormat depth_format;
VkSampleCountFlagBits samples;
// TBDR optimization hints - only affect tile-based GPUs (Qualcomm, ARM, Imagination)
// These flags indicate the expected usage pattern to optimize load/store operations
bool tbdr_will_clear{false}; // Attachment will be cleared with vkCmdClearAttachments
bool tbdr_discard_after{false}; // Attachment won't be read after render pass
};
} // namespace Vulkan
@ -27,6 +32,8 @@ struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
value ^= (static_cast<size_t>(key.tbdr_will_clear) << 56);
value ^= (static_cast<size_t>(key.tbdr_discard_after) << 57);
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
}

75
src/video_core/renderer_vulkan/vk_texture_cache.cpp

@ -160,6 +160,45 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
/// Emergency fallback for MSAA with HDR formats: degrade to non-MSAA if driver doesn't support
/// shaderStorageImageMultisample (required for msaa_copy_pass)
[[nodiscard]] ImageInfo AdjustMSAAForHDRFormats(const Device& device, ImageInfo info) {
// Only apply emergency fallback if MSAA is requested
if (info.num_samples <= 1) {
return info;
}
// Check if this is an HDR format that commonly fails with MSAA
const auto vk_format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal,
false, info.format).format;
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
if (!is_hdr_format) {
return info; // Not an HDR format, no adjustment needed
}
// If driver doesn't support shader storage image multisample, MSAACopyPass will fail
// Emergency fallback: degrade to non-MSAA (1 sample) to avoid texture corruption
if (!device.IsStorageImageMultisampleSupported()) {
LOG_ERROR(Render_Vulkan,
"EMERGENCY MSAA FALLBACK: Driver doesn't support shaderStorageImageMultisample. "
"Degrading HDR format {} from {}x MSAA to 1x (non-MSAA) to prevent texture corruption. "
"This will cause visual quality loss but prevents black textures.",
vk_format, info.num_samples);
// Degrade to non-MSAA
// NOTE: We only change num_samples, NOT dimensions. The ImageInfo dimensions are already
// in "logical" space (full resolution), and MakeImageCreateInfo will handle the conversion
// to physical GPU dimensions based on num_samples automatically.
info.num_samples = 1;
return info;
}
return info; // Driver supports MSAA storage images, no adjustment needed
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
const ImageInfo& info, std::span<const VkFormat> view_formats) {
if (info.type == ImageType::Buffer) {
@ -1510,10 +1549,20 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
runtime{&runtime_}, original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info,
runtime->ViewFormats(info.format))),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
runtime{&runtime_} {
// CRITICAL: Adjust MSAA for HDR formats if driver doesn't support shaderStorageImageMultisample
// This prevents texture corruption by degrading to non-MSAA when msaa_copy_pass would fail
const ImageInfo adjusted_info = AdjustMSAAForHDRFormats(runtime_.device, info_);
// Update our stored info with adjusted values (may have num_samples=1 now)
info = adjusted_info;
// Create image with adjusted info
original_image = MakeImage(runtime_.device, runtime_.memory_allocator, adjusted_info,
runtime->ViewFormats(adjusted_info.format));
aspect_mask = ImageAspectMask(adjusted_info.format);
if (IsPixelFormatASTC(adjusted_info.format) && !runtime->device.IsOptimalAstcSupported()) {
switch (Settings::values.accelerate_astc.GetValue()) {
case Settings::AstcDecodeMode::Gpu:
if (Settings::values.astc_recompression.GetValue() ==
@ -1549,24 +1598,6 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
MakeStorageView(device, level, *original_image, VK_FORMAT_A8B8G8R8_UNORM_PACK32);
}
}
// Proactive warning for problematic HDR format + MSAA combinations on Android
// These combinations commonly cause texture flickering/black screens across multiple game engines
// Note: MSAA is native Switch rendering technique, cannot be disabled by emulator
if (info.num_samples > 1) {
const auto vk_format = MaxwellToVK::SurfaceFormat(runtime->device, FormatType::Optimal,
false, info.format).format;
const bool is_hdr_format = vk_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32;
if (is_hdr_format) {
LOG_WARNING(Render_Vulkan,
"Creating MSAA image ({}x samples) with HDR format {} (Maxwell: {}). "
"Driver support may be limited on Android (Qualcomm < 800, Mali pre-maintenance5). "
"Format fallback to RGBA16F should prevent issues.",
info.num_samples, vk_format, info.format);
}
}
}
Image::Image(const VideoCommon::NullImageParams& params) : VideoCommon::ImageBase{params} {}

65
src/video_core/vulkan_common/vulkan_device.cpp

@ -540,9 +540,74 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
"Qualcomm drivers have a slow VK_KHR_push_descriptor implementation");
//RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
#ifdef ANDROID
// Shader Float Controls handling for Qualcomm Adreno
// Default: DISABLED due to historical issues with binning precision causing visual glitches
const bool force_enable = Settings::values.shader_float_controls_force_enable.GetValue();
if (force_enable) {
// User explicitly enabled float controls - log detected capabilities and user config
LOG_INFO(Render_Vulkan, "Shader Float Controls FORCE ENABLED by user (Eden Veil/Extensions)");
// Log driver capabilities
const auto& fc = float_control;
LOG_INFO(Render_Vulkan, "Driver Float Controls Capabilities:");
LOG_INFO(Render_Vulkan, " - Denorm Flush FP32: {}", fc.shaderDenormFlushToZeroFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Denorm Preserve FP32: {}", fc.shaderDenormPreserveFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - RTE Rounding FP32: {}", fc.shaderRoundingModeRTEFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan FP32: {}", fc.shaderSignedZeroInfNanPreserveFloat32 ? "YES" : "NO");
LOG_INFO(Render_Vulkan, " - Independence: {}",
fc.denormBehaviorIndependence == VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL ? "ALL" : "LIMITED");
// Log user selections
bool ftz = Settings::values.shader_float_ftz.GetValue();
bool preserve = Settings::values.shader_float_denorm_preserve.GetValue();
const bool rte = Settings::values.shader_float_rte.GetValue();
const bool signed_zero = Settings::values.shader_float_signed_zero_inf_nan.GetValue();
// Validate mutually exclusive options
if (ftz && preserve) {
LOG_WARNING(Render_Vulkan,
"CONFLICT: FTZ and DenormPreserve are mutually exclusive!");
LOG_WARNING(Render_Vulkan,
" -> DenormPreserve will take precedence (accuracy over speed)");
ftz = false; // Preserve takes priority for correctness
}
LOG_INFO(Render_Vulkan, "User Float Behavior Selection:");
LOG_INFO(Render_Vulkan, " - Flush To Zero (FTZ): {}", ftz ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Denorm Preserve: {}", preserve ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Round To Even (RTE): {}", rte ? "ENABLED" : "disabled");
LOG_INFO(Render_Vulkan, " - Signed Zero/Inf/Nan: {}", signed_zero ? "ENABLED" : "disabled");
// Analyze configuration vs Switch native behavior
const bool matches_switch = ftz && !preserve && rte && signed_zero;
if (matches_switch) {
LOG_INFO(Render_Vulkan, "Configuration MATCHES Switch/Maxwell native behavior (FTZ+RTE+SignedZero)");
} else if (!ftz && !preserve && !rte && !signed_zero) {
LOG_WARNING(Render_Vulkan, "No float behaviors selected - using driver default (may cause glitches)");
} else {
LOG_INFO(Render_Vulkan, "Configuration is CUSTOM - testing mode active");
}
// Extension stays enabled
LOG_INFO(Render_Vulkan, "VK_KHR_shader_float_controls: ENABLED");
} else {
// Default behavior - disable float controls
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls on Qualcomm (historical binning precision issues)");
LOG_INFO(Render_Vulkan,
"To enable: Eden Veil -> Extensions -> Shader Float Controls (Force Enable)");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
}
#else
// Non-Android: keep original behavior
LOG_WARNING(Render_Vulkan,
"Disabling shader float controls and 64-bit integer features on Qualcomm proprietary drivers");
RemoveExtension(extensions.shader_float_controls, VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
#endif
// Int64 atomics - genuinely broken, always disable
RemoveExtensionFeature(extensions.shader_atomic_int64, features.shader_atomic_int64,
VK_KHR_SHADER_ATOMIC_INT64_EXTENSION_NAME);
features.shader_atomic_int64.shaderBufferInt64Atomics = false;

22
src/video_core/vulkan_common/vulkan_memory_allocator.cpp

@ -226,11 +226,24 @@ namespace Vulkan {
vk::Buffer
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
{
// Qualcomm uses unified memory architecture - prefer DEVICE_LOCAL + HOST_VISIBLE
// for zero-copy access without staging buffers
const bool is_qualcomm = device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
const bool prefer_unified = is_qualcomm && (usage == MemoryUsage::Upload ||
usage == MemoryUsage::Download ||
usage == MemoryUsage::Stream);
VkMemoryPropertyFlags preferred_flags = MemoryUsagePreferredVmaFlags(usage);
if (prefer_unified) {
// Request DEVICE_LOCAL + HOST_VISIBLE for zero-copy on unified memory architectures
preferred_flags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
}
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.preferredFlags = preferred_flags,
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
@ -245,6 +258,13 @@ namespace Vulkan {
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
if (is_qualcomm && prefer_unified) {
const bool got_unified = (property_flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(property_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
LOG_DEBUG(Render_Vulkan, "Qualcomm buffer allocation: usage={}, unified={}, flags=0x{:X}",
static_cast<u32>(usage), got_unified, property_flags);
}
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;

Loading…
Cancel
Save