|
|
|
@ -7,6 +7,7 @@ |
|
|
|
|
|
|
|
#include "common/alignment.h" |
|
|
|
#include "common/settings.h" |
|
|
|
#include "video_core/control/channel_state.h" |
|
|
|
#include "video_core/dirty_flags.h" |
|
|
|
#include "video_core/engines/kepler_compute.h" |
|
|
|
#include "video_core/texture_cache/image_view_base.h" |
|
|
|
@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType; |
|
|
|
using namespace Common::Literals; |
|
|
|
|
|
|
|
template <class P> |
|
|
|
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_, |
|
|
|
Tegra::Engines::Maxwell3D& maxwell3d_, |
|
|
|
Tegra::Engines::KeplerCompute& kepler_compute_, |
|
|
|
Tegra::MemoryManager& gpu_memory_) |
|
|
|
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, |
|
|
|
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} { |
|
|
|
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_) |
|
|
|
: runtime{runtime_}, rasterizer{rasterizer_} { |
|
|
|
// Configure null sampler |
|
|
|
TSCEntry sampler_descriptor{}; |
|
|
|
sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear); |
|
|
|
@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& |
|
|
|
sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear); |
|
|
|
sampler_descriptor.cubemap_anisotropy.Assign(1); |
|
|
|
|
|
|
|
// Setup channels |
|
|
|
current_channel_id = UNSET_CHANNEL; |
|
|
|
state = nullptr; |
|
|
|
maxwell3d = nullptr; |
|
|
|
kepler_compute = nullptr; |
|
|
|
gpu_memory = nullptr; |
|
|
|
|
|
|
|
// Make sure the first index is reserved for the null resources |
|
|
|
// This way the null resource becomes a compile time constant |
|
|
|
void(slot_images.insert(NullImageParams{})); |
|
|
|
@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() { |
|
|
|
const auto copies = FullDownloadCopies(image.info); |
|
|
|
image.DownloadMemory(map, copies); |
|
|
|
runtime.Finish(); |
|
|
|
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
|
|
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
|
|
|
} |
|
|
|
if (True(image.flags & ImageFlagBits::Tracked)) { |
|
|
|
UntrackImage(image, image_id); |
|
|
|
@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept { |
|
|
|
template <class P> |
|
|
|
template <bool has_blacklists> |
|
|
|
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) { |
|
|
|
FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views); |
|
|
|
FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids, |
|
|
|
views); |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) { |
|
|
|
FillImageViews<true>(compute_image_table, compute_image_view_ids, views); |
|
|
|
FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views); |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { |
|
|
|
if (index > graphics_sampler_table.Limit()) { |
|
|
|
if (index > state->graphics_sampler_table.Limit()) { |
|
|
|
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); |
|
|
|
return &slot_samplers[NULL_SAMPLER_ID]; |
|
|
|
} |
|
|
|
const auto [descriptor, is_new] = graphics_sampler_table.Read(index); |
|
|
|
SamplerId& id = graphics_sampler_ids[index]; |
|
|
|
const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index); |
|
|
|
SamplerId& id = state->graphics_sampler_ids[index]; |
|
|
|
if (is_new) { |
|
|
|
id = FindSampler(descriptor); |
|
|
|
} |
|
|
|
@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) { |
|
|
|
|
|
|
|
template <class P> |
|
|
|
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
|
|
|
if (index > compute_sampler_table.Limit()) { |
|
|
|
if (index > state->compute_sampler_table.Limit()) { |
|
|
|
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index); |
|
|
|
return &slot_samplers[NULL_SAMPLER_ID]; |
|
|
|
} |
|
|
|
const auto [descriptor, is_new] = compute_sampler_table.Read(index); |
|
|
|
SamplerId& id = compute_sampler_ids[index]; |
|
|
|
const auto [descriptor, is_new] = state->compute_sampler_table.Read(index); |
|
|
|
SamplerId& id = state->compute_sampler_ids[index]; |
|
|
|
if (is_new) { |
|
|
|
id = FindSampler(descriptor); |
|
|
|
} |
|
|
|
@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) { |
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::SynchronizeGraphicsDescriptors() { |
|
|
|
using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex; |
|
|
|
const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex; |
|
|
|
const u32 tic_limit = maxwell3d.regs.tic.limit; |
|
|
|
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit; |
|
|
|
if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) { |
|
|
|
graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
|
|
|
const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex; |
|
|
|
const u32 tic_limit = maxwell3d->regs.tic.limit; |
|
|
|
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit; |
|
|
|
if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) { |
|
|
|
state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
|
|
|
} |
|
|
|
if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) { |
|
|
|
graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); |
|
|
|
if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) { |
|
|
|
state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::SynchronizeComputeDescriptors() { |
|
|
|
const bool linked_tsc = kepler_compute.launch_description.linked_tsc; |
|
|
|
const u32 tic_limit = kepler_compute.regs.tic.limit; |
|
|
|
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit; |
|
|
|
const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address(); |
|
|
|
if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { |
|
|
|
compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
|
|
|
const bool linked_tsc = kepler_compute->launch_description.linked_tsc; |
|
|
|
const u32 tic_limit = kepler_compute->regs.tic.limit; |
|
|
|
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit; |
|
|
|
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address(); |
|
|
|
if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) { |
|
|
|
state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID); |
|
|
|
} |
|
|
|
if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) { |
|
|
|
compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); |
|
|
|
if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) { |
|
|
|
state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { |
|
|
|
auto& flags = maxwell3d.dirty.flags; |
|
|
|
auto& flags = maxwell3d->dirty.flags; |
|
|
|
u32 scale_rating = 0; |
|
|
|
bool rescaled = false; |
|
|
|
std::array<ImageId, NUM_RT> tmp_color_images{}; |
|
|
|
@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) { |
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
|
|
|
using namespace VideoCommon::Dirty; |
|
|
|
auto& flags = maxwell3d.dirty.flags; |
|
|
|
auto& flags = maxwell3d->dirty.flags; |
|
|
|
if (!flags[Dirty::RenderTargets]) { |
|
|
|
for (size_t index = 0; index < NUM_RT; ++index) { |
|
|
|
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index]; |
|
|
|
@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
|
|
|
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id)); |
|
|
|
|
|
|
|
for (size_t index = 0; index < NUM_RT; ++index) { |
|
|
|
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index)); |
|
|
|
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index)); |
|
|
|
} |
|
|
|
u32 up_scale = 1; |
|
|
|
u32 down_shift = 0; |
|
|
|
@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) { |
|
|
|
down_shift = Settings::values.resolution_info.down_shift; |
|
|
|
} |
|
|
|
render_targets.size = Extent2D{ |
|
|
|
(maxwell3d.regs.render_area.width * up_scale) >> down_shift, |
|
|
|
(maxwell3d.regs.render_area.height * up_scale) >> down_shift, |
|
|
|
(maxwell3d->regs.render_area.width * up_scale) >> down_shift, |
|
|
|
(maxwell3d->regs.render_area.height * up_scale) >> down_shift, |
|
|
|
}; |
|
|
|
|
|
|
|
flags[Dirty::DepthBiasGlobal] = true; |
|
|
|
@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) { |
|
|
|
const auto copies = FullDownloadCopies(image.info); |
|
|
|
image.DownloadMemory(map, copies); |
|
|
|
runtime.Finish(); |
|
|
|
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
|
|
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() { |
|
|
|
for (const ImageId image_id : download_ids) { |
|
|
|
const ImageBase& image = slot_images[image_id]; |
|
|
|
const auto copies = FullDownloadCopies(image.info); |
|
|
|
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span); |
|
|
|
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span); |
|
|
|
download_map.offset += image.unswizzled_size_bytes; |
|
|
|
download_span = download_span.subspan(image.unswizzled_size_bytes); |
|
|
|
} |
|
|
|
@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging) |
|
|
|
const GPUVAddr gpu_addr = image.gpu_addr; |
|
|
|
|
|
|
|
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { |
|
|
|
gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |
|
|
|
gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes()); |
|
|
|
const auto uploads = FullUploadSwizzles(image.info); |
|
|
|
runtime.AccelerateImageUpload(image, staging, uploads); |
|
|
|
} else if (True(image.flags & ImageFlagBits::Converted)) { |
|
|
|
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes); |
|
|
|
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data); |
|
|
|
auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data); |
|
|
|
ConvertImage(unswizzled_data, image.info, mapped_span, copies); |
|
|
|
image.UploadMemory(staging, copies); |
|
|
|
} else { |
|
|
|
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span); |
|
|
|
const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span); |
|
|
|
image.UploadMemory(staging, copies); |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) { |
|
|
|
if (!IsValidEntry(gpu_memory, config)) { |
|
|
|
if (!IsValidEntry(*gpu_memory, config)) { |
|
|
|
return NULL_IMAGE_VIEW_ID; |
|
|
|
} |
|
|
|
const auto [pair, is_new] = image_views.try_emplace(config); |
|
|
|
const auto [pair, is_new] = state->image_views.try_emplace(config); |
|
|
|
ImageViewId& image_view_id = pair->second; |
|
|
|
if (is_new) { |
|
|
|
image_view_id = CreateImageView(config); |
|
|
|
@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a |
|
|
|
template <class P> |
|
|
|
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr, |
|
|
|
RelaxedOptions options) { |
|
|
|
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
|
|
|
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
|
|
|
if (!cpu_addr) { |
|
|
|
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
|
|
|
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); |
|
|
|
if (!cpu_addr) { |
|
|
|
return ImageId{}; |
|
|
|
} |
|
|
|
@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) { |
|
|
|
image.scale_tick = frame_tick + 1; |
|
|
|
} |
|
|
|
const std::span<const ImageViewId> image_view_ids = image.image_view_ids; |
|
|
|
auto& dirty = maxwell3d.dirty.flags; |
|
|
|
auto& dirty = maxwell3d->dirty.flags; |
|
|
|
dirty[Dirty::RenderTargets] = true; |
|
|
|
dirty[Dirty::ZetaBuffer] = true; |
|
|
|
for (size_t rt = 0; rt < NUM_RT; ++rt) { |
|
|
|
@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) { |
|
|
|
image.image_view_ids.clear(); |
|
|
|
image.image_view_infos.clear(); |
|
|
|
if constexpr (ENABLE_VALIDATION) { |
|
|
|
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); |
|
|
|
std::ranges::fill(compute_image_view_ids, CORRUPT_ID); |
|
|
|
std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID); |
|
|
|
std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID); |
|
|
|
} |
|
|
|
graphics_image_table.Invalidate(); |
|
|
|
compute_image_table.Invalidate(); |
|
|
|
state->graphics_image_table.Invalidate(); |
|
|
|
state->compute_image_table.Invalidate(); |
|
|
|
has_deleted_images = true; |
|
|
|
} |
|
|
|
|
|
|
|
@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) { |
|
|
|
template <class P> |
|
|
|
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr, |
|
|
|
RelaxedOptions options) { |
|
|
|
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
|
|
|
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
|
|
|
if (!cpu_addr) { |
|
|
|
const auto size = CalculateGuestSizeInBytes(info); |
|
|
|
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size); |
|
|
|
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size); |
|
|
|
if (!cpu_addr) { |
|
|
|
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space; |
|
|
|
virtual_invalid_space += Common::AlignUp(size, 32); |
|
|
|
@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA |
|
|
|
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); |
|
|
|
Image& new_image = slot_images[new_image_id]; |
|
|
|
|
|
|
|
if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { |
|
|
|
if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) { |
|
|
|
new_image.flags |= ImageFlagBits::Sparse; |
|
|
|
} |
|
|
|
|
|
|
|
@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { |
|
|
|
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { |
|
|
|
return NULL_SAMPLER_ID; |
|
|
|
} |
|
|
|
const auto [pair, is_new] = samplers.try_emplace(config); |
|
|
|
const auto [pair, is_new] = state->samplers.try_emplace(config); |
|
|
|
if (is_new) { |
|
|
|
pair->second = slot_samplers.insert(runtime, config); |
|
|
|
} |
|
|
|
@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { |
|
|
|
|
|
|
|
template <class P> |
|
|
|
ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { |
|
|
|
const auto& regs = maxwell3d.regs; |
|
|
|
const auto& regs = maxwell3d->regs; |
|
|
|
if (index >= regs.rt_control.count) { |
|
|
|
return ImageViewId{}; |
|
|
|
} |
|
|
|
@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) { |
|
|
|
|
|
|
|
template <class P> |
|
|
|
ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) { |
|
|
|
const auto& regs = maxwell3d.regs; |
|
|
|
const auto& regs = maxwell3d->regs; |
|
|
|
if (!regs.zeta_enable) { |
|
|
|
return ImageViewId{}; |
|
|
|
} |
|
|
|
@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu |
|
|
|
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>; |
|
|
|
boost::container::small_vector<ImageId, 8> images; |
|
|
|
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) { |
|
|
|
const auto it = gpu_page_table.find(page); |
|
|
|
if (it == gpu_page_table.end()) { |
|
|
|
const auto it = state->gpu_page_table.find(page); |
|
|
|
if (it == state->gpu_page_table.end()) { |
|
|
|
if constexpr (BOOL_BREAK) { |
|
|
|
return false; |
|
|
|
} else { |
|
|
|
@ -1403,9 +1408,9 @@ template <typename Func> |
|
|
|
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) { |
|
|
|
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type; |
|
|
|
static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>; |
|
|
|
const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); |
|
|
|
const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes); |
|
|
|
for (const auto& [gpu_addr, size] : segments) { |
|
|
|
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr); |
|
|
|
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); |
|
|
|
ASSERT(cpu_addr); |
|
|
|
if constexpr (RETURNS_BOOL) { |
|
|
|
if (func(gpu_addr, *cpu_addr, size)) { |
|
|
|
@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) { |
|
|
|
image.lru_index = lru_cache.Insert(image_id, frame_tick); |
|
|
|
|
|
|
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
|
|
|
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); }); |
|
|
|
[this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); }); |
|
|
|
if (False(image.flags & ImageFlagBits::Sparse)) { |
|
|
|
auto map_id = |
|
|
|
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id); |
|
|
|
@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) { |
|
|
|
} |
|
|
|
image_ids.erase(vector_it); |
|
|
|
}; |
|
|
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, |
|
|
|
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); }); |
|
|
|
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) { |
|
|
|
clear_page_table(page, state->gpu_page_table); |
|
|
|
}); |
|
|
|
if (False(image.flags & ImageFlagBits::Sparse)) { |
|
|
|
const auto map_id = image.map_view_id; |
|
|
|
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) { |
|
|
|
@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { |
|
|
|
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered"); |
|
|
|
|
|
|
|
// Mark render targets as dirty |
|
|
|
auto& dirty = maxwell3d.dirty.flags; |
|
|
|
auto& dirty = maxwell3d->dirty.flags; |
|
|
|
dirty[Dirty::RenderTargets] = true; |
|
|
|
dirty[Dirty::ZetaBuffer] = true; |
|
|
|
for (size_t rt = 0; rt < NUM_RT; ++rt) { |
|
|
|
@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) { |
|
|
|
if (alloc_images.empty()) { |
|
|
|
image_allocs_table.erase(alloc_it); |
|
|
|
} |
|
|
|
if constexpr (ENABLE_VALIDATION) { |
|
|
|
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID); |
|
|
|
std::ranges::fill(compute_image_view_ids, CORRUPT_ID); |
|
|
|
for (auto& this_state : channel_storage) { |
|
|
|
if constexpr (ENABLE_VALIDATION) { |
|
|
|
std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID); |
|
|
|
std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID); |
|
|
|
} |
|
|
|
this_state.graphics_image_table.Invalidate(); |
|
|
|
this_state.compute_image_table.Invalidate(); |
|
|
|
} |
|
|
|
graphics_image_table.Invalidate(); |
|
|
|
compute_image_table.Invalidate(); |
|
|
|
has_deleted_images = true; |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) { |
|
|
|
auto it = image_views.begin(); |
|
|
|
while (it != image_views.end()) { |
|
|
|
auto it = state->image_views.begin(); |
|
|
|
while (it != state->image_views.end()) { |
|
|
|
const auto found = std::ranges::find(removed_views, it->second); |
|
|
|
if (found != removed_views.end()) { |
|
|
|
it = image_views.erase(it); |
|
|
|
it = state->image_views.erase(it); |
|
|
|
} else { |
|
|
|
++it; |
|
|
|
} |
|
|
|
@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) { |
|
|
|
const ImageViewBase& image_view = slot_image_views[id]; |
|
|
|
const ImageBase& image = slot_images[image_view.image_id]; |
|
|
|
const Extent3D size = image_view.size; |
|
|
|
const auto& regs = maxwell3d.regs; |
|
|
|
const auto& regs = maxwell3d->regs; |
|
|
|
const auto& scissor = regs.scissor_test[0]; |
|
|
|
if (image.info.resources.levels > 1 || image.info.resources.layers > 1) { |
|
|
|
// Images with multiple resources can't be cleared in a single call |
|
|
|
@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) { |
|
|
|
scissor.max_y >= size.height; |
|
|
|
} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept |
|
|
|
: maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute}, |
|
|
|
gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory}, |
|
|
|
graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{ |
|
|
|
gpu_memory} {} |
|
|
|
|
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) { |
|
|
|
ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0); |
|
|
|
auto new_id = [this, &channel]() { |
|
|
|
if (!free_channel_ids.empty()) { |
|
|
|
auto id = free_channel_ids.front(); |
|
|
|
free_channel_ids.pop_front(); |
|
|
|
new (&channel_storage[id]) ChannelInfo(channel); |
|
|
|
return id; |
|
|
|
} |
|
|
|
channel_storage.emplace_back(channel); |
|
|
|
return channel_storage.size() - 1; |
|
|
|
}(); |
|
|
|
channel_map.emplace(channel.bind_id, new_id); |
|
|
|
if (current_channel_id != UNSET_CHANNEL) { |
|
|
|
state = &channel_storage[current_channel_id]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
/// Bind a channel for execution. |
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::BindToChannel(s32 id) { |
|
|
|
auto it = channel_map.find(id); |
|
|
|
ASSERT(it != channel_map.end() && id >= 0); |
|
|
|
current_channel_id = it->second; |
|
|
|
state = &channel_storage[current_channel_id]; |
|
|
|
maxwell3d = &state->maxwell3d; |
|
|
|
kepler_compute = &state->kepler_compute; |
|
|
|
gpu_memory = &state->gpu_memory; |
|
|
|
} |
|
|
|
|
|
|
|
/// Erase channel's state. |
|
|
|
template <class P> |
|
|
|
void TextureCache<P>::EraseChannel(s32 id) { |
|
|
|
const auto it = channel_map.find(id); |
|
|
|
ASSERT(it != channel_map.end() && id >= 0); |
|
|
|
const auto this_id = it->second; |
|
|
|
free_channel_ids.push_back(this_id); |
|
|
|
channel_map.erase(it); |
|
|
|
if (this_id == current_channel_id) { |
|
|
|
current_channel_id = UNSET_CHANNEL; |
|
|
|
state = nullptr; |
|
|
|
maxwell3d = nullptr; |
|
|
|
kepler_compute = nullptr; |
|
|
|
gpu_memory = nullptr; |
|
|
|
} else if (current_channel_id != UNSET_CHANNEL) { |
|
|
|
state = &channel_storage[current_channel_id]; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
} // namespace VideoCommon |