You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
456 lines
15 KiB
456 lines
15 KiB
// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
|
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
|
|
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
#include <cstring>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
#include "common/assert.h"
|
|
#include "common/logging.h"
|
|
#include "common/scope_exit.h"
|
|
#include "common/settings.h"
|
|
#include "core/memory.h"
|
|
#include "video_core/host1x/ffmpeg.h"
|
|
#include "video_core/memory_manager.h"
|
|
|
|
extern "C" {
|
|
#ifdef LIBVA_FOUND
|
|
// for querying VAAPI driver information
|
|
#include <libavutil/hwcontext_vaapi.h>
|
|
#endif
|
|
|
|
#include <libavutil/hwcontext.h>
|
|
}
|
|
|
|
namespace FFmpeg {
|
|
|
|
namespace {
|
|
|
|
constexpr AVPixelFormat PREFERRED_GPU_FORMAT = AV_PIX_FMT_NV12;
|
|
constexpr AVPixelFormat PREFERRED_CPU_FORMAT = AV_PIX_FMT_YUV420P;
|
|
constexpr std::array PREFERRED_GPU_DECODERS = {
|
|
#if defined(_WIN32)
|
|
AV_HWDEVICE_TYPE_CUDA,
|
|
AV_HWDEVICE_TYPE_D3D11VA,
|
|
AV_HWDEVICE_TYPE_DXVA2,
|
|
AV_HWDEVICE_TYPE_D3D12VA,
|
|
#elif defined(__FreeBSD__)
|
|
AV_HWDEVICE_TYPE_VAAPI,
|
|
AV_HWDEVICE_TYPE_VDPAU,
|
|
AV_HWDEVICE_TYPE_DRM,
|
|
#elif defined(__APPLE__)
|
|
AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
|
|
#elif defined(ANDROID)
|
|
AV_HWDEVICE_TYPE_MEDIACODEC,
|
|
#elif defined(__unix__)
|
|
AV_HWDEVICE_TYPE_CUDA,
|
|
AV_HWDEVICE_TYPE_VAAPI,
|
|
AV_HWDEVICE_TYPE_VDPAU,
|
|
#endif
|
|
AV_HWDEVICE_TYPE_VULKAN,
|
|
};
|
|
|
|
AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
|
|
const auto desc = av_pix_fmt_desc_get(codec_context->pix_fmt);
|
|
if (desc && !(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) {
|
|
for (int i = 0;; i++) {
|
|
const AVCodecHWConfig* config = avcodec_get_hw_config(codec_context->codec, i);
|
|
if (config) {
|
|
for (const auto type : PREFERRED_GPU_DECODERS)
|
|
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == type) {
|
|
codec_context->pix_fmt = config->pix_fmt;
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
|
|
if (*p == codec_context->pix_fmt) {
|
|
return codec_context->pix_fmt;
|
|
}
|
|
}
|
|
|
|
LOG_INFO(HW_GPU, "Could not find supported GPU pixel format, falling back to CPU decoder");
|
|
av_buffer_unref(&codec_context->hw_device_ctx);
|
|
codec_context->pix_fmt = PREFERRED_CPU_FORMAT;
|
|
return codec_context->pix_fmt;
|
|
}
|
|
|
|
std::string AVError(int errnum) {
|
|
char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
|
|
av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
|
|
return errbuf;
|
|
}
|
|
|
|
#ifdef ANDROID
|
|
// Match a 3- or 4-byte annex-B start code at `i`. Returns its length, or 0.
|
|
size_t MatchStartCode(std::span<const u8> data, size_t i) {
|
|
const size_t n = data.size();
|
|
if (i + 3 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1) {
|
|
return 4;
|
|
}
|
|
if (i + 2 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1) {
|
|
return 3;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Pull SPS (NAL type 7) + PPS (NAL type 8) out of an annex-B frame into an
|
|
// extradata buffer, each prefixed with a 4-byte start code. Eden synthesizes
|
|
// these inline into the very first frame; h264_mediacodec wants them at open.
|
|
std::vector<u8> ExtractH264AnnexBExtradata(std::span<const u8> packet) {
|
|
std::vector<u8> extradata;
|
|
const size_t size = packet.size();
|
|
size_t i = 0;
|
|
while (i < size) {
|
|
const size_t sc = MatchStartCode(packet, i);
|
|
if (sc == 0) {
|
|
++i;
|
|
continue;
|
|
}
|
|
const size_t nal_start = i + sc;
|
|
if (nal_start >= size) {
|
|
break;
|
|
}
|
|
const u8 nal_type = packet[nal_start] & 0x1F;
|
|
|
|
size_t j = nal_start + 1;
|
|
while (j < size && MatchStartCode(packet, j) == 0) {
|
|
++j;
|
|
}
|
|
|
|
if (nal_type == 7 || nal_type == 8) {
|
|
constexpr u8 start[4] = {0, 0, 0, 1};
|
|
extradata.insert(extradata.end(), start, start + sizeof(start));
|
|
extradata.insert(extradata.end(), packet.begin() + nal_start, packet.begin() + j);
|
|
} else if (nal_type == 1 || nal_type == 5) {
|
|
break;
|
|
}
|
|
i = j;
|
|
}
|
|
return extradata;
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
Packet::Packet(std::span<const u8> data) {
|
|
m_packet = av_packet_alloc();
|
|
m_packet->data = const_cast<u8*>(data.data());
|
|
m_packet->size = static_cast<s32>(data.size());
|
|
}
|
|
|
|
Packet::~Packet() {
|
|
av_packet_free(&m_packet);
|
|
}
|
|
|
|
Frame::Frame() {
|
|
m_frame = av_frame_alloc();
|
|
}
|
|
|
|
Frame::~Frame() {
|
|
av_frame_free(&m_frame);
|
|
}
|
|
|
|
Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
|
const AVCodecID av_codec = [&] {
|
|
switch (codec) {
|
|
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
|
|
return AV_CODEC_ID_H264;
|
|
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
|
|
return AV_CODEC_ID_VP8;
|
|
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
|
|
return AV_CODEC_ID_VP9;
|
|
default:
|
|
UNIMPLEMENTED_MSG("Unknown codec {}", codec);
|
|
return AV_CODEC_ID_NONE;
|
|
}
|
|
}();
|
|
|
|
#ifdef ANDROID
|
|
// FFmpeg exposes MediaCodec via dedicated decoders rather than as a
|
|
// hw_config on the regular ones.
|
|
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
|
|
const char* mc_name = nullptr;
|
|
switch (av_codec) {
|
|
case AV_CODEC_ID_H264: mc_name = "h264_mediacodec"; break;
|
|
case AV_CODEC_ID_VP8: mc_name = "vp8_mediacodec"; break;
|
|
case AV_CODEC_ID_VP9: mc_name = "vp9_mediacodec"; break;
|
|
default: break;
|
|
}
|
|
if (mc_name) {
|
|
m_codec = avcodec_find_decoder_by_name(mc_name);
|
|
}
|
|
}
|
|
#endif
|
|
if (!m_codec) {
|
|
m_codec = avcodec_find_decoder(av_codec);
|
|
}
|
|
}
|
|
|
|
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
|
|
for (int i = 0;; i++) {
|
|
const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
|
|
if (!config) {
|
|
LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type));
|
|
break;
|
|
}
|
|
|
|
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == type) {
|
|
LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
|
|
*out_pix_fmt = config->pix_fmt;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
|
|
std::vector<AVHWDeviceType> types;
|
|
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
|
|
while (true) {
|
|
current_device_type = av_hwdevice_iterate_types(current_device_type);
|
|
if (current_device_type == AV_HWDEVICE_TYPE_NONE)
|
|
return types;
|
|
types.push_back(current_device_type);
|
|
}
|
|
}
|
|
|
|
HardwareContext::~HardwareContext() {
|
|
av_buffer_unref(&m_gpu_decoder);
|
|
}
|
|
|
|
bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) {
|
|
auto const supported_types = GetSupportedDeviceTypes();
|
|
for (auto const type : PREFERRED_GPU_DECODERS) {
|
|
if (std::ranges::find(supported_types, type) == supported_types.end()) {
|
|
LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
|
|
continue;
|
|
}
|
|
if (InitializeWithType(type)) {
|
|
AVPixelFormat hw_pix_fmt{};
|
|
if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
|
|
decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
|
|
av_buffer_unref(&m_gpu_decoder);
|
|
|
|
if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) {
|
|
LOG_INFO(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type), AVError(ret));
|
|
return false;
|
|
}
|
|
|
|
#ifdef LIBVA_FOUND
|
|
if (type == AV_HWDEVICE_TYPE_VAAPI) {
|
|
// We need to determine if this is an impersonated VAAPI driver.
|
|
auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
|
|
auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
|
|
const char* vendor_name = vaQueryVendorString(vactx->display);
|
|
if (strstr(vendor_name, "VDPAU backend")) {
|
|
// VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
|
|
LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
|
|
return false;
|
|
} else {
|
|
// According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
|
|
// Log the driver name just in case.
|
|
LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
|
|
m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
|
|
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
|
|
m_codec_context->thread_count = 0;
|
|
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
|
|
// Forwarded into MediaCodec as KEY_LOW_LATENCY on Android.
|
|
m_codec_context->flags |= AV_CODEC_FLAG_LOW_DELAY;
|
|
m_codec_context->flags2 |= AV_CODEC_FLAG2_FAST;
|
|
}
|
|
|
|
DecoderContext::~DecoderContext() {
|
|
av_buffer_unref(&m_codec_context->hw_device_ctx);
|
|
avcodec_free_context(&m_codec_context);
|
|
}
|
|
|
|
void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) {
|
|
m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
|
|
m_codec_context->get_format = GetGpuFormat;
|
|
m_codec_context->pix_fmt = hw_pix_fmt;
|
|
}
|
|
|
|
bool DecoderContext::OpenContext(const Decoder& decoder, std::span<const u8> extradata) {
|
|
if (!extradata.empty()) {
|
|
av_freep(&m_codec_context->extradata);
|
|
m_codec_context->extradata = static_cast<u8*>(
|
|
av_mallocz(extradata.size() + AV_INPUT_BUFFER_PADDING_SIZE));
|
|
if (!m_codec_context->extradata) {
|
|
LOG_ERROR(HW_GPU, "Failed to allocate extradata");
|
|
return false;
|
|
}
|
|
std::memcpy(m_codec_context->extradata, extradata.data(), extradata.size());
|
|
m_codec_context->extradata_size = static_cast<int>(extradata.size());
|
|
}
|
|
|
|
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
|
|
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
|
|
return false;
|
|
}
|
|
|
|
if (!m_codec_context->hw_device_ctx) {
|
|
LOG_INFO(HW_GPU, "Using FFmpeg CPU decoder");
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DecoderContext::SendPacket(const Packet& packet) {
|
|
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0 && ret != AVERROR_EOF && ret != AVERROR(EAGAIN)) {
|
|
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
|
|
auto ReceiveImpl = [&](AVFrame* frame) -> int {
|
|
const int ret = avcodec_receive_frame(m_codec_context, frame);
|
|
if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR(EAGAIN)) {
|
|
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
|
|
}
|
|
return ret;
|
|
};
|
|
|
|
std::shared_ptr<Frame> intermediate_frame = std::make_shared<Frame>();
|
|
if (ReceiveImpl(intermediate_frame->GetFrame()) < 0) {
|
|
return {};
|
|
}
|
|
|
|
m_final_frame = std::make_shared<Frame>();
|
|
if (m_codec_context->hw_device_ctx) {
|
|
#ifdef __ANDROID__
|
|
// c2.mtk.vp9.decoder, c2.mtk.vp89.decoder will be fine if we don't
|
|
// re-encode stuff twice :>
|
|
m_final_frame = std::move(intermediate_frame);
|
|
#else
|
|
m_final_frame->SetFormat(PREFERRED_GPU_FORMAT);
|
|
if (const int ret = av_hwframe_transfer_data(m_final_frame->GetFrame(), intermediate_frame->GetFrame(), 0); ret < 0) {
|
|
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
|
|
return {};
|
|
}
|
|
#endif
|
|
} else {
|
|
m_final_frame = std::move(intermediate_frame);
|
|
}
|
|
|
|
return std::move(m_final_frame);
|
|
}
|
|
|
|
void DecodeApi::Reset() {
|
|
m_hardware_context.reset();
|
|
m_decoder_context.reset();
|
|
m_decoder.reset();
|
|
m_opened = false;
|
|
m_needs_h264_extradata = false;
|
|
m_next_pts = 0;
|
|
while (!m_pending_offsets.empty()) {
|
|
m_pending_offsets.pop();
|
|
}
|
|
}
|
|
|
|
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
|
|
av_log_set_level(AV_LOG_DEBUG);
|
|
|
|
this->Reset();
|
|
m_decoder.emplace(codec);
|
|
m_decoder_context.emplace(*m_decoder);
|
|
|
|
// Enable GPU decoding if requested.
|
|
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
|
|
m_hardware_context.emplace();
|
|
m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
|
|
}
|
|
|
|
#ifdef ANDROID
|
|
// h264_mediacodec needs SPS/PPS in extradata at open. We pull them from
|
|
// the first frame's bitstream in SendPacket.
|
|
m_needs_h264_extradata = m_decoder->GetCodec() &&
|
|
std::string_view(m_decoder->GetCodec()->name) == "h264_mediacodec";
|
|
if (m_needs_h264_extradata) {
|
|
return true;
|
|
}
|
|
#endif
|
|
|
|
if (!m_decoder_context->OpenContext(*m_decoder)) {
|
|
this->Reset();
|
|
return false;
|
|
}
|
|
m_opened = true;
|
|
|
|
return true;
|
|
}
|
|
|
|
bool DecodeApi::SendPacket(std::span<const u8> packet_data, const FrameOffsets& offsets,
|
|
std::optional<FrameDimensions> dimensions) {
|
|
if (!m_opened) {
|
|
std::vector<u8> extradata;
|
|
#ifdef ANDROID
|
|
if (m_needs_h264_extradata) {
|
|
extradata = ExtractH264AnnexBExtradata(packet_data);
|
|
if (extradata.empty()) {
|
|
return true;
|
|
}
|
|
if (dimensions) {
|
|
auto* ctx = m_decoder_context->GetCodecContext();
|
|
ctx->width = dimensions->width;
|
|
ctx->height = dimensions->height;
|
|
ctx->coded_width = dimensions->width;
|
|
ctx->coded_height = dimensions->height;
|
|
}
|
|
}
|
|
#endif
|
|
if (!m_decoder_context->OpenContext(*m_decoder, extradata)) {
|
|
this->Reset();
|
|
return false;
|
|
}
|
|
m_opened = true;
|
|
}
|
|
m_pending_offsets.push(offsets);
|
|
FFmpeg::Packet packet(packet_data);
|
|
packet.GetPacket()->pts = m_next_pts;
|
|
packet.GetPacket()->dts = m_next_pts;
|
|
++m_next_pts;
|
|
return m_decoder_context->SendPacket(packet);
|
|
}
|
|
|
|
std::optional<DecodeApi::DecodedFrame> DecodeApi::ReceiveFrame() {
|
|
auto frame = m_decoder_context->ReceiveFrame();
|
|
if (!frame) {
|
|
return std::nullopt;
|
|
}
|
|
FrameOffsets offsets{};
|
|
if (!m_pending_offsets.empty()) {
|
|
offsets = m_pending_offsets.front();
|
|
m_pending_offsets.pop();
|
|
}
|
|
return DecodedFrame{std::move(frame), offsets};
|
|
}
|
|
|
|
}
|