You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

456 lines
15 KiB

// SPDX-FileCopyrightText: Copyright 2026 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <cstring>
#include <string_view>
#include <vector>
#include "common/assert.h"
#include "common/logging.h"
#include "common/scope_exit.h"
#include "common/settings.h"
#include "core/memory.h"
#include "video_core/host1x/ffmpeg.h"
#include "video_core/memory_manager.h"
extern "C" {
#ifdef LIBVA_FOUND
// for querying VAAPI driver information
#include <libavutil/hwcontext_vaapi.h>
#endif
#include <libavutil/hwcontext.h>
}
namespace FFmpeg {
namespace {
constexpr AVPixelFormat PREFERRED_GPU_FORMAT = AV_PIX_FMT_NV12;
constexpr AVPixelFormat PREFERRED_CPU_FORMAT = AV_PIX_FMT_YUV420P;
constexpr std::array PREFERRED_GPU_DECODERS = {
#if defined(_WIN32)
AV_HWDEVICE_TYPE_CUDA,
AV_HWDEVICE_TYPE_D3D11VA,
AV_HWDEVICE_TYPE_DXVA2,
AV_HWDEVICE_TYPE_D3D12VA,
#elif defined(__FreeBSD__)
AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU,
AV_HWDEVICE_TYPE_DRM,
#elif defined(__APPLE__)
AV_HWDEVICE_TYPE_VIDEOTOOLBOX,
#elif defined(ANDROID)
AV_HWDEVICE_TYPE_MEDIACODEC,
#elif defined(__unix__)
AV_HWDEVICE_TYPE_CUDA,
AV_HWDEVICE_TYPE_VAAPI,
AV_HWDEVICE_TYPE_VDPAU,
#endif
AV_HWDEVICE_TYPE_VULKAN,
};
AVPixelFormat GetGpuFormat(AVCodecContext* codec_context, const AVPixelFormat* pix_fmts) {
const auto desc = av_pix_fmt_desc_get(codec_context->pix_fmt);
if (desc && !(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) {
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(codec_context->codec, i);
if (config) {
for (const auto type : PREFERRED_GPU_DECODERS)
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == type) {
codec_context->pix_fmt = config->pix_fmt;
}
} else {
break;
}
}
}
for (const AVPixelFormat* p = pix_fmts; *p != AV_PIX_FMT_NONE; ++p) {
if (*p == codec_context->pix_fmt) {
return codec_context->pix_fmt;
}
}
LOG_INFO(HW_GPU, "Could not find supported GPU pixel format, falling back to CPU decoder");
av_buffer_unref(&codec_context->hw_device_ctx);
codec_context->pix_fmt = PREFERRED_CPU_FORMAT;
return codec_context->pix_fmt;
}
std::string AVError(int errnum) {
char errbuf[AV_ERROR_MAX_STRING_SIZE] = {};
av_make_error_string(errbuf, sizeof(errbuf) - 1, errnum);
return errbuf;
}
#ifdef ANDROID
// Match a 3- or 4-byte annex-B start code at `i`. Returns its length, or 0.
size_t MatchStartCode(std::span<const u8> data, size_t i) {
const size_t n = data.size();
if (i + 3 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 0 && data[i + 3] == 1) {
return 4;
}
if (i + 2 < n && data[i] == 0 && data[i + 1] == 0 && data[i + 2] == 1) {
return 3;
}
return 0;
}
// Pull SPS (NAL type 7) + PPS (NAL type 8) out of an annex-B frame into an
// extradata buffer, each prefixed with a 4-byte start code. Eden synthesizes
// these inline into the very first frame; h264_mediacodec wants them at open.
std::vector<u8> ExtractH264AnnexBExtradata(std::span<const u8> packet) {
std::vector<u8> extradata;
const size_t size = packet.size();
size_t i = 0;
while (i < size) {
const size_t sc = MatchStartCode(packet, i);
if (sc == 0) {
++i;
continue;
}
const size_t nal_start = i + sc;
if (nal_start >= size) {
break;
}
const u8 nal_type = packet[nal_start] & 0x1F;
size_t j = nal_start + 1;
while (j < size && MatchStartCode(packet, j) == 0) {
++j;
}
if (nal_type == 7 || nal_type == 8) {
constexpr u8 start[4] = {0, 0, 0, 1};
extradata.insert(extradata.end(), start, start + sizeof(start));
extradata.insert(extradata.end(), packet.begin() + nal_start, packet.begin() + j);
} else if (nal_type == 1 || nal_type == 5) {
break;
}
i = j;
}
return extradata;
}
#endif
}
Packet::Packet(std::span<const u8> data) {
m_packet = av_packet_alloc();
m_packet->data = const_cast<u8*>(data.data());
m_packet->size = static_cast<s32>(data.size());
}
Packet::~Packet() {
av_packet_free(&m_packet);
}
Frame::Frame() {
m_frame = av_frame_alloc();
}
Frame::~Frame() {
av_frame_free(&m_frame);
}
Decoder::Decoder(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
const AVCodecID av_codec = [&] {
switch (codec) {
case Tegra::Host1x::NvdecCommon::VideoCodec::H264:
return AV_CODEC_ID_H264;
case Tegra::Host1x::NvdecCommon::VideoCodec::VP8:
return AV_CODEC_ID_VP8;
case Tegra::Host1x::NvdecCommon::VideoCodec::VP9:
return AV_CODEC_ID_VP9;
default:
UNIMPLEMENTED_MSG("Unknown codec {}", codec);
return AV_CODEC_ID_NONE;
}
}();
#ifdef ANDROID
// FFmpeg exposes MediaCodec via dedicated decoders rather than as a
// hw_config on the regular ones.
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
const char* mc_name = nullptr;
switch (av_codec) {
case AV_CODEC_ID_H264: mc_name = "h264_mediacodec"; break;
case AV_CODEC_ID_VP8: mc_name = "vp8_mediacodec"; break;
case AV_CODEC_ID_VP9: mc_name = "vp9_mediacodec"; break;
default: break;
}
if (mc_name) {
m_codec = avcodec_find_decoder_by_name(mc_name);
}
}
#endif
if (!m_codec) {
m_codec = avcodec_find_decoder(av_codec);
}
}
bool Decoder::SupportsDecodingOnDevice(AVPixelFormat* out_pix_fmt, AVHWDeviceType type) const {
for (int i = 0;; i++) {
const AVCodecHWConfig* config = avcodec_get_hw_config(m_codec, i);
if (!config) {
LOG_DEBUG(HW_GPU, "{} decoder does not support device type {}", m_codec->name, av_hwdevice_get_type_name(type));
break;
}
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == type) {
LOG_INFO(HW_GPU, "Using {} GPU decoder", av_hwdevice_get_type_name(type));
*out_pix_fmt = config->pix_fmt;
return true;
}
}
return false;
}
std::vector<AVHWDeviceType> HardwareContext::GetSupportedDeviceTypes() {
std::vector<AVHWDeviceType> types;
AVHWDeviceType current_device_type = AV_HWDEVICE_TYPE_NONE;
while (true) {
current_device_type = av_hwdevice_iterate_types(current_device_type);
if (current_device_type == AV_HWDEVICE_TYPE_NONE)
return types;
types.push_back(current_device_type);
}
}
HardwareContext::~HardwareContext() {
av_buffer_unref(&m_gpu_decoder);
}
bool HardwareContext::InitializeForDecoder(DecoderContext& decoder_context, const Decoder& decoder) {
auto const supported_types = GetSupportedDeviceTypes();
for (auto const type : PREFERRED_GPU_DECODERS) {
if (std::ranges::find(supported_types, type) == supported_types.end()) {
LOG_DEBUG(HW_GPU, "{} explicitly unsupported", av_hwdevice_get_type_name(type));
continue;
}
if (InitializeWithType(type)) {
AVPixelFormat hw_pix_fmt{};
if (decoder.SupportsDecodingOnDevice(&hw_pix_fmt, type)) {
decoder_context.InitializeHardwareDecoder(*this, hw_pix_fmt);
return true;
}
}
}
return false;
}
bool HardwareContext::InitializeWithType(AVHWDeviceType type) {
av_buffer_unref(&m_gpu_decoder);
if (const int ret = av_hwdevice_ctx_create(&m_gpu_decoder, type, nullptr, nullptr, 0); ret < 0) {
LOG_INFO(HW_GPU, "av_hwdevice_ctx_create({}) failed: {}", av_hwdevice_get_type_name(type), AVError(ret));
return false;
}
#ifdef LIBVA_FOUND
if (type == AV_HWDEVICE_TYPE_VAAPI) {
// We need to determine if this is an impersonated VAAPI driver.
auto* hwctx = reinterpret_cast<AVHWDeviceContext*>(m_gpu_decoder->data);
auto* vactx = static_cast<AVVAAPIDeviceContext*>(hwctx->hwctx);
const char* vendor_name = vaQueryVendorString(vactx->display);
if (strstr(vendor_name, "VDPAU backend")) {
// VDPAU impersonated VAAPI impls are super buggy, we need to skip them.
LOG_DEBUG(HW_GPU, "Skipping VDPAU impersonated VAAPI driver");
return false;
} else {
// According to some user testing, certain VAAPI drivers (Intel?) could be buggy.
// Log the driver name just in case.
LOG_DEBUG(HW_GPU, "Using VAAPI driver: {}", vendor_name);
}
}
#endif
return true;
}
DecoderContext::DecoderContext(const Decoder& decoder) : m_decoder{decoder} {
m_codec_context = avcodec_alloc_context3(m_decoder.GetCodec());
av_opt_set(m_codec_context->priv_data, "tune", "zerolatency", 0);
m_codec_context->thread_count = 0;
m_codec_context->thread_type &= ~FF_THREAD_FRAME;
// Forwarded into MediaCodec as KEY_LOW_LATENCY on Android.
m_codec_context->flags |= AV_CODEC_FLAG_LOW_DELAY;
m_codec_context->flags2 |= AV_CODEC_FLAG2_FAST;
}
DecoderContext::~DecoderContext() {
av_buffer_unref(&m_codec_context->hw_device_ctx);
avcodec_free_context(&m_codec_context);
}
void DecoderContext::InitializeHardwareDecoder(const HardwareContext& context, AVPixelFormat hw_pix_fmt) {
m_codec_context->hw_device_ctx = av_buffer_ref(context.GetBufferRef());
m_codec_context->get_format = GetGpuFormat;
m_codec_context->pix_fmt = hw_pix_fmt;
}
bool DecoderContext::OpenContext(const Decoder& decoder, std::span<const u8> extradata) {
if (!extradata.empty()) {
av_freep(&m_codec_context->extradata);
m_codec_context->extradata = static_cast<u8*>(
av_mallocz(extradata.size() + AV_INPUT_BUFFER_PADDING_SIZE));
if (!m_codec_context->extradata) {
LOG_ERROR(HW_GPU, "Failed to allocate extradata");
return false;
}
std::memcpy(m_codec_context->extradata, extradata.data(), extradata.size());
m_codec_context->extradata_size = static_cast<int>(extradata.size());
}
if (const int ret = avcodec_open2(m_codec_context, decoder.GetCodec(), nullptr); ret < 0) {
LOG_ERROR(HW_GPU, "avcodec_open2 error: {}", AVError(ret));
return false;
}
if (!m_codec_context->hw_device_ctx) {
LOG_INFO(HW_GPU, "Using FFmpeg CPU decoder");
}
return true;
}
bool DecoderContext::SendPacket(const Packet& packet) {
if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0 && ret != AVERROR_EOF && ret != AVERROR(EAGAIN)) {
LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret));
return false;
}
return true;
}
std::shared_ptr<Frame> DecoderContext::ReceiveFrame() {
auto ReceiveImpl = [&](AVFrame* frame) -> int {
const int ret = avcodec_receive_frame(m_codec_context, frame);
if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR(EAGAIN)) {
LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret));
}
return ret;
};
std::shared_ptr<Frame> intermediate_frame = std::make_shared<Frame>();
if (ReceiveImpl(intermediate_frame->GetFrame()) < 0) {
return {};
}
m_final_frame = std::make_shared<Frame>();
if (m_codec_context->hw_device_ctx) {
#ifdef __ANDROID__
// c2.mtk.vp9.decoder, c2.mtk.vp89.decoder will be fine if we don't
// re-encode stuff twice :>
m_final_frame = std::move(intermediate_frame);
#else
m_final_frame->SetFormat(PREFERRED_GPU_FORMAT);
if (const int ret = av_hwframe_transfer_data(m_final_frame->GetFrame(), intermediate_frame->GetFrame(), 0); ret < 0) {
LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret));
return {};
}
#endif
} else {
m_final_frame = std::move(intermediate_frame);
}
return std::move(m_final_frame);
}
void DecodeApi::Reset() {
m_hardware_context.reset();
m_decoder_context.reset();
m_decoder.reset();
m_opened = false;
m_needs_h264_extradata = false;
m_next_pts = 0;
while (!m_pending_offsets.empty()) {
m_pending_offsets.pop();
}
}
bool DecodeApi::Initialize(Tegra::Host1x::NvdecCommon::VideoCodec codec) {
av_log_set_level(AV_LOG_DEBUG);
this->Reset();
m_decoder.emplace(codec);
m_decoder_context.emplace(*m_decoder);
// Enable GPU decoding if requested.
if (Settings::values.nvdec_emulation.GetValue() == Settings::NvdecEmulation::Gpu) {
m_hardware_context.emplace();
m_hardware_context->InitializeForDecoder(*m_decoder_context, *m_decoder);
}
#ifdef ANDROID
// h264_mediacodec needs SPS/PPS in extradata at open. We pull them from
// the first frame's bitstream in SendPacket.
m_needs_h264_extradata = m_decoder->GetCodec() &&
std::string_view(m_decoder->GetCodec()->name) == "h264_mediacodec";
if (m_needs_h264_extradata) {
return true;
}
#endif
if (!m_decoder_context->OpenContext(*m_decoder)) {
this->Reset();
return false;
}
m_opened = true;
return true;
}
bool DecodeApi::SendPacket(std::span<const u8> packet_data, const FrameOffsets& offsets,
std::optional<FrameDimensions> dimensions) {
if (!m_opened) {
std::vector<u8> extradata;
#ifdef ANDROID
if (m_needs_h264_extradata) {
extradata = ExtractH264AnnexBExtradata(packet_data);
if (extradata.empty()) {
return true;
}
if (dimensions) {
auto* ctx = m_decoder_context->GetCodecContext();
ctx->width = dimensions->width;
ctx->height = dimensions->height;
ctx->coded_width = dimensions->width;
ctx->coded_height = dimensions->height;
}
}
#endif
if (!m_decoder_context->OpenContext(*m_decoder, extradata)) {
this->Reset();
return false;
}
m_opened = true;
}
m_pending_offsets.push(offsets);
FFmpeg::Packet packet(packet_data);
packet.GetPacket()->pts = m_next_pts;
packet.GetPacket()->dts = m_next_pts;
++m_next_pts;
return m_decoder_context->SendPacket(packet);
}
std::optional<DecodeApi::DecodedFrame> DecodeApi::ReceiveFrame() {
auto frame = m_decoder_context->ReceiveFrame();
if (!frame) {
return std::nullopt;
}
FrameOffsets offsets{};
if (!m_pending_offsets.empty()) {
offsets = m_pending_offsets.front();
m_pending_offsets.pop();
}
return DecodedFrame{std::move(frame), offsets};
}
}