From b89f5278acdc80e7713773e3b1c4d517a80ed405 Mon Sep 17 00:00:00 2001 From: Steveice10 <1269164+Steveice10@users.noreply.github.com> Date: Tue, 16 May 2023 02:28:21 -0700 Subject: [PATCH] Revive: dumping/ffmpeg_backend: Various fixes (#6528) * dumping/ffmpeg_backend: Add FPS filter So that the recorded video can be at 60FPS (which is supported by most encoders) while still maintaining correct speed. * dumping/ffmpeg_backend: Add HW context support Required for some HW acceled encoders. Not tested as my devices don't seem to require this. * CMake: Copy avfilter dll for MSVC * CMakeLists: Require FFmpeg 4.0 * ffmpeg: Fix dumper compile error on MSVC. * ffmpeg: Address review comments. --------- Co-authored-by: zhupengfei <zhupf321@gmail.com> --- CMakeLists.txt | 6 +- CMakeModules/CopyCitraFFmpegDeps.cmake | 2 + src/core/CMakeLists.txt | 2 +- src/core/dumping/ffmpeg_backend.cpp | 290 ++++++++++++++++++++++--- src/core/dumping/ffmpeg_backend.h | 38 +++- 5 files changed, 289 insertions(+), 49 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6192abed..5353e3b72 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -255,12 +255,12 @@ if (ENABLE_FFMPEG) endif() if (ENABLE_FFMPEG_VIDEO_DUMPER) - find_package(FFmpeg REQUIRED COMPONENTS avcodec avformat avutil swscale swresample) + find_package(FFmpeg REQUIRED COMPONENTS avcodec avfilter avformat avutil swresample) else() find_package(FFmpeg REQUIRED COMPONENTS avcodec) endif() - if ("${FFmpeg_avcodec_VERSION}" VERSION_LESS "57.48.101") - message(FATAL_ERROR "Found version for libavcodec is too low. The required version is at least 57.48.101 (included in FFmpeg 3.1 and later).") + if ("${FFmpeg_avcodec_VERSION}" VERSION_LESS "58.4.100") + message(FATAL_ERROR "Found version for libavcodec is too low. The required version is at least 58.4.100 (included in FFmpeg 4.0 and later).") endif() endif() diff --git a/CMakeModules/CopyCitraFFmpegDeps.cmake b/CMakeModules/CopyCitraFFmpegDeps.cmake index be514f696..532f478ce 100644 --- a/CMakeModules/CopyCitraFFmpegDeps.cmake +++ b/CMakeModules/CopyCitraFFmpegDeps.cmake @@ -3,8 +3,10 @@ function(copy_citra_FFmpeg_deps target_dir) set(DLL_DEST "${CMAKE_BINARY_DIR}/bin/$<CONFIG>/") windows_copy_files(${target_dir} ${FFMPEG_DIR}/bin ${DLL_DEST} avcodec*.dll + avfilter*.dll avformat*.dll avutil*.dll + postproc*.dll swresample*.dll swscale*.dll ) diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 7863e8ba7..1ee217a6b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -505,7 +505,7 @@ if ("x86_64" IN_LIST ARCHITECTURE OR "arm64" IN_LIST ARCHITECTURE) endif() if (ENABLE_FFMPEG_VIDEO_DUMPER) - target_link_libraries(citra_core PUBLIC FFmpeg::avcodec FFmpeg::avformat FFmpeg::swscale FFmpeg::swresample FFmpeg::avutil) + target_link_libraries(citra_core PUBLIC FFmpeg::avcodec FFmpeg::avfilter FFmpeg::avformat FFmpeg::swresample FFmpeg::avutil) endif() if (CITRA_USE_PRECOMPILED_HEADERS) diff --git a/src/core/dumping/ffmpeg_backend.cpp b/src/core/dumping/ffmpeg_backend.cpp index ee9f36130..635db9ca8 100644 --- a/src/core/dumping/ffmpeg_backend.cpp +++ b/src/core/dumping/ffmpeg_backend.cpp @@ -7,6 +7,7 @@ #include "common/file_util.h" #include "common/logging/log.h" #include "common/param_package.h" +#include "common/scope_exit.h" #include "common/settings.h" #include "common/string_util.h" #include "core/dumping/ffmpeg_backend.h" @@ -15,6 +16,9 @@ #include "video_core/video_core.h" extern "C" { +#include <libavfilter/buffersink.h> +#include <libavfilter/buffersrc.h> +#include <libavutil/hwcontext.h> #include <libavutil/pixdesc.h> } @@ -102,6 +106,43 @@ FFmpegVideoStream::~FFmpegVideoStream() { Free(); } +// This is modified from libavcodec/decode.c +// The original version was broken +static AVPixelFormat GetPixelFormat(AVCodecContext* avctx, const AVPixelFormat* fmt) { + // Choose a software pixel format if any, prefering those in the front of the list + for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) { + const AVPixFmtDescriptor* desc = av_pix_fmt_desc_get(fmt[i]); + if (!(desc->flags & AV_PIX_FMT_FLAG_HWACCEL)) { + return fmt[i]; + } + } + + // Finally, traverse the list in order and choose the first entry + // with no external dependencies (if there is no hardware configuration + // information available then this just picks the first entry). + for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) { + const AVCodecHWConfig* config; + for (int j = 0;; j++) { + config = avcodec_get_hw_config(avctx->codec, j); + if (!config || config->pix_fmt == fmt[i]) { + break; + } + } + if (!config) { + // No specific config available, so the decoder must be able + // to handle this format without any additional setup. + return fmt[i]; + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) { + // Usable with only internal setup. + return fmt[i]; + } + } + + // Nothing is usable, give up. + return AV_PIX_FMT_NONE; +} + bool FFmpegVideoStream::Init(FFmpegMuxer& muxer, const Layout::FramebufferLayout& layout_) { InitializeFFmpegLibraries(); @@ -125,15 +166,28 @@ bool FFmpegVideoStream::Init(FFmpegMuxer& muxer, const Layout::FramebufferLayout codec_context->bit_rate = Settings::values.video_bitrate; codec_context->width = layout.width; codec_context->height = layout.height; - // TODO(xperia64): While these numbers from core timing work fine, certain video codecs do not - // support the strange resulting timebase (280071/16756991); Addressing this issue would require - // resampling the video - // List of codecs known broken by this change: mpeg1, mpeg2, mpeg4, libxvid - // See https://github.com/citra-emu/citra/pull/5273#issuecomment-643023325 for more information - codec_context->time_base.num = static_cast<int>(GPU::frame_ticks); - codec_context->time_base.den = static_cast<int>(BASE_CLOCK_RATE_ARM11); + // Use 60fps here, since the video is already filtered (resampled) + codec_context->time_base.num = 1; + codec_context->time_base.den = 60; codec_context->gop_size = 12; - codec_context->pix_fmt = codec->pix_fmts ? codec->pix_fmts[0] : AV_PIX_FMT_YUV420P; + + // Get pixel format for codec + if (codec->pix_fmts) { + sw_pixel_format = GetPixelFormat(codec_context.get(), codec->pix_fmts); + } else { + sw_pixel_format = AV_PIX_FMT_YUV420P; + } + if (sw_pixel_format == AV_PIX_FMT_NONE) { + // This encoder requires HW context configuration. + if (!InitHWContext(codec)) { + LOG_ERROR(Render, "Failed to initialize HW context"); + return false; + } + } else { + requires_hw_frames = false; + codec_context->pix_fmt = sw_pixel_format; + } + if (format_context->oformat->flags & AVFMT_GLOBALHEADER) codec_context->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; @@ -160,31 +214,28 @@ bool FFmpegVideoStream::Init(FFmpegMuxer& muxer, const Layout::FramebufferLayout // Allocate frames current_frame.reset(av_frame_alloc()); - scaled_frame.reset(av_frame_alloc()); - scaled_frame->format = codec_context->pix_fmt; - scaled_frame->width = layout.width; - scaled_frame->height = layout.height; - if (av_frame_get_buffer(scaled_frame.get(), 0) < 0) { - LOG_ERROR(Render, "Could not allocate frame buffer"); - return false; + filtered_frame.reset(av_frame_alloc()); + + if (requires_hw_frames) { + hw_frame.reset(av_frame_alloc()); + if (av_hwframe_get_buffer(codec_context->hw_frames_ctx, hw_frame.get(), 0) < 0) { + LOG_ERROR(Render, "Could not allocate buffer for HW frame"); + return false; + } } - // Create SWS Context - auto* context = sws_getCachedContext( - sws_context.get(), layout.width, layout.height, pixel_format, layout.width, layout.height, - codec_context->pix_fmt, SWS_BICUBIC, nullptr, nullptr, nullptr); - if (context != sws_context.get()) - sws_context.reset(context); - - return true; + return InitFilters(); } void FFmpegVideoStream::Free() { FFmpegStream::Free(); current_frame.reset(); - scaled_frame.reset(); - sws_context.reset(); + filtered_frame.reset(); + hw_frame.reset(); + filter_graph.reset(); + source_context = nullptr; + sink_context = nullptr; } void FFmpegVideoStream::ProcessFrame(VideoFrame& frame) { @@ -198,20 +249,189 @@ void FFmpegVideoStream::ProcessFrame(VideoFrame& frame) { current_frame->format = pixel_format; current_frame->width = layout.width; current_frame->height = layout.height; + current_frame->pts = frame_count++; - // Scale the frame - if (av_frame_make_writable(scaled_frame.get()) < 0) { - LOG_ERROR(Render, "Video frame dropped: Could not prepare frame"); + // Filter the frame + if (av_buffersrc_add_frame(source_context, current_frame.get()) < 0) { + LOG_ERROR(Render, "Video frame dropped: Could not add frame to filter graph"); return; } - if (sws_context) { - sws_scale(sws_context.get(), current_frame->data, current_frame->linesize, 0, layout.height, - scaled_frame->data, scaled_frame->linesize); - } - scaled_frame->pts = frame_count++; + while (true) { + const int error = av_buffersink_get_frame(sink_context, filtered_frame.get()); + if (error == AVERROR(EAGAIN) || error == AVERROR_EOF) { + return; + } + if (error < 0) { + LOG_ERROR(Render, "Video frame dropped: Could not receive frame from filter graph"); + return; + } else { + if (requires_hw_frames) { + if (av_hwframe_transfer_data(hw_frame.get(), filtered_frame.get(), 0) < 0) { + LOG_ERROR(Render, "Video frame dropped: Could not upload to HW frame"); + return; + } + SendFrame(hw_frame.get()); + } else { + SendFrame(filtered_frame.get()); + } - // Encode frame - SendFrame(scaled_frame.get()); + av_frame_unref(filtered_frame.get()); + } + } +} + +bool FFmpegVideoStream::InitHWContext(const AVCodec* codec) { + for (std::size_t i = 0; codec->pix_fmts[i] != AV_PIX_FMT_NONE; ++i) { + const AVCodecHWConfig* config; + for (int j = 0;; ++j) { + config = avcodec_get_hw_config(codec, j); + if (!config || config->pix_fmt == codec->pix_fmts[i]) { + break; + } + } + // If we are at this point, there should not be any possible HW format that does not + // need configuration. + ASSERT_MSG(config, "HW pixel format that does not need config should have been selected"); + + if (!(config->methods & (AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX | + AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX))) { + // Maybe this format requires ad-hoc configuration, unsupported. + continue; + } + + codec_context->pix_fmt = codec->pix_fmts[i]; + + // Create HW device context + AVBufferRef* hw_device_context; + SCOPE_EXIT({ av_buffer_unref(&hw_device_context); }); + + // TODO: Provide the argument here somehow. + // This is necessary for some devices like CUDA where you must supply the GPU name. + // This is not necessary for VAAPI, etc. + if (av_hwdevice_ctx_create(&hw_device_context, config->device_type, nullptr, nullptr, 0) < + 0) { + LOG_ERROR(Render, "Failed to create HW device context"); + continue; + } + codec_context->hw_device_ctx = av_buffer_ref(hw_device_context); + + // Get the SW format + AVHWFramesConstraints* constraints = + av_hwdevice_get_hwframe_constraints(hw_device_context, nullptr); + SCOPE_EXIT({ av_hwframe_constraints_free(&constraints); }); + + if (constraints) { + sw_pixel_format = constraints->valid_sw_formats ? constraints->valid_sw_formats[0] + : AV_PIX_FMT_YUV420P; + } else { + LOG_WARNING(Render, "Could not query HW device constraints"); + sw_pixel_format = AV_PIX_FMT_YUV420P; + } + + // For encoders that only need the HW device + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) { + requires_hw_frames = false; + return true; + } + + requires_hw_frames = true; + + // Create HW frames context + AVBufferRef* hw_frames_context_ref; + SCOPE_EXIT({ av_buffer_unref(&hw_frames_context_ref); }); + + if (!(hw_frames_context_ref = av_hwframe_ctx_alloc(hw_device_context))) { + LOG_ERROR(Render, "Failed to create HW frames context"); + continue; + } + + AVHWFramesContext* hw_frames_context = + reinterpret_cast<AVHWFramesContext*>(hw_frames_context_ref->data); + hw_frames_context->format = codec->pix_fmts[i]; + hw_frames_context->sw_format = sw_pixel_format; + hw_frames_context->width = codec_context->width; + hw_frames_context->height = codec_context->height; + hw_frames_context->initial_pool_size = 20; // value from FFmpeg's example + + if (av_hwframe_ctx_init(hw_frames_context_ref) < 0) { + LOG_ERROR(Render, "Failed to initialize HW frames context"); + continue; + } + + codec_context->hw_frames_ctx = av_buffer_ref(hw_frames_context_ref); + return true; + } + + LOG_ERROR(Render, "Failed to find a usable HW pixel format"); + return false; +} + +bool FFmpegVideoStream::InitFilters() { + filter_graph.reset(avfilter_graph_alloc()); + + const AVFilter* source = avfilter_get_by_name("buffer"); + const AVFilter* sink = avfilter_get_by_name("buffersink"); + if (!source || !sink) { + LOG_ERROR(Render, "Could not find buffer source or sink"); + return false; + } + + // Configure buffer source + static constexpr AVRational src_time_base{static_cast<int>(GPU::frame_ticks), + static_cast<int>(BASE_CLOCK_RATE_ARM11)}; + const std::string in_args = fmt::format( + "video_size={}x{}:pix_fmt={}:time_base={}/{}:pixel_aspect=1", codec_context->width, + codec_context->height, pixel_format, src_time_base.num, src_time_base.den); + if (avfilter_graph_create_filter(&source_context, source, "in", in_args.c_str(), nullptr, + filter_graph.get()) < 0) { + LOG_ERROR(Render, "Could not create buffer source"); + return false; + } + + // Configure buffer sink + if (avfilter_graph_create_filter(&sink_context, sink, "out", nullptr, nullptr, + filter_graph.get()) < 0) { + LOG_ERROR(Render, "Could not create buffer sink"); + return false; + } + const AVPixelFormat pix_fmts[] = {sw_pixel_format, AV_PIX_FMT_NONE}; + if (av_opt_set_int_list(sink_context, "pix_fmts", pix_fmts, AV_PIX_FMT_NONE, + AV_OPT_SEARCH_CHILDREN) < 0) { + LOG_ERROR(Render, "Could not set output pixel format"); + return false; + } + + // Initialize filter graph + // `outputs` as in outputs of the 'previous' graphs + AVFilterInOut* outputs = avfilter_inout_alloc(); + outputs->name = av_strdup("in"); + outputs->filter_ctx = source_context; + outputs->pad_idx = 0; + outputs->next = nullptr; + + // `inputs` as in inputs to the 'next' graphs + AVFilterInOut* inputs = avfilter_inout_alloc(); + inputs->name = av_strdup("out"); + inputs->filter_ctx = sink_context; + inputs->pad_idx = 0; + inputs->next = nullptr; + + SCOPE_EXIT({ + avfilter_inout_free(&outputs); + avfilter_inout_free(&inputs); + }); + + if (avfilter_graph_parse_ptr(filter_graph.get(), filter_graph_desc.data(), &inputs, &outputs, + nullptr) < 0) { + LOG_ERROR(Render, "Could not parse or create filter graph"); + return false; + } + if (avfilter_graph_config(filter_graph.get(), nullptr) < 0) { + LOG_ERROR(Render, "Could not configure filter graph"); + return false; + } + + return true; } FFmpegAudioStream::~FFmpegAudioStream() { diff --git a/src/core/dumping/ffmpeg_backend.h b/src/core/dumping/ffmpeg_backend.h index 579191cb1..f7ba53ebb 100644 --- a/src/core/dumping/ffmpeg_backend.h +++ b/src/core/dumping/ffmpeg_backend.h @@ -19,10 +19,10 @@ extern "C" { #include <libavcodec/avcodec.h> +#include <libavfilter/avfilter.h> #include <libavformat/avformat.h> #include <libavutil/opt.h> #include <libswresample/swresample.h> -#include <libswscale/swscale.h> } namespace VideoDumper { @@ -69,7 +69,7 @@ protected: /** * A FFmpegStream used for video data. - * Rescales, encodes and writes a frame. + * Filters (scales), encodes and writes a frame. */ class FFmpegVideoStream : public FFmpegStream { public: @@ -80,21 +80,39 @@ public: void ProcessFrame(VideoFrame& frame); private: - struct SwsContextDeleter { - void operator()(SwsContext* sws_context) const { - sws_freeContext(sws_context); - } - }; + bool InitHWContext(const AVCodec* codec); + bool InitFilters(); u64 frame_count{}; std::unique_ptr<AVFrame, AVFrameDeleter> current_frame{}; - std::unique_ptr<AVFrame, AVFrameDeleter> scaled_frame{}; - std::unique_ptr<SwsContext, SwsContextDeleter> sws_context{}; + std::unique_ptr<AVFrame, AVFrameDeleter> filtered_frame{}; + std::unique_ptr<AVFrame, AVFrameDeleter> hw_frame{}; Layout::FramebufferLayout layout; - /// The pixel format the frames are stored in + /// The pixel format the input frames are stored in static constexpr AVPixelFormat pixel_format = AVPixelFormat::AV_PIX_FMT_BGRA; + + // Software pixel format. For normal encoders, this is the format they accept. For HW-acceled + // encoders, this is the format the HW frames context accepts. + AVPixelFormat sw_pixel_format = AV_PIX_FMT_NONE; + + /// Whether the encoder we are using requires HW frames to be supplied. + bool requires_hw_frames = false; + + // Filter related + struct AVFilterGraphDeleter { + void operator()(AVFilterGraph* filter_graph) const { + avfilter_graph_free(&filter_graph); + } + }; + std::unique_ptr<AVFilterGraph, AVFilterGraphDeleter> filter_graph{}; + // These don't need to be freed apparently + AVFilterContext* source_context; + AVFilterContext* sink_context; + + /// The filter graph to use. This graph means 'change FPS to 60, convert format if needed' + static constexpr std::string_view filter_graph_desc = "fps=60"; }; /**