query_cache: Abstract OpenGL implementation
Abstract the current OpenGL implementation into the VideoCommon namespace and reimplement it on top of that. Doing this avoids repeating code and logic in the Vulkan implementation.
This commit is contained in:
parent
73d2d3342d
commit
c31382ced5
|
@ -37,6 +37,7 @@ add_library(video_core STATIC
|
|||
memory_manager.h
|
||||
morton.cpp
|
||||
morton.h
|
||||
query_cache.h
|
||||
rasterizer_accelerated.cpp
|
||||
rasterizer_accelerated.h
|
||||
rasterizer_cache.cpp
|
||||
|
|
323
src/video_core/query_cache.h
Normal file
323
src/video_core/query_cache.h
Normal file
|
@ -0,0 +1,323 @@
|
|||
// Copyright 2020 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <class QueryCache, class HostCounter>
|
||||
class CounterStreamBase {
|
||||
public:
|
||||
explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
|
||||
: cache{cache}, type{type} {}
|
||||
|
||||
/// Updates the state of the stream, enabling or disabling as needed.
|
||||
void Update(bool enabled) {
|
||||
if (enabled) {
|
||||
Enable();
|
||||
} else {
|
||||
Disable();
|
||||
}
|
||||
}
|
||||
|
||||
/// Resets the stream to zero. It doesn't disable the query after resetting.
|
||||
void Reset() {
|
||||
if (current) {
|
||||
current->EndQuery();
|
||||
|
||||
// Immediately start a new query to avoid disabling its state.
|
||||
current = cache.Counter(nullptr, type);
|
||||
}
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
/// Returns the current counter slicing as needed.
|
||||
std::shared_ptr<HostCounter> Current() {
|
||||
if (!current) {
|
||||
return nullptr;
|
||||
}
|
||||
current->EndQuery();
|
||||
last = std::move(current);
|
||||
current = cache.Counter(last, type);
|
||||
return last;
|
||||
}
|
||||
|
||||
/// Returns true when the counter stream is enabled.
|
||||
bool IsEnabled() const {
|
||||
return static_cast<bool>(current);
|
||||
}
|
||||
|
||||
private:
|
||||
/// Enables the stream.
|
||||
void Enable() {
|
||||
if (current) {
|
||||
return;
|
||||
}
|
||||
current = cache.Counter(last, type);
|
||||
}
|
||||
|
||||
// Disables the stream.
|
||||
void Disable() {
|
||||
if (current) {
|
||||
current->EndQuery();
|
||||
}
|
||||
last = std::exchange(current, nullptr);
|
||||
}
|
||||
|
||||
QueryCache& cache;
|
||||
const VideoCore::QueryType type;
|
||||
|
||||
std::shared_ptr<HostCounter> current;
|
||||
std::shared_ptr<HostCounter> last;
|
||||
};
|
||||
|
||||
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
|
||||
class QueryCacheBase {
|
||||
public:
|
||||
explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||
: system{system}, rasterizer{rasterizer}, streams{{CounterStream{
|
||||
static_cast<QueryCache&>(*this),
|
||||
VideoCore::QueryType::SamplesPassed}}} {}
|
||||
|
||||
void InvalidateRegion(CacheAddr addr, std::size_t size) {
|
||||
FlushAndRemoveRegion(addr, size);
|
||||
}
|
||||
|
||||
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
FlushAndRemoveRegion(addr, size);
|
||||
}
|
||||
|
||||
/**
|
||||
* Records a query in GPU mapped memory, potentially marked with a timestamp.
|
||||
* @param gpu_addr GPU address to flush to when the mapped memory is read.
|
||||
* @param type Query type, e.g. SamplesPassed.
|
||||
* @param timestamp Timestamp, when empty the flushed query is assumed to be short.
|
||||
*/
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
|
||||
CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
|
||||
if (!query) {
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||
|
||||
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
|
||||
}
|
||||
|
||||
query->BindCounter(Stream(type).Current(), timestamp);
|
||||
}
|
||||
|
||||
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
||||
void UpdateCounters() {
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
|
||||
}
|
||||
|
||||
/// Resets a counter to zero. It doesn't disable the query after resetting.
|
||||
void ResetCounter(VideoCore::QueryType type) {
|
||||
Stream(type).Reset();
|
||||
}
|
||||
|
||||
/// Returns a new host counter.
|
||||
std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type) {
|
||||
return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
|
||||
type);
|
||||
}
|
||||
|
||||
/// Returns the counter stream of the specified type.
|
||||
CounterStream& Stream(VideoCore::QueryType type) {
|
||||
return streams[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
private:
|
||||
/// Flushes a memory range to guest memory and removes it from the cache.
|
||||
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
|
||||
const u64 addr_begin = static_cast<u64>(addr);
|
||||
const u64 addr_end = addr_begin + static_cast<u64>(size);
|
||||
const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
|
||||
const u64 cache_begin = query.CacheAddr();
|
||||
const u64 cache_end = cache_begin + query.SizeInBytes();
|
||||
return cache_begin < addr_end && addr_begin < cache_end;
|
||||
};
|
||||
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
const auto& it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
continue;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
for (auto& query : contents) {
|
||||
if (!in_range(query)) {
|
||||
continue;
|
||||
}
|
||||
rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
|
||||
query.Flush();
|
||||
}
|
||||
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
|
||||
std::end(contents));
|
||||
}
|
||||
}
|
||||
|
||||
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
|
||||
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
|
||||
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
|
||||
const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
|
||||
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
|
||||
host_ptr);
|
||||
}
|
||||
|
||||
/// Tries to a get a cached query. Returns nullptr on failure.
|
||||
CachedQuery* TryGet(CacheAddr addr) {
|
||||
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
|
||||
const auto it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
const auto found = std::find_if(std::begin(contents), std::end(contents),
|
||||
[addr](auto& query) { return query.CacheAddr() == addr; });
|
||||
return found != std::end(contents) ? &*found : nullptr;
|
||||
}
|
||||
|
||||
static constexpr std::uintptr_t PAGE_SIZE = 4096;
|
||||
static constexpr int PAGE_SHIFT = 12;
|
||||
|
||||
Core::System& system;
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
|
||||
|
||||
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
||||
};
|
||||
|
||||
template <class QueryCache, class HostCounter>
|
||||
class HostCounterBase {
|
||||
public:
|
||||
explicit HostCounterBase(std::shared_ptr<HostCounter> dependency)
|
||||
: dependency{std::move(dependency)} {}
|
||||
|
||||
/// Returns the current value of the query.
|
||||
u64 Query() {
|
||||
if (result) {
|
||||
return *result;
|
||||
}
|
||||
|
||||
u64 value = BlockingQuery();
|
||||
if (dependency) {
|
||||
value += dependency->Query();
|
||||
}
|
||||
|
||||
return *(result = value);
|
||||
}
|
||||
|
||||
/// Returns true when flushing this query will potentially wait.
|
||||
bool WaitPending() const noexcept {
|
||||
return result.has_value();
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Returns the value of query from the backend API blocking as needed.
|
||||
virtual u64 BlockingQuery() const = 0;
|
||||
|
||||
private:
|
||||
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
|
||||
std::optional<u64> result; ///< Filled with the already returned value.
|
||||
};
|
||||
|
||||
template <class HostCounter>
|
||||
class CachedQueryBase {
|
||||
public:
|
||||
explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
|
||||
: cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
|
||||
|
||||
CachedQueryBase(CachedQueryBase&& rhs) noexcept
|
||||
: cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr}, counter{std::move(rhs.counter)},
|
||||
timestamp{rhs.timestamp} {}
|
||||
|
||||
CachedQueryBase(const CachedQueryBase&) = delete;
|
||||
|
||||
CachedQueryBase& operator=(CachedQueryBase&& rhs) noexcept {
|
||||
cpu_addr = rhs.cpu_addr;
|
||||
host_ptr = rhs.host_ptr;
|
||||
counter = std::move(rhs.counter);
|
||||
timestamp = rhs.timestamp;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// Flushes the query to guest memory.
|
||||
virtual void Flush() {
|
||||
// When counter is nullptr it means that it's just been reseted. We are supposed to write a
|
||||
// zero in these cases.
|
||||
const u64 value = counter ? counter->Query() : 0;
|
||||
std::memcpy(host_ptr, &value, sizeof(u64));
|
||||
|
||||
if (timestamp) {
|
||||
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
|
||||
}
|
||||
}
|
||||
|
||||
/// Binds a counter to this query.
|
||||
void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
|
||||
if (counter) {
|
||||
// If there's an old counter set it means the query is being rewritten by the game.
|
||||
// To avoid losing the data forever, flush here.
|
||||
Flush();
|
||||
}
|
||||
counter = std::move(counter_);
|
||||
timestamp = timestamp_;
|
||||
}
|
||||
|
||||
VAddr CpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
CacheAddr CacheAddr() const noexcept {
|
||||
return ToCacheAddr(host_ptr);
|
||||
}
|
||||
|
||||
u64 SizeInBytes() const noexcept {
|
||||
return SizeInBytes(timestamp.has_value());
|
||||
}
|
||||
|
||||
static u64 SizeInBytes(bool with_timestamp) {
|
||||
return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
|
||||
}
|
||||
|
||||
protected:
|
||||
/// Returns true when querying the counter may potentially block.
|
||||
bool WaitPending() const noexcept {
|
||||
return counter && counter->WaitPending();
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
|
||||
static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
|
||||
static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
|
||||
|
||||
VAddr cpu_addr; ///< Guest CPU address.
|
||||
u8* host_ptr; ///< Writable host pointer.
|
||||
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
|
||||
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -20,211 +20,49 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
using VideoCore::QueryType;
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::uintptr_t PAGE_SIZE = 4096;
|
||||
constexpr int PAGE_SHIFT = 12;
|
||||
|
||||
constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp
|
||||
constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp
|
||||
constexpr std::ptrdiff_t TIMESTAMP_OFFSET = 8;
|
||||
|
||||
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
|
||||
|
||||
constexpr GLenum GetTarget(QueryType type) {
|
||||
constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||
return QueryTargets[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
CounterStream::CounterStream(QueryCache& cache, QueryType type)
|
||||
: cache{cache}, type{type}, target{GetTarget(type)} {}
|
||||
|
||||
CounterStream::~CounterStream() = default;
|
||||
|
||||
void CounterStream::Update(bool enabled, bool any_command_queued) {
|
||||
if (enabled) {
|
||||
Enable();
|
||||
} else {
|
||||
Disable(any_command_queued);
|
||||
}
|
||||
}
|
||||
|
||||
void CounterStream::Reset(bool any_command_queued) {
|
||||
if (current) {
|
||||
EndQuery(any_command_queued);
|
||||
|
||||
// Immediately start a new query to avoid disabling its state.
|
||||
current = cache.GetHostCounter(nullptr, type);
|
||||
}
|
||||
last = nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<HostCounter> CounterStream::GetCurrent(bool any_command_queued) {
|
||||
if (!current) {
|
||||
return nullptr;
|
||||
}
|
||||
EndQuery(any_command_queued);
|
||||
last = std::move(current);
|
||||
current = cache.GetHostCounter(last, type);
|
||||
return last;
|
||||
}
|
||||
|
||||
void CounterStream::Enable() {
|
||||
if (current) {
|
||||
return;
|
||||
}
|
||||
current = cache.GetHostCounter(last, type);
|
||||
}
|
||||
|
||||
void CounterStream::Disable(bool any_command_queued) {
|
||||
if (current) {
|
||||
EndQuery(any_command_queued);
|
||||
}
|
||||
last = std::exchange(current, nullptr);
|
||||
}
|
||||
|
||||
void CounterStream::EndQuery(bool any_command_queued) {
|
||||
if (!any_command_queued) {
|
||||
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
|
||||
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
|
||||
// for this. Insert to the OpenGL command stream a flush.
|
||||
glFlush();
|
||||
}
|
||||
glEndQuery(target);
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& rasterizer)
|
||||
: system{system}, rasterizer{rasterizer}, streams{{CounterStream{*this,
|
||||
QueryType::SamplesPassed}}} {}
|
||||
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
|
||||
: VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
|
||||
HostCounter>{system, static_cast<VideoCore::RasterizerInterface&>(
|
||||
gl_rasterizer)},
|
||||
gl_rasterizer{gl_rasterizer} {}
|
||||
|
||||
QueryCache::~QueryCache() = default;
|
||||
|
||||
void QueryCache::InvalidateRegion(CacheAddr addr, std::size_t size) {
|
||||
const u64 addr_begin = static_cast<u64>(addr);
|
||||
const u64 addr_end = addr_begin + static_cast<u64>(size);
|
||||
const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
|
||||
const u64 cache_begin = query.GetCacheAddr();
|
||||
const u64 cache_end = cache_begin + query.GetSizeInBytes();
|
||||
return cache_begin < addr_end && addr_begin < cache_end;
|
||||
};
|
||||
|
||||
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||
for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||
const auto& it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
continue;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
for (auto& query : contents) {
|
||||
if (!in_range(query)) {
|
||||
continue;
|
||||
}
|
||||
rasterizer.UpdatePagesCachedCount(query.GetCpuAddr(), query.GetSizeInBytes(), -1);
|
||||
Flush(query);
|
||||
}
|
||||
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
|
||||
std::end(contents));
|
||||
}
|
||||
}
|
||||
|
||||
void QueryCache::FlushRegion(CacheAddr addr, std::size_t size) {
|
||||
// We can handle flushes in the same way as invalidations.
|
||||
InvalidateRegion(addr, size);
|
||||
}
|
||||
|
||||
void QueryCache::Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) {
|
||||
auto& memory_manager = system.GPU().MemoryManager();
|
||||
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||
|
||||
CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
|
||||
if (!query) {
|
||||
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||
|
||||
query = &Register(CachedQuery(type, *cpu_addr, host_ptr));
|
||||
}
|
||||
|
||||
query->SetCounter(GetStream(type).GetCurrent(rasterizer.AnyCommandQueued()), timestamp);
|
||||
}
|
||||
|
||||
void QueryCache::UpdateCounters() {
|
||||
auto& samples_passed = GetStream(QueryType::SamplesPassed);
|
||||
|
||||
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||
samples_passed.Update(regs.samplecnt_enable, rasterizer.AnyCommandQueued());
|
||||
}
|
||||
|
||||
void QueryCache::ResetCounter(QueryType type) {
|
||||
GetStream(type).Reset(rasterizer.AnyCommandQueued());
|
||||
}
|
||||
|
||||
void QueryCache::Reserve(QueryType type, OGLQuery&& query) {
|
||||
reserved_queries[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||
}
|
||||
|
||||
std::shared_ptr<HostCounter> QueryCache::GetHostCounter(std::shared_ptr<HostCounter> dependency,
|
||||
QueryType type) {
|
||||
auto& reserve = reserved_queries[static_cast<std::size_t>(type)];
|
||||
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||
auto& reserve = queries_reserve[static_cast<std::size_t>(type)];
|
||||
OGLQuery query;
|
||||
if (reserve.empty()) {
|
||||
query.Create(GetTarget(type));
|
||||
} else {
|
||||
return query;
|
||||
}
|
||||
|
||||
query = std::move(reserve.back());
|
||||
reserve.pop_back();
|
||||
}
|
||||
|
||||
return std::make_shared<HostCounter>(*this, std::move(dependency), type, std::move(query));
|
||||
return query;
|
||||
}
|
||||
|
||||
CachedQuery& QueryCache::Register(CachedQuery&& cached_query) {
|
||||
const u64 page = static_cast<u64>(cached_query.GetCacheAddr()) >> PAGE_SHIFT;
|
||||
auto& stored_ref = cached_queries[page].emplace_back(std::move(cached_query));
|
||||
rasterizer.UpdatePagesCachedCount(stored_ref.GetCpuAddr(), stored_ref.GetSizeInBytes(), 1);
|
||||
return stored_ref;
|
||||
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
|
||||
queries_reserve[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||
}
|
||||
|
||||
CachedQuery* QueryCache::TryGet(CacheAddr addr) {
|
||||
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
|
||||
const auto it = cached_queries.find(page);
|
||||
if (it == std::end(cached_queries)) {
|
||||
return nullptr;
|
||||
}
|
||||
auto& contents = it->second;
|
||||
const auto found =
|
||||
std::find_if(std::begin(contents), std::end(contents),
|
||||
[addr](const auto& query) { return query.GetCacheAddr() == addr; });
|
||||
return found != std::end(contents) ? &*found : nullptr;
|
||||
bool QueryCache::AnyCommandQueued() const noexcept {
|
||||
return gl_rasterizer.AnyCommandQueued();
|
||||
}
|
||||
|
||||
void QueryCache::Flush(CachedQuery& cached_query) {
|
||||
auto& stream = GetStream(cached_query.GetType());
|
||||
|
||||
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
|
||||
// To avoid this disable and re-enable keeping the dependency stream.
|
||||
// But we only have to do this if we have pending waits to be done.
|
||||
const bool slice_counter = stream.IsEnabled() && cached_query.WaitPending();
|
||||
const bool any_command_queued = rasterizer.AnyCommandQueued();
|
||||
if (slice_counter) {
|
||||
stream.Update(false, any_command_queued);
|
||||
}
|
||||
|
||||
cached_query.Flush();
|
||||
|
||||
if (slice_counter) {
|
||||
stream.Update(true, any_command_queued);
|
||||
}
|
||||
}
|
||||
|
||||
CounterStream& QueryCache::GetStream(QueryType type) {
|
||||
return streams[static_cast<std::size_t>(type)];
|
||||
}
|
||||
|
||||
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency, QueryType type,
|
||||
OGLQuery&& query_)
|
||||
: cache{cache}, type{type}, dependency{std::move(dependency)}, query{std::move(query_)} {
|
||||
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type)
|
||||
: VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
|
||||
type{type}, query{cache.AllocateQuery(type)} {
|
||||
glBeginQuery(GetTarget(type), query.handle);
|
||||
}
|
||||
|
||||
|
@ -232,81 +70,50 @@ HostCounter::~HostCounter() {
|
|||
cache.Reserve(type, std::move(query));
|
||||
}
|
||||
|
||||
u64 HostCounter::Query() {
|
||||
if (result) {
|
||||
return *result;
|
||||
void HostCounter::EndQuery() {
|
||||
if (!cache.AnyCommandQueued()) {
|
||||
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
|
||||
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
|
||||
// for this. Insert to the OpenGL command stream a flush.
|
||||
glFlush();
|
||||
}
|
||||
|
||||
u64 value;
|
||||
glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
|
||||
if (dependency) {
|
||||
value += dependency->Query();
|
||||
}
|
||||
|
||||
return *(result = value);
|
||||
glEndQuery(GetTarget(type));
|
||||
}
|
||||
|
||||
bool HostCounter::WaitPending() const noexcept {
|
||||
return result.has_value();
|
||||
u64 HostCounter::BlockingQuery() const {
|
||||
GLint64 value;
|
||||
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
|
||||
return static_cast<u64>(value);
|
||||
}
|
||||
|
||||
CachedQuery::CachedQuery(QueryType type, VAddr cpu_addr, u8* host_ptr)
|
||||
: type{type}, cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
|
||||
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
|
||||
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
|
||||
|
||||
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
|
||||
: type{rhs.type}, cpu_addr{rhs.cpu_addr}, host_ptr{rhs.host_ptr},
|
||||
counter{std::move(rhs.counter)}, timestamp{rhs.timestamp} {}
|
||||
|
||||
CachedQuery::~CachedQuery() = default;
|
||||
: VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
|
||||
|
||||
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
|
||||
VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
|
||||
cache = rhs.cache;
|
||||
type = rhs.type;
|
||||
cpu_addr = rhs.cpu_addr;
|
||||
host_ptr = rhs.host_ptr;
|
||||
counter = std::move(rhs.counter);
|
||||
timestamp = rhs.timestamp;
|
||||
return *this;
|
||||
}
|
||||
|
||||
void CachedQuery::Flush() {
|
||||
// When counter is nullptr it means that it's just been reseted. We are supposed to write a zero
|
||||
// in these cases.
|
||||
const u64 value = counter ? counter->Query() : 0;
|
||||
std::memcpy(host_ptr, &value, sizeof(u64));
|
||||
|
||||
if (timestamp) {
|
||||
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
|
||||
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
|
||||
// To avoid this disable and re-enable keeping the dependency stream.
|
||||
// But we only have to do this if we have pending waits to be done.
|
||||
auto& stream = cache->Stream(type);
|
||||
const bool slice_counter = WaitPending() && stream.IsEnabled();
|
||||
if (slice_counter) {
|
||||
stream.Update(false);
|
||||
}
|
||||
}
|
||||
|
||||
void CachedQuery::SetCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
|
||||
if (counter) {
|
||||
// If there's an old counter set it means the query is being rewritten by the game.
|
||||
// To avoid losing the data forever, flush here.
|
||||
Flush();
|
||||
VideoCommon::CachedQueryBase<HostCounter>::Flush();
|
||||
|
||||
if (slice_counter) {
|
||||
stream.Update(true);
|
||||
}
|
||||
counter = std::move(counter_);
|
||||
timestamp = timestamp_;
|
||||
}
|
||||
|
||||
bool CachedQuery::WaitPending() const noexcept {
|
||||
return counter && counter->WaitPending();
|
||||
}
|
||||
|
||||
QueryType CachedQuery::GetType() const noexcept {
|
||||
return type;
|
||||
}
|
||||
|
||||
VAddr CachedQuery::GetCpuAddr() const noexcept {
|
||||
return cpu_addr;
|
||||
}
|
||||
|
||||
CacheAddr CachedQuery::GetCacheAddr() const noexcept {
|
||||
return ToCacheAddr(host_ptr);
|
||||
}
|
||||
|
||||
u64 CachedQuery::GetSizeInBytes() const noexcept {
|
||||
return timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
|
||||
}
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include <glad/glad.h>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/query_cache.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||
|
||||
|
@ -24,134 +25,57 @@ namespace OpenGL {
|
|||
|
||||
class CachedQuery;
|
||||
class HostCounter;
|
||||
class RasterizerOpenGL;
|
||||
class QueryCache;
|
||||
class RasterizerOpenGL;
|
||||
|
||||
class CounterStream final {
|
||||
public:
|
||||
explicit CounterStream(QueryCache& cache, VideoCore::QueryType type);
|
||||
~CounterStream();
|
||||
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||
|
||||
void Update(bool enabled, bool any_command_queued);
|
||||
|
||||
void Reset(bool any_command_queued);
|
||||
|
||||
std::shared_ptr<HostCounter> GetCurrent(bool any_command_queued);
|
||||
|
||||
bool IsEnabled() const {
|
||||
return current != nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
void Enable();
|
||||
|
||||
void Disable(bool any_command_queued);
|
||||
|
||||
void EndQuery(bool any_command_queued);
|
||||
|
||||
QueryCache& cache;
|
||||
|
||||
std::shared_ptr<HostCounter> current;
|
||||
std::shared_ptr<HostCounter> last;
|
||||
VideoCore::QueryType type;
|
||||
GLenum target;
|
||||
};
|
||||
|
||||
class QueryCache final {
|
||||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
|
||||
~QueryCache();
|
||||
|
||||
void InvalidateRegion(CacheAddr addr, std::size_t size);
|
||||
|
||||
void FlushRegion(CacheAddr addr, std::size_t size);
|
||||
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp);
|
||||
|
||||
void UpdateCounters();
|
||||
|
||||
void ResetCounter(VideoCore::QueryType type);
|
||||
OGLQuery AllocateQuery(VideoCore::QueryType type);
|
||||
|
||||
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
|
||||
|
||||
std::shared_ptr<HostCounter> GetHostCounter(std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type);
|
||||
bool AnyCommandQueued() const noexcept;
|
||||
|
||||
private:
|
||||
CachedQuery& Register(CachedQuery&& cached_query);
|
||||
|
||||
CachedQuery* TryGet(CacheAddr addr);
|
||||
|
||||
void Flush(CachedQuery& cached_query);
|
||||
|
||||
CounterStream& GetStream(VideoCore::QueryType type);
|
||||
|
||||
Core::System& system;
|
||||
RasterizerOpenGL& rasterizer;
|
||||
|
||||
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
|
||||
|
||||
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
||||
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> reserved_queries;
|
||||
RasterizerOpenGL& gl_rasterizer;
|
||||
std::array<std::vector<OGLQuery>, VideoCore::NumQueryTypes> queries_reserve;
|
||||
};
|
||||
|
||||
class HostCounter final {
|
||||
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||
public:
|
||||
explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||
VideoCore::QueryType type, OGLQuery&& query);
|
||||
VideoCore::QueryType type);
|
||||
~HostCounter();
|
||||
|
||||
/// Returns the current value of the query.
|
||||
u64 Query();
|
||||
|
||||
/// Returns true when querying this counter will potentially wait for OpenGL.
|
||||
bool WaitPending() const noexcept;
|
||||
void EndQuery();
|
||||
|
||||
private:
|
||||
u64 BlockingQuery() const override;
|
||||
|
||||
QueryCache& cache;
|
||||
VideoCore::QueryType type;
|
||||
|
||||
std::shared_ptr<HostCounter> dependency; ///< Counter queued before this one.
|
||||
OGLQuery query; ///< OpenGL query.
|
||||
std::optional<u64> result; ///< Added values of the counter.
|
||||
OGLQuery query;
|
||||
};
|
||||
|
||||
class CachedQuery final {
|
||||
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||
public:
|
||||
explicit CachedQuery(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr);
|
||||
CachedQuery(CachedQuery&&) noexcept;
|
||||
CachedQuery(const CachedQuery&) = delete;
|
||||
~CachedQuery();
|
||||
explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
|
||||
u8* host_ptr);
|
||||
CachedQuery(CachedQuery&& rhs) noexcept;
|
||||
|
||||
CachedQuery& operator=(CachedQuery&&) noexcept;
|
||||
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
|
||||
|
||||
/// Writes the counter value to host memory.
|
||||
void Flush();
|
||||
|
||||
/// Updates the counter this cached query registered in guest memory will write when requested.
|
||||
void SetCounter(std::shared_ptr<HostCounter> counter, std::optional<u64> timestamp);
|
||||
|
||||
/// Returns true when a flushing this query will potentially wait for OpenGL.
|
||||
bool WaitPending() const noexcept;
|
||||
|
||||
/// Returns the query type.
|
||||
VideoCore::QueryType GetType() const noexcept;
|
||||
|
||||
/// Returns the guest CPU address for this query.
|
||||
VAddr GetCpuAddr() const noexcept;
|
||||
|
||||
/// Returns the cache address for this query.
|
||||
CacheAddr GetCacheAddr() const noexcept;
|
||||
|
||||
/// Returns the number of cached bytes.
|
||||
u64 GetSizeInBytes() const noexcept;
|
||||
void Flush() override;
|
||||
|
||||
private:
|
||||
VideoCore::QueryType type; ///< Abstracted query type (e.g. samples passed).
|
||||
VAddr cpu_addr; ///< Guest CPU address.
|
||||
u8* host_ptr; ///< Writable host pointer.
|
||||
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
|
||||
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
|
||||
QueryCache* cache;
|
||||
VideoCore::QueryType type;
|
||||
};
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
Loading…
Reference in a new issue