mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-22 14:31:39 +00:00
video_core: Bloodborne stabilization pt1 (#543)
* shader_recompiler: Writelane elimination pass + null image fix * spirv: Implement image derivatives * texture_cache: Reduce page bit size * clang format * slot_vector: Back to debug assert * vk_graphics_pipeline: Handle null tsharp * spirv: Revert some change * vk_instance: Support primitive restart on list topology * page_manager: Adjust windows exception handler * clang format * Remove subres tracking * Will be done separately
This commit is contained in:
parent
1ca13870eb
commit
b52741b714
|
@ -28,9 +28,13 @@ struct SlotId {
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
class SlotVector {
|
class SlotVector {
|
||||||
constexpr static std::size_t InitialCapacity = 1024;
|
constexpr static std::size_t InitialCapacity = 2048;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
SlotVector() {
|
||||||
|
Reserve(InitialCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
~SlotVector() noexcept {
|
~SlotVector() noexcept {
|
||||||
std::size_t index = 0;
|
std::size_t index = 0;
|
||||||
for (u64 bits : stored_bitset) {
|
for (u64 bits : stored_bitset) {
|
||||||
|
@ -67,19 +71,6 @@ public:
|
||||||
return SlotId{index};
|
return SlotId{index};
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
|
||||||
[[nodiscard]] SlotId swap_and_insert(SlotId existing_id, Args&&... args) noexcept {
|
|
||||||
const u32 index = FreeValueIndex();
|
|
||||||
T& existing_value = values[existing_id.index].object;
|
|
||||||
|
|
||||||
new (&values[index].object) T(std::move(existing_value));
|
|
||||||
existing_value.~T();
|
|
||||||
new (&values[existing_id.index].object) T(std::forward<Args>(args)...);
|
|
||||||
SetStorageBit(index);
|
|
||||||
|
|
||||||
return SlotId{index};
|
|
||||||
}
|
|
||||||
|
|
||||||
void erase(SlotId id) noexcept {
|
void erase(SlotId id) noexcept {
|
||||||
values[id.index].object.~T();
|
values[id.index].object.~T();
|
||||||
free_list.push_back(id.index);
|
free_list.push_back(id.index);
|
||||||
|
@ -151,7 +142,8 @@ private:
|
||||||
|
|
||||||
const std::size_t old_free_size = free_list.size();
|
const std::size_t old_free_size = free_list.size();
|
||||||
free_list.resize(old_free_size + (new_capacity - values_capacity));
|
free_list.resize(old_free_size + (new_capacity - values_capacity));
|
||||||
std::iota(free_list.begin() + old_free_size, free_list.end(),
|
const std::size_t new_free_size = free_list.size();
|
||||||
|
std::iota(free_list.rbegin(), free_list.rbegin() + new_free_size - old_free_size,
|
||||||
static_cast<u32>(values_capacity));
|
static_cast<u32>(values_capacity));
|
||||||
|
|
||||||
delete[] values;
|
delete[] values;
|
||||||
|
|
|
@ -1123,7 +1123,6 @@ int PS4_SYSV_ABI posix_pthread_join(ScePthread thread, void** res) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int PS4_SYSV_ABI scePthreadDetach(ScePthread thread) {
|
int PS4_SYSV_ABI scePthreadDetach(ScePthread thread) {
|
||||||
LOG_INFO(Kernel_Pthread, "thread create name = {}", thread->name);
|
|
||||||
thread->is_detached = true;
|
thread->is_detached = true;
|
||||||
return ORBIS_OK;
|
return ORBIS_OK;
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,12 @@ struct ImageOperands {
|
||||||
static_cast<u32>(new_mask));
|
static_cast<u32>(new_mask));
|
||||||
operands.push_back(value);
|
operands.push_back(value);
|
||||||
}
|
}
|
||||||
|
void Add(spv::ImageOperandsMask new_mask, Id value1, Id value2) {
|
||||||
|
mask = static_cast<spv::ImageOperandsMask>(static_cast<u32>(mask) |
|
||||||
|
static_cast<u32>(new_mask));
|
||||||
|
operands.push_back(value1);
|
||||||
|
operands.push_back(value2);
|
||||||
|
}
|
||||||
|
|
||||||
void AddOffset(EmitContext& ctx, const IR::Value& offset,
|
void AddOffset(EmitContext& ctx, const IR::Value& offset,
|
||||||
bool can_use_runtime_offsets = false) {
|
bool can_use_runtime_offsets = false) {
|
||||||
|
@ -53,6 +59,15 @@ struct ImageOperands {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AddDerivatives(EmitContext& ctx, Id derivatives) {
|
||||||
|
if (!Sirit::ValidId(derivatives)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const Id dx{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 0, 1)};
|
||||||
|
const Id dy{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 2, 3)};
|
||||||
|
Add(spv::ImageOperandsMask::Grad, dx, dy);
|
||||||
|
}
|
||||||
|
|
||||||
spv::ImageOperandsMask mask{};
|
spv::ImageOperandsMask mask{};
|
||||||
boost::container::static_vector<Id, 4> operands;
|
boost::container::static_vector<Id, 4> operands;
|
||||||
};
|
};
|
||||||
|
@ -117,7 +132,7 @@ Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
|
const u32 comp = inst->Flags<IR::TextureInstInfo>().gather_comp.Value();
|
||||||
ImageOperands operands;
|
ImageOperands operands;
|
||||||
operands.AddOffset(ctx, offset);
|
operands.AddOffset(ctx, offset, true);
|
||||||
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
|
return ctx.OpImageGather(ctx.F32[4], sampled_image, coords, ctx.ConstU32(comp), operands.mask,
|
||||||
operands.operands);
|
operands.operands);
|
||||||
}
|
}
|
||||||
|
@ -129,7 +144,7 @@ Id EmitImageGatherDref(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
ImageOperands operands;
|
ImageOperands operands;
|
||||||
operands.AddOffset(ctx, offset);
|
operands.AddOffset(ctx, offset, true);
|
||||||
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask,
|
return ctx.OpImageDrefGather(ctx.F32[4], sampled_image, coords, dref, operands.mask,
|
||||||
operands.operands);
|
operands.operands);
|
||||||
}
|
}
|
||||||
|
@ -181,9 +196,17 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords) {
|
||||||
return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords);
|
return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives,
|
||||||
Id derivatives, const IR::Value& offset, Id lod_clamp) {
|
const IR::Value& offset, Id lod_clamp) {
|
||||||
UNREACHABLE_MSG("SPIR-V Instruction");
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
|
ImageOperands operands;
|
||||||
|
operands.AddDerivatives(ctx, derivatives);
|
||||||
|
operands.AddOffset(ctx, offset);
|
||||||
|
return ctx.OpImageSampleExplicitLod(ctx.F32[4], sampled_image, coords, operands.mask,
|
||||||
|
operands.operands);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords) {
|
||||||
|
|
|
@ -387,8 +387,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
|
||||||
Id lod, Id ms);
|
Id lod, Id ms);
|
||||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
||||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives,
|
||||||
Id derivatives, const IR::Value& offset, Id lod_clamp);
|
const IR::Value& offset, Id lod_clamp);
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
|
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
|
||||||
|
|
||||||
|
@ -407,5 +407,8 @@ Id EmitImageAtomicExchange32(EmitContext& ctx, IR::Inst* inst, u32 handle, Id co
|
||||||
Id EmitLaneId(EmitContext& ctx);
|
Id EmitLaneId(EmitContext& ctx);
|
||||||
Id EmitWarpId(EmitContext& ctx);
|
Id EmitWarpId(EmitContext& ctx);
|
||||||
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index);
|
||||||
|
Id EmitReadFirstLane(EmitContext& ctx, Id value);
|
||||||
|
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane);
|
||||||
|
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane);
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -22,4 +22,16 @@ Id EmitQuadShuffle(EmitContext& ctx, Id value, Id index) {
|
||||||
return ctx.OpGroupNonUniformQuadBroadcast(ctx.U32[1], SubgroupScope(ctx), value, index);
|
return ctx.OpGroupNonUniformQuadBroadcast(ctx.U32[1], SubgroupScope(ctx), value, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id EmitReadFirstLane(EmitContext& ctx, Id value) {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitReadLane(EmitContext& ctx, Id value, u32 lane) {
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
Id EmitWriteLane(EmitContext& ctx, Id value, Id write_value, u32 lane) {
|
||||||
|
return ctx.u32_zero_value;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace Shader::Backend::SPIRV
|
} // namespace Shader::Backend::SPIRV
|
||||||
|
|
|
@ -127,7 +127,6 @@ void Translator::DS_ADD_U32(const GcnInst& inst, bool rtn) {
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
const IR::U32 data{GetSrc(inst.src[1])};
|
||||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
const IR::Value original_val = ir.SharedAtomicIAdd(addr_offset, data);
|
||||||
if (rtn) {
|
if (rtn) {
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
SetDst(inst.dst[0], IR::U32{original_val});
|
||||||
|
@ -139,7 +138,6 @@ void Translator::DS_MIN_U32(const GcnInst& inst, bool rtn) {
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
const IR::U32 data{GetSrc(inst.src[1])};
|
||||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
|
const IR::Value original_val = ir.SharedAtomicIMin(addr_offset, data, false);
|
||||||
if (rtn) {
|
if (rtn) {
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
SetDst(inst.dst[0], IR::U32{original_val});
|
||||||
|
@ -151,7 +149,6 @@ void Translator::DS_MAX_U32(const GcnInst& inst, bool rtn) {
|
||||||
const IR::U32 data{GetSrc(inst.src[1])};
|
const IR::U32 data{GetSrc(inst.src[1])};
|
||||||
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
const IR::U32 offset = ir.Imm32(u32(inst.control.ds.offset0));
|
||||||
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
const IR::U32 addr_offset = ir.IAdd(addr, offset);
|
||||||
IR::VectorReg dst_reg{inst.dst[0].code};
|
|
||||||
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
|
const IR::Value original_val = ir.SharedAtomicIMax(addr_offset, data, false);
|
||||||
if (rtn) {
|
if (rtn) {
|
||||||
SetDst(inst.dst[0], IR::U32{original_val});
|
SetDst(inst.dst[0], IR::U32{original_val});
|
||||||
|
@ -168,13 +165,18 @@ void Translator::V_READFIRSTLANE_B32(const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_READLANE_B32(const GcnInst& inst) {
|
void Translator::V_READLANE_B32(const GcnInst& inst) {
|
||||||
ASSERT(info.stage != Stage::Compute);
|
const IR::ScalarReg dst{inst.dst[0].code};
|
||||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
const IR::U32 value{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 lane{GetSrc(inst.src[1])};
|
||||||
|
ir.SetScalarReg(dst, ir.ReadLane(value, lane));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
void Translator::V_WRITELANE_B32(const GcnInst& inst) {
|
||||||
ASSERT(info.stage != Stage::Compute);
|
const IR::VectorReg dst{inst.dst[0].code};
|
||||||
SetDst(inst.dst[0], GetSrc(inst.src[0]));
|
const IR::U32 value{GetSrc(inst.src[0])};
|
||||||
|
const IR::U32 lane{GetSrc(inst.src[1])};
|
||||||
|
const IR::U32 old_value{GetSrc(inst.dst[0])};
|
||||||
|
ir.SetVectorReg(dst, ir.WriteLane(old_value, value, lane));
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace Shader::Gcn
|
} // namespace Shader::Gcn
|
||||||
|
|
|
@ -440,13 +440,16 @@ void Translator::S_SUB_U32(const GcnInst& inst) {
|
||||||
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
void Translator::S_GETPC_B64(u32 pc, const GcnInst& inst) {
|
||||||
// This only really exists to let resource tracking pass know
|
// This only really exists to let resource tracking pass know
|
||||||
// there is an inline cbuf.
|
// there is an inline cbuf.
|
||||||
SetDst(inst.dst[0], ir.Imm32(pc));
|
const IR::ScalarReg dst{inst.dst[0].code};
|
||||||
|
ir.SetScalarReg(dst, ir.Imm32(pc));
|
||||||
|
ir.SetScalarReg(dst + 1, ir.Imm32(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
void Translator::S_ADDC_U32(const GcnInst& inst) {
|
||||||
const IR::U32 src0{GetSrc(inst.src[0])};
|
const IR::U32 src0{GetSrc(inst.src[0])};
|
||||||
const IR::U32 src1{GetSrc(inst.src[1])};
|
const IR::U32 src1{GetSrc(inst.src[1])};
|
||||||
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), ir.GetSccLo()));
|
const IR::U32 carry{ir.Select(ir.GetScc(), ir.Imm32(1U), ir.Imm32(0U))};
|
||||||
|
SetDst(inst.dst[0], ir.IAdd(ir.IAdd(src0, src1), carry));
|
||||||
}
|
}
|
||||||
|
|
||||||
void Translator::S_MAX_U32(const GcnInst& inst) {
|
void Translator::S_MAX_U32(const GcnInst& inst) {
|
||||||
|
|
|
@ -17,6 +17,7 @@ void Translator::EmitVectorMemory(const GcnInst& inst) {
|
||||||
case Opcode::IMAGE_SAMPLE_C_O:
|
case Opcode::IMAGE_SAMPLE_C_O:
|
||||||
case Opcode::IMAGE_SAMPLE_B:
|
case Opcode::IMAGE_SAMPLE_B:
|
||||||
case Opcode::IMAGE_SAMPLE_C_LZ_O:
|
case Opcode::IMAGE_SAMPLE_C_LZ_O:
|
||||||
|
case Opcode::IMAGE_SAMPLE_D:
|
||||||
return IMAGE_SAMPLE(inst);
|
return IMAGE_SAMPLE(inst);
|
||||||
case Opcode::IMAGE_GATHER4_C:
|
case Opcode::IMAGE_GATHER4_C:
|
||||||
case Opcode::IMAGE_GATHER4_LZ:
|
case Opcode::IMAGE_GATHER4_LZ:
|
||||||
|
@ -162,12 +163,15 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||||
const IR::F32 dref =
|
const IR::F32 dref =
|
||||||
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
||||||
|
const IR::Value derivatives = [&] -> IR::Value {
|
||||||
// Derivatives are tricky because their number depends on the texture type which is located in
|
if (!flags.test(MimgModifier::Derivative)) {
|
||||||
// T#. We don't have access to T# though until resource tracking pass. For now assume no
|
return {};
|
||||||
// derivatives are present, otherwise we don't know where coordinates are placed in the address
|
}
|
||||||
// stream.
|
addr_reg = addr_reg + 4;
|
||||||
ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction");
|
return ir.CompositeConstruct(
|
||||||
|
ir.GetVectorReg<IR::F32>(addr_reg - 4), ir.GetVectorReg<IR::F32>(addr_reg - 3),
|
||||||
|
ir.GetVectorReg<IR::F32>(addr_reg - 2), ir.GetVectorReg<IR::F32>(addr_reg - 1));
|
||||||
|
}();
|
||||||
|
|
||||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||||
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
||||||
|
@ -177,6 +181,10 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
||||||
|
|
||||||
|
// Derivatives are tricky because their number depends on the texture type which is located in
|
||||||
|
// T#. We don't have access to T# though until resource tracking pass. For now assume if
|
||||||
|
// derivatives are present, that a 2D image is bound.
|
||||||
|
const bool has_derivatives = flags.test(MimgModifier::Derivative);
|
||||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
||||||
|
|
||||||
IR::TextureInstInfo info{};
|
IR::TextureInstInfo info{};
|
||||||
|
@ -186,9 +194,13 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||||
info.explicit_lod.Assign(explicit_lod);
|
info.explicit_lod.Assign(explicit_lod);
|
||||||
|
info.has_derivatives.Assign(has_derivatives);
|
||||||
|
|
||||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
// Issue IR instruction, leaving unknown fields blank to patch later.
|
||||||
const IR::Value texel = [&]() -> IR::Value {
|
const IR::Value texel = [&]() -> IR::Value {
|
||||||
|
if (has_derivatives) {
|
||||||
|
return ir.ImageGradient(handle, body, derivatives, offset, {}, info);
|
||||||
|
}
|
||||||
if (!flags.test(MimgModifier::Pcf)) {
|
if (!flags.test(MimgModifier::Pcf)) {
|
||||||
if (explicit_lod) {
|
if (explicit_lod) {
|
||||||
return ir.ImageSampleExplicitLod(handle, body, offset, info);
|
return ir.ImageSampleExplicitLod(handle, body, offset, info);
|
||||||
|
|
|
@ -209,10 +209,6 @@ U1 IREmitter::GetVcc() {
|
||||||
return Inst<U1>(Opcode::GetVcc);
|
return Inst<U1>(Opcode::GetVcc);
|
||||||
}
|
}
|
||||||
|
|
||||||
U32 IREmitter::GetSccLo() {
|
|
||||||
return Inst<U32>(Opcode::GetSccLo);
|
|
||||||
}
|
|
||||||
|
|
||||||
U32 IREmitter::GetVccLo() {
|
U32 IREmitter::GetVccLo() {
|
||||||
return Inst<U32>(Opcode::GetVccLo);
|
return Inst<U32>(Opcode::GetVccLo);
|
||||||
}
|
}
|
||||||
|
@ -445,6 +441,18 @@ U32 IREmitter::QuadShuffle(const U32& value, const U32& index) {
|
||||||
return Inst<U32>(Opcode::QuadShuffle, value, index);
|
return Inst<U32>(Opcode::QuadShuffle, value, index);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::ReadFirstLane(const U32& value) {
|
||||||
|
return Inst<U32>(Opcode::ReadFirstLane, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::ReadLane(const U32& value, const U32& lane) {
|
||||||
|
return Inst<U32>(Opcode::ReadLane, value, lane);
|
||||||
|
}
|
||||||
|
|
||||||
|
U32 IREmitter::WriteLane(const U32& value, const U32& write_value, const U32& lane) {
|
||||||
|
return Inst<U32>(Opcode::WriteLane, value, write_value, lane);
|
||||||
|
}
|
||||||
|
|
||||||
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
F32F64 IREmitter::FPAdd(const F32F64& a, const F32F64& b) {
|
||||||
if (a.Type() != b.Type()) {
|
if (a.Type() != b.Type()) {
|
||||||
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
UNREACHABLE_MSG("Mismatching types {} and {}", a.Type(), b.Type());
|
||||||
|
|
|
@ -65,7 +65,6 @@ public:
|
||||||
[[nodiscard]] U1 GetScc();
|
[[nodiscard]] U1 GetScc();
|
||||||
[[nodiscard]] U1 GetExec();
|
[[nodiscard]] U1 GetExec();
|
||||||
[[nodiscard]] U1 GetVcc();
|
[[nodiscard]] U1 GetVcc();
|
||||||
[[nodiscard]] U32 GetSccLo();
|
|
||||||
[[nodiscard]] U32 GetVccLo();
|
[[nodiscard]] U32 GetVccLo();
|
||||||
[[nodiscard]] U32 GetVccHi();
|
[[nodiscard]] U32 GetVccHi();
|
||||||
void SetScc(const U1& value);
|
void SetScc(const U1& value);
|
||||||
|
@ -122,6 +121,9 @@ public:
|
||||||
[[nodiscard]] U32 LaneId();
|
[[nodiscard]] U32 LaneId();
|
||||||
[[nodiscard]] U32 WarpId();
|
[[nodiscard]] U32 WarpId();
|
||||||
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
|
[[nodiscard]] U32 QuadShuffle(const U32& value, const U32& index);
|
||||||
|
[[nodiscard]] U32 ReadFirstLane(const U32& value);
|
||||||
|
[[nodiscard]] U32 ReadLane(const U32& value, const U32& lane);
|
||||||
|
[[nodiscard]] U32 WriteLane(const U32& value, const U32& write_value, const U32& lane);
|
||||||
|
|
||||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
|
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
|
||||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||||
|
|
|
@ -58,7 +58,6 @@ OPCODE(SetAttribute, Void, Attr
|
||||||
OPCODE(GetScc, U1, Void, )
|
OPCODE(GetScc, U1, Void, )
|
||||||
OPCODE(GetExec, U1, Void, )
|
OPCODE(GetExec, U1, Void, )
|
||||||
OPCODE(GetVcc, U1, Void, )
|
OPCODE(GetVcc, U1, Void, )
|
||||||
OPCODE(GetSccLo, U32, Void, )
|
|
||||||
OPCODE(GetVccLo, U32, Void, )
|
OPCODE(GetVccLo, U32, Void, )
|
||||||
OPCODE(GetVccHi, U32, Void, )
|
OPCODE(GetVccHi, U32, Void, )
|
||||||
OPCODE(SetScc, Void, U1, )
|
OPCODE(SetScc, Void, U1, )
|
||||||
|
@ -330,19 +329,22 @@ OPCODE(ImageRead, U32x4, Opaq
|
||||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
||||||
|
|
||||||
// Image atomic operations
|
// Image atomic operations
|
||||||
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicIAdd32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicSMin32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicUMin32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicSMax32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicUMax32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicInc32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicDec32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicAnd32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicOr32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicXor32, U32, Opaque, Opaque, U32, )
|
||||||
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
OPCODE(ImageAtomicExchange32, U32, Opaque, Opaque, U32, )
|
||||||
|
|
||||||
// Warp operations
|
// Warp operations
|
||||||
OPCODE(LaneId, U32, )
|
OPCODE(LaneId, U32, )
|
||||||
OPCODE(WarpId, U32, )
|
OPCODE(WarpId, U32, )
|
||||||
OPCODE(QuadShuffle, U32, U32, U32 )
|
OPCODE(QuadShuffle, U32, U32, U32 )
|
||||||
|
OPCODE(ReadFirstLane, U32, U32, U32 )
|
||||||
|
OPCODE(ReadLane, U32, U32, U32 )
|
||||||
|
OPCODE(WriteLane, U32, U32, U32, U32 )
|
||||||
|
|
|
@ -250,6 +250,18 @@ void FoldCmpClass(IR::Inst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FoldReadLane(IR::Inst& inst) {
|
||||||
|
const u32 lane = inst.Arg(1).U32();
|
||||||
|
IR::Inst* prod = inst.Arg(0).InstRecursive();
|
||||||
|
while (prod->GetOpcode() == IR::Opcode::WriteLane) {
|
||||||
|
if (prod->Arg(2).U32() == lane) {
|
||||||
|
inst.ReplaceUsesWith(prod->Arg(1));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
prod = prod->Arg(0).InstRecursive();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::IAdd32:
|
case IR::Opcode::IAdd32:
|
||||||
|
@ -289,6 +301,8 @@ void ConstantPropagation(IR::Block& block, IR::Inst& inst) {
|
||||||
case IR::Opcode::SelectF32:
|
case IR::Opcode::SelectF32:
|
||||||
case IR::Opcode::SelectF64:
|
case IR::Opcode::SelectF64:
|
||||||
return FoldSelect(inst);
|
return FoldSelect(inst);
|
||||||
|
case IR::Opcode::ReadLane:
|
||||||
|
return FoldReadLane(inst);
|
||||||
case IR::Opcode::FPNeg32:
|
case IR::Opcode::FPNeg32:
|
||||||
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
|
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -345,6 +345,7 @@ SharpLocation TrackSharp(const IR::Inst* inst) {
|
||||||
|
|
||||||
// Retrieve SGPR pair that holds sbase
|
// Retrieve SGPR pair that holds sbase
|
||||||
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
const auto pred1 = [](const IR::Inst* inst) -> std::optional<IR::ScalarReg> {
|
||||||
|
ASSERT(inst->GetOpcode() != IR::Opcode::ReadConst);
|
||||||
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
if (inst->GetOpcode() == IR::Opcode::GetUserData) {
|
||||||
return inst->Arg(0).ScalarReg();
|
return inst->Arg(0).ScalarReg();
|
||||||
}
|
}
|
||||||
|
@ -402,24 +403,13 @@ s32 TryHandleInlineCbuf(IR::Inst& inst, Info& info, Descriptors& descriptors,
|
||||||
// is used to define an inline constant buffer
|
// is used to define an inline constant buffer
|
||||||
|
|
||||||
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
IR::Inst* handle = inst.Arg(0).InstRecursive();
|
||||||
IR::Inst* p0 = handle->Arg(0).InstRecursive();
|
if (!handle->AreAllArgsImmediates()) {
|
||||||
if (p0->GetOpcode() != IR::Opcode::IAdd32 || !p0->Arg(0).IsImmediate() ||
|
|
||||||
!p0->Arg(1).IsImmediate()) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
IR::Inst* p1 = handle->Arg(1).InstRecursive();
|
|
||||||
if (p1->GetOpcode() != IR::Opcode::IAdd32) {
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
if (!handle->Arg(3).IsImmediate() || !handle->Arg(2).IsImmediate()) {
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
// We have found this pattern. Build the sharp.
|
// We have found this pattern. Build the sharp.
|
||||||
std::array<u32, 4> buffer;
|
std::array<u64, 2> buffer;
|
||||||
buffer[0] = info.pgm_base + p0->Arg(0).U32() + p0->Arg(1).U32();
|
buffer[0] = info.pgm_base + (handle->Arg(0).U32() | u64(handle->Arg(1).U32()) << 32);
|
||||||
buffer[1] = 0;
|
buffer[1] = handle->Arg(2).U32() | u64(handle->Arg(3).U32()) << 32;
|
||||||
buffer[2] = handle->Arg(2).U32();
|
|
||||||
buffer[3] = handle->Arg(3).U32();
|
|
||||||
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
|
cbuf = std::bit_cast<AmdGpu::Buffer>(buffer);
|
||||||
// Assign a binding to this sharp.
|
// Assign a binding to this sharp.
|
||||||
return descriptors.Add(BufferResource{
|
return descriptors.Add(BufferResource{
|
||||||
|
@ -617,7 +607,11 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||||
const IR::Value arg = inst.Arg(arg_pos);
|
const IR::Value arg = inst.Arg(arg_pos);
|
||||||
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
||||||
|
|
||||||
const auto read = [&](u32 offset) -> auto {
|
const auto read = [&](u32 offset) -> IR::U32 {
|
||||||
|
if (arg.IsImmediate()) {
|
||||||
|
const u16 comp = (arg.U32() >> offset) & 0x3F;
|
||||||
|
return ir.Imm32(s32(comp << 26) >> 26);
|
||||||
|
}
|
||||||
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
|
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -637,7 +631,10 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (inst_info.has_derivatives) {
|
||||||
|
ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D,
|
||||||
|
"User derivatives only supported for 2D images");
|
||||||
|
}
|
||||||
if (inst_info.has_lod_clamp) {
|
if (inst_info.has_lod_clamp) {
|
||||||
const u32 arg_pos = [&]() -> u32 {
|
const u32 arg_pos = [&]() -> u32 {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
|
|
|
@ -32,7 +32,6 @@ struct SccFlagTag : FlagTag {};
|
||||||
struct ExecFlagTag : FlagTag {};
|
struct ExecFlagTag : FlagTag {};
|
||||||
struct VccFlagTag : FlagTag {};
|
struct VccFlagTag : FlagTag {};
|
||||||
struct VccLoTag : FlagTag {};
|
struct VccLoTag : FlagTag {};
|
||||||
struct SccLoTag : FlagTag {};
|
|
||||||
struct VccHiTag : FlagTag {};
|
struct VccHiTag : FlagTag {};
|
||||||
|
|
||||||
struct GotoVariable : FlagTag {
|
struct GotoVariable : FlagTag {
|
||||||
|
@ -45,7 +44,7 @@ struct GotoVariable : FlagTag {
|
||||||
};
|
};
|
||||||
|
|
||||||
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
using Variant = std::variant<IR::ScalarReg, IR::VectorReg, GotoVariable, SccFlagTag, ExecFlagTag,
|
||||||
VccFlagTag, SccLoTag, VccLoTag, VccHiTag>;
|
VccFlagTag, VccLoTag, VccHiTag>;
|
||||||
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
using ValueMap = std::unordered_map<IR::Block*, IR::Value>;
|
||||||
|
|
||||||
struct DefTable {
|
struct DefTable {
|
||||||
|
@ -84,13 +83,6 @@ struct DefTable {
|
||||||
exec_flag.insert_or_assign(block, value);
|
exec_flag.insert_or_assign(block, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
const IR::Value& Def(IR::Block* block, SccLoTag) {
|
|
||||||
return scc_lo_flag[block];
|
|
||||||
}
|
|
||||||
void SetDef(IR::Block* block, SccLoTag, const IR::Value& value) {
|
|
||||||
scc_lo_flag.insert_or_assign(block, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
const IR::Value& Def(IR::Block* block, VccLoTag) {
|
||||||
return vcc_lo_flag[block];
|
return vcc_lo_flag[block];
|
||||||
}
|
}
|
||||||
|
@ -133,10 +125,6 @@ IR::Opcode UndefOpcode(const VccLoTag) noexcept {
|
||||||
return IR::Opcode::UndefU32;
|
return IR::Opcode::UndefU32;
|
||||||
}
|
}
|
||||||
|
|
||||||
IR::Opcode UndefOpcode(const SccLoTag) noexcept {
|
|
||||||
return IR::Opcode::UndefU32;
|
|
||||||
}
|
|
||||||
|
|
||||||
IR::Opcode UndefOpcode(const VccHiTag) noexcept {
|
IR::Opcode UndefOpcode(const VccHiTag) noexcept {
|
||||||
return IR::Opcode::UndefU32;
|
return IR::Opcode::UndefU32;
|
||||||
}
|
}
|
||||||
|
@ -336,9 +324,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||||
case IR::Opcode::SetVcc:
|
case IR::Opcode::SetVcc:
|
||||||
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
pass.WriteVariable(VccFlagTag{}, block, inst.Arg(0));
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::SetSccLo:
|
|
||||||
pass.WriteVariable(SccLoTag{}, block, inst.Arg(0));
|
|
||||||
break;
|
|
||||||
case IR::Opcode::SetVccLo:
|
case IR::Opcode::SetVccLo:
|
||||||
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
pass.WriteVariable(VccLoTag{}, block, inst.Arg(0));
|
||||||
break;
|
break;
|
||||||
|
@ -371,9 +356,6 @@ void VisitInst(Pass& pass, IR::Block* block, IR::Inst& inst) {
|
||||||
case IR::Opcode::GetVcc:
|
case IR::Opcode::GetVcc:
|
||||||
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
inst.ReplaceUsesWith(pass.ReadVariable(VccFlagTag{}, block));
|
||||||
break;
|
break;
|
||||||
case IR::Opcode::GetSccLo:
|
|
||||||
inst.ReplaceUsesWith(pass.ReadVariable(SccLoTag{}, block));
|
|
||||||
break;
|
|
||||||
case IR::Opcode::GetVccLo:
|
case IR::Opcode::GetVccLo:
|
||||||
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
inst.ReplaceUsesWith(pass.ReadVariable(VccLoTag{}, block));
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -58,6 +58,7 @@ union TextureInstInfo {
|
||||||
BitField<4, 1, u32> explicit_lod;
|
BitField<4, 1, u32> explicit_lod;
|
||||||
BitField<5, 1, u32> has_offset;
|
BitField<5, 1, u32> has_offset;
|
||||||
BitField<6, 2, u32> gather_comp;
|
BitField<6, 2, u32> gather_comp;
|
||||||
|
BitField<8, 1, u32> has_derivatives;
|
||||||
};
|
};
|
||||||
|
|
||||||
union BufferInstInfo {
|
union BufferInstInfo {
|
||||||
|
|
|
@ -56,11 +56,11 @@ IR::Program TranslateProgram(Common::ObjectPool<IR::Inst>& inst_pool,
|
||||||
|
|
||||||
// Run optimization passes
|
// Run optimization passes
|
||||||
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
Shader::Optimization::SsaRewritePass(program.post_order_blocks);
|
||||||
Shader::Optimization::ResourceTrackingPass(program);
|
|
||||||
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
Shader::Optimization::ConstantPropagationPass(program.post_order_blocks);
|
||||||
if (program.info.stage != Stage::Compute) {
|
if (program.info.stage != Stage::Compute) {
|
||||||
Shader::Optimization::LowerSharedMemToRegisters(program);
|
Shader::Optimization::LowerSharedMemToRegisters(program);
|
||||||
}
|
}
|
||||||
|
Shader::Optimization::ResourceTrackingPass(program);
|
||||||
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
Shader::Optimization::IdentityRemovalPass(program.blocks);
|
||||||
Shader::Optimization::DeadCodeEliminationPass(program);
|
Shader::Optimization::DeadCodeEliminationPass(program);
|
||||||
Shader::Optimization::CollectShaderInfoPass(program);
|
Shader::Optimization::CollectShaderInfoPass(program);
|
||||||
|
|
|
@ -179,6 +179,10 @@ struct Image {
|
||||||
return base_address << 8;
|
return base_address << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
operator bool() const noexcept {
|
||||||
|
return base_address != 0;
|
||||||
|
}
|
||||||
|
|
||||||
u32 DstSelect() const {
|
u32 DstSelect() const {
|
||||||
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
return dst_sel_x | (dst_sel_y << 3) | (dst_sel_z << 6) | (dst_sel_w << 9);
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,8 @@ struct PageManager::Impl {
|
||||||
if (ec == EXCEPTION_ACCESS_VIOLATION) {
|
if (ec == EXCEPTION_ACCESS_VIOLATION) {
|
||||||
const auto info = pExp->ExceptionRecord->ExceptionInformation;
|
const auto info = pExp->ExceptionRecord->ExceptionInformation;
|
||||||
if (info[0] == 1) { // Write violation
|
if (info[0] == 1) { // Write violation
|
||||||
rasterizer->InvalidateMemory(info[1], sizeof(u64));
|
const VAddr addr_aligned = Common::AlignDown(info[1], PAGESIZE);
|
||||||
|
rasterizer->InvalidateMemory(addr_aligned, PAGESIZE);
|
||||||
return EXCEPTION_CONTINUE_EXECUTION;
|
return EXCEPTION_CONTINUE_EXECUTION;
|
||||||
} /* else {
|
} /* else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -199,7 +200,8 @@ struct PageManager::Impl {
|
||||||
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
|
const greg_t err = ctx->uc_mcontext.gregs[REG_ERR];
|
||||||
#endif
|
#endif
|
||||||
if (err & 0x2) {
|
if (err & 0x2) {
|
||||||
rasterizer->InvalidateMemory(address, sizeof(u64));
|
const VAddr addr_aligned = Common::AlignDown(address, PAGESIZE);
|
||||||
|
rasterizer->InvalidateMemory(addr_aligned, PAGESIZE);
|
||||||
} else {
|
} else {
|
||||||
// Read not supported!
|
// Read not supported!
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
|
|
@ -396,13 +396,18 @@ void GraphicsPipeline::BindResources(const Liverpool::Regs& regs,
|
||||||
|
|
||||||
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
|
boost::container::static_vector<AmdGpu::Image, 16> tsharps;
|
||||||
for (const auto& image_desc : stage->images) {
|
for (const auto& image_desc : stage->images) {
|
||||||
const auto& tsharp = tsharps.emplace_back(
|
const auto tsharp =
|
||||||
stage->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset));
|
stage->ReadUd<AmdGpu::Image>(image_desc.sgpr_base, image_desc.dword_offset);
|
||||||
VideoCore::ImageInfo image_info{tsharp};
|
if (tsharp) {
|
||||||
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
tsharps.emplace_back(tsharp);
|
||||||
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
VideoCore::ImageInfo image_info{tsharp};
|
||||||
const auto& image = texture_cache.GetImage(image_view.image_id);
|
VideoCore::ImageViewInfo view_info{tsharp, image_desc.is_storage};
|
||||||
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
const auto& image_view = texture_cache.FindTexture(image_info, view_info);
|
||||||
|
const auto& image = texture_cache.GetImage(image_view.image_id);
|
||||||
|
image_infos.emplace_back(VK_NULL_HANDLE, *image_view.image_view, image.layout);
|
||||||
|
} else {
|
||||||
|
image_infos.emplace_back(VK_NULL_HANDLE, VK_NULL_HANDLE, vk::ImageLayout::eGeneral);
|
||||||
|
}
|
||||||
set_writes.push_back({
|
set_writes.push_back({
|
||||||
.dstSet = VK_NULL_HANDLE,
|
.dstSet = VK_NULL_HANDLE,
|
||||||
.dstBinding = binding++,
|
.dstBinding = binding++,
|
||||||
|
|
|
@ -210,6 +210,8 @@ bool Instance::CreateDevice() {
|
||||||
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
color_write_en &= add_extension(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME);
|
||||||
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
|
const bool calibrated_timestamps = add_extension(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME);
|
||||||
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
const bool robustness = add_extension(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME);
|
||||||
|
const bool topology_restart =
|
||||||
|
add_extension(VK_EXT_PRIMITIVE_TOPOLOGY_LIST_RESTART_EXTENSION_NAME);
|
||||||
|
|
||||||
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
// These extensions are promoted by Vulkan 1.3, but for greater compatibility we use Vulkan 1.2
|
||||||
// with extensions.
|
// with extensions.
|
||||||
|
@ -330,6 +332,9 @@ bool Instance::CreateDevice() {
|
||||||
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
|
vk::PhysicalDeviceVertexInputDynamicStateFeaturesEXT{
|
||||||
.vertexInputDynamicState = true,
|
.vertexInputDynamicState = true,
|
||||||
},
|
},
|
||||||
|
vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT{
|
||||||
|
.primitiveTopologyListRestart = true,
|
||||||
|
},
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
feature_chain.get<vk::PhysicalDevicePortabilitySubsetFeaturesKHR>(),
|
||||||
#endif
|
#endif
|
||||||
|
@ -351,6 +356,9 @@ bool Instance::CreateDevice() {
|
||||||
if (!workgroup_memory_explicit_layout) {
|
if (!workgroup_memory_explicit_layout) {
|
||||||
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
device_chain.unlink<vk::PhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR>();
|
||||||
}
|
}
|
||||||
|
if (!topology_restart) {
|
||||||
|
device_chain.unlink<vk::PhysicalDevicePrimitiveTopologyListRestartFeaturesEXT>();
|
||||||
|
}
|
||||||
if (robustness) {
|
if (robustness) {
|
||||||
device_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor =
|
device_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor =
|
||||||
feature_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor;
|
feature_chain.get<vk::PhysicalDeviceRobustness2FeaturesEXT>().nullDescriptor;
|
||||||
|
|
|
@ -280,9 +280,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
||||||
DumpShader(code, hash, stage, "bin");
|
DumpShader(code, hash, stage, "bin");
|
||||||
}
|
}
|
||||||
|
|
||||||
block_pool.ReleaseContents();
|
|
||||||
inst_pool.ReleaseContents();
|
|
||||||
|
|
||||||
if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) {
|
if (stage != Shader::Stage::Fragment && stage != Shader::Stage::Vertex) {
|
||||||
LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage);
|
LOG_ERROR(Render_Vulkan, "Unsupported shader stage {}. PL creation skipped.", stage);
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -219,7 +219,12 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
||||||
guest_address = image.Address();
|
guest_address = image.Address();
|
||||||
|
|
||||||
mips_layout.reserve(resources.levels);
|
mips_layout.reserve(resources.levels);
|
||||||
|
tiling_idx = image.tiling_index;
|
||||||
|
UpdateSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
void ImageInfo::UpdateSize() {
|
||||||
|
mips_layout.clear();
|
||||||
MipInfo mip_info{};
|
MipInfo mip_info{};
|
||||||
guest_size_bytes = 0;
|
guest_size_bytes = 0;
|
||||||
for (auto mip = 0u; mip < resources.levels; ++mip) {
|
for (auto mip = 0u; mip < resources.levels; ++mip) {
|
||||||
|
@ -265,7 +270,7 @@ ImageInfo::ImageInfo(const AmdGpu::Image& image) noexcept {
|
||||||
ASSERT(!props.is_block);
|
ASSERT(!props.is_block);
|
||||||
ASSERT(num_samples == 1);
|
ASSERT(num_samples == 1);
|
||||||
std::tie(mip_info.pitch, mip_info.size) =
|
std::tie(mip_info.pitch, mip_info.size) =
|
||||||
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, image.tiling_index);
|
ImageSizeMacroTiled(mip_w, mip_h, bpp, num_samples, tiling_idx);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default: {
|
default: {
|
||||||
|
|
|
@ -29,6 +29,8 @@ struct ImageInfo {
|
||||||
bool IsPacked() const;
|
bool IsPacked() const;
|
||||||
bool IsDepthStencil() const;
|
bool IsDepthStencil() const;
|
||||||
|
|
||||||
|
void UpdateSize();
|
||||||
|
|
||||||
struct {
|
struct {
|
||||||
VAddr cmask_addr;
|
VAddr cmask_addr;
|
||||||
VAddr fmask_addr;
|
VAddr fmask_addr;
|
||||||
|
@ -69,6 +71,7 @@ struct ImageInfo {
|
||||||
boost::container::small_vector<MipInfo, 14> mips_layout;
|
boost::container::small_vector<MipInfo, 14> mips_layout;
|
||||||
VAddr guest_address{0};
|
VAddr guest_address{0};
|
||||||
u32 guest_size_bytes{0};
|
u32 guest_size_bytes{0};
|
||||||
|
u32 tiling_idx{0}; // TODO: merge with existing!
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace VideoCore
|
} // namespace VideoCore
|
||||||
|
|
|
@ -18,11 +18,15 @@ TextureCache::TextureCache(const Vulkan::Instance& instance_, Vulkan::Scheduler&
|
||||||
BufferCache& buffer_cache_, PageManager& tracker_)
|
BufferCache& buffer_cache_, PageManager& tracker_)
|
||||||
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
: instance{instance_}, scheduler{scheduler_}, buffer_cache{buffer_cache_}, tracker{tracker_},
|
||||||
tile_manager{instance, scheduler} {
|
tile_manager{instance, scheduler} {
|
||||||
ImageInfo info;
|
ImageInfo info{};
|
||||||
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
|
info.pixel_format = vk::Format::eR8G8B8A8Unorm;
|
||||||
info.type = vk::ImageType::e2D;
|
info.type = vk::ImageType::e2D;
|
||||||
|
info.tiling_idx = u32(AmdGpu::TilingMode::Texture_MicroTiled);
|
||||||
|
info.num_bits = 32;
|
||||||
|
info.UpdateSize();
|
||||||
const ImageId null_id = slot_images.insert(instance, scheduler, info);
|
const ImageId null_id = slot_images.insert(instance, scheduler, info);
|
||||||
ASSERT(null_id.index == 0);
|
ASSERT(null_id.index == 0);
|
||||||
|
slot_images[null_id].flags = ImageFlagBits{};
|
||||||
|
|
||||||
ImageViewInfo view_info;
|
ImageViewInfo view_info;
|
||||||
void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id));
|
void(slot_image_views.insert(instance, view_info, slot_images[null_id], null_id));
|
||||||
|
|
|
@ -28,7 +28,7 @@ class TextureCache {
|
||||||
using Entry = boost::container::small_vector<ImageId, 16>;
|
using Entry = boost::container::small_vector<ImageId, 16>;
|
||||||
static constexpr size_t AddressSpaceBits = 39;
|
static constexpr size_t AddressSpaceBits = 39;
|
||||||
static constexpr size_t FirstLevelBits = 9;
|
static constexpr size_t FirstLevelBits = 9;
|
||||||
static constexpr size_t PageBits = 22;
|
static constexpr size_t PageBits = 20;
|
||||||
};
|
};
|
||||||
using PageTable = MultiLevelPageTable<Traits>;
|
using PageTable = MultiLevelPageTable<Traits>;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue