mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2025-01-07 15:46:01 +00:00
shader_recompiler: Move sampling parameter resolution to tracking pass and support more derivative types. (#1290)
* shader_recompiler: Move sampling parameter resolution to tracking pass and support more derivative types. * shader_recompiler: Only track sampler sharp on sample instructions. * shader_recompiler: Fix Inst args size.
This commit is contained in:
parent
2ce6ec1fdc
commit
b7a1986827
|
@ -59,19 +59,22 @@ struct ImageOperands {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddDerivatives(EmitContext& ctx, Id derivatives) {
|
void AddDerivatives(EmitContext& ctx, Id derivatives_dx, Id derivatives_dy) {
|
||||||
if (!Sirit::ValidId(derivatives)) {
|
if (!Sirit::ValidId(derivatives_dx) || !Sirit::ValidId(derivatives_dy)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const Id dx{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 0, 1)};
|
Add(spv::ImageOperandsMask::Grad, derivatives_dx, derivatives_dy);
|
||||||
const Id dy{ctx.OpVectorShuffle(ctx.F32[2], derivatives, derivatives, 2, 3)};
|
|
||||||
Add(spv::ImageOperandsMask::Grad, dx, dy);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
spv::ImageOperandsMask mask{};
|
spv::ImageOperandsMask mask{};
|
||||||
boost::container::static_vector<Id, 4> operands;
|
boost::container::static_vector<Id, 4> operands;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Id EmitImageSampleRaw(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address1, Id address2,
|
||||||
|
Id address3, Id address4) {
|
||||||
|
UNREACHABLE_MSG("Unreachable instruction");
|
||||||
|
}
|
||||||
|
|
||||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
|
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
|
||||||
const IR::Value& offset) {
|
const IR::Value& offset) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
|
@ -114,7 +117,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
||||||
operands.AddOffset(ctx, offset);
|
operands.AddOffset(ctx, offset);
|
||||||
const Id sample = ctx.OpImageSampleDrefImplicitLod(result_type, sampled_image, coords, dref,
|
const Id sample = ctx.OpImageSampleDrefImplicitLod(result_type, sampled_image, coords, dref,
|
||||||
operands.mask, operands.operands);
|
operands.mask, operands.operands);
|
||||||
return texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample;
|
const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample;
|
||||||
|
return ctx.OpCompositeConstruct(ctx.F32[4], sample_typed, ctx.f32_zero_value,
|
||||||
|
ctx.f32_zero_value, ctx.f32_zero_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id dref,
|
||||||
|
@ -129,7 +134,9 @@ Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle,
|
||||||
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
operands.Add(spv::ImageOperandsMask::Lod, lod);
|
||||||
const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref,
|
const Id sample = ctx.OpImageSampleDrefExplicitLod(result_type, sampled_image, coords, dref,
|
||||||
operands.mask, operands.operands);
|
operands.mask, operands.operands);
|
||||||
return texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample;
|
const Id sample_typed = texture.is_integer ? ctx.OpBitcast(ctx.F32[1], sample) : sample;
|
||||||
|
return ctx.OpCompositeConstruct(ctx.F32[4], sample_typed, ctx.f32_zero_value,
|
||||||
|
ctx.f32_zero_value, ctx.f32_zero_value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
Id EmitImageGather(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords,
|
||||||
|
@ -212,15 +219,15 @@ Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords) {
|
||||||
return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords);
|
return ctx.OpImageQueryLod(ctx.F32[2], sampled_image, coords);
|
||||||
}
|
}
|
||||||
|
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx,
|
||||||
const IR::Value& offset, Id lod_clamp) {
|
Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp) {
|
||||||
const auto& texture = ctx.images[handle & 0xFFFF];
|
const auto& texture = ctx.images[handle & 0xFFFF];
|
||||||
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
const Id image = ctx.OpLoad(texture.image_type, texture.id);
|
||||||
const Id result_type = texture.data_types->Get(4);
|
const Id result_type = texture.data_types->Get(4);
|
||||||
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
const Id sampler = ctx.OpLoad(ctx.sampler_type, ctx.samplers[handle >> 16]);
|
||||||
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
const Id sampled_image = ctx.OpSampledImage(texture.sampled_type, image, sampler);
|
||||||
ImageOperands operands;
|
ImageOperands operands;
|
||||||
operands.AddDerivatives(ctx, derivatives);
|
operands.AddDerivatives(ctx, derivatives_dx, derivatives_dy);
|
||||||
operands.AddOffset(ctx, offset);
|
operands.AddOffset(ctx, offset);
|
||||||
const Id sample = ctx.OpImageSampleExplicitLod(result_type, sampled_image, coords,
|
const Id sample = ctx.OpImageSampleExplicitLod(result_type, sampled_image, coords,
|
||||||
operands.mask, operands.operands);
|
operands.mask, operands.operands);
|
||||||
|
|
|
@ -368,6 +368,8 @@ Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||||
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
Id EmitConvertU16U32(EmitContext& ctx, Id value);
|
||||||
Id EmitConvertU32U16(EmitContext& ctx, Id value);
|
Id EmitConvertU32U16(EmitContext& ctx, Id value);
|
||||||
|
|
||||||
|
Id EmitImageSampleRaw(EmitContext& ctx, IR::Inst* inst, u32 handle, Id address1, Id address2,
|
||||||
|
Id address3, Id address4);
|
||||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
|
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id bias,
|
||||||
const IR::Value& offset);
|
const IR::Value& offset);
|
||||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id lod,
|
||||||
|
@ -384,8 +386,8 @@ Id EmitImageFetch(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, const
|
||||||
Id lod, Id ms);
|
Id lod, Id ms);
|
||||||
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
Id EmitImageQueryDimensions(EmitContext& ctx, IR::Inst* inst, u32 handle, Id lod, bool skip_mips);
|
||||||
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
Id EmitImageQueryLod(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords);
|
||||||
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives,
|
Id EmitImageGradient(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id derivatives_dx,
|
||||||
const IR::Value& offset, Id lod_clamp);
|
Id derivatives_dy, const IR::Value& offset, const IR::Value& lod_clamp);
|
||||||
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
Id EmitImageRead(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords);
|
||||||
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
|
void EmitImageWrite(EmitContext& ctx, IR::Inst* inst, u32 handle, Id coords, Id color);
|
||||||
|
|
||||||
|
|
|
@ -411,7 +411,7 @@ void Translator::IMAGE_LOAD(bool has_mip, const GcnInst& inst) {
|
||||||
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
ir.GetVectorReg(addr_reg + 2), ir.GetVectorReg(addr_reg + 3));
|
||||||
|
|
||||||
IR::TextureInstInfo info{};
|
IR::TextureInstInfo info{};
|
||||||
info.explicit_lod.Assign(has_mip);
|
info.has_lod.Assign(has_mip);
|
||||||
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
const IR::Value texel = ir.ImageFetch(handle, body, {}, {}, {}, info);
|
||||||
|
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
|
@ -513,6 +513,76 @@ void Translator::IMAGE_ATOMIC(AtomicOp op, const GcnInst& inst) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
IR::Value EmitImageSample(IR::IREmitter& ir, const GcnInst& inst, const IR::ScalarReg tsharp_reg,
|
||||||
|
const IR::ScalarReg sampler_reg, const IR::VectorReg addr_reg,
|
||||||
|
bool gather) {
|
||||||
|
const auto& mimg = inst.control.mimg;
|
||||||
|
const auto flags = MimgModifierFlags(mimg.mod);
|
||||||
|
|
||||||
|
IR::TextureInstInfo info{};
|
||||||
|
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
|
||||||
|
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
||||||
|
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
||||||
|
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
||||||
|
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
||||||
|
info.has_lod.Assign(flags.any(MimgModifier::Lod));
|
||||||
|
info.is_array.Assign(mimg.da);
|
||||||
|
|
||||||
|
if (gather) {
|
||||||
|
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
||||||
|
info.is_gather.Assign(true);
|
||||||
|
} else {
|
||||||
|
info.has_derivatives.Assign(flags.test(MimgModifier::Derivative));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
||||||
|
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
||||||
|
// binding index.
|
||||||
|
const IR::Value handle =
|
||||||
|
ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg));
|
||||||
|
|
||||||
|
// Determine how many address registers need to be passed.
|
||||||
|
// The image type is unknown, so add all 4 possible base registers and resolve later.
|
||||||
|
int num_addr_regs = 4;
|
||||||
|
if (info.has_offset) {
|
||||||
|
++num_addr_regs;
|
||||||
|
}
|
||||||
|
if (info.has_bias) {
|
||||||
|
++num_addr_regs;
|
||||||
|
}
|
||||||
|
if (info.is_depth) {
|
||||||
|
++num_addr_regs;
|
||||||
|
}
|
||||||
|
if (info.has_derivatives) {
|
||||||
|
// The image type is unknown, so add all 6 possible derivative registers and resolve later.
|
||||||
|
num_addr_regs += 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fetch all the address registers to pass in the IR instruction. There can be up to 13
|
||||||
|
// registers.
|
||||||
|
const auto get_addr_reg = [&](int index) -> IR::F32 {
|
||||||
|
if (index >= num_addr_regs) {
|
||||||
|
return ir.Imm32(0.f);
|
||||||
|
}
|
||||||
|
return ir.GetVectorReg<IR::F32>(addr_reg + index);
|
||||||
|
};
|
||||||
|
const IR::Value address1 =
|
||||||
|
ir.CompositeConstruct(get_addr_reg(0), get_addr_reg(1), get_addr_reg(2), get_addr_reg(3));
|
||||||
|
const IR::Value address2 =
|
||||||
|
ir.CompositeConstruct(get_addr_reg(4), get_addr_reg(5), get_addr_reg(6), get_addr_reg(7));
|
||||||
|
const IR::Value address3 =
|
||||||
|
ir.CompositeConstruct(get_addr_reg(8), get_addr_reg(9), get_addr_reg(10), get_addr_reg(11));
|
||||||
|
const IR::Value address4 = get_addr_reg(12);
|
||||||
|
|
||||||
|
// Issue the placeholder IR instruction.
|
||||||
|
IR::Value texel = ir.ImageSampleRaw(handle, address1, address2, address3, address4, info);
|
||||||
|
if (info.is_depth && !gather) {
|
||||||
|
// For non-gather depth sampling, only return a single value.
|
||||||
|
texel = ir.CompositeExtract(texel, 0);
|
||||||
|
}
|
||||||
|
return texel;
|
||||||
|
}
|
||||||
|
|
||||||
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
const auto& mimg = inst.control.mimg;
|
const auto& mimg = inst.control.mimg;
|
||||||
IR::VectorReg addr_reg{inst.src[0].code};
|
IR::VectorReg addr_reg{inst.src[0].code};
|
||||||
|
@ -521,72 +591,7 @@ void Translator::IMAGE_SAMPLE(const GcnInst& inst) {
|
||||||
const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
|
const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
|
||||||
const auto flags = MimgModifierFlags(mimg.mod);
|
const auto flags = MimgModifierFlags(mimg.mod);
|
||||||
|
|
||||||
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, false);
|
||||||
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
|
||||||
// binding index.
|
|
||||||
const IR::Value handle =
|
|
||||||
ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg));
|
|
||||||
|
|
||||||
// Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction
|
|
||||||
// Set Architecture
|
|
||||||
const IR::U32 offset =
|
|
||||||
flags.test(MimgModifier::Offset) ? ir.GetVectorReg<IR::U32>(addr_reg++) : IR::U32{};
|
|
||||||
const IR::F32 bias =
|
|
||||||
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
|
||||||
const IR::F32 dref =
|
|
||||||
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
|
||||||
const IR::Value derivatives = [&] -> IR::Value {
|
|
||||||
if (!flags.test(MimgModifier::Derivative)) {
|
|
||||||
return {};
|
|
||||||
}
|
|
||||||
addr_reg = addr_reg + 4;
|
|
||||||
return ir.CompositeConstruct(
|
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg - 4), ir.GetVectorReg<IR::F32>(addr_reg - 3),
|
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg - 2), ir.GetVectorReg<IR::F32>(addr_reg - 1));
|
|
||||||
}();
|
|
||||||
|
|
||||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
|
||||||
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
|
||||||
// in coords field of the instruction. Then the resource tracking pass will patch the
|
|
||||||
// IR instruction to fill in lod_clamp field.
|
|
||||||
const IR::Value body = ir.CompositeConstruct(
|
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
|
||||||
|
|
||||||
// Derivatives are tricky because their number depends on the texture type which is located in
|
|
||||||
// T#. We don't have access to T# though until resource tracking pass. For now assume if
|
|
||||||
// derivatives are present, that a 2D image is bound.
|
|
||||||
const bool has_derivatives = flags.test(MimgModifier::Derivative);
|
|
||||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
|
||||||
|
|
||||||
IR::TextureInstInfo info{};
|
|
||||||
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
|
|
||||||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
|
||||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
|
||||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
|
||||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
|
||||||
info.explicit_lod.Assign(explicit_lod);
|
|
||||||
info.has_derivatives.Assign(has_derivatives);
|
|
||||||
info.is_array.Assign(mimg.da);
|
|
||||||
|
|
||||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
|
||||||
const IR::Value texel = [&]() -> IR::Value {
|
|
||||||
if (has_derivatives) {
|
|
||||||
return ir.ImageGradient(handle, body, derivatives, offset, {}, info);
|
|
||||||
}
|
|
||||||
if (!flags.test(MimgModifier::Pcf)) {
|
|
||||||
if (explicit_lod) {
|
|
||||||
return ir.ImageSampleExplicitLod(handle, body, offset, info);
|
|
||||||
} else {
|
|
||||||
return ir.ImageSampleImplicitLod(handle, body, bias, offset, info);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (explicit_lod) {
|
|
||||||
return ir.ImageSampleDrefExplicitLod(handle, body, dref, offset, info);
|
|
||||||
}
|
|
||||||
return ir.ImageSampleDrefImplicitLod(handle, body, dref, bias, offset, info);
|
|
||||||
}();
|
|
||||||
|
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
if (((mimg.dmask >> i) & 1) == 0) {
|
if (((mimg.dmask >> i) & 1) == 0) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -609,60 +614,13 @@ void Translator::IMAGE_GATHER(const GcnInst& inst) {
|
||||||
const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
|
const IR::ScalarReg sampler_reg{inst.src[3].code * 4};
|
||||||
const auto flags = MimgModifierFlags(mimg.mod);
|
const auto flags = MimgModifierFlags(mimg.mod);
|
||||||
|
|
||||||
// Load first dword of T# and S#. We will use them as the handle that will guide resource
|
|
||||||
// tracking pass where to read the sharps. This will later also get patched to the SPIRV texture
|
|
||||||
// binding index.
|
|
||||||
const IR::Value handle =
|
|
||||||
ir.CompositeConstruct(ir.GetScalarReg(tsharp_reg), ir.GetScalarReg(sampler_reg));
|
|
||||||
|
|
||||||
// Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction
|
|
||||||
// Set Architecture
|
|
||||||
const IR::Value offset =
|
|
||||||
flags.test(MimgModifier::Offset) ? ir.GetVectorReg(addr_reg++) : IR::Value{};
|
|
||||||
const IR::F32 bias =
|
|
||||||
flags.test(MimgModifier::LodBias) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
|
||||||
const IR::F32 dref =
|
|
||||||
flags.test(MimgModifier::Pcf) ? ir.GetVectorReg<IR::F32>(addr_reg++) : IR::F32{};
|
|
||||||
|
|
||||||
// Derivatives are tricky because their number depends on the texture type which is located in
|
|
||||||
// T#. We don't have access to T# though until resource tracking pass. For now assume no
|
|
||||||
// derivatives are present, otherwise we don't know where coordinates are placed in the address
|
|
||||||
// stream.
|
|
||||||
ASSERT_MSG(!flags.test(MimgModifier::Derivative), "Derivative image instruction");
|
|
||||||
|
|
||||||
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
|
||||||
// Since these are at most 4 dwords, we load them into a single uvec4 and place them
|
|
||||||
// in coords field of the instruction. Then the resource tracking pass will patch the
|
|
||||||
// IR instruction to fill in lod_clamp field.
|
|
||||||
const IR::Value body = ir.CompositeConstruct(
|
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg), ir.GetVectorReg<IR::F32>(addr_reg + 1),
|
|
||||||
ir.GetVectorReg<IR::F32>(addr_reg + 2), ir.GetVectorReg<IR::F32>(addr_reg + 3));
|
|
||||||
|
|
||||||
const bool explicit_lod = flags.any(MimgModifier::Level0, MimgModifier::Lod);
|
|
||||||
|
|
||||||
IR::TextureInstInfo info{};
|
|
||||||
info.is_depth.Assign(flags.test(MimgModifier::Pcf));
|
|
||||||
info.has_bias.Assign(flags.test(MimgModifier::LodBias));
|
|
||||||
info.has_lod_clamp.Assign(flags.test(MimgModifier::LodClamp));
|
|
||||||
info.force_level0.Assign(flags.test(MimgModifier::Level0));
|
|
||||||
info.has_offset.Assign(flags.test(MimgModifier::Offset));
|
|
||||||
// info.explicit_lod.Assign(explicit_lod);
|
|
||||||
info.gather_comp.Assign(std::bit_width(mimg.dmask) - 1);
|
|
||||||
info.is_array.Assign(mimg.da);
|
|
||||||
|
|
||||||
// Issue IR instruction, leaving unknown fields blank to patch later.
|
|
||||||
const IR::Value texel = [&]() -> IR::Value {
|
|
||||||
const IR::F32 lod = flags.test(MimgModifier::Level0) ? ir.Imm32(0.f) : IR::F32{};
|
|
||||||
if (!flags.test(MimgModifier::Pcf)) {
|
|
||||||
return ir.ImageGather(handle, body, offset, info);
|
|
||||||
}
|
|
||||||
ASSERT(mimg.dmask & 1); // should be always 1st (R) component
|
|
||||||
return ir.ImageGatherDref(handle, body, offset, dref, info);
|
|
||||||
}();
|
|
||||||
|
|
||||||
// For gather4 instructions dmask selects which component to read and must have
|
// For gather4 instructions dmask selects which component to read and must have
|
||||||
// only one bit set to 1
|
// only one bit set to 1
|
||||||
ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask");
|
ASSERT_MSG(std::popcount(mimg.dmask) == 1, "Unexpected bits in gather dmask");
|
||||||
|
// should be always 1st (R) component for depth
|
||||||
|
ASSERT(!flags.test(MimgModifier::Pcf) || mimg.dmask & 1);
|
||||||
|
|
||||||
|
const IR::Value texel = EmitImageSample(ir, inst, tsharp_reg, sampler_reg, addr_reg, true);
|
||||||
for (u32 i = 0; i < 4; i++) {
|
for (u32 i = 0; i < 4; i++) {
|
||||||
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
const IR::F32 value = IR::F32{ir.CompositeExtract(texel, i)};
|
||||||
ir.SetVectorReg(dest_reg++, value);
|
ir.SetVectorReg(dest_reg++, value);
|
||||||
|
|
|
@ -1492,27 +1492,34 @@ Value IREmitter::ImageAtomicExchange(const Value& handle, const Value& coords, c
|
||||||
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
|
return Inst(Opcode::ImageAtomicExchange32, Flags{info}, handle, coords, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& body, const F32& bias,
|
Value IREmitter::ImageSampleRaw(const Value& handle, const Value& address1, const Value& address2,
|
||||||
const U32& offset, TextureInstInfo info) {
|
const Value& address3, const Value& address4,
|
||||||
return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, body, bias, offset);
|
TextureInstInfo info) {
|
||||||
|
return Inst(Opcode::ImageSampleRaw, Flags{info}, handle, address1, address2, address3,
|
||||||
|
address4);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& body, const U32& offset,
|
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
|
||||||
TextureInstInfo info) {
|
const Value& offset, TextureInstInfo info) {
|
||||||
return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, body, IR::F32{}, offset);
|
return Inst(Opcode::ImageSampleImplicitLod, Flags{info}, handle, coords, bias, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& body, const F32& dref,
|
Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
|
||||||
const F32& bias, const U32& offset,
|
const Value& offset, TextureInstInfo info) {
|
||||||
TextureInstInfo info) {
|
return Inst(Opcode::ImageSampleExplicitLod, Flags{info}, handle, coords, lod, offset);
|
||||||
return Inst<F32>(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, body, dref, bias,
|
|
||||||
offset);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& body, const F32& dref,
|
Value IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
|
||||||
const U32& offset, TextureInstInfo info) {
|
const F32& dref, const F32& bias, const Value& offset,
|
||||||
return Inst<F32>(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, body, dref, IR::F32{},
|
TextureInstInfo info) {
|
||||||
offset);
|
return Inst(Opcode::ImageSampleDrefImplicitLod, Flags{info}, handle, coords, dref, bias,
|
||||||
|
offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
Value IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
|
||||||
|
const F32& dref, const F32& lod, const Value& offset,
|
||||||
|
TextureInstInfo info) {
|
||||||
|
return Inst(Opcode::ImageSampleDrefExplicitLod, Flags{info}, handle, coords, dref, lod, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
|
Value IREmitter::ImageGather(const Value& handle, const Value& coords, const Value& offset,
|
||||||
|
@ -1544,9 +1551,11 @@ Value IREmitter::ImageQueryLod(const Value& handle, const Value& coords, Texture
|
||||||
return Inst(Opcode::ImageQueryLod, Flags{info}, handle, coords);
|
return Inst(Opcode::ImageQueryLod, Flags{info}, handle, coords);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageGradient(const Value& handle, const Value& coords, const Value& derivatives,
|
Value IREmitter::ImageGradient(const Value& handle, const Value& coords,
|
||||||
|
const Value& derivatives_dx, const Value& derivatives_dy,
|
||||||
const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
|
const Value& offset, const F32& lod_clamp, TextureInstInfo info) {
|
||||||
return Inst(Opcode::ImageGradient, Flags{info}, handle, coords, derivatives, offset, lod_clamp);
|
return Inst(Opcode::ImageGradient, Flags{info}, handle, coords, derivatives_dx, derivatives_dy,
|
||||||
|
offset, lod_clamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
|
Value IREmitter::ImageRead(const Value& handle, const Value& coords, TextureInstInfo info) {
|
||||||
|
|
|
@ -277,20 +277,25 @@ public:
|
||||||
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
|
[[nodiscard]] Value ImageAtomicExchange(const Value& handle, const Value& coords,
|
||||||
const Value& value, TextureInstInfo info);
|
const Value& value, TextureInstInfo info);
|
||||||
|
|
||||||
|
[[nodiscard]] Value ImageSampleRaw(const Value& handle, const Value& address1,
|
||||||
|
const Value& address2, const Value& address3,
|
||||||
|
const Value& address4, TextureInstInfo info);
|
||||||
|
|
||||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,
|
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& body,
|
||||||
const F32& bias, const U32& offset,
|
const F32& bias, const Value& offset,
|
||||||
TextureInstInfo info);
|
TextureInstInfo info);
|
||||||
|
|
||||||
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body,
|
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& body,
|
||||||
const U32& offset, TextureInstInfo info);
|
const F32& lod, const Value& offset,
|
||||||
|
TextureInstInfo info);
|
||||||
|
|
||||||
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& body,
|
[[nodiscard]] Value ImageSampleDrefImplicitLod(const Value& handle, const Value& body,
|
||||||
const F32& dref, const F32& bias,
|
const F32& dref, const F32& bias,
|
||||||
const U32& offset, TextureInstInfo info);
|
const Value& offset, TextureInstInfo info);
|
||||||
|
|
||||||
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& body,
|
[[nodiscard]] Value ImageSampleDrefExplicitLod(const Value& handle, const Value& body,
|
||||||
const F32& dref, const U32& offset,
|
const F32& dref, const F32& lod,
|
||||||
TextureInstInfo info);
|
const Value& offset, TextureInstInfo info);
|
||||||
|
|
||||||
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
[[nodiscard]] Value ImageQueryDimension(const Value& handle, const U32& lod,
|
||||||
const U1& skip_mips);
|
const U1& skip_mips);
|
||||||
|
@ -306,8 +311,9 @@ public:
|
||||||
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
[[nodiscard]] Value ImageFetch(const Value& handle, const Value& coords, const Value& offset,
|
||||||
const U32& lod, const U32& multisampling, TextureInstInfo info);
|
const U32& lod, const U32& multisampling, TextureInstInfo info);
|
||||||
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
[[nodiscard]] Value ImageGradient(const Value& handle, const Value& coords,
|
||||||
const Value& derivatives, const Value& offset,
|
const Value& derivatives_dx, const Value& derivatives_dy,
|
||||||
const F32& lod_clamp, TextureInstInfo info);
|
const Value& offset, const F32& lod_clamp,
|
||||||
|
TextureInstInfo info);
|
||||||
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
|
[[nodiscard]] Value ImageRead(const Value& handle, const Value& coords, TextureInstInfo info);
|
||||||
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
void ImageWrite(const Value& handle, const Value& coords, const Value& color,
|
||||||
TextureInstInfo info);
|
TextureInstInfo info);
|
||||||
|
|
|
@ -21,7 +21,7 @@ namespace Detail {
|
||||||
struct OpcodeMeta {
|
struct OpcodeMeta {
|
||||||
std::string_view name;
|
std::string_view name;
|
||||||
Type type;
|
Type type;
|
||||||
std::array<Type, 5> arg_types;
|
std::array<Type, 6> arg_types;
|
||||||
};
|
};
|
||||||
|
|
||||||
// using enum Type;
|
// using enum Type;
|
||||||
|
|
|
@ -317,16 +317,17 @@ OPCODE(ConvertU16U32, U16, U32,
|
||||||
OPCODE(ConvertU32U16, U32, U16, )
|
OPCODE(ConvertU32U16, U32, U16, )
|
||||||
|
|
||||||
// Image operations
|
// Image operations
|
||||||
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
|
OPCODE(ImageSampleRaw, F32x4, Opaque, F32x4, F32x4, F32x4, F32, )
|
||||||
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, U32, Opaque, )
|
OPCODE(ImageSampleImplicitLod, F32x4, Opaque, F32x4, F32, Opaque, )
|
||||||
OPCODE(ImageSampleDrefImplicitLod, F32, Opaque, Opaque, Opaque, F32, Opaque, )
|
OPCODE(ImageSampleExplicitLod, F32x4, Opaque, Opaque, F32, Opaque, )
|
||||||
OPCODE(ImageSampleDrefExplicitLod, F32, Opaque, Opaque, Opaque, U32, Opaque, )
|
OPCODE(ImageSampleDrefImplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
|
||||||
|
OPCODE(ImageSampleDrefExplicitLod, F32x4, Opaque, Opaque, F32, F32, Opaque, )
|
||||||
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
|
OPCODE(ImageGather, F32x4, Opaque, Opaque, Opaque, )
|
||||||
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
|
OPCODE(ImageGatherDref, F32x4, Opaque, Opaque, Opaque, F32, )
|
||||||
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
|
OPCODE(ImageFetch, F32x4, Opaque, Opaque, Opaque, U32, Opaque, )
|
||||||
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
OPCODE(ImageQueryDimensions, U32x4, Opaque, U32, U1, )
|
||||||
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
OPCODE(ImageQueryLod, F32x4, Opaque, Opaque, )
|
||||||
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, )
|
OPCODE(ImageGradient, F32x4, Opaque, Opaque, Opaque, Opaque, Opaque, F32, )
|
||||||
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
OPCODE(ImageRead, U32x4, Opaque, Opaque, )
|
||||||
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
OPCODE(ImageWrite, Void, Opaque, Opaque, U32x4, )
|
||||||
|
|
||||||
|
|
|
@ -132,38 +132,16 @@ bool IsImageStorageInstruction(const IR::Inst& inst) {
|
||||||
|
|
||||||
bool IsImageInstruction(const IR::Inst& inst) {
|
bool IsImageInstruction(const IR::Inst& inst) {
|
||||||
switch (inst.GetOpcode()) {
|
switch (inst.GetOpcode()) {
|
||||||
case IR::Opcode::ImageSampleExplicitLod:
|
|
||||||
case IR::Opcode::ImageSampleImplicitLod:
|
|
||||||
case IR::Opcode::ImageSampleDrefExplicitLod:
|
|
||||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
|
||||||
case IR::Opcode::ImageFetch:
|
case IR::Opcode::ImageFetch:
|
||||||
case IR::Opcode::ImageGather:
|
|
||||||
case IR::Opcode::ImageGatherDref:
|
|
||||||
case IR::Opcode::ImageQueryDimensions:
|
case IR::Opcode::ImageQueryDimensions:
|
||||||
case IR::Opcode::ImageQueryLod:
|
case IR::Opcode::ImageQueryLod:
|
||||||
case IR::Opcode::ImageGradient:
|
case IR::Opcode::ImageSampleRaw:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
return IsImageStorageInstruction(inst);
|
return IsImageStorageInstruction(inst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 ImageOffsetArgumentPosition(const IR::Inst& inst) {
|
|
||||||
switch (inst.GetOpcode()) {
|
|
||||||
case IR::Opcode::ImageGather:
|
|
||||||
case IR::Opcode::ImageGatherDref:
|
|
||||||
return 2;
|
|
||||||
case IR::Opcode::ImageSampleExplicitLod:
|
|
||||||
case IR::Opcode::ImageSampleImplicitLod:
|
|
||||||
return 3;
|
|
||||||
case IR::Opcode::ImageSampleDrefExplicitLod:
|
|
||||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
|
||||||
return 4;
|
|
||||||
default:
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class Descriptors {
|
class Descriptors {
|
||||||
public:
|
public:
|
||||||
explicit Descriptors(Info& info_)
|
explicit Descriptors(Info& info_)
|
||||||
|
@ -467,6 +445,185 @@ IR::Value PatchCubeCoord(IR::IREmitter& ir, const IR::Value& s, const IR::Value&
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void PatchImageSampleInstruction(IR::Block& block, IR::Inst& inst, Info& info,
|
||||||
|
Descriptors& descriptors, const IR::Inst* producer,
|
||||||
|
const u32 image_binding, const AmdGpu::Image& image) {
|
||||||
|
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
||||||
|
const u32 sampler_binding = [&] {
|
||||||
|
ASSERT(producer->GetOpcode() == IR::Opcode::CompositeConstructU32x2);
|
||||||
|
const IR::Value& handle = producer->Arg(1);
|
||||||
|
// Inline sampler resource.
|
||||||
|
if (handle.IsImmediate()) {
|
||||||
|
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
||||||
|
return descriptors.Add(SamplerResource{
|
||||||
|
.sgpr_base = std::numeric_limits<u32>::max(),
|
||||||
|
.dword_offset = 0,
|
||||||
|
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
// Normal sampler resource.
|
||||||
|
const auto ssharp_handle = handle.InstRecursive();
|
||||||
|
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
||||||
|
const auto ssharp = TrackSharp(ssharp_ud);
|
||||||
|
return descriptors.Add(SamplerResource{
|
||||||
|
.sgpr_base = ssharp.sgpr_base,
|
||||||
|
.dword_offset = ssharp.dword_offset,
|
||||||
|
.associated_image = image_binding,
|
||||||
|
.disable_aniso = disable_aniso,
|
||||||
|
});
|
||||||
|
}();
|
||||||
|
|
||||||
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
|
||||||
|
const auto inst_info = inst.Flags<IR::TextureInstInfo>();
|
||||||
|
const IR::U32 handle = ir.Imm32(image_binding | sampler_binding << 16);
|
||||||
|
|
||||||
|
IR::Inst* body1 = inst.Arg(1).InstRecursive();
|
||||||
|
IR::Inst* body2 = inst.Arg(2).InstRecursive();
|
||||||
|
IR::Inst* body3 = inst.Arg(3).InstRecursive();
|
||||||
|
IR::Inst* body4 = inst.Arg(4).InstRecursive();
|
||||||
|
const auto get_addr_reg = [&](u32 index) -> IR::F32 {
|
||||||
|
if (index <= 3) {
|
||||||
|
return IR::F32{body1->Arg(index)};
|
||||||
|
}
|
||||||
|
if (index >= 4 && index <= 7) {
|
||||||
|
return IR::F32{body2->Arg(index - 4)};
|
||||||
|
}
|
||||||
|
if (index >= 8 && index <= 11) {
|
||||||
|
return IR::F32{body3->Arg(index - 8)};
|
||||||
|
}
|
||||||
|
if (index == 12) {
|
||||||
|
return IR::F32{body4};
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
};
|
||||||
|
u32 addr_reg = 0;
|
||||||
|
|
||||||
|
// Load first address components as denoted in 8.2.4 VGPR Usage Sea Islands Series Instruction
|
||||||
|
// Set Architecture
|
||||||
|
const IR::Value offset = [&] -> IR::Value {
|
||||||
|
if (!inst_info.has_offset) {
|
||||||
|
return IR::U32{};
|
||||||
|
}
|
||||||
|
|
||||||
|
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
|
||||||
|
const IR::Value arg = get_addr_reg(addr_reg++);
|
||||||
|
|
||||||
|
const auto read = [&](u32 off) -> IR::U32 {
|
||||||
|
if (arg.IsImmediate()) {
|
||||||
|
const u16 comp = (arg.U32() >> off) & 0x3F;
|
||||||
|
return ir.Imm32(s32(comp << 26) >> 26);
|
||||||
|
}
|
||||||
|
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(off), ir.Imm32(6), true);
|
||||||
|
};
|
||||||
|
|
||||||
|
switch (image.GetType()) {
|
||||||
|
case AmdGpu::ImageType::Color1D:
|
||||||
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
return read(0);
|
||||||
|
case AmdGpu::ImageType::Color2D:
|
||||||
|
case AmdGpu::ImageType::Color2DArray:
|
||||||
|
case AmdGpu::ImageType::Color2DMsaa:
|
||||||
|
return ir.CompositeConstruct(read(0), read(8));
|
||||||
|
case AmdGpu::ImageType::Color3D:
|
||||||
|
case AmdGpu::ImageType::Cube:
|
||||||
|
return ir.CompositeConstruct(read(0), read(8), read(16));
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const IR::F32 bias = inst_info.has_bias ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||||
|
const IR::F32 dref = inst_info.is_depth ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||||
|
const auto [derivatives_dx, derivatives_dy] = [&] -> std::pair<IR::Value, IR::Value> {
|
||||||
|
if (!inst_info.has_derivatives) {
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
switch (image.GetType()) {
|
||||||
|
case AmdGpu::ImageType::Color1D:
|
||||||
|
case AmdGpu::ImageType::Color1DArray:
|
||||||
|
// du/dx, du/dy
|
||||||
|
addr_reg = addr_reg + 2;
|
||||||
|
return {get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1)};
|
||||||
|
case AmdGpu::ImageType::Color2D:
|
||||||
|
case AmdGpu::ImageType::Color2DArray:
|
||||||
|
case AmdGpu::ImageType::Color2DMsaa:
|
||||||
|
// (du/dx, dv/dx), (du/dy, dv/dy)
|
||||||
|
addr_reg = addr_reg + 4;
|
||||||
|
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 4), get_addr_reg(addr_reg - 3)),
|
||||||
|
ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1))};
|
||||||
|
case AmdGpu::ImageType::Color3D:
|
||||||
|
case AmdGpu::ImageType::Cube:
|
||||||
|
// (du/dx, dv/dx, dw/dx), (du/dy, dv/dy, dw/dy)
|
||||||
|
addr_reg = addr_reg + 6;
|
||||||
|
return {ir.CompositeConstruct(get_addr_reg(addr_reg - 6), get_addr_reg(addr_reg - 5),
|
||||||
|
get_addr_reg(addr_reg - 4)),
|
||||||
|
ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
|
||||||
|
get_addr_reg(addr_reg - 1))};
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
// Now we can load body components as noted in Table 8.9 Image Opcodes with Sampler
|
||||||
|
const IR::Value coords = [&] -> IR::Value {
|
||||||
|
switch (image.GetType()) {
|
||||||
|
case AmdGpu::ImageType::Color1D: // x
|
||||||
|
addr_reg = addr_reg + 1;
|
||||||
|
return get_addr_reg(addr_reg - 1);
|
||||||
|
case AmdGpu::ImageType::Color1DArray: // x, slice
|
||||||
|
[[fallthrough]];
|
||||||
|
case AmdGpu::ImageType::Color2D: // x, y
|
||||||
|
addr_reg = addr_reg + 2;
|
||||||
|
return ir.CompositeConstruct(get_addr_reg(addr_reg - 2), get_addr_reg(addr_reg - 1));
|
||||||
|
case AmdGpu::ImageType::Color2DArray: // x, y, slice
|
||||||
|
[[fallthrough]];
|
||||||
|
case AmdGpu::ImageType::Color2DMsaa: // x, y, frag
|
||||||
|
[[fallthrough]];
|
||||||
|
case AmdGpu::ImageType::Color3D: // x, y, z
|
||||||
|
addr_reg = addr_reg + 3;
|
||||||
|
return ir.CompositeConstruct(get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
|
||||||
|
get_addr_reg(addr_reg - 1));
|
||||||
|
case AmdGpu::ImageType::Cube: // x, y, face
|
||||||
|
addr_reg = addr_reg + 3;
|
||||||
|
return PatchCubeCoord(ir, get_addr_reg(addr_reg - 3), get_addr_reg(addr_reg - 2),
|
||||||
|
get_addr_reg(addr_reg - 1), false, inst_info.is_array);
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
ASSERT(!inst_info.has_lod || !inst_info.has_lod_clamp);
|
||||||
|
const bool explicit_lod = inst_info.has_lod || inst_info.force_level0;
|
||||||
|
const IR::F32 lod = inst_info.has_lod ? get_addr_reg(addr_reg++)
|
||||||
|
: inst_info.force_level0 ? ir.Imm32(0.0f)
|
||||||
|
: IR::F32{};
|
||||||
|
const IR::F32 lod_clamp = inst_info.has_lod_clamp ? get_addr_reg(addr_reg++) : IR::F32{};
|
||||||
|
|
||||||
|
const auto new_inst = [&] -> IR::Value {
|
||||||
|
if (inst_info.is_gather) {
|
||||||
|
if (inst_info.is_depth) {
|
||||||
|
return ir.ImageGatherDref(handle, coords, offset, dref, inst_info);
|
||||||
|
}
|
||||||
|
return ir.ImageGather(handle, coords, offset, inst_info);
|
||||||
|
}
|
||||||
|
if (inst_info.has_derivatives) {
|
||||||
|
return ir.ImageGradient(handle, coords, derivatives_dx, derivatives_dy, offset,
|
||||||
|
lod_clamp, inst_info);
|
||||||
|
}
|
||||||
|
if (inst_info.is_depth) {
|
||||||
|
if (explicit_lod) {
|
||||||
|
return ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, inst_info);
|
||||||
|
}
|
||||||
|
return ir.ImageSampleDrefImplicitLod(handle, coords, dref, bias, offset, inst_info);
|
||||||
|
}
|
||||||
|
if (explicit_lod) {
|
||||||
|
return ir.ImageSampleExplicitLod(handle, coords, lod, offset, inst_info);
|
||||||
|
}
|
||||||
|
return ir.ImageSampleImplicitLod(handle, coords, bias, offset, inst_info);
|
||||||
|
}();
|
||||||
|
inst.ReplaceUsesWith(new_inst);
|
||||||
|
}
|
||||||
|
|
||||||
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descriptors& descriptors) {
|
||||||
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
const auto pred = [](const IR::Inst* inst) -> std::optional<const IR::Inst*> {
|
||||||
const auto opcode = inst->GetOpcode();
|
const auto opcode = inst->GetOpcode();
|
||||||
|
@ -498,40 +655,18 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||||
.sgpr_base = tsharp.sgpr_base,
|
.sgpr_base = tsharp.sgpr_base,
|
||||||
.dword_offset = tsharp.dword_offset,
|
.dword_offset = tsharp.dword_offset,
|
||||||
.type = type,
|
.type = type,
|
||||||
.nfmt = static_cast<AmdGpu::NumberFormat>(image.GetNumberFmt()),
|
.nfmt = image.GetNumberFmt(),
|
||||||
.is_storage = is_storage,
|
.is_storage = is_storage,
|
||||||
.is_depth = bool(inst_info.is_depth),
|
.is_depth = bool(inst_info.is_depth),
|
||||||
.is_atomic = IsImageAtomicInstruction(inst),
|
.is_atomic = IsImageAtomicInstruction(inst),
|
||||||
.is_array = bool(inst_info.is_array),
|
.is_array = bool(inst_info.is_array),
|
||||||
});
|
});
|
||||||
|
|
||||||
// Read sampler sharp. This doesn't exist for IMAGE_LOAD/IMAGE_STORE instructions
|
// Sample instructions must be resolved into a new instruction using address register data.
|
||||||
const u32 sampler_binding = [&] {
|
if (inst.GetOpcode() == IR::Opcode::ImageSampleRaw) {
|
||||||
if (!has_sampler) {
|
PatchImageSampleInstruction(block, inst, info, descriptors, producer, image_binding, image);
|
||||||
return 0U;
|
return;
|
||||||
}
|
}
|
||||||
const IR::Value& handle = producer->Arg(1);
|
|
||||||
// Inline sampler resource.
|
|
||||||
if (handle.IsImmediate()) {
|
|
||||||
LOG_WARNING(Render_Vulkan, "Inline sampler detected");
|
|
||||||
return descriptors.Add(SamplerResource{
|
|
||||||
.sgpr_base = std::numeric_limits<u32>::max(),
|
|
||||||
.dword_offset = 0,
|
|
||||||
.inline_sampler = AmdGpu::Sampler{.raw0 = handle.U32()},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
// Normal sampler resource.
|
|
||||||
const auto ssharp_handle = handle.InstRecursive();
|
|
||||||
const auto& [ssharp_ud, disable_aniso] = TryDisableAnisoLod0(ssharp_handle);
|
|
||||||
const auto ssharp = TrackSharp(ssharp_ud);
|
|
||||||
return descriptors.Add(SamplerResource{
|
|
||||||
.sgpr_base = ssharp.sgpr_base,
|
|
||||||
.dword_offset = ssharp.dword_offset,
|
|
||||||
.associated_image = image_binding,
|
|
||||||
.disable_aniso = disable_aniso,
|
|
||||||
});
|
|
||||||
}();
|
|
||||||
image_binding |= (sampler_binding << 16);
|
|
||||||
|
|
||||||
// Patch image handle
|
// Patch image handle
|
||||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||||
|
@ -568,62 +703,9 @@ void PatchImageInstruction(IR::Block& block, IR::Inst& inst, Info& info, Descrip
|
||||||
}();
|
}();
|
||||||
inst.SetArg(1, coords);
|
inst.SetArg(1, coords);
|
||||||
|
|
||||||
if (inst_info.has_offset) {
|
if (inst_info.has_lod) {
|
||||||
// The offsets are six-bit signed integers: X=[5:0], Y=[13:8], and Z=[21:16].
|
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch);
|
||||||
const u32 arg_pos = ImageOffsetArgumentPosition(inst);
|
inst.SetArg(3, arg);
|
||||||
const IR::Value arg = inst.Arg(arg_pos);
|
|
||||||
ASSERT_MSG(arg.Type() == IR::Type::U32, "Unexpected offset type");
|
|
||||||
|
|
||||||
const auto read = [&](u32 offset) -> IR::U32 {
|
|
||||||
if (arg.IsImmediate()) {
|
|
||||||
const u16 comp = (arg.U32() >> offset) & 0x3F;
|
|
||||||
return ir.Imm32(s32(comp << 26) >> 26);
|
|
||||||
}
|
|
||||||
return ir.BitFieldExtract(IR::U32{arg}, ir.Imm32(offset), ir.Imm32(6), true);
|
|
||||||
};
|
|
||||||
|
|
||||||
switch (image.GetType()) {
|
|
||||||
case AmdGpu::ImageType::Color1D:
|
|
||||||
case AmdGpu::ImageType::Color1DArray:
|
|
||||||
inst.SetArg(arg_pos, read(0));
|
|
||||||
break;
|
|
||||||
case AmdGpu::ImageType::Color2D:
|
|
||||||
case AmdGpu::ImageType::Color2DArray:
|
|
||||||
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8)));
|
|
||||||
break;
|
|
||||||
case AmdGpu::ImageType::Color3D:
|
|
||||||
inst.SetArg(arg_pos, ir.CompositeConstruct(read(0), read(8), read(16)));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
UNREACHABLE();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (inst_info.has_derivatives) {
|
|
||||||
ASSERT_MSG(image.GetType() == AmdGpu::ImageType::Color2D ||
|
|
||||||
image.GetType() == AmdGpu::ImageType::Color2DArray,
|
|
||||||
"User derivatives only supported for 2D images");
|
|
||||||
}
|
|
||||||
if (inst_info.has_lod_clamp) {
|
|
||||||
const u32 arg_pos = [&]() -> u32 {
|
|
||||||
switch (inst.GetOpcode()) {
|
|
||||||
case IR::Opcode::ImageSampleImplicitLod:
|
|
||||||
return 2;
|
|
||||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
|
||||||
return 3;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return inst_info.is_depth ? 5 : 4;
|
|
||||||
}();
|
|
||||||
inst.SetArg(arg_pos, arg);
|
|
||||||
}
|
|
||||||
if (inst_info.explicit_lod) {
|
|
||||||
ASSERT(inst.GetOpcode() == IR::Opcode::ImageFetch ||
|
|
||||||
inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ||
|
|
||||||
inst.GetOpcode() == IR::Opcode::ImageSampleDrefExplicitLod);
|
|
||||||
const u32 pos = inst.GetOpcode() == IR::Opcode::ImageSampleExplicitLod ? 2 : 3;
|
|
||||||
const IR::Value value = inst_info.force_level0 ? ir.Imm32(0.f) : arg;
|
|
||||||
inst.SetArg(pos, value);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,11 +33,12 @@ union TextureInstInfo {
|
||||||
BitField<1, 1, u32> has_bias;
|
BitField<1, 1, u32> has_bias;
|
||||||
BitField<2, 1, u32> has_lod_clamp;
|
BitField<2, 1, u32> has_lod_clamp;
|
||||||
BitField<3, 1, u32> force_level0;
|
BitField<3, 1, u32> force_level0;
|
||||||
BitField<4, 1, u32> explicit_lod;
|
BitField<4, 1, u32> has_lod;
|
||||||
BitField<5, 1, u32> has_offset;
|
BitField<5, 1, u32> has_offset;
|
||||||
BitField<6, 2, u32> gather_comp;
|
BitField<6, 2, u32> gather_comp;
|
||||||
BitField<8, 1, u32> has_derivatives;
|
BitField<8, 1, u32> has_derivatives;
|
||||||
BitField<9, 1, u32> is_array;
|
BitField<9, 1, u32> is_array;
|
||||||
|
BitField<10, 1, u32> is_gather;
|
||||||
};
|
};
|
||||||
|
|
||||||
union BufferInstInfo {
|
union BufferInstInfo {
|
||||||
|
|
|
@ -209,7 +209,7 @@ private:
|
||||||
union {
|
union {
|
||||||
NonTriviallyDummy dummy{};
|
NonTriviallyDummy dummy{};
|
||||||
boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
|
boost::container::small_vector<std::pair<Block*, Value>, 2> phi_args;
|
||||||
std::array<Value, 5> args;
|
std::array<Value, 6> args;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
|
static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
|
||||||
|
|
Loading…
Reference in a new issue