Add other 64-bit floating point shader instructions (#944)

This commit is contained in:
Raven 2024-09-18 00:01:33 +08:00 committed by GitHub
parent 6152fe8419
commit 2a9b353f8b
2 changed files with 27 additions and 0 deletions

View file

@ -138,9 +138,12 @@ public:
void V_FLOOR_F32(const GcnInst& inst);
void V_SUB_F32(const GcnInst& inst);
void V_RCP_F32(const GcnInst& inst);
void V_RCP_F64(const GcnInst& inst);
void V_FMA_F32(const GcnInst& inst);
void V_FMA_F64(const GcnInst& inst);
void V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst);
void V_MAX_F32(const GcnInst& inst, bool is_legacy = false);
void V_MUL_F64(const GcnInst& inst);
void V_MAX_F64(const GcnInst& inst);
void V_MAX_U32(bool is_signed, const GcnInst& inst);
void V_RSQ_F32(const GcnInst& inst);

View file

@ -179,6 +179,8 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
return V_MUL_F32(inst);
case Opcode::V_RCP_F32:
return V_RCP_F32(inst);
case Opcode::V_RCP_F64:
return V_RCP_F64(inst);
case Opcode::V_LDEXP_F32:
return V_LDEXP_F32(inst);
case Opcode::V_FRACT_F32:
@ -196,8 +198,12 @@ void Translator::EmitVectorAlu(const GcnInst& inst) {
case Opcode::V_FMA_F32:
case Opcode::V_MADAK_F32:
return V_FMA_F32(inst);
case Opcode::V_FMA_F64:
return V_FMA_F64(inst);
case Opcode::V_MAX_F32:
return V_MAX_F32(inst);
case Opcode::V_MUL_F64:
return V_MUL_F64(inst);
case Opcode::V_MAX_F64:
return V_MAX_F64(inst);
case Opcode::V_RSQ_F32:
@ -537,6 +543,18 @@ void Translator::V_FMA_F32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.FPFma(src0, src1, src2));
}
void Translator::V_RCP_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
SetDst64(inst.dst[0], ir.FPRecip(src0));
}
void Translator::V_FMA_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])};
const IR::F64 src2{GetSrc64<IR::F64>(inst.src[2])};
SetDst64(inst.dst[0], ir.FPFma(src0, src1, src2));
}
void Translator::V_CMP_F32(ConditionOp op, bool set_exec, const GcnInst& inst) {
const IR::F32 src0{GetSrc<IR::F32>(inst.src[0])};
const IR::F32 src1{GetSrc<IR::F32>(inst.src[1])};
@ -584,6 +602,12 @@ void Translator::V_MAX_F32(const GcnInst& inst, bool is_legacy) {
SetDst(inst.dst[0], ir.FPMax(src0, src1, is_legacy));
}
void Translator::V_MUL_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])};
SetDst64(inst.dst[0], ir.FPMul(src0, src1));
}
void Translator::V_MAX_F64(const GcnInst& inst) {
const IR::F64 src0{GetSrc64<IR::F64>(inst.src[0])};
const IR::F64 src1{GetSrc64<IR::F64>(inst.src[1])};