Misc UE4 fixes (#1821)

* Add ExecLo case to S_SAVEEXEC_B64

Seen in CUSA38209

* S_BCNT1_I32_B32

Turtle said our implementation of S_BCNT1_I32_B64 was meant to be for S_BCNT1_I32_B32, so renaming the opcode is the fix.
This commit is contained in:
Stephen Miller 2024-12-18 14:05:35 -06:00 committed by GitHub
parent 28b19f38af
commit a507eed037
2 changed files with 6 additions and 4 deletions

View file

@ -98,8 +98,8 @@ void Translator::EmitScalarAlu(const GcnInst& inst) {
break;
case Opcode::S_BREV_B32:
return S_BREV_B32(inst);
case Opcode::S_BCNT1_I32_B64:
return S_BCNT1_I32_B64(inst);
case Opcode::S_BCNT1_I32_B32:
return S_BCNT1_I32_B32(inst);
case Opcode::S_FF1_I32_B32:
return S_FF1_I32_B32(inst);
case Opcode::S_AND_SAVEEXEC_B64:
@ -579,7 +579,7 @@ void Translator::S_BREV_B32(const GcnInst& inst) {
SetDst(inst.dst[0], ir.BitReverse(GetSrc(inst.src[0])));
}
void Translator::S_BCNT1_I32_B64(const GcnInst& inst) {
void Translator::S_BCNT1_I32_B32(const GcnInst& inst) {
const IR::U32 result = ir.BitCount(GetSrc(inst.src[0]));
SetDst(inst.dst[0], result);
ir.SetScc(ir.INotEqual(result, ir.Imm32(0)));
@ -602,6 +602,8 @@ void Translator::S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& in
return ir.GetVcc();
case OperandField::ScalarGPR:
return ir.GetThreadBitScalarReg(IR::ScalarReg(inst.src[0].code));
case OperandField::ExecLo:
return ir.GetExec();
default:
UNREACHABLE();
}

View file

@ -110,7 +110,7 @@ public:
void S_MOV_B64(const GcnInst& inst);
void S_NOT_B64(const GcnInst& inst);
void S_BREV_B32(const GcnInst& inst);
void S_BCNT1_I32_B64(const GcnInst& inst);
void S_BCNT1_I32_B32(const GcnInst& inst);
void S_FF1_I32_B32(const GcnInst& inst);
void S_GETPC_B64(u32 pc, const GcnInst& inst);
void S_SAVEEXEC_B64(NegateMode negate, bool is_or, const GcnInst& inst);