diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2019-10-23 17:51:42 +0000 |
| commit | 1d5ae1026e831016fc29fd927877c86af904481f (patch) | |
| tree | 2cdfd12620fcfa5d9e4a0389f85368e8e36f63f9 /lib/CodeGen/GlobalISel | |
| parent | e6d1592492a3a379186bfb02bd0f4eda0669c0d5 (diff) | |
Vendor import of stripped llvm trunk r375505, the last commit before thevendor/llvm/llvm-trunk-r375505vendor/llvm
upstream Subversion repository was made read-only, and the LLVM project
migrated to GitHub:
https://llvm.org/svn/llvm-project/llvm/trunk@375505
Notes
Notes:
svn path=/vendor/llvm/dist/; revision=353940
svn path=/vendor/llvm/llvm-r375505/; revision=353941; tag=vendor/llvm/llvm-trunk-r375505
Diffstat (limited to 'lib/CodeGen/GlobalISel')
| -rw-r--r-- | lib/CodeGen/GlobalISel/CSEInfo.cpp | 7 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 11 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/CallLowering.cpp | 288 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/Combiner.cpp | 14 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/CombinerHelper.cpp | 919 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 383 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/IRTranslator.cpp | 392 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/InstructionSelect.cpp | 38 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/InstructionSelector.cpp | 2 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/Legalizer.cpp | 35 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 978 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/LegalizerInfo.cpp | 42 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/Localizer.cpp | 11 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 93 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/RegBankSelect.cpp | 13 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/RegisterBank.cpp | 1 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/RegisterBankInfo.cpp | 17 | ||||
| -rw-r--r-- | lib/CodeGen/GlobalISel/Utils.cpp | 98 |
18 files changed, 2938 insertions, 404 deletions
diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp index 4518dbee1a9f..7d9d812d34bc 100644 --- a/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) { case TargetOpcode::G_ANYEXT: case TargetOpcode::G_UNMERGE_VALUES: case TargetOpcode::G_TRUNC: + case TargetOpcode::G_GEP: return true; } return false; @@ -65,9 +66,9 @@ std::unique_ptr<CSEConfigBase> llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) { std::unique_ptr<CSEConfigBase> Config; if (Level == CodeGenOpt::None) - Config = make_unique<CSEConfigConstantOnly>(); + Config = std::make_unique<CSEConfigConstantOnly>(); else - Config = make_unique<CSEConfigFull>(); + Config = std::make_unique<CSEConfigFull>(); return Config; } @@ -332,7 +333,7 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const { const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand( const MachineOperand &MO) const { if (MO.isReg()) { - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); if (!MO.isDef()) addNodeIDRegNum(Reg); LLT Ty = MRI.getType(Reg); diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp index 461bc6038c2c..51a74793f029 100644 --- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -162,6 +162,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc, return buildConstant(DstOps[0], Cst->getSExtValue()); break; } + case TargetOpcode::G_SEXT_INREG: { + assert(DstOps.size() == 1 && "Invalid dst ops"); + assert(SrcOps.size() == 2 && "Invalid src ops"); + const DstOp &Dst = DstOps[0]; + const SrcOp &Src0 = SrcOps[0]; + const SrcOp &Src1 = SrcOps[1]; + if (auto MaybeCst = + ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI())) + return buildConstant(Dst, MaybeCst->getSExtValue()); + break; + } } bool CanCopy = checkCopyToDefsPossible(DstOps); if (!canPerformCSEForOpc(Opc)) diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp index a5d8205a34a8..cdad92f7db4f 100644 --- a/lib/CodeGen/GlobalISel/CallLowering.cpp +++ b/lib/CodeGen/GlobalISel/CallLowering.cpp @@ -11,14 +11,16 @@ /// //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/GlobalISel/CallLowering.h" #include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/GlobalISel/CallLowering.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #define DEBUG_TYPE "call-lowering" @@ -32,66 +34,70 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, ArrayRef<ArrayRef<Register>> ArgRegs, Register SwiftErrorVReg, std::function<unsigned()> GetCalleeReg) const { + CallLoweringInfo Info; auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout(); // First step is to marshall all the function's parameters into the correct // physregs and memory locations. Gather the sequence of argument types that // we'll pass to the assigner function. - SmallVector<ArgInfo, 8> OrigArgs; unsigned i = 0; unsigned NumFixedArgs = CS.getFunctionType()->getNumParams(); for (auto &Arg : CS.args()) { ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}, i < NumFixedArgs}; setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS); - // We don't currently support swiftself args. - if (OrigArg.Flags.isSwiftSelf()) - return false; - OrigArgs.push_back(OrigArg); + Info.OrigArgs.push_back(OrigArg); ++i; } - MachineOperand Callee = MachineOperand::CreateImm(0); if (const Function *F = CS.getCalledFunction()) - Callee = MachineOperand::CreateGA(F, 0); + Info.Callee = MachineOperand::CreateGA(F, 0); else - Callee = MachineOperand::CreateReg(GetCalleeReg(), false); - - ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; - if (!OrigRet.Ty->isVoidTy()) - setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS); - - return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs, - SwiftErrorVReg); + Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false); + + Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}}; + if (!Info.OrigRet.Ty->isVoidTy()) + setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS); + + Info.KnownCallees = + CS.getInstruction()->getMetadata(LLVMContext::MD_callees); + Info.CallConv = CS.getCallingConv(); + Info.SwiftErrorVReg = SwiftErrorVReg; + Info.IsMustTailCall = CS.isMustTailCall(); + Info.IsTailCall = CS.isTailCall() && + isInTailCallPosition(CS, MIRBuilder.getMF().getTarget()); + Info.IsVarArg = CS.getFunctionType()->isVarArg(); + return lowerCall(MIRBuilder, Info); } template <typename FuncInfoTy> void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, const DataLayout &DL, const FuncInfoTy &FuncInfo) const { + auto &Flags = Arg.Flags[0]; const AttributeList &Attrs = FuncInfo.getAttributes(); if (Attrs.hasAttribute(OpIdx, Attribute::ZExt)) - Arg.Flags.setZExt(); + Flags.setZExt(); if (Attrs.hasAttribute(OpIdx, Attribute::SExt)) - Arg.Flags.setSExt(); + Flags.setSExt(); if (Attrs.hasAttribute(OpIdx, Attribute::InReg)) - Arg.Flags.setInReg(); + Flags.setInReg(); if (Attrs.hasAttribute(OpIdx, Attribute::StructRet)) - Arg.Flags.setSRet(); + Flags.setSRet(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf)) - Arg.Flags.setSwiftSelf(); + Flags.setSwiftSelf(); if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError)) - Arg.Flags.setSwiftError(); + Flags.setSwiftError(); if (Attrs.hasAttribute(OpIdx, Attribute::ByVal)) - Arg.Flags.setByVal(); + Flags.setByVal(); if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca)) - Arg.Flags.setInAlloca(); + Flags.setInAlloca(); - if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) { + if (Flags.isByVal() || Flags.isInAlloca()) { Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType(); auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType(); - Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); + Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy)); // For ByVal, alignment should be passed from FE. BE will guess if // this info is not there but there are cases it cannot get right. @@ -100,11 +106,11 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx, FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2); else FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL); - Arg.Flags.setByValAlign(FrameAlign); + Flags.setByValAlign(Align(FrameAlign)); } if (Attrs.hasAttribute(OpIdx, Attribute::Nest)) - Arg.Flags.setNest(); - Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty)); + Flags.setNest(); + Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty))); } template void @@ -159,7 +165,7 @@ void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg, } bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, - ArrayRef<ArgInfo> Args, + SmallVectorImpl<ArgInfo> &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -171,7 +177,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder, bool CallLowering::handleAssignments(CCState &CCInfo, SmallVectorImpl<CCValAssign> &ArgLocs, MachineIRBuilder &MIRBuilder, - ArrayRef<ArgInfo> Args, + SmallVectorImpl<ArgInfo> &Args, ValueHandler &Handler) const { MachineFunction &MF = MIRBuilder.getMF(); const Function &F = MF.getFunction(); @@ -180,14 +186,99 @@ bool CallLowering::handleAssignments(CCState &CCInfo, unsigned NumArgs = Args.size(); for (unsigned i = 0; i != NumArgs; ++i) { MVT CurVT = MVT::getVT(Args[i].Ty); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) { - // Try to use the register type if we couldn't assign the VT. - if (!Handler.isArgumentHandler() || !CurVT.isValid()) + if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) { + if (!CurVT.isValid()) return false; - CurVT = TLI->getRegisterTypeForCallingConv( + MVT NewVT = TLI->getRegisterTypeForCallingConv( F.getContext(), F.getCallingConv(), EVT(CurVT)); - if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) - return false; + + // If we need to split the type over multiple regs, check it's a scenario + // we currently support. + unsigned NumParts = TLI->getNumRegistersForCallingConv( + F.getContext(), F.getCallingConv(), CurVT); + if (NumParts > 1) { + // For now only handle exact splits. + if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits()) + return false; + } + + // For incoming arguments (physregs to vregs), we could have values in + // physregs (or memlocs) which we want to extract and copy to vregs. + // During this, we might have to deal with the LLT being split across + // multiple regs, so we have to record this information for later. + // + // If we have outgoing args, then we have the opposite case. We have a + // vreg with an LLT which we want to assign to a physical location, and + // we might have to record that the value has to be split later. + if (Handler.isIncomingArgumentHandler()) { + if (NumParts == 1) { + // Try to use the register type if we couldn't assign the VT. + if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i], + Args[i].Flags[0], CCInfo)) + return false; + } else { + // We're handling an incoming arg which is split over multiple regs. + // E.g. passing an s128 on AArch64. + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + Args[i].OrigRegs.push_back(Args[i].Regs[0]); + Args[i].Regs.clear(); + Args[i].Flags.clear(); + LLT NewLLT = getLLTForMVT(NewVT); + // For each split register, create and assign a vreg that will store + // the incoming component of the larger value. These will later be + // merged to form the final vreg. + for (unsigned Part = 0; Part < NumParts; ++Part) { + Register Reg = + MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT); + ISD::ArgFlagsTy Flags = OrigFlags; + if (Part == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (Part == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Reg); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[Part], CCInfo)) { + // Still couldn't assign this smaller part type for some reason. + return false; + } + } + } + } else { + // Handling an outgoing arg that might need to be split. + if (NumParts < 2) + return false; // Don't know how to deal with this type combination. + + // This type is passed via multiple registers in the calling convention. + // We need to extract the individual parts. + Register LargeReg = Args[i].Regs[0]; + LLT SmallTy = LLT::scalar(NewVT.getSizeInBits()); + auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg); + assert(Unmerge->getNumOperands() == NumParts + 1); + ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0]; + // We're going to replace the regs and flags with the split ones. + Args[i].Regs.clear(); + Args[i].Flags.clear(); + for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) { + ISD::ArgFlagsTy Flags = OrigFlags; + if (PartIdx == 0) { + Flags.setSplit(); + } else { + Flags.setOrigAlign(Align::None()); + if (PartIdx == NumParts - 1) + Flags.setSplitEnd(); + } + Args[i].Regs.push_back(Unmerge.getReg(PartIdx)); + Args[i].Flags.push_back(Flags); + if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full, + Args[i], Args[i].Flags[PartIdx], CCInfo)) + return false; + } + } } } @@ -202,18 +293,32 @@ bool CallLowering::handleAssignments(CCState &CCInfo, continue; } - assert(Args[i].Regs.size() == 1 && - "Can't handle multiple virtual regs yet"); - // FIXME: Pack registers if we have more than one. Register ArgReg = Args[i].Regs[0]; + MVT OrigVT = MVT::getVT(Args[i].Ty); + MVT VAVT = VA.getValVT(); if (VA.isRegLoc()) { - MVT OrigVT = MVT::getVT(Args[i].Ty); - MVT VAVT = VA.getValVT(); - if (Handler.isArgumentHandler() && VAVT != OrigVT) { - if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) - return false; // Can't handle this type of arg yet. + if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) { + if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) { + // Expected to be multiple regs for a single incoming arg. + unsigned NumArgRegs = Args[i].Regs.size(); + if (NumArgRegs < 2) + return false; + + assert((j + (NumArgRegs - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + for (unsigned Part = 0; Part < NumArgRegs; ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += NumArgRegs - 1; + // Merge the split registers into the expected larger result vreg + // of the original call. + MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs); + continue; + } const LLT VATy(VAVT); Register NewReg = MIRBuilder.getMRI()->createGenericVirtualRegister(VATy); @@ -234,10 +339,28 @@ bool CallLowering::handleAssignments(CCState &CCInfo, } else { MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0); } + } else if (!Handler.isIncomingArgumentHandler()) { + assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() && + "Too many regs for number of args"); + // This is an outgoing argument that might have been split. + for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) { + // There should be Regs.size() ArgLocs per argument. + VA = ArgLocs[j + Part]; + Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA); + } + j += Args[i].Regs.size() - 1; } else { Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA); } } else if (VA.isMemLoc()) { + // Don't currently support loading/storing a type that needs to be split + // to the stack. Should be easy, just not implemented yet. + if (Args[i].Regs.size() > 1) { + LLVM_DEBUG( + dbgs() + << "Load/store a split arg to/from the stack not implemented yet"); + return false; + } MVT VT = MVT::getVT(Args[i].Ty); unsigned Size = VT == MVT::iPTR ? DL.getPointerSize() : alignTo(VT.getSizeInBits(), 8) / 8; @@ -253,6 +376,81 @@ bool CallLowering::handleAssignments(CCState &CCInfo, return true; } +bool CallLowering::analyzeArgInfo(CCState &CCState, + SmallVectorImpl<ArgInfo> &Args, + CCAssignFn &AssignFnFixed, + CCAssignFn &AssignFnVarArg) const { + for (unsigned i = 0, e = Args.size(); i < e; ++i) { + MVT VT = MVT::getVT(Args[i].Ty); + CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg; + if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) { + // Bail out on anything we can't handle. + LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString() + << " (arg number = " << i << "\n"); + return false; + } + } + return true; +} + +bool CallLowering::resultsCompatible(CallLoweringInfo &Info, + MachineFunction &MF, + SmallVectorImpl<ArgInfo> &InArgs, + CCAssignFn &CalleeAssignFnFixed, + CCAssignFn &CalleeAssignFnVarArg, + CCAssignFn &CallerAssignFnFixed, + CCAssignFn &CallerAssignFnVarArg) const { + const Function &F = MF.getFunction(); + CallingConv::ID CalleeCC = Info.CallConv; + CallingConv::ID CallerCC = F.getCallingConv(); + + if (CallerCC == CalleeCC) + return true; + + SmallVector<CCValAssign, 16> ArgLocs1; + CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext()); + if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + SmallVector<CCValAssign, 16> ArgLocs2; + CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext()); + if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed, + CalleeAssignFnVarArg)) + return false; + + // We need the argument locations to match up exactly. If there's more in + // one than the other, then we are done. + if (ArgLocs1.size() != ArgLocs2.size()) + return false; + + // Make sure that each location is passed in exactly the same way. + for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) { + const CCValAssign &Loc1 = ArgLocs1[i]; + const CCValAssign &Loc2 = ArgLocs2[i]; + + // We need both of them to be the same. So if one is a register and one + // isn't, we're done. + if (Loc1.isRegLoc() != Loc2.isRegLoc()) + return false; + + if (Loc1.isRegLoc()) { + // If they don't have the same register location, we're done. + if (Loc1.getLocReg() != Loc2.getLocReg()) + return false; + + // They matched, so we can move to the next ArgLoc. + continue; + } + + // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match. + if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset()) + return false; + } + + return true; +} + Register CallLowering::ValueHandler::extendRegister(Register ValReg, CCValAssign &VA) { LLT LocTy{VA.getLocVT()}; diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp index 31cb1dbbc9b5..b4562a5c6601 100644 --- a/lib/CodeGen/GlobalISel/Combiner.cpp +++ b/lib/CodeGen/GlobalISel/Combiner.cpp @@ -27,6 +27,18 @@ using namespace llvm; +namespace llvm { +cl::OptionCategory GICombinerOptionCategory( + "GlobalISel Combiner", + "Control the rules which are enabled. These options all take a comma " + "separated list of rules to disable and may be specified by number " + "or number range (e.g. 1-10)." +#ifndef NDEBUG + " They may also be specified by name." +#endif +); +} // end namespace llvm + namespace { /// This class acts as the glue the joins the CombinerHelper to the overall /// Combine algorithm. The CombinerHelper is intended to report the @@ -92,7 +104,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF, return false; Builder = - CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>(); + CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>(); MRI = &MF.getRegInfo(); Builder->setMF(MF); if (CSEInfo) diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 9cbf3dd83ff1..854769d283f7 100644 --- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -8,19 +8,36 @@ #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" #include "llvm/CodeGen/GlobalISel/Combiner.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" #define DEBUG_TYPE "gi-combiner" using namespace llvm; +// Option to allow testing of the combiner while no targets know about indexed +// addressing. +static cl::opt<bool> + ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false), + cl::desc("Force all indexed operations to be " + "legal for the GlobalISel combiner")); + + CombinerHelper::CombinerHelper(GISelChangeObserver &Observer, - MachineIRBuilder &B) - : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {} + MachineIRBuilder &B, GISelKnownBits *KB, + MachineDominatorTree *MDT) + : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), + KB(KB), MDT(MDT) { + (void)this->KB; +} void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const { @@ -55,8 +72,8 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) { bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { if (MI.getOpcode() != TargetOpcode::COPY) return false; - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); // Simple Copy Propagation. @@ -66,12 +83,183 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) { return false; } void CombinerHelper::applyCombineCopy(MachineInstr &MI) { - unsigned DstReg = MI.getOperand(0).getReg(); - unsigned SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); MI.eraseFromParent(); replaceRegWith(MRI, DstReg, SrcReg); } +bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) { + bool IsUndef = false; + SmallVector<Register, 4> Ops; + if (matchCombineConcatVectors(MI, IsUndef, Ops)) { + applyCombineConcatVectors(MI, IsUndef, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef, + SmallVectorImpl<Register> &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS && + "Invalid instruction"); + IsUndef = true; + MachineInstr *Undef = nullptr; + + // Walk over all the operands of concat vectors and check if they are + // build_vector themselves or undef. + // Then collect their operands in Ops. + for (const MachineOperand &MO : MI.operands()) { + // Skip the instruction definition. + if (MO.isDef()) + continue; + Register Reg = MO.getReg(); + MachineInstr *Def = MRI.getVRegDef(Reg); + assert(Def && "Operand not defined"); + switch (Def->getOpcode()) { + case TargetOpcode::G_BUILD_VECTOR: + IsUndef = false; + // Remember the operands of the build_vector to fold + // them into the yet-to-build flattened concat vectors. + for (const MachineOperand &BuildVecMO : Def->operands()) { + // Skip the definition. + if (BuildVecMO.isDef()) + continue; + Ops.push_back(BuildVecMO.getReg()); + } + break; + case TargetOpcode::G_IMPLICIT_DEF: { + LLT OpType = MRI.getType(Reg); + // Keep one undef value for all the undef operands. + if (!Undef) { + Builder.setInsertPt(*MI.getParent(), MI); + Undef = Builder.buildUndef(OpType.getScalarType()); + } + assert(MRI.getType(Undef->getOperand(0).getReg()) == + OpType.getScalarType() && + "All undefs should have the same type"); + // Break the undef vector in as many scalar elements as needed + // for the flattening. + for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements(); + EltIdx != EltEnd; ++EltIdx) + Ops.push_back(Undef->getOperand(0).getReg()); + break; + } + default: + return false; + } + } + return true; +} +void CombinerHelper::applyCombineConcatVectors( + MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) { + // We determined that the concat_vectors can be flatten. + // Generate the flattened build_vector. + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + // Note: IsUndef is sort of redundant. We could have determine it by + // checking that at all Ops are undef. Alternatively, we could have + // generate a build_vector of undefs and rely on another combine to + // clean that up. For now, given we already gather this information + // in tryCombineConcatVectors, just save compile time and issue the + // right thing. + if (IsUndef) + Builder.buildUndef(NewDstReg); + else + Builder.buildBuildVector(NewDstReg, Ops); + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + +bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) { + SmallVector<Register, 4> Ops; + if (matchCombineShuffleVector(MI, Ops)) { + applyCombineShuffleVector(MI, Ops); + return true; + } + return false; +} + +bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI, + SmallVectorImpl<Register> &Ops) { + assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR && + "Invalid instruction kind"); + LLT DstType = MRI.getType(MI.getOperand(0).getReg()); + Register Src1 = MI.getOperand(1).getReg(); + LLT SrcType = MRI.getType(Src1); + unsigned DstNumElts = DstType.getNumElements(); + unsigned SrcNumElts = SrcType.getNumElements(); + + // If the resulting vector is smaller than the size of the source + // vectors being concatenated, we won't be able to replace the + // shuffle vector into a concat_vectors. + // + // Note: We may still be able to produce a concat_vectors fed by + // extract_vector_elt and so on. It is less clear that would + // be better though, so don't bother for now. + if (DstNumElts < 2 * SrcNumElts) + return false; + + // Check that the shuffle mask can be broken evenly between the + // different sources. + if (DstNumElts % SrcNumElts != 0) + return false; + + // Mask length is a multiple of the source vector length. + // Check if the shuffle is some kind of concatenation of the input + // vectors. + unsigned NumConcat = DstNumElts / SrcNumElts; + SmallVector<int, 8> ConcatSrcs(NumConcat, -1); + SmallVector<int, 8> Mask; + ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask); + for (unsigned i = 0; i != DstNumElts; ++i) { + int Idx = Mask[i]; + // Undef value. + if (Idx < 0) + continue; + // Ensure the indices in each SrcType sized piece are sequential and that + // the same source is used for the whole piece. + if ((Idx % SrcNumElts != (i % SrcNumElts)) || + (ConcatSrcs[i / SrcNumElts] >= 0 && + ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) + return false; + // Remember which source this index came from. + ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts; + } + + // The shuffle is concatenating multiple vectors together. + // Collect the different operands for that. + Register UndefReg; + Register Src2 = MI.getOperand(2).getReg(); + for (auto Src : ConcatSrcs) { + if (Src < 0) { + if (!UndefReg) { + Builder.setInsertPt(*MI.getParent(), MI); + UndefReg = Builder.buildUndef(SrcType).getReg(0); + } + Ops.push_back(UndefReg); + } else if (Src == 0) + Ops.push_back(Src1); + else + Ops.push_back(Src2); + } + return true; +} + +void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI, + const ArrayRef<Register> Ops) { + Register DstReg = MI.getOperand(0).getReg(); + Builder.setInsertPt(*MI.getParent(), MI); + Register NewDstReg = MRI.cloneVirtualRegister(DstReg); + + Builder.buildConcatVectors(NewDstReg, Ops); + + MI.eraseFromParent(); + replaceRegWith(MRI, DstReg, NewDstReg); +} + namespace { /// Select a preference between two uses. CurrentUse is the current preference @@ -279,7 +467,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, // up the type and extend so that it uses the preferred use. if (UseMI->getOpcode() == Preferred.ExtendOpcode || UseMI->getOpcode() == TargetOpcode::G_ANYEXT) { - unsigned UseDstReg = UseMI->getOperand(0).getReg(); + Register UseDstReg = UseMI->getOperand(0).getReg(); MachineOperand &UseSrcMO = UseMI->getOperand(1); const LLT &UseDstTy = MRI.getType(UseDstReg); if (UseDstReg != ChosenDstReg) { @@ -342,8 +530,212 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI, Observer.changedInstr(MI); } -bool CombinerHelper::matchCombineBr(MachineInstr &MI) { - assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR"); +bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) { + assert(DefMI.getParent() == UseMI.getParent()); + if (&DefMI == &UseMI) + return false; + + // Loop through the basic block until we find one of the instructions. + MachineBasicBlock::const_iterator I = DefMI.getParent()->begin(); + for (; &*I != &DefMI && &*I != &UseMI; ++I) + return &*I == &DefMI; + + llvm_unreachable("Block must contain instructions"); +} + +bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) { + if (MDT) + return MDT->dominates(&DefMI, &UseMI); + else if (DefMI.getParent() != UseMI.getParent()) + return false; + + return isPredecessor(DefMI, UseMI); +} + +bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Base = MI.getOperand(1).getReg(); + MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base); + if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) + return false; + + LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI); + + for (auto &Use : MRI.use_instructions(Base)) { + if (Use.getOpcode() != TargetOpcode::G_GEP) + continue; + + Offset = Use.getOperand(2).getReg(); + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: " + << Use); + continue; + } + + // Make sure the offset calculation is before the potentially indexed op. + // FIXME: we really care about dependency here. The offset calculation might + // be movable. + MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset); + if (!OffsetDef || !dominates(*OffsetDef, MI)) { + LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: " + << Use); + continue; + } + + // FIXME: check whether all uses of Base are load/store with foldable + // addressing modes. If so, using the normal addr-modes is better than + // forming an indexed one. + + bool MemOpDominatesAddrUses = true; + for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) { + if (!dominates(MI, GEPUse)) { + MemOpDominatesAddrUses = false; + break; + } + } + + if (!MemOpDominatesAddrUses) { + LLVM_DEBUG( + dbgs() << " Ignoring candidate as memop does not dominate uses: " + << Use); + continue; + } + + LLVM_DEBUG(dbgs() << " Found match: " << Use); + Addr = Use.getOperand(0).getReg(); + return true; + } + + return false; +} + +bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr, + Register &Base, Register &Offset) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + +#ifndef NDEBUG + unsigned Opcode = MI.getOpcode(); + assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD || + Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE); +#endif + + Addr = MI.getOperand(1).getReg(); + MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI); + if (!AddrDef || MRI.hasOneUse(Addr)) + return false; + + Base = AddrDef->getOperand(1).getReg(); + Offset = AddrDef->getOperand(2).getReg(); + + LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI); + + if (!ForceLegalIndexing && + !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) { + LLVM_DEBUG(dbgs() << " Skipping, not legal for target"); + return false; + } + + MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI); + if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) { + LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway."); + return false; + } + + if (MI.getOpcode() == TargetOpcode::G_STORE) { + // Would require a copy. + if (Base == MI.getOperand(0).getReg()) { + LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway."); + return false; + } + + // We're expecting one use of Addr in MI, but it could also be the + // value stored, which isn't actually dominated by the instruction. + if (MI.getOperand(0).getReg() == Addr) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses"); + return false; + } + } + + // FIXME: check whether all uses of the base pointer are constant GEPs. That + // might allow us to end base's liveness here by adjusting the constant. + + for (auto &UseMI : MRI.use_instructions(Addr)) { + if (!dominates(MI, UseMI)) { + LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses."); + return false; + } + } + + return true; +} + +bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD && + Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE) + return false; + + bool IsStore = Opcode == TargetOpcode::G_STORE; + Register Addr, Base, Offset; + bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset); + if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset)) + return false; + + + unsigned NewOpcode; + switch (Opcode) { + case TargetOpcode::G_LOAD: + NewOpcode = TargetOpcode::G_INDEXED_LOAD; + break; + case TargetOpcode::G_SEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD; + break; + case TargetOpcode::G_ZEXTLOAD: + NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD; + break; + case TargetOpcode::G_STORE: + NewOpcode = TargetOpcode::G_INDEXED_STORE; + break; + default: + llvm_unreachable("Unknown load/store opcode"); + } + + MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr); + MachineIRBuilder MIRBuilder(MI); + auto MIB = MIRBuilder.buildInstr(NewOpcode); + if (IsStore) { + MIB.addDef(Addr); + MIB.addUse(MI.getOperand(0).getReg()); + } else { + MIB.addDef(MI.getOperand(0).getReg()); + MIB.addDef(Addr); + } + + MIB.addUse(Base); + MIB.addUse(Offset); + MIB.addImm(IsPre); + MI.eraseFromParent(); + AddrDef.eraseFromParent(); + + LLVM_DEBUG(dbgs() << " Combinined to indexed operation"); + return true; +} + +bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) { + if (MI.getOpcode() != TargetOpcode::G_BR) + return false; + // Try to match the following: // bb1: // %c(s32) = G_ICMP pred, %a, %b @@ -380,9 +772,14 @@ bool CombinerHelper::matchCombineBr(MachineInstr &MI) { return true; } -bool CombinerHelper::tryCombineBr(MachineInstr &MI) { - if (!matchCombineBr(MI)) +bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) { + if (!matchElideBrByInvertingCond(MI)) return false; + applyElideBrByInvertingCond(MI); + return true; +} + +void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) { MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB(); MachineBasicBlock::iterator BrIt(MI); MachineInstr *BrCond = &*std::prev(BrIt); @@ -401,11 +798,509 @@ bool CombinerHelper::tryCombineBr(MachineInstr &MI) { BrCond->getOperand(1).setMBB(BrTarget); Observer.changedInstr(*BrCond); MI.eraseFromParent(); +} + +static bool shouldLowerMemFuncForSize(const MachineFunction &MF) { + // On Darwin, -Os means optimize for size without hurting performance, so + // only really optimize for size when -Oz (MinSize) is used. + if (MF.getTarget().getTargetTriple().isOSDarwin()) + return MF.getFunction().hasMinSize(); + return MF.getFunction().hasOptSize(); +} + +// Returns a list of types to use for memory op lowering in MemOps. A partial +// port of findOptimalMemOpLowering in TargetLowering. +static bool findGISelOptimalMemOpLowering( + std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, + unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, + bool AllowOverlap, unsigned DstAS, unsigned SrcAS, + const AttributeList &FuncAttributes, const TargetLowering &TLI) { + // If 'SrcAlign' is zero, that means the memory operation does not need to + // load the value, i.e. memset or memcpy from constant string. Otherwise, + // it's the inferred alignment of the source. 'DstAlign', on the other hand, + // is the specified alignment of the memory operation. If it is zero, that + // means it's possible to change the alignment of the destination. + // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does + // not need to be loaded. + if (SrcAlign != 0 && SrcAlign < DstAlign) + return false; + + LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset, + ZeroMemset, MemcpyStrSrc, FuncAttributes); + + if (Ty == LLT()) { + // Use the largest scalar type whose alignment constraints are satisfied. + // We only need to check DstAlign here as SrcAlign is always greater or + // equal to DstAlign (or zero). + Ty = LLT::scalar(64); + while (DstAlign && DstAlign < Ty.getSizeInBytes() && + !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign)) + Ty = LLT::scalar(Ty.getSizeInBytes()); + assert(Ty.getSizeInBits() > 0 && "Could not find valid type"); + // FIXME: check for the largest legal type we can load/store to. + } + + unsigned NumMemOps = 0; + while (Size != 0) { + unsigned TySize = Ty.getSizeInBytes(); + while (TySize > Size) { + // For now, only use non-vector load / store's for the left-over pieces. + LLT NewTy = Ty; + // FIXME: check for mem op safety and legality of the types. Not all of + // SDAGisms map cleanly to GISel concepts. + if (NewTy.isVector()) + NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32); + NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1)); + unsigned NewTySize = NewTy.getSizeInBytes(); + assert(NewTySize > 0 && "Could not find appropriate type"); + + // If the new LLT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + bool Fast; + // Need to get a VT equivalent for allowMisalignedMemoryAccesses(). + MVT VT = getMVTForLLT(Ty); + if (NumMemOps && AllowOverlap && NewTySize < Size && + TLI.allowsMisalignedMemoryAccesses( + VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) && + Fast) + TySize = Size; + else { + Ty = NewTy; + TySize = NewTySize; + } + } + + if (++NumMemOps > Limit) + return false; + + MemOps.push_back(Ty); + Size -= TySize; + } + + return true; +} + +static Type *getTypeForLLT(LLT Ty, LLVMContext &C) { + if (Ty.isVector()) + return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()), + Ty.getNumElements()); + return IntegerType::get(C, Ty.getSizeInBits()); +} + +// Get a vectorized representation of the memset value operand, GISel edition. +static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) { + MachineRegisterInfo &MRI = *MIB.getMRI(); + unsigned NumBits = Ty.getScalarSizeInBits(); + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + if (!Ty.isVector() && ValVRegAndVal) { + unsigned KnownVal = ValVRegAndVal->Value; + APInt Scalar = APInt(8, KnownVal); + APInt SplatVal = APInt::getSplat(NumBits, Scalar); + return MIB.buildConstant(Ty, SplatVal).getReg(0); + } + // FIXME: for vector types create a G_BUILD_VECTOR. + if (Ty.isVector()) + return Register(); + + // Extend the byte value to the larger type, and then multiply by a magic + // value 0x010101... in order to replicate it across every byte. + LLT ExtType = Ty.getScalarType(); + auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val); + if (NumBits > 8) { + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); + auto MagicMI = MIB.buildConstant(ExtType, Magic); + Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0); + } + + assert(ExtType == Ty && "Vector memset value type not supported yet"); + return Val; +} + +bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val, + unsigned KnownLen, unsigned Align, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memset length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemset(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + + auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI); + bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0; + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0, + /*IsMemset=*/true, + /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u, + MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + if (NewAlign > Align) { + Align = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Align) + MFI.setObjectAlignment(FI, Align); + } + } + + MachineIRBuilder MIB(MI); + // Find the largest store and generate the bit pattern for it. + LLT LargestTy = MemOps[0]; + for (unsigned i = 1; i < MemOps.size(); i++) + if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits()) + LargestTy = MemOps[i]; + + // The memset stored value is always defined as an s8, so in order to make it + // work with larger store types we need to repeat the bit pattern across the + // wider type. + Register MemSetValue = getMemsetValue(Val, LargestTy, MIB); + + if (!MemSetValue) + return false; + + // Generate the stores. For each store type in the list, we generate the + // matching store of that type to the destination address. + LLT PtrTy = MRI.getType(Dst); + unsigned DstOff = 0; + unsigned Size = KnownLen; + for (unsigned I = 0; I < MemOps.size(); I++) { + LLT Ty = MemOps[I]; + unsigned TySize = Ty.getSizeInBytes(); + if (TySize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(I == MemOps.size() - 1 && I != 0); + DstOff -= TySize - Size; + } + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + Register Value = MemSetValue; + if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) { + MVT VT = getMVTForLLT(Ty); + MVT LargestVT = getMVTForLLT(LargestTy); + if (!LargestTy.isVector() && !Ty.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0); + else + Value = getMemsetValue(Val, Ty, MIB); + if (!Value) + return false; + } + + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes()); + + Register Ptr = Dst; + if (DstOff != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff); + Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + + MIB.buildStore(Value, Ptr, *StoreMMO); + DstOff += Ty.getSizeInBytes(); + Size -= TySize; + } + + MI.eraseFromParent(); + return true; +} + + +bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memcpy length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + // FIXME: infer better src pointer alignment like SelectionDAG does here. + // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining + // if the memcpy is in a tail call position. + + unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Now we need to emit a pair of load and stores for each of the types we've + // collected. I.e. for each type, generate a load from the source pointer of + // that type width, and then generate a corresponding store to the dest buffer + // of that value loaded. This can result in a sequence of loads and stores + // mixed types, depending on what the target specifies as good types to use. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + unsigned Size = KnownLen; + for (auto CopyTy : MemOps) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + if (CopyTy.getSizeInBytes() > Size) + CurrOffset -= CopyTy.getSizeInBytes() - Size; + + // Construct MMOs for the accesses. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + Register Offset; + if (CurrOffset != 0) { + Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) + .getReg(0); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); + + // Create the store. + Register StorePtr = + CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + MIB.buildStore(LdVal, StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + Size -= CopyTy.getSizeInBytes(); + } + + MI.eraseFromParent(); return true; } +bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, + Register Src, unsigned KnownLen, + unsigned DstAlign, unsigned SrcAlign, + bool IsVolatile) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + auto &DL = MF.getDataLayout(); + LLVMContext &C = MF.getFunction().getContext(); + + assert(KnownLen != 0 && "Have a zero length memmove length!"); + + bool DstAlignCanChange = false; + MachineFrameInfo &MFI = MF.getFrameInfo(); + bool OptSize = shouldLowerMemFuncForSize(MF); + unsigned Alignment = MinAlign(DstAlign, SrcAlign); + + MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); + if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex())) + DstAlignCanChange = true; + + unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize); + std::vector<LLT> MemOps; + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo(); + MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo(); + + // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due + // to a bug in it's findOptimalMemOpLowering implementation. For now do the + // same thing here. + if (!findGISelOptimalMemOpLowering( + MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment), + SrcAlign, + /*IsMemset=*/false, + /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false, + /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(), + SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI)) + return false; + + if (DstAlignCanChange) { + // Get an estimate of the type from the LLT. + Type *IRTy = getTypeForLLT(MemOps[0], C); + unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Alignment && + DL.exceedsNaturalStackAlignment(Align(NewAlign))) + NewAlign /= 2; + + if (NewAlign > Alignment) { + Alignment = NewAlign; + unsigned FI = FIDef->getOperand(1).getIndex(); + // Give the stack frame object a larger alignment if needed. + if (MFI.getObjectAlignment(FI) < Alignment) + MFI.setObjectAlignment(FI, Alignment); + } + } + + LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n"); + + MachineIRBuilder MIB(MI); + // Memmove requires that we perform the loads first before issuing the stores. + // Apart from that, this loop is pretty much doing the same thing as the + // memcpy codegen function. + unsigned CurrOffset = 0; + LLT PtrTy = MRI.getType(Src); + SmallVector<Register, 16> LoadVals; + for (auto CopyTy : MemOps) { + // Construct MMO for the load. + auto *LoadMMO = + MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes()); + + // Create the load. + Register LoadPtr = Src; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0); + } + LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); + CurrOffset += CopyTy.getSizeInBytes(); + } + + CurrOffset = 0; + for (unsigned I = 0; I < MemOps.size(); ++I) { + LLT CopyTy = MemOps[I]; + // Now store the values loaded. + auto *StoreMMO = + MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes()); + + Register StorePtr = Dst; + if (CurrOffset != 0) { + auto Offset = + MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0); + } + MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); + CurrOffset += CopyTy.getSizeInBytes(); + } + MI.eraseFromParent(); + return true; +} + +bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) { + // This combine is fairly complex so it's not written with a separate + // matcher function. + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID(); + assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove || + ID == Intrinsic::memset) && + "Expected a memcpy like intrinsic"); + + auto MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + // Don't try to optimize volatile. + if (IsVolatile) + return false; + + unsigned DstAlign = MemOp->getBaseAlignment(); + unsigned SrcAlign = 0; + Register Dst = MI.getOperand(1).getReg(); + Register Src = MI.getOperand(2).getReg(); + Register Len = MI.getOperand(3).getReg(); + + if (ID != Intrinsic::memset) { + assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI"); + MemOp = *(++MMOIt); + SrcAlign = MemOp->getBaseAlignment(); + } + + // See if this is a constant length copy + auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); + if (!LenVRegAndVal) + return false; // Leave it to the legalizer to lower it to a libcall. + unsigned KnownLen = LenVRegAndVal->Value; + + if (KnownLen == 0) { + MI.eraseFromParent(); + return true; + } + + if (MaxLen && KnownLen > MaxLen) + return false; + + if (ID == Intrinsic::memcpy) + return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memmove) + return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (ID == Intrinsic::memset) + return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile); + return false; +} + bool CombinerHelper::tryCombine(MachineInstr &MI) { if (tryCombineCopy(MI)) return true; - return tryCombineExtendingLoads(MI); + if (tryCombineExtendingLoads(MI)) + return true; + if (tryCombineIndexedLoadStore(MI)) + return true; + return false; } diff --git a/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp new file mode 100644 index 000000000000..be8efa8795f3 --- /dev/null +++ b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp @@ -0,0 +1,383 @@ +//===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// Provides analysis for querying information about KnownBits during GISel +/// passes. +// +//===------------------ +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetOpcodes.h" + +#define DEBUG_TYPE "gisel-known-bits" + +using namespace llvm; + +char llvm::GISelKnownBitsAnalysis::ID = 0; + +INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) +INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE, + "Analysis for ComputingKnownBits", false, true) + +GISelKnownBits::GISelKnownBits(MachineFunction &MF) + : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()), + DL(MF.getFunction().getParent()->getDataLayout()) {} + +Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset, + const MachineFunction &MF) { + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset); + // TODO: How to handle cases with Base + Offset? +} + +MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) { + if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) { + int FrameIdx = MI.getOperand(1).getIndex(); + return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF()); + } + return None; +} + +void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + const MachineInstr &MI = *MRI.getVRegDef(R); + computeKnownBitsForAlignment(Known, inferPtrAlignment(MI)); +} + +void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known, + MaybeAlign Alignment) { + if (Alignment) + // The low bits are known zero if the pointer is aligned. + Known.Zero.setLowBits(Log2(Alignment)); +} + +KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) { + return getKnownBits(MI.getOperand(0).getReg()); +} + +KnownBits GISelKnownBits::getKnownBits(Register R) { + KnownBits Known; + LLT Ty = MRI.getType(R); + APInt DemandedElts = + Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1); + computeKnownBitsImpl(R, Known, DemandedElts); + return Known; +} + +bool GISelKnownBits::signBitIsZero(Register R) { + LLT Ty = MRI.getType(R); + unsigned BitWidth = Ty.getScalarSizeInBits(); + return maskedValueIsZero(R, APInt::getSignMask(BitWidth)); +} + +APInt GISelKnownBits::getKnownZeroes(Register R) { + return getKnownBits(R).Zero; +} + +APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; } + +void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known, + const APInt &DemandedElts, + unsigned Depth) { + MachineInstr &MI = *MRI.getVRegDef(R); + unsigned Opcode = MI.getOpcode(); + LLT DstTy = MRI.getType(R); + + // Handle the case where this is called on a register that does not have a + // type constraint (i.e. it has a register class constraint instead). This is + // unlikely to occur except by looking through copies but it is possible for + // the initial register being queried to be in this state. + if (!DstTy.isValid()) { + Known = KnownBits(); + return; + } + + unsigned BitWidth = DstTy.getSizeInBits(); + Known = KnownBits(BitWidth); // Don't know anything + + if (DstTy.isVector()) + return; // TODO: Handle vectors. + + if (Depth == getMaxDepth()) + return; + + if (!DemandedElts) + return; // No demanded elts, better to assume we don't know anything. + + KnownBits Known2; + + switch (Opcode) { + default: + TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI, + Depth); + break; + case TargetOpcode::COPY: { + MachineOperand Dst = MI.getOperand(0); + MachineOperand Src = MI.getOperand(1); + // Look through trivial copies but don't look through trivial copies of the + // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to + // determine the bit width of a register class. + // + // We can't use NoSubRegister by name as it's defined by each target but + // it's always defined to be 0 by tablegen. + if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() && + Src.getSubReg() == 0 /*NoSubRegister*/ && + MRI.getType(Src.getReg()).isValid()) { + // Don't increment Depth for this one since we didn't do any work. + computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth); + } + break; + } + case TargetOpcode::G_CONSTANT: { + auto CstVal = getConstantVRegVal(R, MRI); + if (!CstVal) + break; + Known.One = *CstVal; + Known.Zero = ~Known.One; + break; + } + case TargetOpcode::G_FRAME_INDEX: { + computeKnownBitsForFrameIndex(R, Known, DemandedElts); + break; + } + case TargetOpcode::G_SUB: { + // If low bits are known to be zero in both operands, then we know they are + // going to be 0 in the result. Both addition and complement operations + // preserve the low zero bits. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + if (KnownZeroLow == 0) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + break; + } + case TargetOpcode::G_XOR: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are known if clear or set in both the LHS & RHS. + APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One); + // Output known-1 are known to be set if set in only one of the LHS, RHS. + Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero); + Known.Zero = KnownZeroOut; + break; + } + case TargetOpcode::G_GEP: { + // G_GEP is like G_ADD. FIXME: Is this true for all targets? + LLT Ty = MRI.getType(MI.getOperand(1).getReg()); + if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace())) + break; + LLVM_FALLTHROUGH; + } + case TargetOpcode::G_ADD: { + // Output known-0 bits are known if clear or set in both the low clear bits + // common to both LHS & RHS. For example, 8+(X<<3) is known to have the + // low 3 bits clear. + // Output known-0 bits are also known if the top bits of each input are + // known to be clear. For example, if one input has the top 10 bits clear + // and the other has the top 8 bits clear, we know the top 7 bits of the + // output must be clear. + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + unsigned KnownZeroHigh = Known2.countMinLeadingZeros(); + unsigned KnownZeroLow = Known2.countMinTrailingZeros(); + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros()); + KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros()); + Known.Zero.setLowBits(KnownZeroLow); + if (KnownZeroHigh > 1) + Known.Zero.setHighBits(KnownZeroHigh - 1); + break; + } + case TargetOpcode::G_AND: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-1 bits are only known if set in both the LHS & RHS. + Known.One &= Known2.One; + // Output known-0 are known to be clear if zero in either the LHS | RHS. + Known.Zero |= Known2.Zero; + break; + } + case TargetOpcode::G_OR: { + // If either the LHS or the RHS are Zero, the result is zero. + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + + // Output known-0 bits are only known if clear in both the LHS & RHS. + Known.Zero &= Known2.Zero; + // Output known-1 are known to be set if set in either the LHS | RHS. + Known.One |= Known2.One; + break; + } + case TargetOpcode::G_MUL: { + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts, + Depth + 1); + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts, + Depth + 1); + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conservative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + unsigned TrailZ = + Known.countMinTrailingZeros() + Known2.countMinTrailingZeros(); + unsigned LeadZ = + std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(), + BitWidth) - + BitWidth; + + Known.resetAll(); + Known.Zero.setLowBits(std::min(TrailZ, BitWidth)); + Known.Zero.setHighBits(std::min(LeadZ, BitWidth)); + break; + } + case TargetOpcode::G_SELECT: { + computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts, + Depth + 1); + // If we don't know any bits, early out. + if (Known.isUnknown()) + break; + computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts, + Depth + 1); + // Only known if known in both the LHS and RHS. + Known.One &= Known2.One; + Known.Zero &= Known2.Zero; + break; + } + case TargetOpcode::G_FCMP: + case TargetOpcode::G_ICMP: { + if (TL.getBooleanContents(DstTy.isVector(), + Opcode == TargetOpcode::G_FCMP) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + Known.Zero.setBitsFrom(1); + break; + } + case TargetOpcode::G_SEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + // If the sign bit is known to be zero or one, then sext will extend + // it to the top bits, else it will just zext. + Known = Known.sext(BitWidth); + break; + } + case TargetOpcode::G_ANYEXT: { + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */); + break; + } + case TargetOpcode::G_LOAD: { + if (MI.hasOneMemOperand()) { + const MachineMemOperand *MMO = *MI.memoperands_begin(); + if (const MDNode *Ranges = MMO->getRanges()) { + computeKnownBitsFromRangeMetadata(*Ranges, Known); + } + } + break; + } + case TargetOpcode::G_ZEXTLOAD: { + // Everything above the retrieved bits is zero + if (MI.hasOneMemOperand()) + Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits()); + break; + } + case TargetOpcode::G_ASHR: + case TargetOpcode::G_LSHR: + case TargetOpcode::G_SHL: { + KnownBits RHSKnown; + computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts, + Depth + 1); + if (!RHSKnown.isConstant()) { + LLVM_DEBUG( + MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg()); + dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI); + break; + } + uint64_t Shift = RHSKnown.getConstant().getZExtValue(); + LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n'); + + computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts, + Depth + 1); + + switch (Opcode) { + case TargetOpcode::G_ASHR: + Known.Zero = Known.Zero.ashr(Shift); + Known.One = Known.One.ashr(Shift); + break; + case TargetOpcode::G_LSHR: + Known.Zero = Known.Zero.lshr(Shift); + Known.One = Known.One.lshr(Shift); + Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift); + break; + case TargetOpcode::G_SHL: + Known.Zero = Known.Zero.shl(Shift); + Known.One = Known.One.shl(Shift); + Known.Zero.setBits(0, Shift); + break; + } + break; + } + case TargetOpcode::G_INTTOPTR: + case TargetOpcode::G_PTRTOINT: + // Fall through and handle them the same as zext/trunc. + LLVM_FALLTHROUGH; + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_TRUNC: { + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + unsigned SrcBitWidth = SrcTy.isPointer() + ? DL.getIndexSizeInBits(SrcTy.getAddressSpace()) + : SrcTy.getSizeInBits(); + assert(SrcBitWidth && "SrcBitWidth can't be zero"); + Known = Known.zextOrTrunc(SrcBitWidth, true); + computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1); + Known = Known.zextOrTrunc(BitWidth, true); + if (BitWidth > SrcBitWidth) + Known.Zero.setBitsFrom(SrcBitWidth); + break; + } + } + + assert(!Known.hasConflict() && "Bits known to be one AND zero?"); + LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "[" + << Depth << "] Computed for: " << MI << "[" << Depth + << "] Known: 0x" + << (Known.Zero | Known.One).toString(16, false) << "\n" + << "[" << Depth << "] Zero: 0x" + << Known.Zero.toString(16, false) << "\n" + << "[" << Depth << "] One: 0x" + << Known.One.toString(16, false) << "\n"); +} + +void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) { + return false; +} diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index 6e99bdbd8264..45cef4aca888 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -32,6 +32,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" #include "llvm/CodeGen/TargetFrameLowering.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetRegisterInfo.h" @@ -334,7 +335,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) { bool IRTranslator::translateCompare(const User &U, MachineIRBuilder &MIRBuilder) { - const CmpInst *CI = dyn_cast<CmpInst>(&U); + auto *CI = dyn_cast<CmpInst>(&U); Register Op0 = getOrCreateVReg(*U.getOperand(0)); Register Op1 = getOrCreateVReg(*U.getOperand(1)); Register Res = getOrCreateVReg(U); @@ -345,11 +346,12 @@ bool IRTranslator::translateCompare(const User &U, MIRBuilder.buildICmp(Pred, Res, Op0, Op1); else if (Pred == CmpInst::FCMP_FALSE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getNullValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getNullValue(U.getType()))); else if (Pred == CmpInst::FCMP_TRUE) MIRBuilder.buildCopy( - Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType()))); + Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType()))); else { + assert(CI && "Instruction should be CmpInst"); MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1}, MachineInstr::copyFlagsFromInstruction(*CI)); } @@ -588,8 +590,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); } else { - assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE && - "Can only handle ULE ranges"); + assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && + "Can only handle SLE ranges"); const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); @@ -598,7 +600,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB, if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { Register CondRHS = getOrCreateVReg(*CB.CmpRHS); Cond = - MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0); + MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0); } else { const LLT &CmpTy = MRI->getType(CmpOpReg); auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS); @@ -728,7 +730,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I, MHS = nullptr; } else { // Check I->Low <= Cond <= I->High. - Pred = CmpInst::ICMP_ULE; + Pred = CmpInst::ICMP_SLE; LHS = I->Low; MHS = Cond; RHS = I->High; @@ -879,7 +881,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { return true; } - + const MDNode *Ranges = + Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr; for (unsigned i = 0; i < Regs.size(); ++i) { Register Addr; MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8); @@ -888,7 +891,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) { unsigned BaseAlign = getMemOpAlignment(LI); auto MMO = MF->getMachineMemOperand( Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8, - MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr, + MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges, LI.getSyncScopeID(), LI.getOrdering()); MIRBuilder.buildLoad(Regs[i], Addr, *MMO); } @@ -1075,36 +1078,29 @@ bool IRTranslator::translateGetElementPtr(const User &U, } if (Offset != 0) { - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL); auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0)); - - BaseReg = NewBaseReg; + BaseReg = + MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0); Offset = 0; } Register IdxReg = getOrCreateVReg(*Idx); - if (MRI->getType(IdxReg) != OffsetTy) { - Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy); - MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg); - IdxReg = NewIdxReg; - } + if (MRI->getType(IdxReg) != OffsetTy) + IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0); // N = N + Idx * ElementSize; // Avoid doing it for ElementSize of 1. Register GepOffsetReg; if (ElementSize != 1) { - GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy); auto ElementSizeMIB = MIRBuilder.buildConstant( getLLTForType(*OffsetIRTy, *DL), ElementSize); - MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg); + GepOffsetReg = + MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0); } else GepOffsetReg = IdxReg; - Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg); - BaseReg = NewBaseReg; + BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0); } } @@ -1119,54 +1115,51 @@ bool IRTranslator::translateGetElementPtr(const User &U, return true; } -bool IRTranslator::translateMemfunc(const CallInst &CI, +bool IRTranslator::translateMemFunc(const CallInst &CI, MachineIRBuilder &MIRBuilder, - unsigned ID) { + Intrinsic::ID ID) { // If the source is undef, then just emit a nop. - if (isa<UndefValue>(CI.getArgOperand(1))) { - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: - case Intrinsic::memset: - return true; - default: - break; - } - } - - LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL); - Type *DstTy = CI.getArgOperand(0)->getType(); - if (cast<PointerType>(DstTy)->getAddressSpace() != 0 || - SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0)) - return false; + if (isa<UndefValue>(CI.getArgOperand(1))) + return true; - SmallVector<CallLowering::ArgInfo, 8> Args; - for (int i = 0; i < 3; ++i) { - const auto &Arg = CI.getArgOperand(i); - Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType()); + ArrayRef<Register> Res; + auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true); + for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI) + ICall.addUse(getOrCreateVReg(**AI)); + + unsigned DstAlign = 0, SrcAlign = 0; + unsigned IsVol = + cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1)) + ->getZExtValue(); + + if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) { + DstAlign = std::max<unsigned>(MCI->getDestAlignment(), 1); + SrcAlign = std::max<unsigned>(MCI->getSourceAlignment(), 1); + } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) { + DstAlign = std::max<unsigned>(MMI->getDestAlignment(), 1); + SrcAlign = std::max<unsigned>(MMI->getSourceAlignment(), 1); + } else { + auto *MSI = cast<MemSetInst>(&CI); + DstAlign = std::max<unsigned>(MSI->getDestAlignment(), 1); } - const char *Callee; - switch (ID) { - case Intrinsic::memmove: - case Intrinsic::memcpy: { - Type *SrcTy = CI.getArgOperand(1)->getType(); - if(cast<PointerType>(SrcTy)->getAddressSpace() != 0) - return false; - Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove"; - break; - } - case Intrinsic::memset: - Callee = "memset"; - break; - default: - return false; - } + // We need to propagate the tail call flag from the IR inst as an argument. + // Otherwise, we have to pessimize and assume later that we cannot tail call + // any memory intrinsics. + ICall.addImm(CI.isTailCall() ? 1 : 0); - return CLI->lowerCall(MIRBuilder, CI.getCallingConv(), - MachineOperand::CreateES(Callee), - CallLowering::ArgInfo({0}, CI.getType()), Args); + // Create mem operands to store the alignment and volatile info. + auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(0)), + MachineMemOperand::MOStore | VolFlag, 1, DstAlign)); + if (ID != Intrinsic::memset) + ICall.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(CI.getArgOperand(1)), + MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign)); + + return true; } void IRTranslator::getStackGuard(Register DstReg, @@ -1186,7 +1179,7 @@ void IRTranslator::getStackGuard(Register DstReg, MachineMemOperand::MODereferenceable; MachineMemOperand *MemRef = MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8, - DL->getPointerABIAlignment(0)); + DL->getPointerABIAlignment(0).value()); MIB.setMemRefs({MemRef}); } @@ -1208,6 +1201,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) { break; case Intrinsic::bswap: return TargetOpcode::G_BSWAP; + case Intrinsic::bitreverse: + return TargetOpcode::G_BITREVERSE; case Intrinsic::ceil: return TargetOpcode::G_FCEIL; case Intrinsic::cos: @@ -1383,16 +1378,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression()); + MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression()); } else if (const auto *CI = dyn_cast<Constant>(V)) { MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression()); } else { - Register Reg = getOrCreateVReg(*V); - // FIXME: This does not handle register-indirect values at offset 0. The - // direct/indirect thing shouldn't really be handled by something as - // implicit as reg+noreg vs reg+imm in the first palce, but it seems - // pretty baked in right now. - MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + for (Register Reg : getOrCreateVRegs(*V)) { + // FIXME: This does not handle register-indirect values at offset 0. The + // direct/indirect thing shouldn't really be handled by something as + // implicit as reg+noreg vs reg+imm in the first place, but it seems + // pretty baked in right now. + MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression()); + } } return true; } @@ -1433,7 +1429,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: - return translateMemfunc(CI, MIRBuilder, ID); + return translateMemFunc(CI, MIRBuilder, ID); case Intrinsic::eh_typeid_for: { GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0)); Register Reg = getOrCreateVReg(CI); @@ -1441,18 +1437,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, MIRBuilder.buildConstant(Reg, TypeID); return true; } - case Intrinsic::objectsize: { - // If we don't know by now, we're never going to know. - const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1)); + case Intrinsic::objectsize: + llvm_unreachable("llvm.objectsize.* should have been lowered already"); - MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0); - return true; - } case Intrinsic::is_constant: - // If this wasn't constant-folded away by now, then it's not a - // constant. - MIRBuilder.buildConstant(getOrCreateVReg(CI), 0); - return true; + llvm_unreachable("llvm.is.constant.* should have been lowered already"); + case Intrinsic::stackguard: getStackGuard(getOrCreateVReg(CI), MIRBuilder); return true; @@ -1551,6 +1541,46 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI, return true; } +bool IRTranslator::translateCallSite(const ImmutableCallSite &CS, + MachineIRBuilder &MIRBuilder) { + const Instruction &I = *CS.getInstruction(); + ArrayRef<Register> Res = getOrCreateVRegs(I); + + SmallVector<ArrayRef<Register>, 8> Args; + Register SwiftInVReg = 0; + Register SwiftErrorVReg = 0; + for (auto &Arg : CS.args()) { + if (CLI->supportSwiftError() && isSwiftError(Arg)) { + assert(SwiftInVReg == 0 && "Expected only one swift error argument"); + LLT Ty = getLLTForType(*Arg->getType(), *DL); + SwiftInVReg = MRI->createGenericVirtualRegister(Ty); + MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( + &I, &MIRBuilder.getMBB(), Arg)); + Args.emplace_back(makeArrayRef(SwiftInVReg)); + SwiftErrorVReg = + SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); + continue; + } + Args.push_back(getOrCreateVRegs(*Arg)); + } + + // We don't set HasCalls on MFI here yet because call lowering may decide to + // optimize into tail calls. Instead, we defer that to selection where a final + // scan is done to check if any instructions are calls. + bool Success = + CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg, + [&]() { return getOrCreateVReg(*CS.getCalledValue()); }); + + // Check if we just inserted a tail call. + if (Success) { + assert(!HasTailCall && "Can't tail call return twice from block?"); + const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); + HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt())); + } + + return Success; +} + bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { const CallInst &CI = cast<CallInst>(U); auto TII = MF->getTarget().getIntrinsicInfo(); @@ -1570,34 +1600,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F)); } - if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) { - ArrayRef<Register> Res = getOrCreateVRegs(CI); - - SmallVector<ArrayRef<Register>, 8> Args; - Register SwiftInVReg = 0; - Register SwiftErrorVReg = 0; - for (auto &Arg: CI.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &CI, &MIRBuilder.getMBB(), Arg)); - Args.emplace_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg); - continue; - } - Args.push_back(getOrCreateVRegs(*Arg)); - } - - MF->getFrameInfo().setHasCalls(true); - bool Success = - CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*CI.getCalledValue()); }); - - return Success; - } + if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) + return translateCallSite(&CI, MIRBuilder); assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); @@ -1615,14 +1619,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { if (isa<FPMathOperator>(CI)) MIB->copyIRFlags(CI); - for (auto &Arg : CI.arg_operands()) { + for (auto &Arg : enumerate(CI.arg_operands())) { // Some intrinsics take metadata parameters. Reject them. - if (isa<MetadataAsValue>(Arg)) - return false; - ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg); - if (VRegs.size() > 1) + if (isa<MetadataAsValue>(Arg.value())) return false; - MIB.addUse(VRegs[0]); + + // If this is required to be an immediate, don't materialize it in a + // register. + if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) { + // imm arguments are more convenient than cimm (and realistically + // probably sufficient), so use them. + assert(CI->getBitWidth() <= 64 && + "large intrinsic immediates not handled"); + MIB.addImm(CI->getSExtValue()); + } else { + MIB.addFPImm(cast<ConstantFP>(Arg.value())); + } + } else { + ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value()); + if (VRegs.size() > 1) + return false; + MIB.addUse(VRegs[0]); + } } // Add a MachineMemOperand if it is a target mem intrinsic. @@ -1630,13 +1649,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) { TargetLowering::IntrinsicInfo Info; // TODO: Add a GlobalISel version of getTgtMemIntrinsic. if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) { - unsigned Align = Info.align; - if (Align == 0) - Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())); + MaybeAlign Align = Info.align; + if (!Align) + Align = MaybeAlign( + DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()))); uint64_t Size = Info.memVT.getStoreSize(); - MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal), - Info.flags, Size, Align)); + MIB.addMemOperand(MF->getMachineMemOperand( + MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value())); } return true; @@ -1672,30 +1692,7 @@ bool IRTranslator::translateInvoke(const User &U, MCSymbol *BeginSymbol = Context.createTempSymbol(); MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol); - ArrayRef<Register> Res; - if (!I.getType()->isVoidTy()) - Res = getOrCreateVRegs(I); - SmallVector<ArrayRef<Register>, 8> Args; - Register SwiftErrorVReg = 0; - Register SwiftInVReg = 0; - for (auto &Arg : I.arg_operands()) { - if (CLI->supportSwiftError() && isSwiftError(Arg)) { - assert(SwiftInVReg == 0 && "Expected only one swift error argument"); - LLT Ty = getLLTForType(*Arg->getType(), *DL); - SwiftInVReg = MRI->createGenericVirtualRegister(Ty); - MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt( - &I, &MIRBuilder.getMBB(), Arg)); - Args.push_back(makeArrayRef(SwiftInVReg)); - SwiftErrorVReg = - SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg); - continue; - } - - Args.push_back(getOrCreateVRegs(*Arg)); - } - - if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg, - [&]() { return getOrCreateVReg(*I.getCalledValue()); })) + if (!translateCallSite(&I, MIRBuilder)) return false; MCSymbol *EndSymbol = Context.createTempSymbol(); @@ -1811,36 +1808,25 @@ bool IRTranslator::translateAlloca(const User &U, Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy); Register TySize = - getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty))); + getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty))); MIRBuilder.buildMul(AllocSize, NumElts, TySize); - LLT PtrTy = getLLTForType(*AI.getType(), *DL); - auto &TLI = *MF->getSubtarget().getTargetLowering(); - Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); - - Register SPTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildCopy(SPTmp, SPReg); - - Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize); - - // Handle alignment. We have to realign if the allocation granule was smaller - // than stack alignment, or the specific alloca requires more than stack - // alignment. unsigned StackAlign = MF->getSubtarget().getFrameLowering()->getStackAlignment(); - Align = std::max(Align, StackAlign); - if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) { - // Round the size of the allocation up to the stack alignment size - // by add SA-1 to the size. This doesn't overflow because we're computing - // an address inside an alloca. - Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy); - MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align)); - AllocTmp = AlignedAlloc; - } + if (Align <= StackAlign) + Align = 0; + + // Round the size of the allocation up to the stack alignment size + // by add SA-1 to the size. This doesn't overflow because we're computing + // an address inside an alloca. + auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1); + auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne, + MachineInstr::NoUWrap); + auto AlignCst = + MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1)); + auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst); - MIRBuilder.buildCopy(SPReg, AllocTmp); - MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp); + MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align); MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI); assert(MF->getFrameInfo().hasVarSizedObjects()); @@ -1926,7 +1912,7 @@ bool IRTranslator::translateShuffleVector(const User &U, .addDef(getOrCreateVReg(U)) .addUse(getOrCreateVReg(*U.getOperand(0))) .addUse(getOrCreateVReg(*U.getOperand(1))) - .addUse(getOrCreateVReg(*U.getOperand(2))); + .addShuffleMask(cast<Constant>(U.getOperand(2))); return true; } @@ -1991,7 +1977,6 @@ bool IRTranslator::translateAtomicRMW(const User &U, unsigned Opcode = 0; switch (I.getOperation()) { default: - llvm_unreachable("Unknown atomicrmw op"); return false; case AtomicRMWInst::Xchg: Opcode = TargetOpcode::G_ATOMICRMW_XCHG; @@ -2026,6 +2011,12 @@ bool IRTranslator::translateAtomicRMW(const User &U, case AtomicRMWInst::UMin: Opcode = TargetOpcode::G_ATOMICRMW_UMIN; break; + case AtomicRMWInst::FAdd: + Opcode = TargetOpcode::G_ATOMICRMW_FADD; + break; + case AtomicRMWInst::FSub: + Opcode = TargetOpcode::G_ATOMICRMW_FSUB; + break; } MIRBuilder.buildAtomicRMW( @@ -2197,6 +2188,20 @@ void IRTranslator::finalizeFunction() { FuncInfo.clear(); } +/// Returns true if a BasicBlock \p BB within a variadic function contains a +/// variadic musttail call. +static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) { + if (!IsVarArg) + return false; + + // Walk the block backwards, because tail calls usually only appear at the end + // of a block. + return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) { + const auto *CI = dyn_cast<CallInst>(&I); + return CI && CI->isMustTailCall(); + }); +} + bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { MF = &CurMF; const Function &F = MF->getFunction(); @@ -2212,26 +2217,26 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { : TPC->isGISelCSEEnabled(); if (EnableCSE) { - EntryBuilder = make_unique<CSEMIRBuilder>(CurMF); + EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF); CSEInfo = &Wrapper.get(TPC->getCSEConfig()); EntryBuilder->setCSEInfo(CSEInfo); - CurBuilder = make_unique<CSEMIRBuilder>(CurMF); + CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF); CurBuilder->setCSEInfo(CSEInfo); } else { - EntryBuilder = make_unique<MachineIRBuilder>(); - CurBuilder = make_unique<MachineIRBuilder>(); + EntryBuilder = std::make_unique<MachineIRBuilder>(); + CurBuilder = std::make_unique<MachineIRBuilder>(); } CLI = MF->getSubtarget().getCallLowering(); CurBuilder->setMF(*MF); EntryBuilder->setMF(*MF); MRI = &MF->getRegInfo(); DL = &F.getParent()->getDataLayout(); - ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F); + ORE = std::make_unique<OptimizationRemarkEmitter>(&F); FuncInfo.MF = MF; FuncInfo.BPI = nullptr; const auto &TLI = *MF->getSubtarget().getTargetLowering(); const TargetMachine &TM = MF->getTarget(); - SL = make_unique<GISelSwitchLowering>(this, FuncInfo); + SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo); SL->init(TLI, TM, *DL); EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); @@ -2258,6 +2263,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { SwiftError.setFunction(CurMF); SwiftError.createEntriesInEntryBlock(DbgLoc); + bool IsVarArg = F.isVarArg(); + bool HasMustTailInVarArgFn = false; + // Create all blocks, in IR order, to preserve the layout. for (const BasicBlock &BB: F) { auto *&MBB = BBToMBB[&BB]; @@ -2267,8 +2275,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { if (BB.hasAddressTaken()) MBB->setHasAddressTaken(); + + if (!HasMustTailInVarArgFn) + HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB); } + MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn); + // Make our arguments/constants entry block fallthrough to the IR entry block. EntryBB->addSuccessor(&getMBB(F.front())); @@ -2286,18 +2299,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { } } - // We don't currently support translating swifterror or swiftself functions. - for (auto &Arg : F.args()) { - if (Arg.hasSwiftSelfAttr()) { - OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", - F.getSubprogram(), &F.getEntryBlock()); - R << "unable to lower arguments due to swiftself: " - << ore::NV("Prototype", F.getType()); - reportTranslationError(*MF, *TPC, *ORE, R); - return false; - } - } - if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) { OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure", F.getSubprogram(), &F.getEntryBlock()); @@ -2322,8 +2323,15 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) { // Set the insertion point of all the following translations to // the end of this basic block. CurBuilder->setMBB(MBB); - + HasTailCall = false; for (const Instruction &Inst : *BB) { + // If we translated a tail call in the last step, then we know + // everything after the call is either a return, or something that is + // handled by the call itself. (E.g. a lifetime marker or assume + // intrinsic.) In this case, we should stop translating the block and + // move on. + if (HasTailCall) + break; #ifndef NDEBUG Verifier.setCurrentInst(&Inst); #endif // ifndef NDEBUG diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp index 70694fe6b6c8..7c4fd2d140d3 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp @@ -12,11 +12,14 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Twine.h" +#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -45,6 +48,7 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) +INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE, "Select target instructions out of generic instructions", false, false) @@ -53,6 +57,8 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { } void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<TargetPassConfig>(); + AU.addRequired<GISelKnownBitsAnalysis>(); + AU.addPreserved<GISelKnownBitsAnalysis>(); getSelectionDAGFallbackAnalysisUsage(AU); MachineFunctionPass::getAnalysisUsage(AU); } @@ -64,11 +70,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { return false; LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n'); + GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF); const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>(); - const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); + InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector(); CodeGenCoverage CoverageInfo; assert(ISel && "Cannot work without InstructionSelector"); + ISel->setupMF(MF, KB, CoverageInfo); // An optimization remark emitter. Used to report failures. MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr); @@ -124,7 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { continue; } - if (!ISel->select(MI, CoverageInfo)) { + if (!ISel->select(MI)) { // FIXME: It would be nice to dump all inserted instructions. It's // not obvious how, esp. considering select() can insert after MI. reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI); @@ -159,10 +167,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { --MII; if (MI.getOpcode() != TargetOpcode::COPY) continue; - unsigned SrcReg = MI.getOperand(1).getReg(); - unsigned DstReg = MI.getOperand(0).getReg(); - if (TargetRegisterInfo::isVirtualRegister(SrcReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { + Register SrcReg = MI.getOperand(1).getReg(); + Register DstReg = MI.getOperand(0).getReg(); + if (Register::isVirtualRegister(SrcReg) && + Register::isVirtualRegister(DstReg)) { auto SrcRC = MRI.getRegClass(SrcReg); auto DstRC = MRI.getRegClass(DstReg); if (SrcRC == DstRC) { @@ -179,7 +187,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { // that the size of the now-constrained vreg is unchanged and that it has a // register class. for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) { - unsigned VReg = TargetRegisterInfo::index2VirtReg(I); + unsigned VReg = Register::index2VirtReg(I); MachineInstr *MI = nullptr; if (!MRI.def_empty(VReg)) @@ -217,6 +225,22 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) { auto &TLI = *MF.getSubtarget().getTargetLowering(); TLI.finalizeLowering(MF); + // Determine if there are any calls in this machine function. Ported from + // SelectionDAG. + MachineFrameInfo &MFI = MF.getFrameInfo(); + for (const auto &MBB : MF) { + if (MFI.hasCalls() && MF.hasInlineAsm()) + break; + + for (const auto &MI : MBB) { + if ((MI.isCall() && !MI.isReturn()) || MI.isStackAligningInlineAsm()) + MFI.setHasCalls(true); + if (MI.isInlineAsm()) + MF.setHasInlineAsm(true); + } + } + + LLVM_DEBUG({ dbgs() << "Rules covered by selecting function: " << MF.getName() << ":"; for (auto RuleID : CoverageInfo.covered()) diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp index 2ad35b3a72c9..28143b30d4e8 100644 --- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp +++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp @@ -79,5 +79,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI, return true; return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() && - !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands()); + !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty(); } diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp index b5b26bff34bb..1593e21fe07e 100644 --- a/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -184,11 +184,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { : TPC.isGISelCSEEnabled(); if (EnableCSE) { - MIRBuilder = make_unique<CSEMIRBuilder>(); + MIRBuilder = std::make_unique<CSEMIRBuilder>(); CSEInfo = &Wrapper.get(TPC.getCSEConfig()); MIRBuilder->setCSEInfo(CSEInfo); } else - MIRBuilder = make_unique<MachineIRBuilder>(); + MIRBuilder = std::make_unique<MachineIRBuilder>(); // This observer keeps the worklist updated. LegalizerWorkListManager WorkListObserver(InstList, ArtifactList); // We want both WorkListObserver as well as CSEInfo to observe all changes. @@ -206,8 +206,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) { WrapperObserver.erasingInstr(*DeadMI); }; + auto stopLegalizing = [&](MachineInstr &MI) { + Helper.MIRBuilder.stopObservingChanges(); + reportGISelFailure(MF, TPC, MORE, "gisel-legalize", + "unable to legalize instruction", MI); + }; bool Changed = false; + SmallVector<MachineInstr *, 128> RetryList; do { + assert(RetryList.empty() && "Expected no instructions in RetryList"); + unsigned NumArtifacts = ArtifactList.size(); while (!InstList.empty()) { MachineInstr &MI = *InstList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); @@ -222,14 +230,31 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) { // Error out if we couldn't legalize this instruction. We may want to // fall back to DAG ISel instead in the future. if (Res == LegalizerHelper::UnableToLegalize) { - Helper.MIRBuilder.stopObservingChanges(); - reportGISelFailure(MF, TPC, MORE, "gisel-legalize", - "unable to legalize instruction", MI); + // Move illegal artifacts to RetryList instead of aborting because + // legalizing InstList may generate artifacts that allow + // ArtifactCombiner to combine away them. + if (isArtifact(MI)) { + RetryList.push_back(&MI); + continue; + } + stopLegalizing(MI); return false; } WorkListObserver.printNewInstrs(); Changed |= Res == LegalizerHelper::Legalized; } + // Try to combine the instructions in RetryList again if there + // are new artifacts. If not, stop legalizing. + if (!RetryList.empty()) { + if (ArtifactList.size() > NumArtifacts) { + while (!RetryList.empty()) + ArtifactList.insert(RetryList.pop_back_val()); + } else { + MachineInstr *MI = *RetryList.begin(); + stopLegalizing(*MI); + return false; + } + } while (!ArtifactList.empty()) { MachineInstr &MI = *ArtifactList.pop_back_val(); assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode"); diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index f5cf7fc9bd9b..21512e543878 100644 --- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" @@ -171,6 +172,26 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy, return true; } +static LLT getGCDType(LLT OrigTy, LLT TargetTy) { + if (OrigTy.isVector() && TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy.getElementType()); + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigTy.getElementType()); + } + + if (OrigTy.isVector() && !TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy); + return TargetTy; + } + + assert(!OrigTy.isVector() && !TargetTy.isVector()); + + int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), + TargetTy.getSizeInBits()); + return LLT::scalar(GCD); +} + void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef<Register> PartRegs, @@ -219,11 +240,29 @@ void LegalizerHelper::insertParts(Register DstReg, static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::SDIV_I32; + case 64: + return RTLIB::SDIV_I64; + case 128: + return RTLIB::SDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_UDIV: - assert((Size == 32 || Size == 64) && "Unsupported size"); - return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32; + assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size"); + switch (Size) { + case 32: + return RTLIB::UDIV_I32; + case 64: + return RTLIB::UDIV_I64; + case 128: + return RTLIB::UDIV_I128; + default: + llvm_unreachable("unexpected size"); + } case TargetOpcode::G_SREM: assert((Size == 32 || Size == 64) && "Unsupported size"); return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32; @@ -288,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { llvm_unreachable("Unknown libcall function"); } +/// True if an instruction is in tail position in its caller. Intended for +/// legalizing libcalls as tail calls when possible. +static bool isLibCallInTailPosition(MachineInstr &MI) { + const Function &F = MI.getParent()->getParent()->getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore NoAlias and NonNull because they don't affect the + // call sequence. + AttributeList CallerAttrs = F.getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex) + .removeAttribute(Attribute::NoAlias) + .removeAttribute(Attribute::NonNull) + .hasAttributes()) + return false; + + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + return false; + + // Only tail call if the following instruction is a standard return. + auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); + MachineInstr *Next = MI.getNextNode(); + if (!Next || TII.isTailCall(*Next) || !Next->isReturn()) + return false; + + return true; +} + LegalizerHelper::LegalizeResult llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, @@ -296,9 +364,12 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); const char *Name = TLI.getLibcallName(Libcall); - MIRBuilder.getMF().getFrameInfo().setHasCalls(true); - if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall), - MachineOperand::CreateES(Name), Result, Args)) + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(Libcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = Result; + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) return LegalizerHelper::UnableToLegalize; return LegalizerHelper::Legalized; @@ -317,6 +388,74 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Args); } +LegalizerHelper::LegalizeResult +llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, + MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS); + auto &Ctx = MIRBuilder.getMF().getFunction().getContext(); + + SmallVector<CallLowering::ArgInfo, 3> Args; + // Add all the args, except for the last which is an imm denoting 'tail'. + for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) { + Register Reg = MI.getOperand(i).getReg(); + + // Need derive an IR type for call lowering. + LLT OpLLT = MRI.getType(Reg); + Type *OpTy = nullptr; + if (OpLLT.isPointer()) + OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace()); + else + OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits()); + Args.push_back({Reg, OpTy}); + } + + auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering(); + auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering(); + Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID(); + RTLIB::Libcall RTLibcall; + switch (ID) { + case Intrinsic::memcpy: + RTLibcall = RTLIB::MEMCPY; + break; + case Intrinsic::memset: + RTLibcall = RTLIB::MEMSET; + break; + case Intrinsic::memmove: + RTLibcall = RTLIB::MEMMOVE; + break; + default: + return LegalizerHelper::UnableToLegalize; + } + const char *Name = TLI.getLibcallName(RTLibcall); + + MIRBuilder.setInstr(MI); + + CallLowering::CallLoweringInfo Info; + Info.CallConv = TLI.getLibcallCallingConv(RTLibcall); + Info.Callee = MachineOperand::CreateES(Name); + Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)); + Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 && + isLibCallInTailPosition(MI); + + std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs)); + if (!CLI.lowerCall(MIRBuilder, Info)) + return LegalizerHelper::UnableToLegalize; + + if (Info.LoweredTailCall) { + assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?"); + // We must have a return following the call to get past + // isLibCallInTailPosition. + assert(MI.getNextNode() && MI.getNextNode()->isReturn() && + "Expected instr following MI to be a return?"); + + // We lowered a tail call, so the call is now the return from the block. + // Delete the old return. + MI.getNextNode()->eraseFromParent(); + } + + return LegalizerHelper::Legalized; +} + static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType) { auto ToMVT = MVT::getVT(ToType); @@ -518,6 +657,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + Register SrcReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + // FIXME: support the general case where the requested NarrowTy may not be + // the same as the source type. E.g. s128 = sext(s32) + if ((SrcTy.getSizeInBits() != SizeOp0 / 2) || + SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) { + LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1); + auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ZEXT: { + if (TypeIdx != 0) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + uint64_t SizeOp1 = SrcTy.getSizeInBits(); + if (SizeOp0 % SizeOp1 != 0) + return UnableToLegalize; + + // Generate a merge where the bottom bits are taken from the source, and + // zero everything else. + Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); + unsigned NumParts = SizeOp0 / SizeOp1; + SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()}; + for (unsigned Part = 1; Part < NumParts; ++Part) + Srcs.push_back(ZeroReg); + MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_TRUNC: { + if (TypeIdx != 1) + return UnableToLegalize; + + uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); + if (NarrowTy.getSizeInBits() * 2 != SizeOp1) { + LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n"); + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg()); + MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0)); + MI.eraseFromParent(); + return Legalized; + } + case TargetOpcode::G_ADD: { // FIXME: add support for when SizeOp0 isn't an exact multiple of // NarrowSize. @@ -530,15 +728,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs); extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs); - Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildConstant(CarryIn, 0); - + Register CarryIn; for (int i = 0; i < NumParts; ++i) { Register DstReg = MRI.createGenericVirtualRegister(NarrowTy); Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1)); - MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], - Src2Regs[i], CarryIn); + if (i == 0) + MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]); + else { + MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i], + Src2Regs[i], CarryIn); + } DstRegs.push_back(DstReg); CarryIn = CarryOut; @@ -730,7 +930,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, for (unsigned j = 1; j < MI.getNumOperands(); j += 2) MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1)); } - MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI()); + MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI()); MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs); Observer.changedInstr(MI); MI.eraseFromParent(); @@ -763,6 +963,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, CmpInst::Predicate Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); + LLT ResTy = MRI.getType(MI.getOperand(0).getReg()); if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL); @@ -771,18 +972,109 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI, MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0); MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero); } else { - const LLT s1 = LLT::scalar(1); - MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH); + MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH); MachineInstrBuilder CmpHEQ = - MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH); + MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH); MachineInstrBuilder CmpLU = MIRBuilder.buildICmp( - ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL); + ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL); MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH); } Observer.changedInstr(MI); MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SEXT_INREG: { + if (TypeIdx != 0) + return UnableToLegalize; + + if (!MI.getOperand(2).isImm()) + return UnableToLegalize; + int64_t SizeInBits = MI.getOperand(2).getImm(); + + // So long as the new type has more bits than the bits we're extending we + // don't need to break it apart. + if (NarrowTy.getScalarSizeInBits() >= SizeInBits) { + Observer.changingInstr(MI); + // We don't lose any non-extension bits by truncating the src and + // sign-extending the dst. + MachineOperand &MO1 = MI.getOperand(1); + auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg()); + MO1.setReg(TruncMIB->getOperand(0).getReg()); + + MachineOperand &MO2 = MI.getOperand(0); + Register DstExt = MRI.createGenericVirtualRegister(NarrowTy); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt}); + MO2.setReg(DstExt); + Observer.changedInstr(MI); + return Legalized; + } + + // Break it apart. Components below the extension point are unmodified. The + // component containing the extension point becomes a narrower SEXT_INREG. + // Components above it are ashr'd from the component containing the + // extension point. + if (SizeOp0 % NarrowSize != 0) + return UnableToLegalize; + int NumParts = SizeOp0 / NarrowSize; + + // List the registers where the destination will be scattered. + SmallVector<Register, 2> DstRegs; + // List the registers where the source will be split. + SmallVector<Register, 2> SrcRegs; + + // Create all the temporary registers. + for (int i = 0; i < NumParts; ++i) { + Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy); + + SrcRegs.push_back(SrcReg); + } + + // Explode the big arguments into smaller chunks. + MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg()); + + Register AshrCstReg = + MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1) + ->getOperand(0) + .getReg(); + Register FullExtensionReg = 0; + Register PartialExtensionReg = 0; + + // Do the operation on each small part. + for (int i = 0; i < NumParts; ++i) { + if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits) + DstRegs.push_back(SrcRegs[i]); + else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) { + assert(PartialExtensionReg && + "Expected to visit partial extension before full"); + if (FullExtensionReg) { + DstRegs.push_back(FullExtensionReg); + continue; + } + DstRegs.push_back(MIRBuilder + .buildInstr(TargetOpcode::G_ASHR, {NarrowTy}, + {PartialExtensionReg, AshrCstReg}) + ->getOperand(0) + .getReg()); + FullExtensionReg = DstRegs.back(); + } else { + DstRegs.push_back( + MIRBuilder + .buildInstr( + TargetOpcode::G_SEXT_INREG, {NarrowTy}, + {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()}) + ->getOperand(0) + .getReg()); + PartialExtensionReg = DstRegs.back(); + } + } + + // Gather the destination registers into the final destination. + Register DstReg = MI.getOperand(0).getReg(); + MIRBuilder.buildMerge(DstReg, DstRegs); + MI.eraseFromParent(); + return Legalized; + } } } @@ -892,7 +1184,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg); - Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg : + Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg : MRI.createGenericVirtualRegister(WideTy); auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset); @@ -903,6 +1195,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx, if (WideSize > DstSize) MIRBuilder.buildTrunc(DstReg, ResultReg); + else if (DstTy.isPointer()) + MIRBuilder.buildIntToPtr(DstReg, ResultReg); MI.eraseFromParent(); return Legalized; @@ -1218,6 +1512,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_BITREVERSE: { + Observer.changingInstr(MI); + + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits(); + + Register DstExt = MRI.createGenericVirtualRegister(WideTy); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + MI.getOperand(0).setReg(DstExt); + MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt()); + + auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits); + auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt); + MIRBuilder.buildTrunc(DstReg, Shift); + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_ADD: case TargetOpcode::G_AND: case TargetOpcode::G_MUL: @@ -1310,13 +1622,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FPTOSI: case TargetOpcode::G_FPTOUI: - if (TypeIdx != 0) - return UnableToLegalize; Observer.changingInstr(MI); - widenScalarDst(MI, WideTy); + + if (TypeIdx == 0) + widenScalarDst(MI, WideTy); + else + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT); + Observer.changedInstr(MI); return Legalized; - case TargetOpcode::G_SITOFP: if (TypeIdx != 1) return UnableToLegalize; @@ -1483,6 +1797,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { case TargetOpcode::G_FMUL: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMA: + case TargetOpcode::G_FMAD: case TargetOpcode::G_FNEG: case TargetOpcode::G_FABS: case TargetOpcode::G_FCANONICALIZE: @@ -1553,6 +1868,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_SEXT_INREG: + if (TypeIdx != 0) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC); + Observer.changedInstr(MI); + return Legalized; } } @@ -1579,6 +1903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_SADDO: + case TargetOpcode::G_SSUBO: + return lowerSADDO_SSUBO(MI); case TargetOpcode::G_SMULO: case TargetOpcode::G_UMULO: { // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the @@ -1669,6 +1996,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { MI.eraseFromParent(); return Legalized; } + case TargetOpcode::G_FMAD: + return lowerFMad(MI); case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: { Register OldValRes = MI.getOperand(0).getReg(); Register SuccessRes = MI.getOperand(1).getReg(); @@ -1690,11 +2019,57 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { LLT DstTy = MRI.getType(DstReg); auto &MMO = **MI.memoperands_begin(); - if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) { - // In the case of G_LOAD, this was a non-extending load already and we're - // about to lower to the same instruction. - if (MI.getOpcode() == TargetOpcode::G_LOAD) + if (DstTy.getSizeInBits() == MMO.getSizeInBits()) { + if (MI.getOpcode() == TargetOpcode::G_LOAD) { + // This load needs splitting into power of 2 sized loads. + if (DstTy.isVector()) return UnableToLegalize; + if (isPowerOf2_32(DstTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Our strategy here is to generate anyextending loads for the smaller + // types up to next power-2 result type, and then combine the two larger + // result values together, before truncating back down to the non-pow-2 + // type. + // E.g. v1 = i24 load => + // v2 = i32 load (2 byte) + // v3 = i32 load (1 byte) + // v4 = i32 shl v3, 16 + // v5 = i32 or v4, v2 + // v1 = i24 trunc v5 + // By doing this we generate the correct truncate which should get + // combined away as an artifact with a matching extend. + uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits()); + uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize; + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = MF.getMachineMemOperand( + &MMO, LargeSplitSize / 8, SmallSplitSize / 8); + + LLT PtrTy = MRI.getType(PtrReg); + unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits()); + LLT AnyExtTy = LLT::scalar(AnyExtSize); + Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy); + auto LargeLoad = + MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO); + + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0), + *SmallMMO); + + auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize); + auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt); + auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad); + MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)}); + MI.eraseFromParent(); + return Legalized; + } MIRBuilder.buildLoad(DstReg, PtrReg, MMO); MI.eraseFromParent(); return Legalized; @@ -1723,6 +2098,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } + case TargetOpcode::G_STORE: { + // Lower a non-power of 2 store into multiple pow-2 stores. + // E.g. split an i24 store into an i16 store + i8 store. + // We do this by first extending the stored value to the next largest power + // of 2 type, and then using truncating stores to store the components. + // By doing this, likewise with G_LOAD, generate an extend that can be + // artifact-combined away instead of leaving behind extracts. + Register SrcReg = MI.getOperand(0).getReg(); + Register PtrReg = MI.getOperand(1).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + MachineMemOperand &MMO = **MI.memoperands_begin(); + if (SrcTy.getSizeInBits() != MMO.getSizeInBits()) + return UnableToLegalize; + if (SrcTy.isVector()) + return UnableToLegalize; + if (isPowerOf2_32(SrcTy.getSizeInBits())) + return UnableToLegalize; // Don't know what we're being asked to do. + + // Extend to the next pow-2. + const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits())); + auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg); + + // Obtain the smaller value by shifting away the larger value. + uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits()); + uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize; + auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize); + auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt); + + // Generate the GEP and truncating stores. + LLT PtrTy = MRI.getType(PtrReg); + auto OffsetCst = + MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8); + Register GEPReg = MRI.createGenericVirtualRegister(PtrTy); + auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0)); + + MachineFunction &MF = MIRBuilder.getMF(); + MachineMemOperand *LargeMMO = + MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8); + MachineMemOperand *SmallMMO = + MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8); + MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO); + MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_CTLZ_ZERO_UNDEF: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTLZ: @@ -1797,6 +2217,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return lowerUITOFP(MI, TypeIdx, Ty); case G_SITOFP: return lowerSITOFP(MI, TypeIdx, Ty); + case G_FPTOUI: + return lowerFPTOUI(MI, TypeIdx, Ty); case G_SMIN: case G_SMAX: case G_UMIN: @@ -1807,6 +2229,31 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { case G_FMINNUM: case G_FMAXNUM: return lowerFMinNumMaxNum(MI); + case G_UNMERGE_VALUES: + return lowerUnmergeValues(MI); + case TargetOpcode::G_SEXT_INREG: { + assert(MI.getOperand(2).isImm() && "Expected immediate"); + int64_t SizeInBits = MI.getOperand(2).getImm(); + + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + Register TmpRes = MRI.createGenericVirtualRegister(DstTy); + + auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits); + MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()}); + MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()}); + MI.eraseFromParent(); + return Legalized; + } + case G_SHUFFLE_VECTOR: + return lowerShuffleVector(MI); + case G_DYN_STACKALLOC: + return lowerDynStackAlloc(MI); + case G_EXTRACT: + return lowerExtract(MI); + case G_INSERT: + return lowerInsert(MI); } } @@ -2283,6 +2730,105 @@ LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, } LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + const int NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + + // TODO: Create sequence of extracts. + if (DstTy == NarrowTy) + return UnableToLegalize; + + LLT GCDTy = getGCDType(SrcTy, NarrowTy); + if (DstTy == GCDTy) { + // This would just be a copy of the same unmerge. + // TODO: Create extracts, pad with undef and create intermediate merges. + return UnableToLegalize; + } + + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + const int NumUnmerge = Unmerge->getNumOperands() - 1; + const int PartsPerUnmerge = NumDst / NumUnmerge; + + for (int I = 0; I != NumUnmerge; ++I) { + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + + for (int J = 0; J != PartsPerUnmerge; ++J) + MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg()); + MIB.addUse(Unmerge.getReg(I)); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI, + unsigned TypeIdx, + LLT NarrowTy) { + assert(TypeIdx == 0 && "not a vector type index"); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = DstTy.getElementType(); + + int DstNumElts = DstTy.getNumElements(); + int NarrowNumElts = NarrowTy.getNumElements(); + int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts; + LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy); + + SmallVector<Register, 8> ConcatOps; + SmallVector<Register, 8> SubBuildVector; + + Register UndefReg; + if (WidenedDstTy != DstTy) + UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0); + + // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as + // necessary. + // + // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2 + // -> <2 x s16> + // + // %4:_(s16) = G_IMPLICIT_DEF + // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1 + // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4 + // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6 + // %3:_(<3 x s16>) = G_EXTRACT %7, 0 + for (int I = 0; I != NumConcat; ++I) { + for (int J = 0; J != NarrowNumElts; ++J) { + int SrcIdx = NarrowNumElts * I + J; + + if (SrcIdx < DstNumElts) { + Register SrcReg = MI.getOperand(SrcIdx + 1).getReg(); + SubBuildVector.push_back(SrcReg); + } else + SubBuildVector.push_back(UndefReg); + } + + auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector); + ConcatOps.push_back(BuildVec.getReg(0)); + SubBuildVector.clear(); + } + + if (DstTy == WidenedDstTy) + MIRBuilder.buildConcatVectors(DstReg, ConcatOps); + else { + auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps); + MIRBuilder.buildExtract(DstReg, Concat, 0); + } + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) { // FIXME: Don't know how to handle secondary types yet. @@ -2395,6 +2941,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FDIV: case G_FREM: case G_FMA: + case G_FMAD: case G_FPOW: case G_FEXP: case G_FEXP2: @@ -2411,6 +2958,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, case G_FSIN: case G_FSQRT: case G_BSWAP: + case G_BITREVERSE: case G_SDIV: case G_SMIN: case G_SMAX: @@ -2453,6 +3001,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy); case G_PHI: return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy); + case G_UNMERGE_VALUES: + return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy); + case G_BUILD_VECTOR: + return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy); case G_LOAD: case G_STORE: return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy); @@ -2604,11 +3156,11 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, switch (MI.getOpcode()) { case TargetOpcode::G_SHL: { // Short: ShAmt < NewBitSize - auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt); - auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); - auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero. @@ -2622,41 +3174,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, ResultRegs[1] = Hi.getReg(0); break; } - case TargetOpcode::G_LSHR: { - // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt); - - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); - - // Long: ShAmt >= NewBitSize - auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. - auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part. - - auto Lo = MIRBuilder.buildSelect( - HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); - auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL); - - ResultRegs[0] = Lo.getReg(0); - ResultRegs[1] = Hi.getReg(0); - break; - } + case TargetOpcode::G_LSHR: case TargetOpcode::G_ASHR: { // Short: ShAmt < NewBitSize - auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt); + auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt}); - auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt); - auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack); - auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS); + auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt); + auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack); + auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr); // Long: ShAmt >= NewBitSize - - // Sign of Hi part. - auto HiL = MIRBuilder.buildAShr( - HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1)); - - auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part. + MachineInstrBuilder HiL; + if (MI.getOpcode() == TargetOpcode::G_LSHR) { + HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero. + } else { + auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1); + HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part. + } + auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, + {InH, AmtExcess}); // Lo from Hi part. auto Lo = MIRBuilder.buildSelect( HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL)); @@ -2701,12 +3237,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, MIRBuilder.setInstr(MI); unsigned Opc = MI.getOpcode(); switch (Opc) { - case TargetOpcode::G_IMPLICIT_DEF: { + case TargetOpcode::G_IMPLICIT_DEF: + case TargetOpcode::G_LOAD: { + if (TypeIdx != 0) + return UnableToLegalize; Observer.changingInstr(MI); moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_STORE: + if (TypeIdx != 0) + return UnableToLegalize; + Observer.changingInstr(MI); + moreElementsVectorSrc(MI, MoreTy, 0); + Observer.changedInstr(MI); + return Legalized; case TargetOpcode::G_AND: case TargetOpcode::G_OR: case TargetOpcode::G_XOR: @@ -2748,6 +3294,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx, moreElementsVectorDst(MI, MoreTy, 0); Observer.changedInstr(MI); return Legalized; + case TargetOpcode::G_UNMERGE_VALUES: { + if (TypeIdx != 1) + return UnableToLegalize; + + LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); + int NumDst = MI.getNumOperands() - 1; + moreElementsVectorSrc(MI, MoreTy, NumDst); + + auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES); + for (int I = 0; I != NumDst; ++I) + MIB.addDef(MI.getOperand(I).getReg()); + + int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits(); + for (int I = NumDst; I != NewNumDst; ++I) + MIB.addDef(MRI.createGenericVirtualRegister(DstTy)); + + MIB.addUse(MI.getOperand(NumDst).getReg()); + MI.eraseFromParent(); + return Legalized; + } case TargetOpcode::G_PHI: return moreElementsVectorPhi(MI, TypeIdx, MoreTy); default: @@ -3310,6 +3876,48 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { return UnableToLegalize; } +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + const LLT S64 = LLT::scalar(64); + const LLT S32 = LLT::scalar(32); + + if (SrcTy != S64 && SrcTy != S32) + return UnableToLegalize; + if (DstTy != S32 && DstTy != S64) + return UnableToLegalize; + + // FPTOSI gives same result as FPTOUI for positive signed integers. + // FPTOUI needs to deal with fp values that convert to unsigned integers + // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp. + + APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits()); + APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle() + : APFloat::IEEEdouble(), + APInt::getNullValue(SrcTy.getSizeInBits())); + TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven); + + MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src); + + MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP); + // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on + // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1. + MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold); + MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub); + MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt); + MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit); + + MachineInstrBuilder FCMP = + MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold); + MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res); + + MI.eraseFromParent(); + return Legalized; +} + static CmpInst::Predicate minMaxToCompare(unsigned Opc) { switch (Opc) { case TargetOpcode::G_SMIN: @@ -3419,3 +4027,251 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) { MI.eraseFromParent(); return Legalized; } + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) { + // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c + Register DstReg = MI.getOperand(0).getReg(); + LLT Ty = MRI.getType(DstReg); + unsigned Flags = MI.getFlags(); + + auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2), + Flags); + MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) { + const unsigned NumDst = MI.getNumOperands() - 1; + const Register SrcReg = MI.getOperand(NumDst).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + Register Dst0Reg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(Dst0Reg); + + + // Expand scalarizing unmerge as bitcast to integer and shift. + if (!DstTy.isVector() && SrcTy.isVector() && + SrcTy.getElementType() == DstTy) { + LLT IntTy = LLT::scalar(SrcTy.getSizeInBits()); + Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0); + + MIRBuilder.buildTrunc(Dst0Reg, Cast); + + const unsigned DstSize = DstTy.getSizeInBits(); + unsigned Offset = DstSize; + for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) { + auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset); + auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt); + MIRBuilder.buildTrunc(MI.getOperand(I), Shift); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerShuffleVector(MachineInstr &MI) { + Register DstReg = MI.getOperand(0).getReg(); + Register Src0Reg = MI.getOperand(1).getReg(); + Register Src1Reg = MI.getOperand(2).getReg(); + LLT Src0Ty = MRI.getType(Src0Reg); + LLT DstTy = MRI.getType(DstReg); + LLT IdxTy = LLT::scalar(32); + + const Constant *ShufMask = MI.getOperand(3).getShuffleMask(); + + SmallVector<int, 32> Mask; + ShuffleVectorInst::getShuffleMask(ShufMask, Mask); + + if (DstTy.isScalar()) { + if (Src0Ty.isVector()) + return UnableToLegalize; + + // This is just a SELECT. + assert(Mask.size() == 1 && "Expected a single mask element"); + Register Val; + if (Mask[0] < 0 || Mask[0] > 1) + Val = MIRBuilder.buildUndef(DstTy).getReg(0); + else + Val = Mask[0] == 0 ? Src0Reg : Src1Reg; + MIRBuilder.buildCopy(DstReg, Val); + MI.eraseFromParent(); + return Legalized; + } + + Register Undef; + SmallVector<Register, 32> BuildVec; + LLT EltTy = DstTy.getElementType(); + + for (int Idx : Mask) { + if (Idx < 0) { + if (!Undef.isValid()) + Undef = MIRBuilder.buildUndef(EltTy).getReg(0); + BuildVec.push_back(Undef); + continue; + } + + if (Src0Ty.isScalar()) { + BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg); + } else { + int NumElts = Src0Ty.getNumElements(); + Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg; + int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts; + auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx); + auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK); + BuildVec.push_back(Extract.getReg(0)); + } + } + + MIRBuilder.buildBuildVector(DstReg, BuildVec); + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register AllocSize = MI.getOperand(1).getReg(); + unsigned Align = MI.getOperand(2).getImm(); + + const auto &MF = *MI.getMF(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + + LLT PtrTy = MRI.getType(Dst); + LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits()); + + Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); + auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg); + SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp); + + // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't + // have to generate an extra instruction to negate the alloc and then use + // G_GEP to add the negative offset. + auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize); + if (Align) { + APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true); + AlignMask.negate(); + auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask); + Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst); + } + + SPTmp = MIRBuilder.buildCast(PtrTy, Alloc); + MIRBuilder.buildCopy(SPReg, SPTmp); + MIRBuilder.buildCopy(Dst, SPTmp); + + MI.eraseFromParent(); + return Legalized; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerExtract(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + unsigned Offset = MI.getOperand(2).getImm(); + + LLT DstTy = MRI.getType(Dst); + LLT SrcTy = MRI.getType(Src); + + if (DstTy.isScalar() && + (SrcTy.isScalar() || + (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) { + LLT SrcIntTy = SrcTy; + if (!SrcTy.isScalar()) { + SrcIntTy = LLT::scalar(SrcTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0); + } + + if (Offset == 0) + MIRBuilder.buildTrunc(Dst, Src); + else { + auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset); + auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt); + MIRBuilder.buildTrunc(Dst, Shr); + } + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) { + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register InsertSrc = MI.getOperand(2).getReg(); + uint64_t Offset = MI.getOperand(3).getImm(); + + LLT DstTy = MRI.getType(Src); + LLT InsertTy = MRI.getType(InsertSrc); + + if (InsertTy.isScalar() && + (DstTy.isScalar() || + (DstTy.isVector() && DstTy.getElementType() == InsertTy))) { + LLT IntDstTy = DstTy; + if (!DstTy.isScalar()) { + IntDstTy = LLT::scalar(DstTy.getSizeInBits()); + Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0); + } + + Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0); + if (Offset != 0) { + auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset); + ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0); + } + + APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset, + InsertTy.getSizeInBits()); + + auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal); + auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask); + auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc); + + MIRBuilder.buildBitcast(Dst, Or); + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult +LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) { + Register Dst0 = MI.getOperand(0).getReg(); + Register Dst1 = MI.getOperand(1).getReg(); + Register LHS = MI.getOperand(2).getReg(); + Register RHS = MI.getOperand(3).getReg(); + const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO; + + LLT Ty = MRI.getType(Dst0); + LLT BoolTy = MRI.getType(Dst1); + + if (IsAdd) + MIRBuilder.buildAdd(Dst0, LHS, RHS); + else + MIRBuilder.buildSub(Dst0, LHS, RHS); + + // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow. + + auto Zero = MIRBuilder.buildConstant(Ty, 0); + + // For an addition, the result should be less than one of the operands (LHS) + // if and only if the other operand (RHS) is negative, otherwise there will + // be overflow. + // For a subtraction, the result should be less than one of the operands + // (LHS) if and only if the other operand (RHS) is (non-zero) positive, + // otherwise there will be overflow. + auto ResultLowerThanLHS = + MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS); + auto ConditionRHS = MIRBuilder.buildICmp( + IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero); + + MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS); + MI.eraseFromParent(); + return Legalized; +} diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp index 6e1de95b3277..70045512fae5 100644 --- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp +++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp @@ -215,7 +215,30 @@ bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const { return true; } const bool AllCovered = (FirstUncovered >= NumTypeIdxs); - LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + if (NumTypeIdxs > 0) + LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered + << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); + return AllCovered; +#else + return true; +#endif +} + +bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const { +#ifndef NDEBUG + if (Rules.empty()) { + LLVM_DEBUG( + dbgs() << ".. imm index coverage check SKIPPED: no rules defined\n"); + return true; + } + const int64_t FirstUncovered = ImmIdxsCovered.find_first_unset(); + if (FirstUncovered < 0) { + LLVM_DEBUG(dbgs() << ".. imm index coverage check SKIPPED:" + " user-defined predicate detected\n"); + return true; + } + const bool AllCovered = (FirstUncovered >= NumImmIdxs); + LLVM_DEBUG(dbgs() << ".. the first uncovered imm index: " << FirstUncovered << ", " << (AllCovered ? "OK" : "FAIL") << "\n"); return AllCovered; #else @@ -387,8 +410,6 @@ unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const { LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias << "\n"); OpcodeIdx = getOpcodeIdxForOpcode(Alias); - LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to " - << RulesForOpcode[OpcodeIdx].getAlias() << "\n"); assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases"); } @@ -412,7 +433,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder( std::initializer_list<unsigned> Opcodes) { unsigned Representative = *Opcodes.begin(); - assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && + assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() && "Initializer list must have at least two opcodes"); for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I) @@ -677,12 +698,23 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const { ? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc) : Acc; }); + const unsigned NumImmIdxs = std::accumulate( + MCID.opInfo_begin(), MCID.opInfo_end(), 0U, + [](unsigned Acc, const MCOperandInfo &OpInfo) { + return OpInfo.isGenericImm() + ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc) + : Acc; + }); LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode << "): " << NumTypeIdxs << " type ind" - << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n"); + << (NumTypeIdxs == 1 ? "ex" : "ices") << ", " + << NumImmIdxs << " imm ind" + << (NumImmIdxs == 1 ? "ex" : "ices") << "\n"); const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode); if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs)) FailedOpcodes.push_back(Opcode); + else if (!RuleSet.verifyImmIdxsCoverage(NumImmIdxs)) + FailedOpcodes.push_back(Opcode); } if (!FailedOpcodes.empty()) { errs() << "The following opcodes have ill-defined legalization rules:"; diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp index 3592409710a7..f882ecbf5db3 100644 --- a/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/lib/CodeGen/GlobalISel/Localizer.cpp @@ -79,7 +79,7 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) { return true; case TargetOpcode::G_GLOBAL_VALUE: { unsigned RematCost = TTI->getGISelRematGlobalCost(); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); unsigned MaxUses = maxUses(RematCost); if (MaxUses == UINT_MAX) return true; // Remats are "free" so always localize. @@ -121,7 +121,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LLVM_DEBUG(dbgs() << "Should localize: " << MI); assert(MI.getDesc().getNumDefs() == 1 && "More than one definition not supported yet"); - unsigned Reg = MI.getOperand(0).getReg(); + Register Reg = MI.getOperand(0).getReg(); // Check if all the users of MI are local. // We are going to invalidation the list of use operands, so we // can't use range iterator. @@ -151,7 +151,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF, LocalizedMI); // Set a new register for the definition. - unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); + Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg)); MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg)); LocalizedMI->getOperand(0).setReg(NewReg); NewVRegIt = @@ -177,7 +177,7 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) { // many users, but this case may be better served by regalloc improvements. for (MachineInstr *MI : LocalizedInstrs) { - unsigned Reg = MI->getOperand(0).getReg(); + Register Reg = MI->getOperand(0).getReg(); MachineBasicBlock &MBB = *MI->getParent(); // All of the user MIs of this reg. SmallPtrSet<MachineInstr *, 32> Users; @@ -220,5 +220,6 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) { LocalizedSetVecT LocalizedInstrs; bool Changed = localizeInterBlock(MF, LocalizedInstrs); - return Changed |= localizeIntraBlock(LocalizedInstrs); + Changed |= localizeIntraBlock(LocalizedInstrs); + return Changed; } diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b7a73326b85c..df770f6664ca 100644 --- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -107,9 +107,13 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable, assert( cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast<DIExpression>(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return insertInstr(BuildMI(getMF(), getDL(), getTII().get(TargetOpcode::DBG_VALUE), - /*IsIndirect*/ true, Reg, Variable, Expr)); + /*IsIndirect*/ false, Reg, Variable, DIExpr)); } MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, @@ -120,11 +124,15 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI, assert( cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) && "Expected inlined-at fields to agree"); + // DBG_VALUE insts now carry IR-level indirection in their DIExpression + // rather than encoding it in the instruction itself. + const DIExpression *DIExpr = cast<DIExpression>(Expr); + DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref}); return buildInstr(TargetOpcode::DBG_VALUE) .addFrameIndex(FI) - .addImm(0) + .addReg(0) .addMetadata(Variable) - .addMetadata(Expr); + .addMetadata(DIExpr); } MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, @@ -148,7 +156,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C, MIB.addReg(0U); } - return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr); + return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr); } MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { @@ -160,6 +168,17 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) { return MIB.addMetadata(Label); } +MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res, + const SrcOp &Size, + unsigned Align) { + assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type"); + auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC); + Res.addDefToMIB(*getMRI(), MIB); + Size.addSrcToMIB(MIB); + MIB.addImm(Align); + return MIB; +} + MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res, int Idx) { assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type"); @@ -207,11 +226,7 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res, Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch"); assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type"); - auto MIB = buildInstr(TargetOpcode::G_GEP); - Res.addDefToMIB(*getMRI(), MIB); - Op0.addSrcToMIB(MIB); - Op1.addSrcToMIB(MIB); - return MIB; + return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1}); } Optional<MachineInstrBuilder> @@ -697,17 +712,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred, MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional<unsigned> Flags) { - return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}); + return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags); } MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, - const SrcOp &Op1) { + const SrcOp &Op1, + Optional<unsigned> Flags) { - return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}); + return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags); } MachineInstrBuilder @@ -774,26 +791,28 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr, .addMemOperand(&MMO); } -MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode, - Register OldValRes, - Register Addr, - Register Val, - MachineMemOperand &MMO) { +MachineInstrBuilder MachineIRBuilder::buildAtomicRMW( + unsigned Opcode, const DstOp &OldValRes, + const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + #ifndef NDEBUG - LLT OldValResTy = getMRI()->getType(OldValRes); - LLT AddrTy = getMRI()->getType(Addr); - LLT ValTy = getMRI()->getType(Val); + LLT OldValResTy = OldValRes.getLLTTy(*getMRI()); + LLT AddrTy = Addr.getLLTTy(*getMRI()); + LLT ValTy = Val.getLLTTy(*getMRI()); assert(OldValResTy.isScalar() && "invalid operand type"); assert(AddrTy.isPointer() && "invalid operand type"); assert(ValTy.isValid() && "invalid operand type"); assert(OldValResTy == ValTy && "type mismatch"); + assert(MMO.isAtomic() && "not atomic mem operand"); #endif - return buildInstr(Opcode) - .addDef(OldValRes) - .addUse(Addr) - .addUse(Val) - .addMemOperand(&MMO); + auto MIB = buildInstr(Opcode); + OldValRes.addDefToMIB(*getMRI(), MIB); + Addr.addSrcToMIB(MIB); + Val.addSrcToMIB(MIB); + MIB.addMemOperand(&MMO); + return MIB; } MachineInstrBuilder @@ -865,6 +884,21 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr, } MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFAdd( + const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder +MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val, + MachineMemOperand &MMO) { + return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val, + MMO); +} + +MachineInstrBuilder MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) { return buildInstr(TargetOpcode::G_FENCE) .addImm(Ordering) @@ -1037,8 +1071,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc, "input operands do not cover output register"); if (SrcOps.size() == 1) return buildCast(DstOps[0], SrcOps[0]); - if (DstOps[0].getLLTTy(*getMRI()).isVector()) - return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + if (DstOps[0].getLLTTy(*getMRI()).isVector()) { + if (SrcOps[0].getLLTTy(*getMRI()).isVector()) + return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps); + return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps); + } break; } case TargetOpcode::G_EXTRACT_VECTOR_ELT: { diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp index 42be88fcf947..f0e35c65c53b 100644 --- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -92,7 +92,7 @@ void RegBankSelect::init(MachineFunction &MF) { MBPI = nullptr; } MIRBuilder.setMF(MF); - MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); + MORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI); } void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const { @@ -139,7 +139,7 @@ bool RegBankSelect::repairReg( "need new vreg for each breakdown"); // An empty range of new register means no repairing. - assert(!empty(NewVRegs) && "We should not have to repair"); + assert(!NewVRegs.empty() && "We should not have to repair"); MachineInstr *MI; if (ValMapping.NumBreakDowns == 1) { @@ -154,7 +154,7 @@ bool RegBankSelect::repairReg( std::swap(Src, Dst); assert((RepairPt.getNumInsertPoints() == 1 || - TargetRegisterInfo::isPhysicalRegister(Dst)) && + Register::isPhysicalRegister(Dst)) && "We are about to create several defs for Dst"); // Build the instruction used to repair, then clone it at the right @@ -398,7 +398,7 @@ void RegBankSelect::tryAvoidingSplit( // Check if this is a physical or virtual register. Register Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // We are going to split every outgoing edges. // Check that this is possible. // FIXME: The machine representation is currently broken @@ -687,8 +687,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) { // iterator before hand. MachineInstr &MI = *MII++; - // Ignore target-specific instructions: they should use proper regclasses. - if (isTargetSpecificOpcode(MI.getOpcode())) + // Ignore target-specific post-isel instructions: they should use proper + // regclasses. + if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode()) continue; if (!assignInstr(MI)) { diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp index 4e41f338934d..fc9c802693ab 100644 --- a/lib/CodeGen/GlobalISel/RegisterBank.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp @@ -12,6 +12,7 @@ #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/Config/llvm-config.h" +#include "llvm/Support/Debug.h" #define DEBUG_TYPE "registerbank" diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp index 159422e38878..3fcc55286beb 100644 --- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp +++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp @@ -82,7 +82,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const { const RegisterBank * RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI)); assert(Reg && "NoRegister does not have a register bank"); @@ -97,8 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterClass & RegisterBankInfo::getMinimalPhysRegClass(Register Reg, const TargetRegisterInfo &TRI) const { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Reg must be a physreg"); + assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg"); const auto &RegRCIt = PhysRegMinimalRCs.find(Reg); if (RegRCIt != PhysRegMinimalRCs.end()) return *RegRCIt->second; @@ -284,7 +283,7 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length, ++NumPartialMappingsCreated; auto &PartMapping = MapOfPartialMappings[Hash]; - PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank); + PartMapping = std::make_unique<PartialMapping>(StartIdx, Length, RegBank); return *PartMapping; } @@ -318,7 +317,7 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown, ++NumValueMappingsCreated; auto &ValMapping = MapOfValueMappings[Hash]; - ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns); + ValMapping = std::make_unique<ValueMapping>(BreakDown, NumBreakDowns); return *ValMapping; } @@ -342,7 +341,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const { // mapping, because we use the pointer of the ValueMapping // to hash and we expect them to uniquely identify an instance // of value mapping. - Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End)); + Res = std::make_unique<ValueMapping[]>(std::distance(Begin, End)); unsigned Idx = 0; for (Iterator It = Begin; It != End; ++It, ++Idx) { const ValueMapping *ValMap = *It; @@ -392,7 +391,7 @@ RegisterBankInfo::getInstructionMappingImpl( ++NumInstructionMappingsCreated; auto &InstrMapping = MapOfInstructionMappings[Hash]; - InstrMapping = llvm::make_unique<InstructionMapping>( + InstrMapping = std::make_unique<InstructionMapping>( ID, Cost, OperandsMapping, NumOperands); return *InstrMapping; } @@ -456,7 +455,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { "This mapping is too complex for this function"); iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs = OpdMapper.getVRegs(OpIdx); - if (empty(NewRegs)) { + if (NewRegs.empty()) { LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n"); continue; } @@ -489,7 +488,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) { unsigned RegisterBankInfo::getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (Register::isPhysicalRegister(Reg)) { // The size is not directly available for physical registers. // Instead, we need to access a register class that contains Reg and // get the size of that register class. diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp index 766ea1d60bac..45618d7992ad 100644 --- a/lib/CodeGen/GlobalISel/Utils.cpp +++ b/lib/CodeGen/GlobalISel/Utils.cpp @@ -43,10 +43,9 @@ unsigned llvm::constrainOperandRegClass( const RegisterBankInfo &RBI, MachineInstr &InsertPt, const TargetRegisterClass &RegClass, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass); // If we created a new virtual register because the class is not compatible @@ -73,10 +72,9 @@ unsigned llvm::constrainOperandRegClass( MachineRegisterInfo &MRI, const TargetInstrInfo &TII, const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II, const MachineOperand &RegMO, unsigned OpIdx) { - unsigned Reg = RegMO.getReg(); + Register Reg = RegMO.getReg(); // Assume physical registers are properly constrained. - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "PhysReg not implemented"); + assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented"); const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF); // Some of the target independent instructions, like COPY, may not impose any @@ -130,9 +128,9 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I, LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n'); assert(MO.isReg() && "Unsupported non-reg operand"); - unsigned Reg = MO.getReg(); + Register Reg = MO.getReg(); // Physical registers don't need to be constrained. - if (TRI.isPhysicalRegister(Reg)) + if (Register::isPhysicalRegister(Reg)) continue; // Register operands with a value of 0 (e.g. predicate operands) don't need @@ -170,9 +168,8 @@ bool llvm::isTriviallyDead(const MachineInstr &MI, if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg) || - !MRI.use_nodbg_empty(Reg)) + Register Reg = MO.getReg(); + if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg)) return false; } return true; @@ -219,11 +216,33 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg, } Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( - unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) { + unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs, + bool HandleFConstant) { SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes; MachineInstr *MI; - while ((MI = MRI.getVRegDef(VReg)) && - MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) { + auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) { + return Opcode == TargetOpcode::G_CONSTANT || + (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT); + }; + auto GetImmediateValue = [HandleFConstant, + &MRI](const MachineInstr &MI) -> Optional<APInt> { + const MachineOperand &CstVal = MI.getOperand(1); + if (!CstVal.isImm() && !CstVal.isCImm() && + (!HandleFConstant || !CstVal.isFPImm())) + return None; + if (!CstVal.isFPImm()) { + unsigned BitWidth = + MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) + : CstVal.getCImm()->getValue(); + assert(Val.getBitWidth() == BitWidth && + "Value bitwidth doesn't match definition type"); + return Val; + } + return CstVal.getFPImm()->getValueAPF().bitcastToAPInt(); + }; + while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) && + LookThroughInstrs) { switch (MI->getOpcode()) { case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXT: @@ -235,7 +254,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( break; case TargetOpcode::COPY: VReg = MI->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VReg)) + if (Register::isPhysicalRegister(VReg)) return None; break; case TargetOpcode::G_INTTOPTR: @@ -245,16 +264,13 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough( return None; } } - if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT || - (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm())) + if (!MI || !IsConstantOpcode(MI->getOpcode())) return None; - const MachineOperand &CstVal = MI->getOperand(1); - unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits(); - APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm()) - : CstVal.getCImm()->getValue(); - assert(Val.getBitWidth() == BitWidth && - "Value bitwidth doesn't match definition type"); + Optional<APInt> MaybeVal = GetImmediateValue(*MI); + if (!MaybeVal) + return None; + APInt &Val = *MaybeVal; while (!SeenOpcodes.empty()) { std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val(); switch (OpcodeAndSize.first) { @@ -291,7 +307,7 @@ llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg, if (!DstTy.isValid()) return nullptr; while (DefMI->getOpcode() == TargetOpcode::COPY) { - unsigned SrcReg = DefMI->getOperand(1).getReg(); + Register SrcReg = DefMI->getOperand(1).getReg(); auto SrcTy = MRI.getType(SrcReg); if (!SrcTy.isValid() || SrcTy != DstTy) break; @@ -395,6 +411,40 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI, return false; } +Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1, + uint64_t Imm, + const MachineRegisterInfo &MRI) { + auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI); + if (MaybeOp1Cst) { + LLT Ty = MRI.getType(Op1); + APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true); + switch (Opcode) { + default: + break; + case TargetOpcode::G_SEXT_INREG: + return C1.trunc(Imm).sext(C1.getBitWidth()); + } + } + return None; +} + void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) { AU.addPreserved<StackProtector>(); } + +MVT llvm::getMVTForLLT(LLT Ty) { + if (!Ty.isVector()) + return MVT::getIntegerVT(Ty.getSizeInBits()); + + return MVT::getVectorVT( + MVT::getIntegerVT(Ty.getElementType().getSizeInBits()), + Ty.getNumElements()); +} + +LLT llvm::getLLTForMVT(MVT Ty) { + if (!Ty.isVector()) + return LLT::scalar(Ty.getSizeInBits()); + + return LLT::vector(Ty.getVectorNumElements(), + Ty.getVectorElementType().getSizeInBits()); +} |
