diff options
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 145 |
1 files changed, 133 insertions, 12 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a26bbc77f248..c539c8617d99 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ObjCARCUtil.h" #include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/Analysis.h" @@ -938,19 +939,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, // In case of strict alignment, avoid an excessive number of byte wide stores. MaxStoresPerMemsetOptSize = 8; - MaxStoresPerMemset = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemsetOptSize : 32; + MaxStoresPerMemset = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32; MaxGluedStoresPerMemcpy = 4; MaxStoresPerMemcpyOptSize = 4; - MaxStoresPerMemcpy = Subtarget->requiresStrictAlign() - ? MaxStoresPerMemcpyOptSize : 16; + MaxStoresPerMemcpy = + Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16; - MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4; + MaxStoresPerMemmoveOptSize = 4; + MaxStoresPerMemmove = 4; MaxLoadsPerMemcmpOptSize = 4; - MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign() - ? MaxLoadsPerMemcmpOptSize : 8; + MaxLoadsPerMemcmp = + Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8; setStackPointerRegisterToSaveRestore(AArch64::SP); @@ -1426,6 +1428,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8); } + if (Subtarget->hasMOPS() && Subtarget->hasMTE()) { + // Only required for llvm.aarch64.mops.memset.tag + setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom); + } + PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive(); } @@ -2201,7 +2208,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::INSR) MAKE_CASE(AArch64ISD::PTEST) MAKE_CASE(AArch64ISD::PTRUE) - MAKE_CASE(AArch64ISD::PFALSE) MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO) MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO) MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO) @@ -2268,6 +2274,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::UADDLP) MAKE_CASE(AArch64ISD::CALL_RVMARKER) MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL) + MAKE_CASE(AArch64ISD::MOPS_MEMSET) + MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) + MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) + MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) } #undef MAKE_CASE return nullptr; @@ -3746,6 +3756,10 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op, if (OpVT != MVT::f16 && OpVT != MVT::bf16) return SDValue(); + // Bitcasts between f16 and bf16 are legal. + if (ArgVT == MVT::f16 || ArgVT == MVT::bf16) + return Op; + assert(ArgVT == MVT::i16); SDLoc DL(Op); @@ -4056,6 +4070,39 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret); } +SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, + SelectionDAG &DAG) const { + unsigned IntNo = Op.getConstantOperandVal(1); + switch (IntNo) { + default: + return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::aarch64_mops_memset_tag: { + auto Node = cast<MemIntrinsicSDNode>(Op.getNode()); + SDLoc DL(Op); + SDValue Chain = Node->getChain(); + SDValue Dst = Op.getOperand(2); + SDValue Val = Op.getOperand(3); + Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64); + SDValue Size = Op.getOperand(4); + auto Alignment = Node->getMemOperand()->getAlign(); + bool IsVol = Node->isVolatile(); + auto DstPtrInfo = Node->getPointerInfo(); + + const auto &SDI = + static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo()); + SDValue MS = + SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val, + Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{}); + + // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the + // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise + // LowerOperationWrapper will complain that the number of results has + // changed. + return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL); + } + } +} + SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const { unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); @@ -5123,6 +5170,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op, case ISD::MULHU: return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED, /*OverrideNEON=*/true); + case ISD::INTRINSIC_W_CHAIN: + return LowerINTRINSIC_W_CHAIN(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::ATOMIC_STORE: @@ -6475,12 +6524,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, unsigned CallOpc = AArch64ISD::CALL; // Calls with operand bundle "clang.arc.attachedcall" are special. They should - // be expanded to the call, directly followed by a special marker sequence. - // Use the CALL_RVMARKER to do that. + // be expanded to the call, directly followed by a special marker sequence and + // a call to an ObjC library function. Use CALL_RVMARKER to do that. if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { assert(!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"); CallOpc = AArch64ISD::CALL_RVMARKER; + + // Add a target global address for the retainRV/claimRV runtime function + // just before the call target. + Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); + auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT); + Ops.insert(Ops.begin() + 1, GA); } // Returns a chain and a flag for retval copy to use. @@ -9985,8 +10040,9 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op, // The only legal i1 vectors are SVE vectors, so we can use SVE-specific // lowering code. if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) { + // We can hande the zero case during isel. if (ConstVal->isZero()) - return DAG.getNode(AArch64ISD::PFALSE, dl, VT); + return Op; if (ConstVal->isOne()) return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all); } @@ -11869,6 +11925,19 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal; return true; } + case Intrinsic::aarch64_mops_memset_tag: { + Value *Dst = I.getArgOperand(0); + Value *Val = I.getArgOperand(1); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(Val->getType()); + Info.ptrVal = Dst; + Info.offset = 0; + Info.align = I.getParamAlign(0).valueOrOne(); + Info.flags = MachineMemOperand::MOStore; + // The size of the memory being operated on is unknown at this point + Info.size = MemoryLocation::UnknownSize; + return true; + } default: break; } @@ -15092,7 +15161,7 @@ static bool isAllInactivePredicate(SDValue N) { while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) N = N.getOperand(0); - return N.getOpcode() == AArch64ISD::PFALSE; + return ISD::isConstantSplatVectorAllZeros(N.getNode()); } static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) { @@ -15393,6 +15462,52 @@ static SDValue performIntrinsicCombine(SDNode *N, return SDValue(); } +static bool isCheapToExtend(const SDValue &N) { + unsigned OC = N->getOpcode(); + return OC == ISD::LOAD || OC == ISD::MLOAD || + ISD::isConstantSplatVectorAllZeros(N.getNode()); +} + +static SDValue +performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + SelectionDAG &DAG) { + // If we have (sext (setcc A B)) and A and B are cheap to extend, + // we can move the sext into the arguments and have the same result. For + // example, if A and B are both loads, we can make those extending loads and + // avoid an extra instruction. This pattern appears often in VLS code + // generation where the inputs to the setcc have a different size to the + // instruction that wants to use the result of the setcc. + assert(N->getOpcode() == ISD::SIGN_EXTEND && + N->getOperand(0)->getOpcode() == ISD::SETCC); + const SDValue SetCC = N->getOperand(0); + + const SDValue CCOp0 = SetCC.getOperand(0); + const SDValue CCOp1 = SetCC.getOperand(1); + if (!CCOp0->getValueType(0).isInteger() || + !CCOp1->getValueType(0).isInteger()) + return SDValue(); + + ISD::CondCode Code = + cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get(); + + ISD::NodeType ExtType = + isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + + if (isCheapToExtend(SetCC.getOperand(0)) && + isCheapToExtend(SetCC.getOperand(1))) { + const SDValue Ext1 = + DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0); + const SDValue Ext2 = + DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1); + + return DAG.getSetCC( + SDLoc(SetCC), N->getValueType(0), Ext1, Ext2, + cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get()); + } + + return SDValue(); +} + static SDValue performExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) { @@ -15411,6 +15526,12 @@ static SDValue performExtendCombine(SDNode *N, return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD); } + + if (N->getValueType(0).isFixedLengthVector() && + N->getOpcode() == ISD::SIGN_EXTEND && + N->getOperand(0)->getOpcode() == ISD::SETCC) + return performSignExtendSetCCCombine(N, DCI, DAG); + return SDValue(); } |
