summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp145
1 files changed, 133 insertions, 12 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a26bbc77f248..c539c8617d99 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/Analysis/ObjCARCUtil.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
@@ -938,19 +939,20 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// In case of strict alignment, avoid an excessive number of byte wide stores.
MaxStoresPerMemsetOptSize = 8;
- MaxStoresPerMemset = Subtarget->requiresStrictAlign()
- ? MaxStoresPerMemsetOptSize : 32;
+ MaxStoresPerMemset =
+ Subtarget->requiresStrictAlign() ? MaxStoresPerMemsetOptSize : 32;
MaxGluedStoresPerMemcpy = 4;
MaxStoresPerMemcpyOptSize = 4;
- MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
- ? MaxStoresPerMemcpyOptSize : 16;
+ MaxStoresPerMemcpy =
+ Subtarget->requiresStrictAlign() ? MaxStoresPerMemcpyOptSize : 16;
- MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
+ MaxStoresPerMemmoveOptSize = 4;
+ MaxStoresPerMemmove = 4;
MaxLoadsPerMemcmpOptSize = 4;
- MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
- ? MaxLoadsPerMemcmpOptSize : 8;
+ MaxLoadsPerMemcmp =
+ Subtarget->requiresStrictAlign() ? MaxLoadsPerMemcmpOptSize : 8;
setStackPointerRegisterToSaveRestore(AArch64::SP);
@@ -1426,6 +1428,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
}
+ if (Subtarget->hasMOPS() && Subtarget->hasMTE()) {
+ // Only required for llvm.aarch64.mops.memset.tag
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}
@@ -2201,7 +2208,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::INSR)
MAKE_CASE(AArch64ISD::PTEST)
MAKE_CASE(AArch64ISD::PTRUE)
- MAKE_CASE(AArch64ISD::PFALSE)
MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
@@ -2268,6 +2274,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::UADDLP)
MAKE_CASE(AArch64ISD::CALL_RVMARKER)
MAKE_CASE(AArch64ISD::ASSERT_ZEXT_BOOL)
+ MAKE_CASE(AArch64ISD::MOPS_MEMSET)
+ MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING)
+ MAKE_CASE(AArch64ISD::MOPS_MEMCOPY)
+ MAKE_CASE(AArch64ISD::MOPS_MEMMOVE)
}
#undef MAKE_CASE
return nullptr;
@@ -3746,6 +3756,10 @@ SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
if (OpVT != MVT::f16 && OpVT != MVT::bf16)
return SDValue();
+ // Bitcasts between f16 and bf16 are legal.
+ if (ArgVT == MVT::f16 || ArgVT == MVT::bf16)
+ return Op;
+
assert(ArgVT == MVT::i16);
SDLoc DL(Op);
@@ -4056,6 +4070,39 @@ static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
}
+SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
+ SelectionDAG &DAG) const {
+ unsigned IntNo = Op.getConstantOperandVal(1);
+ switch (IntNo) {
+ default:
+ return SDValue(); // Don't custom lower most intrinsics.
+ case Intrinsic::aarch64_mops_memset_tag: {
+ auto Node = cast<MemIntrinsicSDNode>(Op.getNode());
+ SDLoc DL(Op);
+ SDValue Chain = Node->getChain();
+ SDValue Dst = Op.getOperand(2);
+ SDValue Val = Op.getOperand(3);
+ Val = DAG.getAnyExtOrTrunc(Val, DL, MVT::i64);
+ SDValue Size = Op.getOperand(4);
+ auto Alignment = Node->getMemOperand()->getAlign();
+ bool IsVol = Node->isVolatile();
+ auto DstPtrInfo = Node->getPointerInfo();
+
+ const auto &SDI =
+ static_cast<const AArch64SelectionDAGInfo &>(DAG.getSelectionDAGInfo());
+ SDValue MS =
+ SDI.EmitMOPS(AArch64ISD::MOPS_MEMSET_TAGGING, DAG, DL, Chain, Dst, Val,
+ Size, Alignment, IsVol, DstPtrInfo, MachinePointerInfo{});
+
+ // MOPS_MEMSET_TAGGING has 3 results (DstWb, SizeWb, Chain) whereas the
+ // intrinsic has 2. So hide SizeWb using MERGE_VALUES. Otherwise
+ // LowerOperationWrapper will complain that the number of results has
+ // changed.
+ return DAG.getMergeValues({MS.getValue(0), MS.getValue(2)}, DL);
+ }
+ }
+}
+
SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -5123,6 +5170,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::MULHU:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
/*OverrideNEON=*/true);
+ case ISD::INTRINSIC_W_CHAIN:
+ return LowerINTRINSIC_W_CHAIN(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN:
return LowerINTRINSIC_WO_CHAIN(Op, DAG);
case ISD::ATOMIC_STORE:
@@ -6475,12 +6524,18 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
unsigned CallOpc = AArch64ISD::CALL;
// Calls with operand bundle "clang.arc.attachedcall" are special. They should
- // be expanded to the call, directly followed by a special marker sequence.
- // Use the CALL_RVMARKER to do that.
+ // be expanded to the call, directly followed by a special marker sequence and
+ // a call to an ObjC library function. Use CALL_RVMARKER to do that.
if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
assert(!IsTailCall &&
"tail calls cannot be marked with clang.arc.attachedcall");
CallOpc = AArch64ISD::CALL_RVMARKER;
+
+ // Add a target global address for the retainRV/claimRV runtime function
+ // just before the call target.
+ Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB);
+ auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT);
+ Ops.insert(Ops.begin() + 1, GA);
}
// Returns a chain and a flag for retval copy to use.
@@ -9985,8 +10040,9 @@ SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
// The only legal i1 vectors are SVE vectors, so we can use SVE-specific
// lowering code.
if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
+ // We can hande the zero case during isel.
if (ConstVal->isZero())
- return DAG.getNode(AArch64ISD::PFALSE, dl, VT);
+ return Op;
if (ConstVal->isOne())
return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
}
@@ -11869,6 +11925,19 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MONonTemporal;
return true;
}
+ case Intrinsic::aarch64_mops_memset_tag: {
+ Value *Dst = I.getArgOperand(0);
+ Value *Val = I.getArgOperand(1);
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(Val->getType());
+ Info.ptrVal = Dst;
+ Info.offset = 0;
+ Info.align = I.getParamAlign(0).valueOrOne();
+ Info.flags = MachineMemOperand::MOStore;
+ // The size of the memory being operated on is unknown at this point
+ Info.size = MemoryLocation::UnknownSize;
+ return true;
+ }
default:
break;
}
@@ -15092,7 +15161,7 @@ static bool isAllInactivePredicate(SDValue N) {
while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST)
N = N.getOperand(0);
- return N.getOpcode() == AArch64ISD::PFALSE;
+ return ISD::isConstantSplatVectorAllZeros(N.getNode());
}
static bool isAllActivePredicate(SelectionDAG &DAG, SDValue N) {
@@ -15393,6 +15462,52 @@ static SDValue performIntrinsicCombine(SDNode *N,
return SDValue();
}
+static bool isCheapToExtend(const SDValue &N) {
+ unsigned OC = N->getOpcode();
+ return OC == ISD::LOAD || OC == ISD::MLOAD ||
+ ISD::isConstantSplatVectorAllZeros(N.getNode());
+}
+
+static SDValue
+performSignExtendSetCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ // If we have (sext (setcc A B)) and A and B are cheap to extend,
+ // we can move the sext into the arguments and have the same result. For
+ // example, if A and B are both loads, we can make those extending loads and
+ // avoid an extra instruction. This pattern appears often in VLS code
+ // generation where the inputs to the setcc have a different size to the
+ // instruction that wants to use the result of the setcc.
+ assert(N->getOpcode() == ISD::SIGN_EXTEND &&
+ N->getOperand(0)->getOpcode() == ISD::SETCC);
+ const SDValue SetCC = N->getOperand(0);
+
+ const SDValue CCOp0 = SetCC.getOperand(0);
+ const SDValue CCOp1 = SetCC.getOperand(1);
+ if (!CCOp0->getValueType(0).isInteger() ||
+ !CCOp1->getValueType(0).isInteger())
+ return SDValue();
+
+ ISD::CondCode Code =
+ cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get();
+
+ ISD::NodeType ExtType =
+ isSignedIntSetCC(Code) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+ if (isCheapToExtend(SetCC.getOperand(0)) &&
+ isCheapToExtend(SetCC.getOperand(1))) {
+ const SDValue Ext1 =
+ DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp0);
+ const SDValue Ext2 =
+ DAG.getNode(ExtType, SDLoc(N), N->getValueType(0), CCOp1);
+
+ return DAG.getSetCC(
+ SDLoc(SetCC), N->getValueType(0), Ext1, Ext2,
+ cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get());
+ }
+
+ return SDValue();
+}
+
static SDValue performExtendCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -15411,6 +15526,12 @@ static SDValue performExtendCombine(SDNode *N,
return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
}
+
+ if (N->getValueType(0).isFixedLengthVector() &&
+ N->getOpcode() == ISD::SIGN_EXTEND &&
+ N->getOperand(0)->getOpcode() == ISD::SETCC)
+ return performSignExtendSetCCCombine(N, DCI, DAG);
+
return SDValue();
}