diff options
Diffstat (limited to 'llvm/lib/Transforms/Vectorize/VectorCombine.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 39 |
1 files changed, 20 insertions, 19 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index fe8d74c43dfd..ea9cbed0117b 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -74,7 +74,7 @@ public: const DataLayout *DL, TTI::TargetCostKind CostKind, bool TryEarlyFoldsOnly) : F(F), Builder(F.getContext(), InstSimplifyFolder(*DL)), TTI(TTI), - DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind), + DT(DT), AA(AA), AC(AC), DL(DL), CostKind(CostKind), SQ(*DL), TryEarlyFoldsOnly(TryEarlyFoldsOnly) {} bool run(); @@ -88,6 +88,7 @@ private: AssumptionCache &AC; const DataLayout *DL; TTI::TargetCostKind CostKind; + const SimplifyQuery SQ; /// If true, only perform beneficial early IR transforms. Do not introduce new /// vector operations. @@ -1185,17 +1186,18 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) { // Fold the vector constants in the original vectors into a new base vector to // get more accurate cost modelling. Value *NewVecC = nullptr; - TargetFolder Folder(*DL); if (CI) - NewVecC = Folder.FoldCmp(CI->getPredicate(), VecCs[0], VecCs[1]); + NewVecC = simplifyCmpInst(CI->getPredicate(), VecCs[0], VecCs[1], SQ); else if (UO) NewVecC = - Folder.FoldUnOpFMF(UO->getOpcode(), VecCs[0], UO->getFastMathFlags()); + simplifyUnOp(UO->getOpcode(), VecCs[0], UO->getFastMathFlags(), SQ); else if (BO) - NewVecC = Folder.FoldBinOp(BO->getOpcode(), VecCs[0], VecCs[1]); - else if (II->arg_size() == 2) - NewVecC = Folder.FoldBinaryIntrinsic(II->getIntrinsicID(), VecCs[0], - VecCs[1], II->getType(), &I); + NewVecC = simplifyBinOp(BO->getOpcode(), VecCs[0], VecCs[1], SQ); + else if (II) + NewVecC = simplifyCall(II, II->getCalledOperand(), VecCs, SQ); + + if (!NewVecC) + return false; // Get cost estimate for the insert element. This cost will factor into // both sequences. @@ -1203,6 +1205,7 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) { InstructionCost NewCost = ScalarOpCost + TTI.getVectorInstrCost(Instruction::InsertElement, VecTy, CostKind, *Index, NewVecC); + for (auto [Idx, Op, VecC, Scalar] : enumerate(Ops, VecCs, ScalarOps)) { if (!Scalar || (II && isVectorIntrinsicWithScalarOpAtArg( II->getIntrinsicID(), Idx, &TTI))) @@ -1247,15 +1250,6 @@ bool VectorCombine::scalarizeOpOrCmp(Instruction &I) { if (auto *ScalarInst = dyn_cast<Instruction>(Scalar)) ScalarInst->copyIRFlags(&I); - // Create a new base vector if the constant folding failed. - if (!NewVecC) { - if (CI) - NewVecC = Builder.CreateCmp(CI->getPredicate(), VecCs[0], VecCs[1]); - else if (UO || BO) - NewVecC = Builder.CreateNAryOp(Opcode, VecCs); - else - NewVecC = Builder.CreateIntrinsic(VecTy, II->getIntrinsicID(), VecCs); - } Value *Insert = Builder.CreateInsertElement(NewVecC, Scalar, *Index); replaceValue(I, *Insert); return true; @@ -1835,12 +1829,19 @@ bool VectorCombine::scalarizeExtExtract(Instruction &I) { IntegerType::get(SrcTy->getContext(), DL->getTypeSizeInBits(SrcTy))); uint64_t SrcEltSizeInBits = DL->getTypeSizeInBits(SrcTy->getElementType()); uint64_t EltBitMask = (1ull << SrcEltSizeInBits) - 1; + uint64_t TotalBits = DL->getTypeSizeInBits(SrcTy); + Type *PackedTy = IntegerType::get(SrcTy->getContext(), TotalBits); + Value *Mask = ConstantInt::get(PackedTy, EltBitMask); for (User *U : Ext->users()) { auto *Extract = cast<ExtractElementInst>(U); uint64_t Idx = cast<ConstantInt>(Extract->getIndexOperand())->getZExtValue(); - Value *LShr = Builder.CreateLShr(ScalarV, Idx * SrcEltSizeInBits); - Value *And = Builder.CreateAnd(LShr, EltBitMask); + uint64_t ShiftAmt = + DL->isBigEndian() + ? (TotalBits - SrcEltSizeInBits - Idx * SrcEltSizeInBits) + : (Idx * SrcEltSizeInBits); + Value *LShr = Builder.CreateLShr(ScalarV, ShiftAmt); + Value *And = Builder.CreateAnd(LShr, Mask); U->replaceAllUsesWith(And); } return true; |
