diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp | 88 |
1 files changed, 81 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index 7d6845b287bc..bebf032b5535 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -14,9 +14,12 @@ #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LegacyDivergenceAnalysis.h" #include "llvm/Analysis/MemorySSA.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/InitializePasses.h" #define DEBUG_TYPE "amdgpu-annotate-uniform" @@ -29,6 +32,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, public InstVisitor<AMDGPUAnnotateUniformValues> { LegacyDivergenceAnalysis *DA; MemorySSA *MSSA; + AliasAnalysis *AA; DenseMap<Value*, GetElementPtrInst*> noClobberClones; bool isEntryFunc; @@ -44,6 +48,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired<LegacyDivergenceAnalysis>(); AU.addRequired<MemorySSAWrapperPass>(); + AU.addRequired<AAResultsWrapperPass>(); AU.setPreservesAll(); } @@ -58,6 +63,7 @@ INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis) INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE, "Add AMDGPU uniform metadata", false, false) @@ -70,9 +76,79 @@ static void setNoClobberMetadata(Instruction *I) { I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {})); } -bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) { - const MemoryAccess *MA = MSSA->getWalker()->getClobberingMemoryAccess(Load); - return !MSSA->isLiveOnEntryDef(MA); +bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst *Load) { + MemorySSAWalker *Walker = MSSA->getWalker(); + SmallVector<MemoryAccess *> WorkList{Walker->getClobberingMemoryAccess(Load)}; + SmallSet<MemoryAccess *, 8> Visited; + MemoryLocation Loc(MemoryLocation::get(Load)); + + const auto isReallyAClobber = [this, Load](MemoryDef *Def) -> bool { + Instruction *DefInst = Def->getMemoryInst(); + LLVM_DEBUG(dbgs() << " Def: " << *DefInst << '\n'); + + if (isa<FenceInst>(DefInst)) + return false; + + if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_s_barrier: + case Intrinsic::amdgcn_wave_barrier: + return false; + default: + break; + } + } + + // Ignore atomics not aliasing with the original load, any atomic is a + // universal MemoryDef from MSSA's point of view too, just like a fence. + const auto checkNoAlias = [this, Load](auto I) -> bool { + return I && AA->isNoAlias(I->getPointerOperand(), + Load->getPointerOperand()); + }; + + if (checkNoAlias(dyn_cast<AtomicCmpXchgInst>(DefInst)) || + checkNoAlias(dyn_cast<AtomicRMWInst>(DefInst))) + return false; + + return true; + }; + + LLVM_DEBUG(dbgs() << "Checking clobbering of: " << *Load << '\n'); + + // Start with a nearest dominating clobbering access, it will be either + // live on entry (nothing to do, load is not clobbered), MemoryDef, or + // MemoryPhi if several MemoryDefs can define this memory state. In that + // case add all Defs to WorkList and continue going up and checking all + // the definitions of this memory location until the root. When all the + // defs are exhausted and came to the entry state we have no clobber. + // Along the scan ignore barriers and fences which are considered clobbers + // by the MemorySSA, but not really writing anything into the memory. + while (!WorkList.empty()) { + MemoryAccess *MA = WorkList.pop_back_val(); + if (!Visited.insert(MA).second) + continue; + + if (MSSA->isLiveOnEntryDef(MA)) + continue; + + if (MemoryDef *Def = dyn_cast<MemoryDef>(MA)) { + if (isReallyAClobber(Def)) { + LLVM_DEBUG(dbgs() << " -> load is clobbered\n"); + return true; + } + + WorkList.push_back( + Walker->getClobberingMemoryAccess(Def->getDefiningAccess(), Loc)); + continue; + } + + const MemoryPhi *Phi = cast<MemoryPhi>(MA); + for (auto &Use : Phi->incoming_values()) + WorkList.push_back(cast<MemoryAccess>(&Use)); + } + + LLVM_DEBUG(dbgs() << " -> no clobber\n"); + return false; } void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) { @@ -84,9 +160,6 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); if (!DA->isUniform(Ptr)) return; - auto isGlobalLoad = [&](LoadInst &Load)->bool { - return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; - }; // We're tracking up to the Function boundaries, and cannot go beyond because // of FunctionPass restrictions. We can ensure that is memory not clobbered // for memory operations that are live in to entry points only. @@ -99,7 +172,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { } bool NotClobbered = false; - bool GlobalLoad = isGlobalLoad(I); + bool GlobalLoad = I.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; if (PtrI) NotClobbered = GlobalLoad && !isClobberedInFunction(&I); else if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) { @@ -139,6 +212,7 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) { DA = &getAnalysis<LegacyDivergenceAnalysis>(); MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); + AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); isEntryFunc = AMDGPU::isEntryFunctionCC(F.getCallingConv()); visit(F); |
