diff options
Diffstat (limited to 'clang/lib/Driver/Driver.cpp')
| -rw-r--r-- | clang/lib/Driver/Driver.cpp | 264 |
1 files changed, 188 insertions, 76 deletions
diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 2e4ebc10e9ba..3bfddeefc7b2 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -103,39 +103,58 @@ using namespace clang; using namespace llvm::opt; static llvm::Optional<llvm::Triple> -getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { - if (Args.hasArg(options::OPT_offload_EQ)) { - auto HIPOffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); +getOffloadTargetTriple(const Driver &D, const ArgList &Args) { + auto OffloadTargets = Args.getAllArgValues(options::OPT_offload_EQ); + // Offload compilation flow does not support multiple targets for now. We + // need the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) + // to support multiple tool chains first. + switch (OffloadTargets.size()) { + default: + D.Diag(diag::err_drv_only_one_offload_target_supported); + return llvm::None; + case 0: + D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; + return llvm::None; + case 1: + break; + } + return llvm::Triple(OffloadTargets[0]); +} - // HIP compilation flow does not support multiple targets for now. We need - // the HIPActionBuilder (and possibly the CudaActionBuilder{,Base}too) to - // support multiple tool chains first. - switch (HIPOffloadTargets.size()) { - default: - D.Diag(diag::err_drv_only_one_offload_target_supported_in) << "HIP"; - return llvm::None; - case 0: - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << ""; - return llvm::None; - case 1: - break; - } - llvm::Triple TT(HIPOffloadTargets[0]); - if (TT.getArch() == llvm::Triple::amdgcn && - TT.getVendor() == llvm::Triple::AMD && - TT.getOS() == llvm::Triple::AMDHSA) - return TT; - if (TT.getArch() == llvm::Triple::spirv64 && - TT.getVendor() == llvm::Triple::UnknownVendor && - TT.getOS() == llvm::Triple::UnknownOS) +static llvm::Optional<llvm::Triple> +getNVIDIAOffloadTargetTriple(const Driver &D, const ArgList &Args, + const llvm::Triple &HostTriple) { + if (!Args.hasArg(options::OPT_offload_EQ)) { + return llvm::Triple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + } + auto TT = getOffloadTargetTriple(D, Args); + if (TT && (TT->getArch() == llvm::Triple::spirv32 || + TT->getArch() == llvm::Triple::spirv64)) { + if (Args.hasArg(options::OPT_emit_llvm)) return TT; - D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) - << HIPOffloadTargets[0]; + D.Diag(diag::err_drv_cuda_offload_only_emit_bc); return llvm::None; } - - static const llvm::Triple T("amdgcn-amd-amdhsa"); // Default HIP triple. - return T; + D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); + return llvm::None; +} +static llvm::Optional<llvm::Triple> +getHIPOffloadTargetTriple(const Driver &D, const ArgList &Args) { + if (!Args.hasArg(options::OPT_offload_EQ)) { + return llvm::Triple("amdgcn-amd-amdhsa"); // Default HIP triple. + } + auto TT = getOffloadTargetTriple(D, Args); + if (!TT) + return llvm::None; + if (TT->getArch() == llvm::Triple::amdgcn && + TT->getVendor() == llvm::Triple::AMD && + TT->getOS() == llvm::Triple::AMDHSA) + return TT; + if (TT->getArch() == llvm::Triple::spirv64) + return TT; + D.Diag(diag::err_drv_invalid_or_unsupported_offload_target) << TT->str(); + return llvm::None; } // static @@ -719,17 +738,17 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, if (IsCuda) { const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); const llvm::Triple &HostTriple = HostTC->getTriple(); - StringRef DeviceTripleStr; auto OFK = Action::OFK_Cuda; - DeviceTripleStr = - HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" : "nvptx-nvidia-cuda"; - llvm::Triple CudaTriple(DeviceTripleStr); + auto CudaTriple = + getNVIDIAOffloadTargetTriple(*this, C.getInputArgs(), HostTriple); + if (!CudaTriple) + return; // Use the CUDA and host triples as the key into the ToolChains map, // because the device toolchain we create depends on both. - auto &CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()]; + auto &CudaTC = ToolChains[CudaTriple->str() + "/" + HostTriple.str()]; if (!CudaTC) { CudaTC = std::make_unique<toolchains::CudaToolChain>( - *this, CudaTriple, *HostTC, C.getInputArgs(), OFK); + *this, *CudaTriple, *HostTC, C.getInputArgs(), OFK); } C.addOffloadDeviceToolChain(CudaTC.get(), OFK); } else if (IsHIP) { @@ -773,21 +792,9 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, if (HasValidOpenMPRuntime) { llvm::StringMap<const char *> FoundNormalizedTriples; for (const char *Val : OpenMPTargets->getValues()) { - llvm::Triple TT(Val); + llvm::Triple TT(ToolChain::getOpenMPTriple(Val)); std::string NormalizedName = TT.normalize(); - // We want to expand the shortened versions of the triples passed in to - // the values used for the bitcode libraries for convenience. - if (TT.getVendor() == llvm::Triple::UnknownVendor || - TT.getOS() == llvm::Triple::UnknownOS) { - if (TT.getArch() == llvm::Triple::nvptx) - TT = llvm::Triple("nvptx-nvidia-cuda"); - else if (TT.getArch() == llvm::Triple::nvptx64) - TT = llvm::Triple("nvptx64-nvidia-cuda"); - else if (TT.getArch() == llvm::Triple::amdgcn) - TT = llvm::Triple("amdgcn-amd-amdhsa"); - } - // Make sure we don't have a duplicate triple. auto Duplicate = FoundNormalizedTriples.find(NormalizedName); if (Duplicate != FoundNormalizedTriples.end()) { @@ -3823,6 +3830,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Builder to be used to build offloading actions. OffloadingActionBuilder OffloadBuilder(C, Args, Inputs); + // Offload kinds active for this compilation. + unsigned OffloadKinds = Action::OFK_None; + if (C.hasOffloadToolChain<Action::OFK_OpenMP>()) + OffloadKinds |= Action::OFK_OpenMP; + // Construct the actions to perform. HeaderModulePrecompileJobAction *HeaderModuleAction = nullptr; ActionList LinkerInputs; @@ -3843,14 +3855,16 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; for (phases::ID Phase : PL) { // Add any offload action the host action depends on. - Current = OffloadBuilder.addDeviceDependencesToHostAction( - Current, InputArg, Phase, PL.back(), FullPL); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = OffloadBuilder.addDeviceDependencesToHostAction( + Current, InputArg, Phase, PL.back(), FullPL); if (!Current) break; @@ -3883,6 +3897,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } + // Try to build the offloading actions and add the result as a dependency + // to the host. + if (Args.hasArg(options::OPT_fopenmp_new_driver)) + Current = BuildOffloadingActions(C, Args, I, Current); + // FIXME: Should we include any prior module file outputs as inputs of // later actions in the same command line? @@ -3900,8 +3919,9 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, // Use the current host action in any of the offloading actions, if // required. - if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) - break; + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; if (Current->getType() == types::TY_Nothing) break; @@ -3912,7 +3932,11 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Actions.push_back(Current); // Add any top level actions generated for offloading. - OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); + else if (Current) + Current->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); } // Add a link action if necessary. @@ -3924,16 +3948,23 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, } if (!LinkerInputs.empty()) { - if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) - LinkerInputs.push_back(Wrapper); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + if (Action *Wrapper = OffloadBuilder.makeHostLinkAction()) + LinkerInputs.push_back(Wrapper); Action *LA; // Check if this Linker Job should emit a static library. if (ShouldEmitStaticLibrary(Args)) { LA = C.MakeAction<StaticLibJobAction>(LinkerInputs, types::TY_Image); + } else if (Args.hasArg(options::OPT_fopenmp_new_driver) && + OffloadKinds != Action::OFK_None) { + LA = C.MakeAction<LinkerWrapperJobAction>(LinkerInputs, types::TY_Image); + LA->propagateHostOffloadInfo(OffloadKinds, + /*BoundArch=*/nullptr); } else { LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image); } - LA = OffloadBuilder.processHostLinkAction(LA); + if (!Args.hasArg(options::OPT_fopenmp_new_driver)) + LA = OffloadBuilder.processHostLinkAction(LA); Actions.push_back(LA); } @@ -4019,6 +4050,68 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Args.ClaimAllArgs(options::OPT_cuda_compile_host_device); } +Action *Driver::BuildOffloadingActions(Compilation &C, + llvm::opt::DerivedArgList &Args, + const InputTy &Input, + Action *HostAction) const { + if (!isa<CompileJobAction>(HostAction)) + return HostAction; + + SmallVector<const ToolChain *, 2> ToolChains; + ActionList DeviceActions; + + types::ID InputType = Input.first; + const Arg *InputArg = Input.second; + + auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; ++TI) + ToolChains.push_back(TI->second); + + for (unsigned I = 0; I < ToolChains.size(); ++I) + DeviceActions.push_back(C.MakeAction<InputAction>(*InputArg, InputType)); + + if (DeviceActions.empty()) + return HostAction; + + auto PL = types::getCompilationPhases(*this, Args, InputType); + + for (phases::ID Phase : PL) { + if (Phase == phases::Link) { + assert(Phase == PL.back() && "linking must be final compilation step."); + break; + } + + auto TC = ToolChains.begin(); + for (Action *&A : DeviceActions) { + A = ConstructPhaseAction(C, Args, Phase, A, Action::OFK_OpenMP); + + if (isa<CompileJobAction>(A)) { + HostAction->setCannotBeCollapsedWithNextDependentAction(); + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BourdArch=*/nullptr, Action::OFK_OpenMP); + OffloadAction::DeviceDependences DDep; + DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + A = C.MakeAction<OffloadAction>(HDep, DDep); + } + ++TC; + } + } + + OffloadAction::DeviceDependences DDeps; + + auto TC = ToolChains.begin(); + for (Action *A : DeviceActions) { + DDeps.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + TC++; + } + + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BoundArch=*/nullptr, DDeps); + return C.MakeAction<OffloadAction>(HDep, DDeps); +} + Action *Driver::ConstructPhaseAction( Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input, Action::OffloadKind TargetDeviceOffloadKind) const { @@ -4110,6 +4203,12 @@ Action *Driver::ConstructPhaseAction( Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; return C.MakeAction<BackendJobAction>(Input, Output); } + if (isUsingLTO(/* IsOffload */ true) && + TargetDeviceOffloadKind == Action::OFK_OpenMP) { + types::ID Output = + Args.hasArg(options::OPT_S) ? types::TY_LTO_IR : types::TY_LTO_BC; + return C.MakeAction<BackendJobAction>(Input, Output); + } if (Args.hasArg(options::OPT_emit_llvm) || (TargetDeviceOffloadKind == Action::OFK_HIP && Args.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc, @@ -4181,7 +4280,7 @@ void Driver::BuildJobs(Compilation &C) const { ArchNames.insert(A->getValue()); // Set of (Action, canonical ToolChain triple) pairs we've built jobs for. - std::map<std::pair<const Action *, std::string>, InputInfo> CachedResults; + std::map<std::pair<const Action *, std::string>, InputInfoList> CachedResults; for (Action *A : C.getActions()) { // If we are linking an image for multiple archs then the linker wants // -arch_multiple and -final_output <final image name>. Unfortunately, this @@ -4638,10 +4737,11 @@ static std::string GetTriplePlusArchString(const ToolChain *TC, return TriplePlusArch; } -InputInfo Driver::BuildJobsForAction( +InputInfoList Driver::BuildJobsForAction( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { std::pair<const Action *, std::string> ActionTC = { A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; @@ -4649,17 +4749,18 @@ InputInfo Driver::BuildJobsForAction( if (CachedResult != CachedResults.end()) { return CachedResult->second; } - InputInfo Result = BuildJobsForActionNoCache( + InputInfoList Result = BuildJobsForActionNoCache( C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } -InputInfo Driver::BuildJobsForActionNoCache( +InputInfoList Driver::BuildJobsForActionNoCache( Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, + std::map<std::pair<const Action *, std::string>, InputInfoList> + &CachedResults, Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); @@ -4697,7 +4798,7 @@ InputInfo Driver::BuildJobsForActionNoCache( // If there is a single device option, just generate the job for it. if (OA->hasSingleDeviceDependence()) { - InputInfo DevA; + InputInfoList DevA; OA->doOnEachDeviceDependence([&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { DevA = @@ -4715,7 +4816,7 @@ InputInfo Driver::BuildJobsForActionNoCache( OA->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4724,6 +4825,17 @@ InputInfo Driver::BuildJobsForActionNoCache( A = BuildingForOffloadDevice ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) : OA->getHostDependence(); + + // We may have already built this action as a part of the offloading + // toolchain, return the cached input if so. + std::pair<const Action *, std::string> ActionTC = { + OA->getHostDependence(), + GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + if (CachedResults.find(ActionTC) != CachedResults.end()) { + InputInfoList Inputs = CachedResults[ActionTC]; + Inputs.append(OffloadDependencesInputInfo); + return Inputs; + } } if (const InputAction *IA = dyn_cast<InputAction>(A)) { @@ -4733,9 +4845,9 @@ InputInfo Driver::BuildJobsForActionNoCache( Input.claim(); if (Input.getOption().matches(options::OPT_INPUT)) { const char *Name = Input.getValue(); - return InputInfo(A, Name, /* _BaseInput = */ Name); + return {InputInfo(A, Name, /* _BaseInput = */ Name)}; } - return InputInfo(A, &Input, /* _BaseInput = */ ""); + return {InputInfo(A, &Input, /* _BaseInput = */ "")}; } if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) { @@ -4765,7 +4877,7 @@ InputInfo Driver::BuildJobsForActionNoCache( const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions); if (!T) - return InputInfo(); + return {InputInfo()}; if (BuildingForOffloadDevice && A->getOffloadingDeviceKind() == Action::OFK_OpenMP) { @@ -4792,7 +4904,7 @@ InputInfo Driver::BuildJobsForActionNoCache( cast<OffloadAction>(OA)->doOnEachDependence( /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { - OffloadDependencesInputInfo.push_back(BuildJobsForAction( + OffloadDependencesInputInfo.append(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, DepA->getOffloadingDeviceKind())); @@ -4806,7 +4918,7 @@ InputInfo Driver::BuildJobsForActionNoCache( // FIXME: Clean this up. bool SubJobAtTopLevel = AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A)); - InputInfos.push_back(BuildJobsForAction( + InputInfos.append(BuildJobsForAction( C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, CachedResults, A->getOffloadingDeviceKind())); } @@ -4890,8 +5002,8 @@ InputInfo Driver::BuildJobsForActionNoCache( Arch = BoundArch; CachedResults[{A, GetTriplePlusArchString(UI.DependentToolChain, Arch, - UI.DependentOffloadKind)}] = - CurI; + UI.DependentOffloadKind)}] = { + CurI}; } // Now that we have all the results generated, select the one that should be @@ -4900,9 +5012,9 @@ InputInfo Driver::BuildJobsForActionNoCache( A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; assert(CachedResults.find(ActionTC) != CachedResults.end() && "Result does not exist??"); - Result = CachedResults[ActionTC]; + Result = CachedResults[ActionTC].front(); } else if (JA->getType() == types::TY_Nothing) - Result = InputInfo(A, BaseInput); + Result = {InputInfo(A, BaseInput)}; else { // We only have to generate a prefix for the host if this is not a top-level // action. @@ -4955,7 +5067,7 @@ InputInfo Driver::BuildJobsForActionNoCache( C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), LinkingOutput); } - return Result; + return {Result}; } const char *Driver::getDefaultImageName() const { |
