From 9bebd9e72d6b552fcfd3d1e6716eca6563944f42 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Thu, 14 Dec 2023 22:19:02 +0000 Subject: tree-wide: cudaPackages should not break default eval cudaPackages: guard expressions against null values --- pkgs/development/python-modules/jaxlib/default.nix | 3 ++- pkgs/development/python-modules/torch/default.nix | 13 ++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'pkgs/development/python-modules') diff --git a/pkgs/development/python-modules/jaxlib/default.nix b/pkgs/development/python-modules/jaxlib/default.nix index 27b9e61fbc82..d8dc4d67a594 100644 --- a/pkgs/development/python-modules/jaxlib/default.nix +++ b/pkgs/development/python-modules/jaxlib/default.nix @@ -64,7 +64,8 @@ let # aarch64-darwin is broken because of https://github.com/bazelbuild/rules_cc/pull/136 # however even with that fix applied, it doesn't work for everyone: # https://github.com/NixOS/nixpkgs/pull/184395#issuecomment-1207287129 - broken = stdenv.isDarwin; + # NOTE: We always build with NCCL; if it is unsupported, then our build is broken. + broken = stdenv.isDarwin || nccl.meta.unsupported; }; cudatoolkit_joined = symlinkJoin { diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 8fb227cbd36b..802d1a920141 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -7,7 +7,8 @@ magma, magma-hip, magma-cuda-static, - useSystemNccl ? true, + # Use the system NCCL as long as it is supported. + useSystemNccl ? !cudaPackages.nccl.meta.unsupported, MPISupport ? false, mpi, buildDocs ? false, @@ -57,6 +58,7 @@ let inherit (lib) attrsets lists strings trivial; inherit (cudaPackages) cudaFlags cudnn nccl; + ncclSupported = cudaSupport && !cudaPackages.nccl.meta.unsupported; setBool = v: if v then "1" else "0"; @@ -121,6 +123,7 @@ let "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]); "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); "Magma cudaPackages does not match cudaPackages" = cudaSupport && (effectiveMagma.cudaPackages != cudaPackages); + "Requested system NCCL, but cudaPackages.nccl is not supported" = useSystemNccl && !ncclSupported; }; in buildPythonPackage rec { pname = "torch"; @@ -273,9 +276,9 @@ in buildPythonPackage rec { PYTORCH_BUILD_VERSION = version; PYTORCH_BUILD_NUMBER = 0; - USE_NCCL = setBool (cudaSupport && cudaPackages ? nccl); - USE_SYSTEM_NCCL = setBool useSystemNccl; # don't build pytorch's third_party NCCL - USE_STATIC_NCCL = setBool useSystemNccl; + USE_NCCL = setBool (cudaSupport && ncclSupported); + USE_SYSTEM_NCCL = setBool (cudaSupport && useSystemNccl); # don't build pytorch's third_party NCCL + USE_STATIC_NCCL = setBool (cudaSupport && useSystemNccl); # Suppress a weird warning in mkl-dnn, part of ideep in pytorch # (upstream seems to have fixed this in the wrong place?) @@ -363,7 +366,7 @@ in buildPythonPackage rec { ] ++ lists.optionals (cudaPackages ? cudnn) [ cudnn.dev cudnn.lib - ] ++ lists.optionals (useSystemNccl && cudaPackages ? nccl) [ + ] ++ lists.optionals (useSystemNccl && ncclSupported) [ # Some platforms do not support NCCL (i.e., Jetson) nccl.dev # Provides nccl.h AND a static copy of NCCL! ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [ -- cgit v1.2.3 From 39cab2b768ef7a4b692f2d1b1516a7d44957e1c5 Mon Sep 17 00:00:00 2001 From: Connor Baker Date: Tue, 9 Jan 2024 22:31:43 +0000 Subject: python3Packages.torch: only build with NCCL when targeting CUDA on a supported platform --- pkgs/development/python-modules/torch/default.nix | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'pkgs/development/python-modules') diff --git a/pkgs/development/python-modules/torch/default.nix b/pkgs/development/python-modules/torch/default.nix index 802d1a920141..8a499d763a4a 100644 --- a/pkgs/development/python-modules/torch/default.nix +++ b/pkgs/development/python-modules/torch/default.nix @@ -7,8 +7,8 @@ magma, magma-hip, magma-cuda-static, - # Use the system NCCL as long as it is supported. - useSystemNccl ? !cudaPackages.nccl.meta.unsupported, + # Use the system NCCL as long as we're targeting CUDA on a supported platform. + useSystemNccl ? (cudaSupport && !cudaPackages.nccl.meta.unsupported), MPISupport ? false, mpi, buildDocs ? false, @@ -58,7 +58,6 @@ let inherit (lib) attrsets lists strings trivial; inherit (cudaPackages) cudaFlags cudnn nccl; - ncclSupported = cudaSupport && !cudaPackages.nccl.meta.unsupported; setBool = v: if v then "1" else "0"; @@ -123,7 +122,6 @@ let "Unsupported CUDA version" = cudaSupport && !(builtins.elem cudaPackages.cudaMajorVersion [ "11" "12" ]); "MPI cudatoolkit does not match cudaPackages.cudatoolkit" = MPISupport && cudaSupport && (mpi.cudatoolkit != cudaPackages.cudatoolkit); "Magma cudaPackages does not match cudaPackages" = cudaSupport && (effectiveMagma.cudaPackages != cudaPackages); - "Requested system NCCL, but cudaPackages.nccl is not supported" = useSystemNccl && !ncclSupported; }; in buildPythonPackage rec { pname = "torch"; @@ -276,9 +274,11 @@ in buildPythonPackage rec { PYTORCH_BUILD_VERSION = version; PYTORCH_BUILD_NUMBER = 0; - USE_NCCL = setBool (cudaSupport && ncclSupported); - USE_SYSTEM_NCCL = setBool (cudaSupport && useSystemNccl); # don't build pytorch's third_party NCCL - USE_STATIC_NCCL = setBool (cudaSupport && useSystemNccl); + # In-tree builds of NCCL are not supported. + # Use NCCL when cudaSupport is enabled and nccl is available. + USE_NCCL = setBool useSystemNccl; + USE_SYSTEM_NCCL = USE_NCCL; + USE_STATIC_NCCL = USE_NCCL; # Suppress a weird warning in mkl-dnn, part of ideep in pytorch # (upstream seems to have fixed this in the wrong place?) @@ -366,7 +366,7 @@ in buildPythonPackage rec { ] ++ lists.optionals (cudaPackages ? cudnn) [ cudnn.dev cudnn.lib - ] ++ lists.optionals (useSystemNccl && ncclSupported) [ + ] ++ lists.optionals useSystemNccl [ # Some platforms do not support NCCL (i.e., Jetson) nccl.dev # Provides nccl.h AND a static copy of NCCL! ] ++ lists.optionals (strings.versionOlder cudaVersion "11.8") [ -- cgit v1.2.3