pkgs/development/python-modules/cupy/default.nix


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

{
  lib,
  stdenv,
  buildPythonPackage,
  fetchFromGitHub,
  cython,
  fastrlock,
  numpy,
  pytestCheckHook,
  mock,
  setuptools,
  cudaPackages,
  addDriverRunpath,
  symlinkJoin,
}:

let
  inherit (cudaPackages) cudnn;

  shouldUsePkg = lib.mapNullable (pkg: if pkg.meta.available or true then pkg else null);

  # some packages are not available on all platforms
  cuda_nvprof = shouldUsePkg (cudaPackages.nvprof or null);
  libcutensor = shouldUsePkg (cudaPackages.libcutensor or null);
  nccl = shouldUsePkg (cudaPackages.nccl or null);

  outpaths = lib.filter (outpath: outpath != null) (
    with cudaPackages;
    [
      cuda_cccl # <nv/target>
      cuda_cudart
      cuda_nvcc # <crt/host_defines.h>
      cuda_nvprof
      cuda_nvrtc
      cuda_nvtx
      cuda_profiler_api
      libcublas
      libcufft
      libcurand
      libcusolver
      libcusparse
      # NOTE: libcusparse_lt is too new for CuPy, so we must do without.
      # libcusparse_lt
    ]
  );
  cudatoolkit-joined = symlinkJoin {
    name = "cudatoolkit-joined-${cudaPackages.cudaMajorMinorVersion}";
    paths =
      outpaths ++ lib.concatMap (outpath: lib.map (output: outpath.${output}) outpath.outputs) outpaths;
  };
in
buildPythonPackage.override { stdenv = cudaPackages.backendStdenv; } rec {
  pname = "cupy";
  version = "13.6.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "cupy";
    repo = "cupy";
    tag = "v${version}";
    hash = "sha256-nU3VL0MSCN+mI5m7C5sKAjBSL6ybM6YAk5lJiIDY0ck=";
    fetchSubmodules = true;
  };

  env.LDFLAGS = toString [
    # Fake libcuda.so (the real one is deployed impurely)
    "-L${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs"
  ];

  # See https://docs.cupy.dev/en/v10.2.0/reference/environment.html. Setting both
  # CUPY_NUM_BUILD_JOBS and CUPY_NUM_NVCC_THREADS to NIX_BUILD_CORES results in
  # a small amount of thrashing but it turns out there are a large number of
  # very short builds and a few extremely long ones, so setting both ends up
  # working nicely in practice.
  preConfigure = ''
    export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES"
    export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES"
  '';

  build-system = [
    cython
    fastrlock
    setuptools
  ];

  nativeBuildInputs = [
    addDriverRunpath
    cudatoolkit-joined
  ];

  buildInputs = [
    cudatoolkit-joined
    cudnn
    libcutensor
    nccl
  ];

  # NVCC = "${lib.getExe cudaPackages.cuda_nvcc}"; # FIXME: splicing/buildPackages
  CUDA_PATH = "${cudatoolkit-joined}";

  dependencies = [
    fastrlock
    numpy
  ];

  nativeCheckInputs = [
    pytestCheckHook
    mock
  ];

  # Won't work with the GPU, whose drivers won't be accessible from the build
  # sandbox
  doCheck = false;

  postFixup = ''
    find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
      addDriverRunpath "$lib"
    done
  '';

  enableParallelBuilding = true;

  meta = {
    description = "NumPy-compatible matrix library accelerated by CUDA";
    homepage = "https://cupy.chainer.org/";
    changelog = "https://github.com/cupy/cupy/releases/tag/${src.tag}";
    license = lib.licenses.mit;
    platforms = [
      "aarch64-linux"
      "x86_64-linux"
    ];
    maintainers = [ ];
  };
}