1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
|
{
lib,
stdenv,
buildPythonPackage,
fetchFromGitHub,
cython,
fastrlock,
numpy,
pytestCheckHook,
mock,
setuptools,
cudaPackages,
addDriverRunpath,
symlinkJoin,
}:
let
inherit (cudaPackages) cudnn;
shouldUsePkg = lib.mapNullable (pkg: if pkg.meta.available or true then pkg else null);
# some packages are not available on all platforms
cuda_nvprof = shouldUsePkg (cudaPackages.nvprof or null);
libcutensor = shouldUsePkg (cudaPackages.libcutensor or null);
nccl = shouldUsePkg (cudaPackages.nccl or null);
outpaths = lib.filter (outpath: outpath != null) (
with cudaPackages;
[
cuda_cccl # <nv/target>
cuda_cudart
cuda_nvcc # <crt/host_defines.h>
cuda_nvprof
cuda_nvrtc
cuda_nvtx
cuda_profiler_api
libcublas
libcufft
libcurand
libcusolver
libcusparse
# NOTE: libcusparse_lt is too new for CuPy, so we must do without.
# libcusparse_lt
]
);
cudatoolkit-joined = symlinkJoin {
name = "cudatoolkit-joined-${cudaPackages.cudaMajorMinorVersion}";
paths =
outpaths ++ lib.concatMap (outpath: lib.map (output: outpath.${output}) outpath.outputs) outpaths;
};
in
buildPythonPackage.override { stdenv = cudaPackages.backendStdenv; } rec {
pname = "cupy";
version = "13.6.0";
pyproject = true;
src = fetchFromGitHub {
owner = "cupy";
repo = "cupy";
tag = "v${version}";
hash = "sha256-nU3VL0MSCN+mI5m7C5sKAjBSL6ybM6YAk5lJiIDY0ck=";
fetchSubmodules = true;
};
env.LDFLAGS = toString [
# Fake libcuda.so (the real one is deployed impurely)
"-L${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs"
];
# See https://docs.cupy.dev/en/v10.2.0/reference/environment.html. Setting both
# CUPY_NUM_BUILD_JOBS and CUPY_NUM_NVCC_THREADS to NIX_BUILD_CORES results in
# a small amount of thrashing but it turns out there are a large number of
# very short builds and a few extremely long ones, so setting both ends up
# working nicely in practice.
preConfigure = ''
export CUPY_NUM_BUILD_JOBS="$NIX_BUILD_CORES"
export CUPY_NUM_NVCC_THREADS="$NIX_BUILD_CORES"
'';
build-system = [
cython
fastrlock
setuptools
];
nativeBuildInputs = [
addDriverRunpath
cudatoolkit-joined
];
buildInputs = [
cudatoolkit-joined
cudnn
libcutensor
nccl
];
# NVCC = "${lib.getExe cudaPackages.cuda_nvcc}"; # FIXME: splicing/buildPackages
CUDA_PATH = "${cudatoolkit-joined}";
dependencies = [
fastrlock
numpy
];
nativeCheckInputs = [
pytestCheckHook
mock
];
# Won't work with the GPU, whose drivers won't be accessible from the build
# sandbox
doCheck = false;
postFixup = ''
find $out -type f \( -name '*.so' -or -name '*.so.*' \) | while read lib; do
addDriverRunpath "$lib"
done
'';
enableParallelBuilding = true;
meta = {
description = "NumPy-compatible matrix library accelerated by CUDA";
homepage = "https://cupy.chainer.org/";
changelog = "https://github.com/cupy/cupy/releases/tag/${src.tag}";
license = lib.licenses.mit;
platforms = [
"aarch64-linux"
"x86_64-linux"
];
maintainers = [ ];
};
}
|