1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
|
{
lib,
buildPythonPackage,
fetchFromGitHub,
# build-system
setuptools,
torch,
# buildInputs
fmt,
pybind11,
# nativeBuildInputs
autoAddDriverRunpath,
# tests
pytestCheckHook,
writableTmpDirAsHomeHook,
# passthru
deep-gemm,
config,
cudaPackages,
cudaSupport ? config.cudaSupport,
}:
let
inherit (lib)
getBin
optionalAttrs
optionals
;
in
buildPythonPackage.override { inherit (torch) stdenv; } (finalAttrs: {
pname = "deep-gemm";
version = "2.1.1.post3";
pyproject = true;
src = fetchFromGitHub {
owner = "deepseek-ai";
repo = "DeepGEMM";
tag = "v${finalAttrs.version}";
hash = "sha256-2yEHiuTaNUodWlZk7waqBsVMip2qiVJPgQHwsY0I63k=";
};
patches = [
./use-system-libraries.patch
];
env = optionalAttrs cudaSupport {
CUDA_HOME = (getBin cudaPackages.cuda_nvcc).outPath;
LDFLAGS = toString [
# Fake libcuda.so (the real one is deployed impurely)
"-L${lib.getOutput "stubs" cudaPackages.cuda_cudart}/lib/stubs"
];
};
build-system = [
setuptools
torch
];
nativeBuildInputs = [
autoAddDriverRunpath
];
buildInputs = [
fmt
pybind11
]
++ optionals cudaSupport (
with cudaPackages;
[
cuda_cudart # cuda_runtime_api.h
cuda_nvrtc # nvrtc.h
cutlass # cute/arch/mma_sm100_desc.hpp
libcublas # cublas_v2.h
libcusolver # cusolverDn.h
libcusparse # cusparse.h
]
);
nativeCheckInputs = [
pytestCheckHook
writableTmpDirAsHomeHook
];
# Tests require GPU access
doCheck = false;
passthru.gpuCheck = deep-gemm.overridePythonAttrs {
requiredSystemFeatures = [ "cuda" ];
# dlopens libcuda.so at import time
pythonImportsCheck = [ "deep_gemm" ];
doCheck = true;
};
meta = {
description = "Clean and efficient FP8 GEMM kernels with fine-grained scaling";
homepage = "https://github.com/deepseek-ai/DeepGEMM";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ GaetanLepage ];
broken = !cudaSupport;
};
})
|