1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
{
lib,
stdenv,
gcc13Stdenv,
buildPythonPackage,
fetchFromGitHub,
fetchpatch2,
# nativeBuildInputs
cmake,
ninja,
# build-system
pathspec,
pyproject-metadata,
scikit-build-core,
# dependencies
diskcache,
jinja2,
numpy,
typing-extensions,
# tests
scipy,
huggingface-hub,
# passthru
gitUpdater,
pytestCheckHook,
llama-cpp-python,
config,
cudaSupport ? config.cudaSupport,
cudaPackages ? { },
}:
let
stdenvTarget = if cudaSupport then gcc13Stdenv else stdenv;
in
buildPythonPackage rec {
pname = "llama-cpp-python";
version = "0.3.6";
pyproject = true;
src = fetchFromGitHub {
owner = "abetlen";
repo = "llama-cpp-python";
tag = "v${version}";
hash = "sha256-d5nMgpS7m6WEILs222ztwphoqkAezJ+qt6sVKSlpIYI=";
fetchSubmodules = true;
};
# src = /home/gaetan/llama-cpp-python;
patches = [
# fix segfault when running tests due to missing default Metal devices
(fetchpatch2 {
url = "https://github.com/ggerganov/llama.cpp/commit/acd38efee316f3a5ed2e6afcbc5814807c347053.patch?full_index=1";
stripLen = 1;
extraPrefix = "vendor/llama.cpp/";
hash = "sha256-71+Lpg9z5KPlaQTX9D85KS2LXFWLQNJJ18TJyyq3/pU=";
})
];
dontUseCmakeConfigure = true;
SKBUILD_CMAKE_ARGS = lib.strings.concatStringsSep ";" (
lib.optionals cudaSupport [
"-DGGML_CUDA=on"
"-DCUDAToolkit_ROOT=${lib.getDev cudaPackages.cuda_nvcc}"
"-DCMAKE_CUDA_COMPILER=${lib.getExe cudaPackages.cuda_nvcc}"
]
);
nativeBuildInputs = [
cmake
ninja
];
build-system = [
pathspec
pyproject-metadata
scikit-build-core
];
buildInputs = lib.optionals cudaSupport (
with cudaPackages;
[
cuda_cudart # cuda_runtime.h
cuda_cccl # <thrust/*>
libcublas # cublas_v2.h
]
);
stdenv = stdenvTarget;
dependencies = [
diskcache
jinja2
numpy
typing-extensions
];
nativeCheckInputs = [
pytestCheckHook
scipy
huggingface-hub
];
disabledTests = [
# tries to download model from huggingface-hub
"test_real_model"
"test_real_llama"
];
pythonImportsCheck = [ "llama_cpp" ];
passthru = {
updateScript = gitUpdater { rev-prefix = "v"; };
tests = lib.optionalAttrs stdenvTarget.hostPlatform.isLinux {
withCuda = llama-cpp-python.override {
cudaSupport = true;
};
};
};
meta = {
description = "Python bindings for llama.cpp";
homepage = "https://github.com/abetlen/llama-cpp-python";
changelog = "https://github.com/abetlen/llama-cpp-python/blob/v${version}/CHANGELOG.md";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ kirillrdy ];
badPlatforms = [
# cc1: error: unknown value ‘native+nodotprod+noi8mm+nosve’ for ‘-mcpu’
"aarch64-linux"
];
};
}
|