1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
{
lib,
buildPythonPackage,
fetchFromGitHub,
cmake,
cxxopts,
deepsearch-toolkit,
docling-core,
fasttext,
fmt,
loguru,
matplotlib,
nlohmann_json,
pandas,
pcre2,
pkg-config,
poetry-core,
pybind11,
python-dotenv,
requests,
rich,
sentencepiece,
tabulate,
tqdm,
utf8cpp,
zlib,
}:
buildPythonPackage rec {
pname = "deepsearch-glm";
version = "1.0.0";
pyproject = true;
src = fetchFromGitHub {
owner = "DS4SD";
repo = "deepsearch-glm";
tag = "v${version}";
hash = "sha256-3sJNkrx0tTm6RMYAwV8Aha7x8dZjf4tGdds8OScRff8=";
};
dontUseCmakeConfigure = true;
nativeBuildInputs = [
cmake
pkg-config
];
build-system = [
poetry-core
pybind11
];
env = {
NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";
USE_SYSTEM_DEPS = true;
};
optional-dependencies = {
docling = [
docling-core
pandas
];
pyplot = [
matplotlib
];
toolkit = [
deepsearch-toolkit
python-dotenv
];
utils = [
pandas
python-dotenv
requests
rich
tabulate
tqdm
];
};
buildInputs = [
cxxopts
fasttext
fmt
loguru
nlohmann_json
pcre2
sentencepiece
utf8cpp
zlib
];
# Test suite insists on downloading models, data etc. from s3 bucket
doCheck = false;
pythonImportsCheck = [
"deepsearch_glm"
];
meta = {
changelog = "https://github.com/DS4SD/deepsearch-glm/releases/tag/v${version}";
description = "Create fast graph language models from converted PDF documents for knowledge extraction and Q&A";
homepage = "https://github.com/DS4SD/deepsearch-glm";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ booxter ];
};
}
|