summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/deepsearch-glm/default.nix
blob: 734e301f697b47e8150d307eb17144db910e9fbe (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  cmake,
  cxxopts,
  deepsearch-toolkit,
  docling-core,
  fasttext,
  fmt,
  loguru,
  matplotlib,
  nlohmann_json,
  pandas,
  pcre2,
  pkg-config,
  poetry-core,
  pybind11,
  python-dotenv,
  requests,
  rich,
  sentencepiece,
  tabulate,
  tqdm,
  utf8cpp,
  zlib,
}:

buildPythonPackage rec {
  pname = "deepsearch-glm";
  version = "1.0.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "DS4SD";
    repo = "deepsearch-glm";
    tag = "v${version}";
    hash = "sha256-3sJNkrx0tTm6RMYAwV8Aha7x8dZjf4tGdds8OScRff8=";
  };

  dontUseCmakeConfigure = true;

  nativeBuildInputs = [
    cmake
    pkg-config
  ];

  build-system = [
    poetry-core
    pybind11
  ];

  env = {
    NIX_CFLAGS_COMPILE = "-I${lib.getDev utf8cpp}/include/utf8cpp";
    USE_SYSTEM_DEPS = true;
  };

  optional-dependencies = {
    docling = [
      docling-core
      pandas
    ];
    pyplot = [
      matplotlib
    ];
    toolkit = [
      deepsearch-toolkit
      python-dotenv
    ];
    utils = [
      pandas
      python-dotenv
      requests
      rich
      tabulate
      tqdm
    ];
  };

  buildInputs = [
    cxxopts
    fasttext
    fmt
    loguru
    nlohmann_json
    pcre2
    sentencepiece
    utf8cpp
    zlib
  ];

  # Test suite insists on downloading models, data etc. from s3 bucket
  doCheck = false;

  pythonImportsCheck = [
    "deepsearch_glm"
  ];

  meta = {
    changelog = "https://github.com/DS4SD/deepsearch-glm/releases/tag/v${version}";
    description = "Create fast graph language models from converted PDF documents for knowledge extraction and Q&A";
    homepage = "https://github.com/DS4SD/deepsearch-glm";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ booxter ];
  };
}