summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/markitdown/default.nix
blob: c47f7361a2efb2a3ffb0eddd0b97dd3309e02464 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  hatchling,
  beautifulsoup4,
  defusedxml,
  ffmpeg-headless,
  magika,
  mammoth,
  markdownify,
  numpy,
  openai,
  openpyxl,
  pandas,
  pathvalidate,
  pdfminer-six,
  puremagic,
  pydub,
  python-pptx,
  requests,
  speechrecognition,
  youtube-transcript-api,
  olefile,
  xlrd,
  lxml,
  pytestCheckHook,
  gitUpdater,
}:

buildPythonPackage rec {
  pname = "markitdown";
  version = "0.1.4";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "microsoft";
    repo = "markitdown";
    tag = "v${version}";
    hash = "sha256-WKA2eY8wY3SM9xZ7Cek5eUcJbO5q6eMDx2aTKfQnFvE=";
  };

  sourceRoot = "${src.name}/packages/markitdown";

  build-system = [ hatchling ];

  pythonRelaxDeps = [
    "magika"
  ];
  dependencies = [
    beautifulsoup4
    defusedxml
    ffmpeg-headless
    lxml
    magika
    mammoth
    markdownify
    numpy
    olefile
    openai
    openpyxl
    pandas
    pathvalidate
    pdfminer-six
    puremagic
    pydub
    python-pptx
    requests
    speechrecognition
    xlrd
    youtube-transcript-api
  ];

  pythonImportsCheck = [ "markitdown" ];

  nativeCheckInputs = [ pytestCheckHook ];

  disabledTests = [
    # Require network access
    "test_markitdown_remote"
    "test_module_vectors"
    "test_cli_vectors"
    "test_module_misc"
  ];

  passthru.updateScripts = gitUpdater { };

  meta = {
    description = "Python tool for converting files and office documents to Markdown";
    homepage = "https://github.com/microsoft/markitdown";
    changelog = "https://github.com/microsoft/markitdown/releases/tag/${src.tag}";
    license = lib.licenses.mit;
    maintainers = [ ];
  };
}