summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/rapidocr-onnxruntime/default.nix
blob: 1783e28f58a37888857861315ef34837078510f2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
{
  lib,
  config,
  buildPythonPackage,
  fetchFromGitHub,

  fetchzip,
  replaceVars,

  setuptools,
  pyclipper,
  opencv-python,
  numpy,
  six,
  shapely,
  pyyaml,
  pillow,
  onnxruntime,
  tqdm,

  pytestCheckHook,
  requests,

  cudaSupport ? config.cudaSupport,
  rapidocr-onnxruntime,
}:
let
  version = "1.4.4";

  src = fetchFromGitHub {
    owner = "RapidAI";
    repo = "RapidOCR";
    tag = "v${version}";
    hash = "sha256-x0VELDKOffxbV3v0aDFJFuDC4YfsGM548XWgINmRc3M=";
  };

  models =
    fetchzip {
      url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
      hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
      stripRoot = false;
    }
    + "/required_for_whl_v1.3.0/resources/models";
in
buildPythonPackage {
  pname = "rapidocr-onnxruntime";
  inherit version src;
  pyproject = true;

  sourceRoot = "${src.name}/python";

  # HACK:
  # Upstream uses a very unconventional structure to organize the packages, and we have to coax the
  # existing infrastructure to work with it.
  # See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
  # for the "intended" way of building this package.

  # The setup.py supplied by upstream tries to determine the current version by
  # fetching the latest version of the package from PyPI, and then bumping the version number.
  # This is not allowed in the Nix build environment as we do not have internet access,
  # hence we patch that out and get the version from the build environment directly.
  patches = [
    (replaceVars ./setup-py-override-version-checking.patch {
      inherit version;
    })
  ];

  postPatch = ''
    mv setup_onnxruntime.py setup.py

    ln -s ${models}/* rapidocr_onnxruntime/models

    echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
  '';

  # Upstream expects the source files to be under rapidocr_onnxruntime/rapidocr_onnxruntime
  # instead of rapidocr_onnxruntime for the wheel to build correctly.
  preBuild = ''
    mkdir rapidocr_onnxruntime_t
    mv rapidocr_onnxruntime rapidocr_onnxruntime_t
    mv rapidocr_onnxruntime_t rapidocr_onnxruntime
  '';

  # Revert the above hack
  postBuild = ''
    mv rapidocr_onnxruntime rapidocr_onnxruntime_t
    mv rapidocr_onnxruntime_t/* .
  '';

  build-system = [ setuptools ];

  dependencies = [
    pyclipper
    opencv-python
    numpy
    six
    shapely
    pyyaml
    pillow
    onnxruntime
    tqdm
  ];

  pythonImportsCheck = [ "rapidocr_onnxruntime" ];

  nativeCheckInputs = [
    pytestCheckHook
    requests
  ];

  # These are tests for different backends.
  disabledTestPaths = [
    "tests/test_vino.py"
    "tests/test_paddle.py"
  ];

  disabledTests = [
    # Needs Internet access
    "test_long_img"
  ];

  # Tests require access to a physical GPU to work, otherwise the interpreter crashes:
  # Fatal Python error: Aborted
  # File "/nix/store/..onnxruntime/capi/onnxruntime_inference_collection.py", line 561 in _create_inference_session
  doCheck = !cudaSupport;

  # rapidocr-onnxruntime has been renamed to rapidocr by upstream since 2.0.0. However, some packages like open-webui still requires rapidocr-onnxruntime 1.4.4. Therefore we set no auto update here.
  # nixpkgs-update: no auto update
  passthru.skipBulkUpdate = true;

  passthru.gpuCheck = rapidocr-onnxruntime.overridePythonAttrs (old: {
    requiredSystemFeatures = [ "cuda" ];
    doCheck = true;

    disabledTests =
      (old.disabledTests or [ ])
      ++ lib.optionals cudaSupport [
        # IndexError: list index out of range
        "test_ort_cuda_warning"
      ];
  });

  meta = {
    changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/${src.tag}";
    description = "Cross platform OCR Library based on OnnxRuntime";
    homepage = "https://github.com/RapidAI/RapidOCR";
    license = lib.licenses.asl20;
    maintainers = with lib.maintainers; [ wrvsrx ];
    mainProgram = "rapidocr_onnxruntime";
  };
}