1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
|
{
lib,
buildPythonPackage,
fetchFromGitHub,
fetchzip,
replaceVars,
setuptools,
colorlog,
pyclipper,
opencv-python,
omegaconf,
numpy,
six,
shapely,
pyyaml,
pillow,
onnxruntime,
tqdm,
pytestCheckHook,
requests,
}:
let
version = "3.4.1";
src = fetchFromGitHub {
owner = "RapidAI";
repo = "RapidOCR";
tag = "v${version}";
hash = "sha256-Q8QtjI+5QDv6zQ96aXLyEepHfMh75DR+ZWj/ygVx3o0=";
};
models =
fetchzip {
url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
stripRoot = false;
}
+ "/required_for_whl_v1.3.0/resources/models";
in
buildPythonPackage {
pname = "rapidocr";
inherit version src;
pyproject = true;
sourceRoot = "${src.name}/python";
# HACK:
# Upstream uses a very unconventional structure to organize the packages, and we have to coax the
# existing infrastructure to work with it.
# See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
# for the "intended" way of building this package.
# The setup.py supplied by upstream tries to determine the current version by
# fetching the latest version of the package from PyPI, and then bumping the version number.
# This is not allowed in the Nix build environment as we do not have internet access,
# hence we patch that out and get the version from the build environment directly.
patches = [
(replaceVars ./setup-py-override-version-checking.patch {
inherit version;
})
];
postPatch = ''
mkdir -p rapidocr/models
ln -s ${models}/* rapidocr/models
echo "from .rapidocr.main import RapidOCR, VisRes" > __init__.py
'';
# Upstream expects the source files to be under rapidocr/rapidocr
# instead of rapidocr for the wheel to build correctly.
preBuild = ''
mkdir rapidocr_t
mv rapidocr rapidocr_t
mv rapidocr_t rapidocr
'';
# Revert the above hack
postBuild = ''
mv rapidocr rapidocr_t
mv rapidocr_t/* .
'';
build-system = [ setuptools ];
dependencies = [
colorlog
numpy
omegaconf
onnxruntime
opencv-python
pillow
pyclipper
pyyaml
requests
shapely
six
tqdm
];
pythonImportsCheck = [ "rapidocr" ];
# As of version 2.1.0, 61 out of 70 tests require internet access.
# It's just not plausible to manually pick out ones that actually work
# in a hermetic build environment anymore :(
doCheck = false;
meta = {
changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/${src.tag}";
description = "Cross platform OCR Library based on OnnxRuntime";
homepage = "https://github.com/RapidAI/RapidOCR";
license = with lib.licenses; [ asl20 ];
maintainers = with lib.maintainers; [ pluiedev ];
mainProgram = "rapidocr";
};
}
|