1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
{
lib,
buildPythonPackage,
fetchFromGitHub,
setuptools,
setuptools-scm,
attrdict,
beautifulsoup4,
cython,
fire,
fonttools,
lmdb,
lxml,
numpy,
opencv-python,
openpyxl,
pdf2docx,
pillow,
pyclipper,
pymupdf,
python-docx,
rapidfuzz,
scikit-image,
shapely,
tqdm,
paddlepaddle,
lanms-neo,
polygon3,
paddlex,
pyyaml,
}:
buildPythonPackage rec {
pname = "paddleocr";
version = "3.3.2";
pyproject = true;
src = fetchFromGitHub {
owner = "PaddlePaddle";
repo = "PaddleOCR";
tag = "v${version}";
hash = "sha256-uNgYR9Hop/wNIDG4KQaJxn5m4tBKu5R+pfhO1aSa3iQ=";
};
patches = [
# The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
# classes. These classes depend on the `imgaug` package which is
# unmaintained and has been removed from nixpkgs.
#
# The image OCR feature of PaddleOCR doesn't use these classes though, so
# they work even after stripping the the `IaaAugment` and `CopyPaste`
# exports. It probably breaks some of the OCR model creation tooling that
# PaddleOCR provides, however.
./remove-import-imaug.patch
];
postPatch = ''
substituteInPlace pyproject.toml \
--replace-fail "setuptools==72.1.0" "setuptools"
'';
build-system = [
setuptools
setuptools-scm
];
dependencies = [
attrdict
beautifulsoup4
cython
fire
fonttools
lmdb
lxml
numpy
opencv-python
openpyxl
pdf2docx
pillow
pyclipper
pymupdf
python-docx
rapidfuzz
scikit-image
shapely
tqdm
paddlepaddle
lanms-neo
polygon3
paddlex
pyyaml
];
# TODO: The tests depend, among possibly other things, on `cudatoolkit`.
# But Cudatoolkit fails to install.
# preCheck = "export HOME=$TMPDIR";
# nativeCheckInputs = with pkgs; [ which cudatoolkit ];
doCheck = false;
meta = {
homepage = "https://github.com/PaddlePaddle/PaddleOCR";
license = lib.licenses.asl20;
description = "Multilingual OCR toolkits based on PaddlePaddle";
longDescription = ''
PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
tools that help users train better models and apply them into practice.
'';
changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/${src.tag}";
maintainers = with lib.maintainers; [ happysalada ];
platforms = [
"x86_64-linux"
"x86_64-darwin"
"aarch64-darwin"
];
};
}
|