1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
{
lib,
buildPythonPackage,
fetchFromGitHub,
# build-system
cython,
setuptools,
# dependencies
affinegap,
btrees,
categorical-distance,
dedupe-levenshtein-search,
doublemetaphone,
haversine,
highered,
numpy,
scikit-learn,
simplecosine,
zope-index,
dedupe,
# tests
pytest-cov-stub,
pytestCheckHook,
python,
runCommand,
}:
buildPythonPackage rec {
pname = "dedupe";
version = "3.0.3";
pyproject = true;
src = fetchFromGitHub {
owner = "dedupeio";
repo = "dedupe";
tag = "v${version}";
hash = "sha256-tfBJeaeZw5w5OwM+AOfy9H6P2zbShjN/kuzEbpxATHI=";
};
build-system = [
cython
setuptools
];
dependencies = [
affinegap
btrees
categorical-distance
dedupe-levenshtein-search
doublemetaphone
haversine
highered
numpy
scikit-learn
simplecosine
zope-index
];
nativeCheckInputs = [
pytest-cov-stub
pytestCheckHook
];
# Remove source directory so pytest imports compiled extension from $out
preCheck = ''
rm -rf dedupe
'';
pythonImportsCheck = [
"dedupe"
];
passthru.tests.benchmarks =
runCommand "dedupe-benchmarks-test"
{
nativeBuildInputs = [ (python.withPackages (ps: [ dedupe ])) ];
}
''
# Copy benchmarks to writable location
cp -r ${src}/benchmarks benchmarks
chmod -R +w benchmarks
cd benchmarks
# Run all three canonical benchmarks
for benchmark in canonical canonical_gazetteer canonical_matching; do
echo "Running $benchmark benchmark..."
# Redirect stderr to /dev/null (`2>/dev/null`) to suppress Python 3.13
# multiprocessing resource tracker warnings from scikit-learn/joblib subprocesses
# `|| exit 1` provides fail-fast behavior: exit immediately if any benchmark fails
PYTHONPATH=$PWD python -m benchmarks.$benchmark 2>/dev/null || exit 1
done
touch $out
'';
meta = {
description = "Library for accurate and scalable fuzzy matching, deduplication and entity resolution";
homepage = "https://github.com/dedupeio/dedupe";
changelog = "https://github.com/dedupeio/dedupe/blob/${src.tag}/CHANGELOG.md";
license = lib.licenses.mit;
maintainers = with lib.maintainers; [ daniel-fahey ];
};
}
|