summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/cleanlab/default.nix
blob: 827db22fe93cc842484c18179f519ef30f852527 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,

  # build-system
  setuptools,

  # dependencies
  numpy,
  scikit-learn,
  termcolor,
  tqdm,
  pandas,

  # tests
  cleanvision,
  datasets,
  fasttext,
  hypothesis,
  keras,
  matplotlib,
  pytestCheckHook,
  pytest-lazy-fixture,
  skorch,
  tensorflow,
  torch,
  torchvision,
  wget,
  pythonAtLeast,
}:

buildPythonPackage (finalAttrs: {
  pname = "cleanlab";
  version = "2.9.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "cleanlab";
    repo = "cleanlab";
    tag = "v${finalAttrs.version}";
    hash = "sha256-0H4JTAc2tCtIFklGciXQ+TCWOiJ6kRkqcycJNeIpero=";
  };

  postPatch = ''
    substituteInPlace pyproject.toml \
      --replace-fail "setuptools>=65.0,<70.0" "setuptools"
  '';

  build-system = [
    setuptools
  ];

  dependencies = [
    numpy
    scikit-learn
    termcolor
    tqdm
    pandas
  ];

  nativeCheckInputs = [
    cleanvision
    datasets
    fasttext
    hypothesis
    keras
    matplotlib
    pytestCheckHook
    pytest-lazy-fixture
    skorch
    tensorflow
    torch
    torchvision
    wget
  ];

  disabledTests = [
    # Incorrect snapshots (AssertionError)
    "test_color_sentence"

    # Requires the datasets we prevent from downloading
    "test_create_imagelab"

    # AssertionError: assert np.int64(36) == 35
    "test_num_label_issues"

    # Non-trivial numpy2 incompatibilities
    # assert np.float64(0.492) == 0.491
    "test_duplicate_points_have_similar_scores"
    # AssertionError: assert 'Annotators [1] did not label any examples.'
    "test_label_quality_scores_multiannotator"
  ]
  ++ lib.optionals (pythonAtLeast "3.12") [
    # AttributeError: 'called_once_with' is not a valid assertion.
    # Use a spec for the mock if 'called_once_with' is meant to be an attribute..
    # Did you mean: 'assert_called_once_with'?
    "test_custom_issue_manager_not_registered"
  ];

  disabledTestPaths = [
    # Requires internet
    "tests/test_dataset.py"
    # Requires the datasets we just prevented from downloading
    "tests/datalab/test_cleanvision_integration.py"
    # Fails because of issues with the keras derivation
    "tests/test_frameworks.py"
  ];

  meta = {
    description = "Standard data-centric AI package for data quality and machine learning with messy, real-world data and labels";
    homepage = "https://github.com/cleanlab/cleanlab";
    changelog = "https://github.com/cleanlab/cleanlab/releases/tag/${finalAttrs.src.tag}";
    license = lib.licenses.agpl3Only;
    maintainers = with lib.maintainers; [ happysalada ];
  };
})