summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/dask-ml/default.nix
blob: d78a16803c216e56f0882a5aba1e5746800c6ed9 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,

  # build-system
  hatch-vcs,
  hatchling,

  # dependencies
  dask-glm,
  distributed,
  multipledispatch,
  numba,
  numpy,
  packaging,
  pandas,
  scikit-learn,
  scipy,
  dask,

  # tests
  pytest-mock,
  pytestCheckHook,
}:

buildPythonPackage rec {
  pname = "dask-ml";
  version = "2025.1.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "dask";
    repo = "dask-ml";
    tag = "v${version}";
    hash = "sha256-DHxx0LFuJmGWYuG/WGHj+a5XHAEekBmlHUUb90rl2IY=";
  };

  build-system = [
    hatch-vcs
    hatchling
  ];

  dependencies = [
    dask-glm
    distributed
    multipledispatch
    numba
    numpy
    packaging
    pandas
    scikit-learn
    scipy
  ]
  ++ dask.optional-dependencies.array
  ++ dask.optional-dependencies.dataframe;

  pythonImportsCheck = [
    "dask_ml"
    "dask_ml.naive_bayes"
    "dask_ml.wrappers"
    "dask_ml.utils"
  ];

  nativeCheckInputs = [
    pytest-mock
    pytestCheckHook
  ];

  disabledTestPaths = [
    # RuntimeError: Attempting to use an asynchronous Client in a synchronous context of `dask.compute`
    # https://github.com/dask/dask-ml/issues/1016
    "tests/model_selection/test_hyperband.py"
    "tests/model_selection/test_incremental.py"
    "tests/model_selection/test_incremental_warns.py"
    "tests/model_selection/test_successive_halving.py"
  ];

  disabledTests = [
    # AssertionError: Regex pattern did not match.
    "test_unknown_category_transform_array"

    # ValueError: cannot broadcast shape (nan,) to shape (nan,)
    # https://github.com/dask/dask-ml/issues/1012
    "test_fit_array"
    "test_fit_frame"
    "test_fit_transform_frame"
    "test_laziness"
    "test_lr_score"
    "test_ok"
    "test_scoring_string"
  ];

  __darwinAllowLocalNetworking = true;

  meta = {
    description = "Scalable Machine Learn with Dask";
    homepage = "https://github.com/dask/dask-ml";
    license = lib.licenses.bsd3;
    maintainers = with lib.maintainers; [ GaetanLepage ];
  };
}