summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/cnvkit/default.nix
blob: eb6375367297b9fefd904702d5b1ea4ee17188c3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
{
  lib,
  buildPythonPackage,
  fetchFromGitHub,
  fetchpatch,
  python,
  makeWrapper,
  # dependencies
  biopython,
  matplotlib,
  numpy,
  pandas,
  pomegranate,
  pyfaidx,
  pysam,
  reportlab,
  rPackages,
  scikit-learn,
  scipy,
  R,
  # tests
  pytestCheckHook,

}:
buildPythonPackage rec {
  pname = "cnvkit";
  version = "0.9.12";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "etal";
    repo = "cnvkit";
    tag = "v${version}";
    hash = "sha256-ZdE3EUNZpEXRHTRKwVhuj3BWQWczpdFbg4pVr0+AHiQ=";
  };

  patches = [
    (fetchpatch {
      name = "fix-numpy2-compat";
      url = "https://github.com/etal/cnvkit/commit/5cb6aeaf40ea5572063cf9914c456c307b7ddf7a.patch";
      hash = "sha256-VwGAMGKuX2Kx9xL9GX/PB94/7LkT0dSLbWIfVO8F9NI=";
    })
  ];

  pythonRelaxDeps = [
    # https://github.com/etal/cnvkit/issues/815
    "pomegranate"
  ];

  nativeBuildInputs = [
    makeWrapper
  ];

  buildInputs = [
    R
  ];

  postPatch =
    let
      rscript = lib.getExe' R "Rscript";
    in
    # Numpy 2 compatibility
    ''
      substituteInPlace skgenome/intersect.py \
        --replace-fail "np.string_" "np.bytes_"
    ''
    # Patch shebang lines in R scripts
    + ''
      substituteInPlace cnvlib/segmentation/flasso.py \
        --replace-fail "#!/usr/bin/env Rscript" "#!${rscript}"

      substituteInPlace cnvlib/segmentation/cbs.py \
        --replace-fail "#!/usr/bin/env Rscript" "#!${rscript}"

      substituteInPlace cnvlib/segmentation/__init__.py \
        --replace-fail 'rscript_path="Rscript"' 'rscript_path="${rscript}"'

      substituteInPlace cnvlib/commands.py \
        --replace-fail 'default="Rscript"' 'default="${rscript}"'

    '';

  dependencies = [
    biopython
    matplotlib
    numpy
    pandas
    pomegranate
    pyfaidx
    pysam
    reportlab
    rPackages.DNAcopy
    scikit-learn
    scipy
  ];

  # Make sure R can find the DNAcopy package
  postInstall = ''
    wrapProgram $out/bin/cnvkit.py \
      --set R_LIBS_SITE "${rPackages.DNAcopy}/library" \
       --set MPLCONFIGDIR "/tmp/matplotlib-config"
  '';

  installCheckPhase = ''
    runHook preInstallCheck

    ${python.executable} -m pytest --deselect=test/test_commands.py::CommandTests::test_batch \
      --deselect=test/test_commands.py::CommandTests::test_segment_hmm

      cd test
      # Set matplotlib config directory for the tests
      export MPLCONFIGDIR="/tmp/matplotlib-config"
      export HOME="/tmp"
      mkdir -p "$MPLCONFIGDIR"

      # Use the installed binary - it's already wrapped with R_LIBS_SITE
      make cnvkit="$out/bin/cnvkit.py" || {
        echo "Make tests failed"
        exit 1
      }

    runHook postInstallCheck
  '';

  doInstallCheck = true;

  pythonImportsCheck = [ "cnvlib" ];

  nativeCheckInputs = [
    pytestCheckHook
    R
  ];

  meta = {
    homepage = "https://cnvkit.readthedocs.io";
    description = "Python library and command-line software toolkit to infer and visualize copy number from high-throughput DNA sequencing data";
    changelog = "https://github.com/etal/cnvkit/releases/tag/v${version}";
    license = lib.licenses.asl20;
    maintainers = [ lib.maintainers.jbedo ];
  };
}