summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/whisperx/default.nix
blob: 235bca7af2fe68d88a22de711c85a9bb9e35579c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
{
  lib,
  stdenv,
  buildPythonPackage,
  fetchFromGitHub,

  # build-system
  setuptools,

  # dependencies
  av,
  ctranslate2,
  faster-whisper,
  nltk,
  numpy,
  pandas,
  pyannote-audio,
  torch,
  torchaudio,
  transformers,
  triton,

  # native packages
  ffmpeg,
  ctranslate2-cpp, # alias for `pkgs.ctranslate2`, required due to colliding with the `ctranslate2` Python module.

  # enable GPU support
  cudaSupport ? torch.cudaSupport,
}:

let
  ctranslate = ctranslate2.override {
    ctranslate2-cpp = ctranslate2-cpp.override {
      withCUDA = cudaSupport;
      withCuDNN = cudaSupport;
    };
  };
in
buildPythonPackage rec {
  pname = "whisperx";
  version = "3.7.4";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "m-bain";
    repo = "whisperX";
    tag = "v${version}";
    hash = "sha256-wmCGHRx1JaOs5+7fp2jeh8PIR5dlmOl8hKrIw2550Bk=";
  };

  # As `makeWrapperArgs` does not apply to the module, and whisperx depends on `ffmpeg`,
  # we replace the `"ffmpeg"` string in `subprocess.run` with the full path to the binary.
  # This works for both the program and the module.
  # Every update, the codebase should be checked for further instances of `ffmpeg` calls.
  postPatch = ''
    substituteInPlace whisperx/audio.py --replace-fail \
      '"ffmpeg"' '"${lib.getExe ffmpeg}"'
  '';

  build-system = [ setuptools ];

  pythonRelaxDeps = [
    "av"
    "numpy"
    "pandas"
    "pyannote-audio"
    "torch"
    "torchaudio"
  ];
  dependencies = [
    av
    ctranslate
    faster-whisper
    nltk
    numpy
    pandas
    pyannote-audio
    torch
    torchaudio
    transformers
  ]
  ++ lib.optionals (stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isx86_64) [
    triton
  ];

  # Import check fails due on `aarch64-linux` ONLY in the sandbox due to onnxruntime
  # not finding its default logger, which then promptly segfaults.
  # Simply run the import check on every other platform instead.
  pythonImportsCheck = lib.optionals (
    !(stdenv.hostPlatform.isAarch64 && stdenv.hostPlatform.isLinux)
  ) [ "whisperx" ];

  # No tests in repository
  doCheck = false;

  meta = {
    mainProgram = "whisperx";
    description = "Automatic Speech Recognition with Word-level Timestamps (& Diarization)";
    homepage = "https://github.com/m-bain/whisperX";
    changelog = "https://github.com/m-bain/whisperX/releases/tag/${src.tag}";
    license = lib.licenses.bsd2;
    maintainers = [ lib.maintainers.bengsparks ];
  };
}