summaryrefslogtreecommitdiff
path: root/pkgs/development/python-modules/holistic-trace-analysis/default.nix
blob: 6389d96bec6e12649ef14cbc25b54d4de6e9052d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{
  lib,
  stdenv,
  buildPythonPackage,
  fetchFromGitHub,

  # build system
  setuptools,

  # dependencies
  jupyterlab,
  numpy,
  pandas,
  plotly,
  pydot,
  torch,

  # tests
  pytestCheckHook,
  writableTmpDirAsHomeHook,
}:

buildPythonPackage rec {
  pname = "HolisticTraceAnalysis";
  version = "0.5.0";
  pyproject = true;

  src = fetchFromGitHub {
    owner = "facebookresearch";
    repo = "HolisticTraceAnalysis";
    tag = "v${version}";
    hash = "sha256-3DuoP9gQ0vLlAAJ2uWw/oOEH/DTbn2xulzvqk4W3BiY=";
  };

  build-system = [ setuptools ];

  dependencies = [
    jupyterlab
    numpy
    pandas
    plotly
    pydot
    torch
  ];

  nativeCheckInputs = [
    pytestCheckHook
    writableTmpDirAsHomeHook
  ];

  disabledTests = lib.optionals stdenv.hostPlatform.isDarwin [
    # Permission denied: '/tmp/my_saved_cp_graph/trace_data.csv'
    "test_critical_path_breakdown_and_save_restore"
    # Fails under Python 3.12 on Darwin with I/O errors
    # Permission denied: '/tmp/path_does_not_exist/...'
    "test_critical_path_overlaid_trace"
    # Permission error: [Errno 1] Operation not permitted
    "test_get_mtia_aten_op_kernels_and_delay_inference_single_rank"
    # No cuda on Darwin, can cause hangs in nixpkgs-review
    "test_frequent_cuda_kernel_sequences"
    "test_get_cuda_kernel_launch_stats_for_h100"
    "test_get_cuda_kernel_launch_stats_inference_single_rank"
    "test_get_cuda_kernel_launch_stats_training_multiple_ranks"
  ];

  disabledTestPaths = lib.optionals stdenv.hostPlatform.isDarwin [
    # Makes assumptions about the filesystem layout
    "tests/test_config.py"
    # EOFError -- makes assumptions about file I/O under Python 3.12
    # https://github.com/facebookresearch/HolisticTraceAnalysis/issues/300
    "tests/test_symbol_table.py"
  ];

  pythonImportsCheck = [ "hta" ];

  meta = {
    description = "Performance analysis tool to identify bottlenecks in distributed training workloads";
    homepage = "https://github.com/facebookresearch/HolisticTraceAnalysis";
    changelog = "https://github.com/facebookresearch/HolisticTraceAnalysis/releases/tag/v${version}";
    license = lib.licenses.mit;
    maintainers = with lib.maintainers; [ jherland ];
  };
}