{ lib, buildPythonPackage, fetchFromGitHub, replaceVars, cudaPackages, # build-system setuptools, torch, # buildInputs pybind11, # passthru nix-update-script, config, cudaSupport ? config.cudaSupport, }: let inherit (lib) getBin optionalAttrs optionals ; in buildPythonPackage.override { inherit (torch) stdenv; } (finalAttrs: { pname = "flash-mla"; version = "0-unstable-2026-03-31"; pyproject = true; src = fetchFromGitHub { owner = "deepseek-ai"; repo = "FlashMLA"; rev = "71c737929f2567bd0a094ae140f8f60f390b1232"; # Using the cutlass git subodules is necessary to get cutlass/util/command_line.h which is not # shipped in cudaPackages.cutlass fetchSubmodules = true; hash = "sha256-d8Hh+1QFwD6cl9fE8/XSYdWiJJjY9bSRk5h4N2sEV2U="; }; patches = [ (replaceVars ./inject-git-rev.patch { git_rev = "+${finalAttrs.src.rev}"; }) ]; postPatch = '' substituteInPlace setup.py \ --replace-fail \ 'subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])' \ "" ''; env = optionalAttrs cudaSupport { CUDA_HOME = (getBin cudaPackages.cuda_nvcc).outPath; }; build-system = [ setuptools torch ]; buildInputs = [ pybind11 ] ++ optionals cudaSupport ( with cudaPackages; [ cuda_cudart # cuda_runtime.h libcublas # cublas_v2.h libcurand # curand_kernel.h libcusolver # cusolverDn.h libcusparse # cusparse.h ] ); pythonImportsCheck = [ "flash_mla" ]; # Tests are not meant to run with pytest doCheck = false; passthru.updateScript = nix-update-script { extraArgs = [ "--version=branch" ]; }; meta = { description = "Efficient Multi-head Latent Attention Kernels"; homepage = "https://github.com/deepseek-ai/FlashMLA"; license = lib.licenses.mit; maintainers = with lib.maintainers; [ GaetanLepage ]; broken = !cudaSupport; }; })