{ lib, stdenv, callPackage, fetchFromGitHub, fetchpatch, fetchurl, runCommand, pkg-config, cmake, rocm-cmake, rocblas, rocmlir, rocrand, rocm-runtime, hipblas-common, hipblas, hipblaslt, clr, composable_kernel, frugally-deep, rocm-docs-core, half, boost, sqlite, symlinkJoin, bzip2, lbzip2, nlohmann_json, texliveSmall, doxygen, sphinx, zlib, gtest, rocm-comgr, roctracer, python3Packages, gpuTargets ? clr.localGpuTargets or clr.gpuTargets, buildDocs ? false, # Needs internet because of rocm-docs-core buildTests ? false, withComposableKernel ? true, }: let # FIXME: cmake files need patched to include this properly cFlags = "-Wno-documentation-pedantic --offload-compress -I${hipblas-common}/include -I${hipblas}/include -I${roctracer}/include -I${nlohmann_json}/include -I${sqlite.dev}/include -I${rocrand}/include"; version = "7.2.3"; # Targets outside this list will get # error: use of undeclared identifier 'CK_BUFFER_RESOURCE_3RD_DWORD' supportedTargets = lib.intersectLists [ "gfx900" "gfx906" "gfx908" "gfx90a" "gfx942" "gfx950" "gfx1030" "gfx1031" "gfx1100" "gfx1101" "gfx1102" "gfx1150" "gfx1151" "gfx1200" "gfx1201" ] gpuTargets; latex = lib.optionalAttrs buildDocs ( texliveSmall.withPackages ( ps: with ps; [ latexmk tex-gyre fncychap wrapfig capt-of framed needspace tabulary varwidth titlesec ] ) ); # for hiprtcCompileProgram (dropout kernels require rocrand in -I at runtime) hiprtcCompileRocmPath = symlinkJoin { name = "miopen-hiprtc-compile-rocm-path"; paths = [ clr rocrand ]; }; # Kernel databases moved from Git LFS to DVC (anonymous s3 bucket s3://therock-dvc/rocm-libraries) fetchKdb = name: { url, hash }: runCommand "miopen-${name}.kdb" { preferLocalBuild = true; } '' ${lbzip2}/bin/lbzip2 -ckd ${ fetchurl { inherit url hash; name = "${name}.kdb.bz2"; } } > $out ''; kdbs = lib.mapAttrs fetchKdb ( lib.filterAttrs (name: _: lib.elem name supportedTargets) (import ./kdbs.nix) ); linkKDBsTo = targetPath: lib.concatStringsSep "" ( lib.mapAttrsToList (name: kdb: '' ln -sf ${kdb} ${targetPath}/${name}.kdb '') kdbs ); in stdenv.mkDerivation (finalAttrs: { inherit version; pname = "miopen"; src = fetchFromGitHub { owner = "ROCm"; repo = "rocm-libraries"; rev = "rocm-${finalAttrs.version}"; sparseCheckout = [ "projects/miopen" "shared" ]; fetchSubmodules = true; hash = "sha256-plZpBTbEBVMa5CasjfbUsu45xP/BYstrEpWKK2H7QQ4="; }; sourceRoot = "${finalAttrs.src.name}/projects/miopen"; env.CFLAGS = cFlags; env.CXXFLAGS = cFlags; # Find zstd and add to target. Mainly for torch. patches = [ ./skip-preexisting-dbs.patch (fetchpatch { url = "https://github.com/ROCm/MIOpen/commit/e608b4325646afeabb5e52846997b926d2019d19.patch"; hash = "sha256-oxa3qlIC2bzbwGxrQOZXoY/S7CpLsMrnWRB7Og0tk0M="; }) (fetchpatch { url = "https://github.com/ROCm/MIOpen/commit/3413d2daaeb44b7d6eadcc03033a5954a118491e.patch"; hash = "sha256-ST4snUcTmmSI1Ogx815KEX9GdMnmubsavDzXCGJkiKs="; }) (fetchpatch { # [miopen] Extend HIP ISA compatibility name = "Extend-MIOpen-ISA-compatibility.patch"; url = "https://github.com/GZGavinZhao/rocm-libraries/commit/02f0fedffdc197f146dd45f41e10990a00cde3ee.patch"; hash = "sha256-My32iZw75rvB4fyvUJJ2kw2bU9/39awGteFGjzijixw="; relative = "projects/miopen"; }) ]; outputs = [ "out" ] ++ lib.optionals buildDocs [ "doc" ] ++ lib.optionals buildTests [ "test" ]; enableParallelBuilding = true; env.ROCM_PATH = clr; env.LD_LIBRARY_PATH = lib.makeLibraryPath [ rocm-runtime ]; nativeBuildInputs = [ pkg-config cmake rocm-cmake clr ]; buildInputs = [ hipblas hipblas-common rocblas rocmlir half boost sqlite bzip2 nlohmann_json frugally-deep roctracer rocrand hipblaslt ] ++ lib.optionals withComposableKernel [ composable_kernel ] ++ lib.optionals buildDocs [ latex doxygen sphinx rocm-docs-core python3Packages.sphinx-rtd-theme python3Packages.breathe python3Packages.myst-parser ] ++ lib.optionals buildTests [ gtest zlib ]; cmakeFlags = [ "-DAMDGPU_TARGETS=${lib.concatStringsSep ";" supportedTargets}" "-DGPU_TARGETS=${lib.concatStringsSep ";" supportedTargets}" "-DGPU_ARCHS=${lib.concatStringsSep ";" supportedTargets}" "-DCMAKE_VERBOSE_MAKEFILE=ON" "-DCMAKE_MODULE_PATH=${clr}/hip/cmake" "-DCMAKE_BUILD_TYPE=Release" # needs to stream to stdout so bzcat rather than bunzip2 "-DUNZIPPER=${bzip2}/bin/bzcat" "-DCMAKE_C_COMPILER=amdclang" "-DCMAKE_CXX_COMPILER=amdclang++" "-DROCM_PATH=${clr}" "-DHIP_ROOT_DIR=${clr}" (lib.cmakeBool "MIOPEN_USE_ROCBLAS" true) (lib.cmakeBool "MIOPEN_USE_HIPBLASLT" true) (lib.cmakeBool "MIOPEN_USE_COMPOSABLEKERNEL" withComposableKernel) (lib.cmakeBool "MIOPEN_USE_HIPRTC" true) (lib.cmakeBool "MIOPEN_USE_COMGR" true) "-DCMAKE_HIP_COMPILER_ROCM_ROOT=${clr}" # Manually define CMAKE_INSTALL_