Spaces:
Runtime error
Runtime error
| { | |
| lib, | |
| glibc, | |
| config, | |
| stdenv, | |
| mkShell, | |
| runCommand, | |
| cmake, | |
| ninja, | |
| pkg-config, | |
| git, | |
| python3, | |
| mpi, | |
| blas, | |
| cudaPackages, | |
| darwin, | |
| rocmPackages, | |
| vulkan-headers, | |
| vulkan-loader, | |
| clblast, | |
| useBlas ? builtins.all (x: !x) [ | |
| useCuda | |
| useMetalKit | |
| useOpenCL | |
| useRocm | |
| useVulkan | |
| ] && blas.meta.available, | |
| useCuda ? config.cudaSupport, | |
| useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, | |
| useMpi ? false, # Increases the runtime closure size by ~700M | |
| useOpenCL ? false, | |
| useRocm ? config.rocmSupport, | |
| useVulkan ? false, | |
| llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake | |
| # It's necessary to consistently use backendStdenv when building with CUDA support, | |
| # otherwise we get libstdc++ errors downstream. | |
| effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, | |
| enableStatic ? effectiveStdenv.hostPlatform.isStatic, | |
| precompileMetalShaders ? false | |
| }@inputs: | |
| let | |
| inherit (lib) | |
| cmakeBool | |
| cmakeFeature | |
| optionals | |
| strings | |
| versionOlder | |
| ; | |
| stdenv = throw "Use effectiveStdenv instead"; | |
| suffices = | |
| lib.optionals useBlas [ "BLAS" ] | |
| ++ lib.optionals useCuda [ "CUDA" ] | |
| ++ lib.optionals useMetalKit [ "MetalKit" ] | |
| ++ lib.optionals useMpi [ "MPI" ] | |
| ++ lib.optionals useOpenCL [ "OpenCL" ] | |
| ++ lib.optionals useRocm [ "ROCm" ] | |
| ++ lib.optionals useVulkan [ "Vulkan" ]; | |
| pnameSuffix = | |
| strings.optionalString (suffices != [ ]) | |
| "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; | |
| descriptionSuffix = | |
| strings.optionalString (suffices != [ ]) | |
| ", accelerated with ${strings.concatStringsSep ", " suffices}"; | |
| executableSuffix = effectiveStdenv.hostPlatform.extensions.executable; | |
| # TODO: package the Python in this repository in a Nix-like way. | |
| # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo | |
| # is PEP 517-compatible, and ensure the correct .dist-info is generated. | |
| # https://peps.python.org/pep-0517/ | |
| # | |
| # TODO: Package up each Python script or service appropriately, by making | |
| # them into "entrypoints" | |
| llama-python = python3.withPackages ( | |
| ps: [ | |
| ps.numpy | |
| ps.sentencepiece | |
| ] | |
| ); | |
| # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | |
| llama-python-extra = python3.withPackages ( | |
| ps: [ | |
| ps.numpy | |
| ps.sentencepiece | |
| ps.tiktoken | |
| ps.torchWithoutCuda | |
| ps.transformers | |
| ] | |
| ); | |
| xcrunHost = runCommand "xcrunHost" {} '' | |
| mkdir -p $out/bin | |
| ln -s /usr/bin/xcrun $out/bin | |
| ''; | |
| # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 | |
| # separately | |
| darwinBuildInputs = | |
| with darwin.apple_sdk.frameworks; | |
| [ | |
| Accelerate | |
| CoreVideo | |
| CoreGraphics | |
| ] | |
| ++ optionals useMetalKit [ MetalKit ]; | |
| cudaBuildInputs = with cudaPackages; [ | |
| cuda_cccl.dev # <nv/target> | |
| # A temporary hack for reducing the closure size, remove once cudaPackages | |
| # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 | |
| cuda_cudart.dev | |
| cuda_cudart.lib | |
| cuda_cudart.static | |
| libcublas.dev | |
| libcublas.lib | |
| libcublas.static | |
| ]; | |
| rocmBuildInputs = with rocmPackages; [ | |
| clr | |
| hipblas | |
| rocblas | |
| ]; | |
| vulkanBuildInputs = [ | |
| vulkan-headers | |
| vulkan-loader | |
| ]; | |
| in | |
| effectiveStdenv.mkDerivation ( | |
| finalAttrs: { | |
| pname = "llama-cpp${pnameSuffix}"; | |
| version = llamaVersion; | |
| # Note: none of the files discarded here are visible in the sandbox or | |
| # affect the output hash. This also means they can be modified without | |
| # triggering a rebuild. | |
| src = lib.cleanSourceWith { | |
| filter = | |
| name: type: | |
| let | |
| noneOf = builtins.all (x: !x); | |
| baseName = baseNameOf name; | |
| in | |
| noneOf [ | |
| (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | |
| (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths | |
| (lib.hasPrefix "." baseName) # Skip hidden files and directories | |
| (baseName == "flake.lock") | |
| ]; | |
| src = lib.cleanSource ../../.; | |
| }; | |
| postPatch = '' | |
| substituteInPlace ./ggml-metal.m \ | |
| --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | |
| substituteInPlace ./ggml-metal.m \ | |
| --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" | |
| ''; | |
| # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, | |
| # `default.metallib` may be compiled with Metal compiler from XCode | |
| # and we need to escape sandbox on MacOS to access Metal compiler. | |
| # `xcrun` is used find the path of the Metal compiler, which is varible | |
| # and not on $PATH | |
| # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion | |
| __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; | |
| nativeBuildInputs = | |
| [ | |
| cmake | |
| ninja | |
| pkg-config | |
| git | |
| ] | |
| ++ optionals useCuda [ | |
| cudaPackages.cuda_nvcc | |
| # TODO: Replace with autoAddDriverRunpath | |
| # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged | |
| cudaPackages.autoAddOpenGLRunpathHook | |
| ] | |
| ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ | |
| glibc.static | |
| ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ | |
| xcrunHost | |
| ]; | |
| buildInputs = | |
| optionals effectiveStdenv.isDarwin darwinBuildInputs | |
| ++ optionals useCuda cudaBuildInputs | |
| ++ optionals useMpi [ mpi ] | |
| ++ optionals useOpenCL [ clblast ] | |
| ++ optionals useRocm rocmBuildInputs | |
| ++ optionals useBlas [ blas ] | |
| ++ optionals useVulkan vulkanBuildInputs; | |
| cmakeFlags = | |
| [ | |
| (cmakeBool "LLAMA_NATIVE" false) | |
| (cmakeBool "LLAMA_BUILD_SERVER" true) | |
| (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) | |
| (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | |
| (cmakeBool "LLAMA_BLAS" useBlas) | |
| (cmakeBool "LLAMA_CLBLAST" useOpenCL) | |
| (cmakeBool "LLAMA_CUDA" useCuda) | |
| (cmakeBool "LLAMA_HIPBLAS" useRocm) | |
| (cmakeBool "LLAMA_METAL" useMetalKit) | |
| (cmakeBool "LLAMA_MPI" useMpi) | |
| (cmakeBool "LLAMA_VULKAN" useVulkan) | |
| (cmakeBool "LLAMA_STATIC" enableStatic) | |
| ] | |
| ++ optionals useCuda [ | |
| ( | |
| with cudaPackages.flags; | |
| cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | |
| builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | |
| ) | |
| ) | |
| ] | |
| ++ optionals useRocm [ | |
| (cmakeFeature "CMAKE_C_COMPILER" "hipcc") | |
| (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") | |
| # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM | |
| # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt | |
| # and select the line that matches the current nixpkgs version of rocBLAS. | |
| # Should likely use `rocmPackages.clr.gpuTargets`. | |
| "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" | |
| ] | |
| ++ optionals useMetalKit [ | |
| (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") | |
| (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) | |
| ]; | |
| # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | |
| # if they haven't been added yet. | |
| postInstall = '' | |
| mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix} | |
| mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix} | |
| mkdir -p $out/include | |
| cp $src/llama.h $out/include/ | |
| ''; | |
| # Define the shells here, but don't add in the inputsFrom to avoid recursion. | |
| passthru = { | |
| inherit | |
| useBlas | |
| useCuda | |
| useMetalKit | |
| useMpi | |
| useOpenCL | |
| useRocm | |
| useVulkan | |
| ; | |
| shell = mkShell { | |
| name = "shell-${finalAttrs.finalPackage.name}"; | |
| description = "contains numpy and sentencepiece"; | |
| buildInputs = [ llama-python ]; | |
| inputsFrom = [ finalAttrs.finalPackage ]; | |
| shellHook = '' | |
| addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib" | |
| ''; | |
| }; | |
| shell-extra = mkShell { | |
| name = "shell-extra-${finalAttrs.finalPackage.name}"; | |
| description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; | |
| buildInputs = [ llama-python-extra ]; | |
| inputsFrom = [ finalAttrs.finalPackage ]; | |
| }; | |
| }; | |
| meta = { | |
| # Configurations we don't want even the CI to evaluate. Results in the | |
| # "unsupported platform" messages. This is mostly a no-op, because | |
| # cudaPackages would've refused to evaluate anyway. | |
| badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; | |
| # Configurations that are known to result in build failures. Can be | |
| # overridden by importing Nixpkgs with `allowBroken = true`. | |
| broken = (useMetalKit && !effectiveStdenv.isDarwin); | |
| description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | |
| homepage = "https://github.com/ggerganov/llama.cpp/"; | |
| license = lib.licenses.mit; | |
| # Accommodates `nix run` and `lib.getExe` | |
| mainProgram = "llama"; | |
| # These people might respond, on the best effort basis, if you ping them | |
| # in case of Nix-specific regressions or for reviewing Nix-specific PRs. | |
| # Consider adding yourself to this list if you want to ensure this flake | |
| # stays maintained and you're willing to invest your time. Do not add | |
| # other people without their consent. Consider removing people after | |
| # they've been unreachable for long periods of time. | |
| # Note that lib.maintainers is defined in Nixpkgs, but you may just add | |
| # an attrset following the same format as in | |
| # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | |
| maintainers = with lib.maintainers; [ | |
| philiptaron | |
| SomeoneSerge | |
| ]; | |
| # Extend `badPlatforms` instead | |
| platforms = lib.platforms.all; | |
| }; | |
| } | |
| ) | |