Spaces:
Runtime error
Runtime error
| { | |
| lib, | |
| glibc, | |
| config, | |
| stdenv, | |
| runCommand, | |
| cmake, | |
| ninja, | |
| pkg-config, | |
| git, | |
| mpi, | |
| blas, | |
| cudaPackages, | |
| autoAddDriverRunpath, | |
| darwin, | |
| rocmPackages, | |
| vulkan-headers, | |
| vulkan-loader, | |
| curl, | |
| shaderc, | |
| useBlas ? | |
| builtins.all (x: !x) [ | |
| useCuda | |
| useMetalKit | |
| useRocm | |
| useVulkan | |
| ] | |
| && blas.meta.available, | |
| useCuda ? config.cudaSupport, | |
| useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, | |
| # Increases the runtime closure size by ~700M | |
| useMpi ? false, | |
| useRocm ? config.rocmSupport, | |
| enableCurl ? true, | |
| useVulkan ? false, | |
| llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake | |
| # It's necessary to consistently use backendStdenv when building with CUDA support, | |
| # otherwise we get libstdc++ errors downstream. | |
| effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, | |
| enableStatic ? effectiveStdenv.hostPlatform.isStatic, | |
| precompileMetalShaders ? false, | |
| }: | |
| let | |
| inherit (lib) | |
| cmakeBool | |
| cmakeFeature | |
| optionals | |
| strings | |
| ; | |
| stdenv = throw "Use effectiveStdenv instead"; | |
| suffices = | |
| lib.optionals useBlas [ "BLAS" ] | |
| ++ lib.optionals useCuda [ "CUDA" ] | |
| ++ lib.optionals useMetalKit [ "MetalKit" ] | |
| ++ lib.optionals useMpi [ "MPI" ] | |
| ++ lib.optionals useRocm [ "ROCm" ] | |
| ++ lib.optionals useVulkan [ "Vulkan" ]; | |
| pnameSuffix = | |
| strings.optionalString (suffices != [ ]) | |
| "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; | |
| descriptionSuffix = strings.optionalString ( | |
| suffices != [ ] | |
| ) ", accelerated with ${strings.concatStringsSep ", " suffices}"; | |
| xcrunHost = runCommand "xcrunHost" { } '' | |
| mkdir -p $out/bin | |
| ln -s /usr/bin/xcrun $out/bin | |
| ''; | |
| # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 | |
| # separately | |
| darwinBuildInputs = | |
| with darwin.apple_sdk.frameworks; | |
| [ | |
| Accelerate | |
| CoreVideo | |
| CoreGraphics | |
| ] | |
| ++ optionals useMetalKit [ MetalKit ]; | |
| cudaBuildInputs = with cudaPackages; [ | |
| cuda_cudart | |
| cuda_cccl # <nv/target> | |
| libcublas | |
| ]; | |
| rocmBuildInputs = with rocmPackages; [ | |
| clr | |
| hipblas | |
| rocblas | |
| ]; | |
| vulkanBuildInputs = [ | |
| vulkan-headers | |
| vulkan-loader | |
| shaderc | |
| ]; | |
| in | |
| effectiveStdenv.mkDerivation (finalAttrs: { | |
| pname = "llama-cpp${pnameSuffix}"; | |
| version = llamaVersion; | |
| # Note: none of the files discarded here are visible in the sandbox or | |
| # affect the output hash. This also means they can be modified without | |
| # triggering a rebuild. | |
| src = lib.cleanSourceWith { | |
| filter = | |
| name: type: | |
| let | |
| noneOf = builtins.all (x: !x); | |
| baseName = baseNameOf name; | |
| in | |
| noneOf [ | |
| (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | |
| (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths | |
| (lib.hasPrefix "." baseName) # Skip hidden files and directories | |
| (baseName == "flake.lock") | |
| ]; | |
| src = lib.cleanSource ../../.; | |
| }; | |
| postPatch = '' | |
| substituteInPlace ./ggml/src/ggml-metal.m \ | |
| --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | |
| substituteInPlace ./ggml/src/ggml-metal.m \ | |
| --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" | |
| ''; | |
| # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, | |
| # `default.metallib` may be compiled with Metal compiler from XCode | |
| # and we need to escape sandbox on MacOS to access Metal compiler. | |
| # `xcrun` is used find the path of the Metal compiler, which is varible | |
| # and not on $PATH | |
| # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion | |
| __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; | |
| nativeBuildInputs = | |
| [ | |
| cmake | |
| ninja | |
| pkg-config | |
| git | |
| ] | |
| ++ optionals useCuda [ | |
| cudaPackages.cuda_nvcc | |
| autoAddDriverRunpath | |
| ] | |
| ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ] | |
| ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ]; | |
| buildInputs = | |
| optionals effectiveStdenv.isDarwin darwinBuildInputs | |
| ++ optionals useCuda cudaBuildInputs | |
| ++ optionals useMpi [ mpi ] | |
| ++ optionals useRocm rocmBuildInputs | |
| ++ optionals useBlas [ blas ] | |
| ++ optionals useVulkan vulkanBuildInputs | |
| ++ optionals enableCurl [ curl ]; | |
| cmakeFlags = | |
| [ | |
| (cmakeBool "LLAMA_BUILD_SERVER" true) | |
| (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) | |
| (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | |
| (cmakeBool "LLAMA_CURL" enableCurl) | |
| (cmakeBool "GGML_NATIVE" false) | |
| (cmakeBool "GGML_BLAS" useBlas) | |
| (cmakeBool "GGML_CUDA" useCuda) | |
| (cmakeBool "GGML_HIPBLAS" useRocm) | |
| (cmakeBool "GGML_METAL" useMetalKit) | |
| (cmakeBool "GGML_VULKAN" useVulkan) | |
| (cmakeBool "GGML_STATIC" enableStatic) | |
| ] | |
| ++ optionals useCuda [ | |
| ( | |
| with cudaPackages.flags; | |
| cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | |
| builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | |
| ) | |
| ) | |
| ] | |
| ++ optionals useRocm [ | |
| (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") | |
| (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets)) | |
| ] | |
| ++ optionals useMetalKit [ | |
| (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") | |
| (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) | |
| ]; | |
| # Environment variables needed for ROCm | |
| env = optionals useRocm { | |
| ROCM_PATH = "${rocmPackages.clr}"; | |
| HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; | |
| }; | |
| # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | |
| # if they haven't been added yet. | |
| postInstall = '' | |
| mkdir -p $out/include | |
| cp $src/include/llama.h $out/include/ | |
| ''; | |
| meta = { | |
| # Configurations we don't want even the CI to evaluate. Results in the | |
| # "unsupported platform" messages. This is mostly a no-op, because | |
| # cudaPackages would've refused to evaluate anyway. | |
| badPlatforms = optionals useCuda lib.platforms.darwin; | |
| # Configurations that are known to result in build failures. Can be | |
| # overridden by importing Nixpkgs with `allowBroken = true`. | |
| broken = (useMetalKit && !effectiveStdenv.isDarwin); | |
| description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | |
| homepage = "https://github.com/ggerganov/llama.cpp/"; | |
| license = lib.licenses.mit; | |
| # Accommodates `nix run` and `lib.getExe` | |
| mainProgram = "llama-cli"; | |
| # These people might respond, on the best effort basis, if you ping them | |
| # in case of Nix-specific regressions or for reviewing Nix-specific PRs. | |
| # Consider adding yourself to this list if you want to ensure this flake | |
| # stays maintained and you're willing to invest your time. Do not add | |
| # other people without their consent. Consider removing people after | |
| # they've been unreachable for long periods of time. | |
| # Note that lib.maintainers is defined in Nixpkgs, but you may just add | |
| # an attrset following the same format as in | |
| # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | |
| maintainers = with lib.maintainers; [ | |
| philiptaron | |
| SomeoneSerge | |
| ]; | |
| # Extend `badPlatforms` instead | |
| platforms = lib.platforms.all; | |
| }; | |
| }) | |