Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- llama.cpp/.devops/cann.Dockerfile +130 -0
- llama.cpp/.devops/cpu.Dockerfile +88 -0
- llama.cpp/.devops/cuda-new.Dockerfile +95 -0
- llama.cpp/.devops/cuda.Dockerfile +94 -0
- llama.cpp/.devops/intel.Dockerfile +95 -0
- llama.cpp/.devops/llama-cli-cann.Dockerfile +45 -0
- llama.cpp/.devops/llama-cpp-cuda.srpm.spec +85 -0
- llama.cpp/.devops/llama-cpp.srpm.spec +87 -0
- llama.cpp/.devops/musa.Dockerfile +101 -0
- llama.cpp/.devops/rocm.Dockerfile +113 -0
- llama.cpp/.devops/s390x.Dockerfile +126 -0
- llama.cpp/.devops/tools.sh +53 -0
- llama.cpp/.devops/vulkan.Dockerfile +90 -0
- llama.cpp/.gemini/settings.json +1 -0
- llama.cpp/.github/labeler.yml +106 -0
- llama.cpp/.github/pull_request_template.md +1 -0
- llama.cpp/build/CMakeCache.txt +91 -0
- llama.cpp/ci/README-MUSA.md +35 -0
- llama.cpp/ci/README.md +33 -0
- llama.cpp/ci/run.sh +709 -0
- llama.cpp/cmake/arm64-apple-clang.cmake +16 -0
- llama.cpp/cmake/arm64-windows-llvm.cmake +16 -0
- llama.cpp/cmake/build-info.cmake +48 -0
- llama.cpp/cmake/common.cmake +58 -0
- llama.cpp/cmake/download-models.cmake +21 -0
- llama.cpp/cmake/git-vars.cmake +22 -0
- llama.cpp/cmake/license.cmake +40 -0
- llama.cpp/cmake/llama-config.cmake.in +30 -0
- llama.cpp/cmake/llama.pc.in +10 -0
- llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake +29 -0
- llama.cpp/cmake/x64-windows-llvm.cmake +5 -0
- llama.cpp/common/CMakeLists.txt +149 -0
- llama.cpp/common/arg.cpp +0 -0
- llama.cpp/common/arg.h +131 -0
- llama.cpp/common/base64.hpp +392 -0
- llama.cpp/common/build-info.cpp.in +4 -0
- llama.cpp/common/chat-parser-xml-toolcall.cpp +879 -0
- llama.cpp/common/chat-parser-xml-toolcall.h +45 -0
- llama.cpp/common/chat-parser.cpp +1649 -0
- llama.cpp/common/chat-parser.h +133 -0
- llama.cpp/common/chat-peg-parser.cpp +124 -0
- llama.cpp/common/chat-peg-parser.h +105 -0
- llama.cpp/common/chat.cpp +0 -0
- llama.cpp/common/chat.h +252 -0
- llama.cpp/common/common.cpp +1824 -0
- llama.cpp/common/common.h +931 -0
- llama.cpp/common/console.cpp +1137 -0
- llama.cpp/common/console.h +41 -0
- llama.cpp/common/debug.cpp +167 -0
- llama.cpp/common/debug.h +43 -0
llama.cpp/.devops/cann.Dockerfile
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ==============================================================================
|
| 2 |
+
# ARGUMENTS
|
| 3 |
+
# ==============================================================================
|
| 4 |
+
|
| 5 |
+
# Define the CANN base image for easier version updates later
|
| 6 |
+
ARG CHIP_TYPE=910b
|
| 7 |
+
ARG CANN_BASE_IMAGE=quay.io/ascend/cann:8.3.rc2-${CHIP_TYPE}-openeuler24.03-py3.11
|
| 8 |
+
|
| 9 |
+
# ==============================================================================
|
| 10 |
+
# BUILD STAGE
|
| 11 |
+
# Compile all binary files and libraries
|
| 12 |
+
# ==============================================================================
|
| 13 |
+
FROM ${CANN_BASE_IMAGE} AS build
|
| 14 |
+
|
| 15 |
+
# -- Install build dependencies --
|
| 16 |
+
RUN yum install -y gcc g++ cmake make git openssl-devel python3 python3-pip && \
|
| 17 |
+
yum clean all && \
|
| 18 |
+
rm -rf /var/cache/yum
|
| 19 |
+
|
| 20 |
+
# -- Set the working directory --
|
| 21 |
+
WORKDIR /app
|
| 22 |
+
|
| 23 |
+
# -- Copy project files --
|
| 24 |
+
COPY . .
|
| 25 |
+
|
| 26 |
+
# -- Set CANN environment variables (required for compilation) --
|
| 27 |
+
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
|
| 28 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 29 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
|
| 30 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
|
| 31 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 32 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
| 33 |
+
# ... You can add other environment variables from the original file as needed ...
|
| 34 |
+
# For brevity, only core variables are listed here. You can paste the original ENV list here.
|
| 35 |
+
|
| 36 |
+
# -- Build llama.cpp --
|
| 37 |
+
# Use the passed CHIP_TYPE argument and add general build options
|
| 38 |
+
ARG CHIP_TYPE
|
| 39 |
+
RUN source /usr/local/Ascend/ascend-toolkit/set_env.sh --force \
|
| 40 |
+
&& \
|
| 41 |
+
cmake -B build \
|
| 42 |
+
-DGGML_CANN=ON \
|
| 43 |
+
-DCMAKE_BUILD_TYPE=Release \
|
| 44 |
+
-DSOC_TYPE=ascend${CHIP_TYPE} \
|
| 45 |
+
-DUSE_ACL_GRAPH=ON \
|
| 46 |
+
. && \
|
| 47 |
+
cmake --build build --config Release -j$(nproc)
|
| 48 |
+
|
| 49 |
+
# -- Organize build artifacts for copying in later stages --
|
| 50 |
+
# Create a lib directory to store all .so files
|
| 51 |
+
RUN mkdir -p /app/lib && \
|
| 52 |
+
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 53 |
+
|
| 54 |
+
# Create a full directory to store all executables and Python scripts
|
| 55 |
+
RUN mkdir -p /app/full && \
|
| 56 |
+
cp build/bin/* /app/full/ && \
|
| 57 |
+
cp *.py /app/full/ && \
|
| 58 |
+
cp -r gguf-py /app/full/ && \
|
| 59 |
+
cp -r requirements /app/full/ && \
|
| 60 |
+
cp requirements.txt /app/full/
|
| 61 |
+
# If you have a tools.sh script, make sure it is copied here
|
| 62 |
+
# cp .devops/tools.sh /app/full/tools.sh
|
| 63 |
+
|
| 64 |
+
# ==============================================================================
|
| 65 |
+
# BASE STAGE
|
| 66 |
+
# Create a minimal base image with CANN runtime and common libraries
|
| 67 |
+
# ==============================================================================
|
| 68 |
+
FROM ${CANN_BASE_IMAGE} AS base
|
| 69 |
+
|
| 70 |
+
# -- Install runtime dependencies --
|
| 71 |
+
RUN yum install -y libgomp curl && \
|
| 72 |
+
yum clean all && \
|
| 73 |
+
rm -rf /var/cache/yum
|
| 74 |
+
|
| 75 |
+
# -- Set CANN environment variables (required for runtime) --
|
| 76 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 77 |
+
ENV LD_LIBRARY_PATH=/app:${ASCEND_TOOLKIT_HOME}/lib64:${LD_LIBRARY_PATH}
|
| 78 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${PATH}
|
| 79 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 80 |
+
# ... You can add other environment variables from the original file as needed ...
|
| 81 |
+
|
| 82 |
+
WORKDIR /app
|
| 83 |
+
|
| 84 |
+
# Copy compiled .so files from the build stage
|
| 85 |
+
COPY --from=build /app/lib/ /app
|
| 86 |
+
|
| 87 |
+
# ==============================================================================
|
| 88 |
+
# FINAL STAGES (TARGETS)
|
| 89 |
+
# ==============================================================================
|
| 90 |
+
|
| 91 |
+
### Target: full
|
| 92 |
+
# Complete image with all tools, Python bindings, and dependencies
|
| 93 |
+
# ==============================================================================
|
| 94 |
+
FROM base AS full
|
| 95 |
+
|
| 96 |
+
COPY --from=build /app/full /app
|
| 97 |
+
|
| 98 |
+
# Install Python dependencies
|
| 99 |
+
RUN yum install -y git python3 python3-pip && \
|
| 100 |
+
pip3 install --no-cache-dir --upgrade pip setuptools wheel && \
|
| 101 |
+
pip3 install --no-cache-dir -r requirements.txt && \
|
| 102 |
+
yum clean all && \
|
| 103 |
+
rm -rf /var/cache/yum
|
| 104 |
+
|
| 105 |
+
# You need to provide a tools.sh script as the entrypoint
|
| 106 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 107 |
+
# If there is no tools.sh, you can set the default to start the server
|
| 108 |
+
# ENTRYPOINT ["/app/llama-server"]
|
| 109 |
+
|
| 110 |
+
### Target: light
|
| 111 |
+
# Lightweight image containing only llama-cli and llama-completion
|
| 112 |
+
# ==============================================================================
|
| 113 |
+
FROM base AS light
|
| 114 |
+
|
| 115 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 116 |
+
|
| 117 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 118 |
+
|
| 119 |
+
### Target: server
|
| 120 |
+
# Dedicated server image containing only llama-server
|
| 121 |
+
# ==============================================================================
|
| 122 |
+
FROM base AS server
|
| 123 |
+
|
| 124 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 125 |
+
|
| 126 |
+
COPY --from=build /app/full/llama-server /app
|
| 127 |
+
|
| 128 |
+
HEALTHCHECK --interval=5m CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 129 |
+
|
| 130 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/cpu.Dockerfile
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
|
| 3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
| 4 |
+
|
| 5 |
+
ARG TARGETARCH
|
| 6 |
+
|
| 7 |
+
RUN apt-get update && \
|
| 8 |
+
apt-get install -y build-essential git cmake libssl-dev
|
| 9 |
+
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
|
| 12 |
+
COPY . .
|
| 13 |
+
|
| 14 |
+
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
|
| 15 |
+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
| 16 |
+
else \
|
| 17 |
+
echo "Unsupported architecture"; \
|
| 18 |
+
exit 1; \
|
| 19 |
+
fi && \
|
| 20 |
+
cmake --build build -j $(nproc)
|
| 21 |
+
|
| 22 |
+
RUN mkdir -p /app/lib && \
|
| 23 |
+
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 24 |
+
|
| 25 |
+
RUN mkdir -p /app/full \
|
| 26 |
+
&& cp build/bin/* /app/full \
|
| 27 |
+
&& cp *.py /app/full \
|
| 28 |
+
&& cp -r gguf-py /app/full \
|
| 29 |
+
&& cp -r requirements /app/full \
|
| 30 |
+
&& cp requirements.txt /app/full \
|
| 31 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 32 |
+
|
| 33 |
+
## Base image
|
| 34 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
| 35 |
+
|
| 36 |
+
RUN apt-get update \
|
| 37 |
+
&& apt-get install -y libgomp1 curl\
|
| 38 |
+
&& apt autoremove -y \
|
| 39 |
+
&& apt clean -y \
|
| 40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 42 |
+
&& find /var/cache -type f -delete
|
| 43 |
+
|
| 44 |
+
COPY --from=build /app/lib/ /app
|
| 45 |
+
|
| 46 |
+
### Full
|
| 47 |
+
FROM base AS full
|
| 48 |
+
|
| 49 |
+
COPY --from=build /app/full /app
|
| 50 |
+
|
| 51 |
+
WORKDIR /app
|
| 52 |
+
|
| 53 |
+
RUN apt-get update \
|
| 54 |
+
&& apt-get install -y \
|
| 55 |
+
git \
|
| 56 |
+
python3 \
|
| 57 |
+
python3-pip \
|
| 58 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 59 |
+
&& pip install -r requirements.txt \
|
| 60 |
+
&& apt autoremove -y \
|
| 61 |
+
&& apt clean -y \
|
| 62 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 63 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 64 |
+
&& find /var/cache -type f -delete
|
| 65 |
+
|
| 66 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 67 |
+
|
| 68 |
+
### Light, CLI only
|
| 69 |
+
FROM base AS light
|
| 70 |
+
|
| 71 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 72 |
+
|
| 73 |
+
WORKDIR /app
|
| 74 |
+
|
| 75 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 76 |
+
|
| 77 |
+
### Server, Server only
|
| 78 |
+
FROM base AS server
|
| 79 |
+
|
| 80 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 81 |
+
|
| 82 |
+
COPY --from=build /app/full/llama-server /app
|
| 83 |
+
|
| 84 |
+
WORKDIR /app
|
| 85 |
+
|
| 86 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 87 |
+
|
| 88 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/cuda-new.Dockerfile
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=24.04
|
| 2 |
+
# This needs to generally match the container host's environment.
|
| 3 |
+
ARG CUDA_VERSION=13.1.0
|
| 4 |
+
# Target the CUDA build image
|
| 5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
| 6 |
+
|
| 7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
| 8 |
+
|
| 9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
| 10 |
+
|
| 11 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
| 12 |
+
ARG CUDA_DOCKER_ARCH=default
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
|
| 16 |
+
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
| 22 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
| 23 |
+
fi && \
|
| 24 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 25 |
+
cmake --build build --config Release -j$(nproc)
|
| 26 |
+
|
| 27 |
+
RUN mkdir -p /app/lib && \
|
| 28 |
+
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 29 |
+
|
| 30 |
+
RUN mkdir -p /app/full \
|
| 31 |
+
&& cp build/bin/* /app/full \
|
| 32 |
+
&& cp *.py /app/full \
|
| 33 |
+
&& cp -r gguf-py /app/full \
|
| 34 |
+
&& cp -r requirements /app/full \
|
| 35 |
+
&& cp requirements.txt /app/full \
|
| 36 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 37 |
+
|
| 38 |
+
## Base image
|
| 39 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
| 40 |
+
|
| 41 |
+
RUN apt-get update \
|
| 42 |
+
&& apt-get install -y libgomp1 curl\
|
| 43 |
+
&& apt autoremove -y \
|
| 44 |
+
&& apt clean -y \
|
| 45 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 46 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 47 |
+
&& find /var/cache -type f -delete
|
| 48 |
+
|
| 49 |
+
COPY --from=build /app/lib/ /app
|
| 50 |
+
|
| 51 |
+
### Full
|
| 52 |
+
FROM base AS full
|
| 53 |
+
|
| 54 |
+
COPY --from=build /app/full /app
|
| 55 |
+
|
| 56 |
+
WORKDIR /app
|
| 57 |
+
|
| 58 |
+
RUN apt-get update \
|
| 59 |
+
&& apt-get install -y \
|
| 60 |
+
git \
|
| 61 |
+
python3 \
|
| 62 |
+
python3-pip \
|
| 63 |
+
python3-wheel \
|
| 64 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
| 65 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 66 |
+
&& apt autoremove -y \
|
| 67 |
+
&& apt clean -y \
|
| 68 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 69 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 70 |
+
&& find /var/cache -type f -delete
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 74 |
+
|
| 75 |
+
### Light, CLI only
|
| 76 |
+
FROM base AS light
|
| 77 |
+
|
| 78 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 79 |
+
|
| 80 |
+
WORKDIR /app
|
| 81 |
+
|
| 82 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 83 |
+
|
| 84 |
+
### Server, Server only
|
| 85 |
+
FROM base AS server
|
| 86 |
+
|
| 87 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 88 |
+
|
| 89 |
+
COPY --from=build /app/full/llama-server /app
|
| 90 |
+
|
| 91 |
+
WORKDIR /app
|
| 92 |
+
|
| 93 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 94 |
+
|
| 95 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/cuda.Dockerfile
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
# This needs to generally match the container host's environment.
|
| 3 |
+
ARG CUDA_VERSION=12.4.0
|
| 4 |
+
# Target the CUDA build image
|
| 5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
| 6 |
+
|
| 7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
| 8 |
+
|
| 9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
| 10 |
+
|
| 11 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
| 12 |
+
ARG CUDA_DOCKER_ARCH=default
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y build-essential cmake python3 python3-pip git libssl-dev libgomp1
|
| 16 |
+
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
|
| 19 |
+
COPY . .
|
| 20 |
+
|
| 21 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
| 22 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
| 23 |
+
fi && \
|
| 24 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 25 |
+
cmake --build build --config Release -j$(nproc)
|
| 26 |
+
|
| 27 |
+
RUN mkdir -p /app/lib && \
|
| 28 |
+
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 29 |
+
|
| 30 |
+
RUN mkdir -p /app/full \
|
| 31 |
+
&& cp build/bin/* /app/full \
|
| 32 |
+
&& cp *.py /app/full \
|
| 33 |
+
&& cp -r gguf-py /app/full \
|
| 34 |
+
&& cp -r requirements /app/full \
|
| 35 |
+
&& cp requirements.txt /app/full \
|
| 36 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 37 |
+
|
| 38 |
+
## Base image
|
| 39 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
| 40 |
+
|
| 41 |
+
RUN apt-get update \
|
| 42 |
+
&& apt-get install -y libgomp1 curl\
|
| 43 |
+
&& apt autoremove -y \
|
| 44 |
+
&& apt clean -y \
|
| 45 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 46 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 47 |
+
&& find /var/cache -type f -delete
|
| 48 |
+
|
| 49 |
+
COPY --from=build /app/lib/ /app
|
| 50 |
+
|
| 51 |
+
### Full
|
| 52 |
+
FROM base AS full
|
| 53 |
+
|
| 54 |
+
COPY --from=build /app/full /app
|
| 55 |
+
|
| 56 |
+
WORKDIR /app
|
| 57 |
+
|
| 58 |
+
RUN apt-get update \
|
| 59 |
+
&& apt-get install -y \
|
| 60 |
+
git \
|
| 61 |
+
python3 \
|
| 62 |
+
python3-pip \
|
| 63 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 64 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 65 |
+
&& apt autoremove -y \
|
| 66 |
+
&& apt clean -y \
|
| 67 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 68 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 69 |
+
&& find /var/cache -type f -delete
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 73 |
+
|
| 74 |
+
### Light, CLI only
|
| 75 |
+
FROM base AS light
|
| 76 |
+
|
| 77 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 78 |
+
|
| 79 |
+
WORKDIR /app
|
| 80 |
+
|
| 81 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 82 |
+
|
| 83 |
+
### Server, Server only
|
| 84 |
+
FROM base AS server
|
| 85 |
+
|
| 86 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 87 |
+
|
| 88 |
+
COPY --from=build /app/full/llama-server /app
|
| 89 |
+
|
| 90 |
+
WORKDIR /app
|
| 91 |
+
|
| 92 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 93 |
+
|
| 94 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/intel.Dockerfile
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG ONEAPI_VERSION=2025.2.2-0-devel-ubuntu24.04
|
| 2 |
+
|
| 3 |
+
## Build Image
|
| 4 |
+
|
| 5 |
+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS build
|
| 6 |
+
|
| 7 |
+
ARG GGML_SYCL_F16=OFF
|
| 8 |
+
RUN apt-get update && \
|
| 9 |
+
apt-get install -y git libssl-dev
|
| 10 |
+
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
COPY . .
|
| 14 |
+
|
| 15 |
+
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
| 16 |
+
echo "GGML_SYCL_F16 is set" \
|
| 17 |
+
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
| 18 |
+
fi && \
|
| 19 |
+
echo "Building with dynamic libs" && \
|
| 20 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
|
| 21 |
+
cmake --build build --config Release -j$(nproc)
|
| 22 |
+
|
| 23 |
+
RUN mkdir -p /app/lib && \
|
| 24 |
+
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 25 |
+
|
| 26 |
+
RUN mkdir -p /app/full \
|
| 27 |
+
&& cp build/bin/* /app/full \
|
| 28 |
+
&& cp *.py /app/full \
|
| 29 |
+
&& cp -r gguf-py /app/full \
|
| 30 |
+
&& cp -r requirements /app/full \
|
| 31 |
+
&& cp requirements.txt /app/full \
|
| 32 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 33 |
+
|
| 34 |
+
FROM intel/deep-learning-essentials:$ONEAPI_VERSION AS base
|
| 35 |
+
|
| 36 |
+
RUN apt-get update \
|
| 37 |
+
&& apt-get install -y libgomp1 curl\
|
| 38 |
+
&& apt autoremove -y \
|
| 39 |
+
&& apt clean -y \
|
| 40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 42 |
+
&& find /var/cache -type f -delete
|
| 43 |
+
|
| 44 |
+
### Full
|
| 45 |
+
FROM base AS full
|
| 46 |
+
|
| 47 |
+
COPY --from=build /app/lib/ /app
|
| 48 |
+
COPY --from=build /app/full /app
|
| 49 |
+
|
| 50 |
+
WORKDIR /app
|
| 51 |
+
|
| 52 |
+
RUN apt-get update && \
|
| 53 |
+
apt-get install -y \
|
| 54 |
+
git \
|
| 55 |
+
python3 \
|
| 56 |
+
python3-pip \
|
| 57 |
+
python3-venv && \
|
| 58 |
+
python3 -m venv /opt/venv && \
|
| 59 |
+
. /opt/venv/bin/activate && \
|
| 60 |
+
pip install --upgrade pip setuptools wheel && \
|
| 61 |
+
pip install -r requirements.txt && \
|
| 62 |
+
apt autoremove -y && \
|
| 63 |
+
apt clean -y && \
|
| 64 |
+
rm -rf /tmp/* /var/tmp/* && \
|
| 65 |
+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
|
| 66 |
+
find /var/cache -type f -delete
|
| 67 |
+
|
| 68 |
+
ENV PATH="/opt/venv/bin:$PATH"
|
| 69 |
+
|
| 70 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 71 |
+
|
| 72 |
+
### Light, CLI only
|
| 73 |
+
FROM base AS light
|
| 74 |
+
|
| 75 |
+
COPY --from=build /app/lib/ /app
|
| 76 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 77 |
+
|
| 78 |
+
WORKDIR /app
|
| 79 |
+
|
| 80 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 81 |
+
|
| 82 |
+
### Server, Server only
|
| 83 |
+
FROM base AS server
|
| 84 |
+
|
| 85 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 86 |
+
|
| 87 |
+
COPY --from=build /app/lib/ /app
|
| 88 |
+
COPY --from=build /app/full/llama-server /app
|
| 89 |
+
|
| 90 |
+
WORKDIR /app
|
| 91 |
+
|
| 92 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 93 |
+
|
| 94 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
| 95 |
+
|
llama.cpp/.devops/llama-cli-cann.Dockerfile
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG ASCEND_VERSION=8.1.RC1.alpha001-910b-openeuler22.03-py3.10
|
| 2 |
+
|
| 3 |
+
FROM ascendai/cann:$ASCEND_VERSION AS build
|
| 4 |
+
|
| 5 |
+
WORKDIR /app
|
| 6 |
+
|
| 7 |
+
COPY . .
|
| 8 |
+
|
| 9 |
+
RUN yum install -y gcc g++ cmake make openssl-devel
|
| 10 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 11 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
| 12 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
| 13 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
| 14 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
| 15 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
| 16 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 17 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
| 18 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
| 19 |
+
|
| 20 |
+
# find libascend_hal.so, because the drive hasn`t been mounted.
|
| 21 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
| 22 |
+
|
| 23 |
+
RUN echo "Building with static libs" && \
|
| 24 |
+
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
| 25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF -DLLAMA_BUILD_TESTS=OFF && \
|
| 26 |
+
cmake --build build --config Release --target llama-cli && \
|
| 27 |
+
cmake --build build --config Release --target llama-completion
|
| 28 |
+
|
| 29 |
+
# TODO: use image with NNRT
|
| 30 |
+
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
| 31 |
+
COPY --from=build /app/build/bin/llama-cli /app/build/bin/llama-completion /
|
| 32 |
+
|
| 33 |
+
ENV LC_ALL=C.utf8
|
| 34 |
+
|
| 35 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
| 36 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
| 37 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
| 38 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
| 39 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
| 40 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
| 41 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
| 42 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
| 43 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
| 44 |
+
|
| 45 |
+
ENTRYPOINT ["/llama-cli" ]
|
llama.cpp/.devops/llama-cpp-cuda.srpm.spec
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
| 2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
| 3 |
+
# Built and maintained by John Boero - boeroboy@gmail.com
|
| 4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
| 5 |
+
|
| 6 |
+
# Notes for llama.cpp:
|
| 7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
| 8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
| 9 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
| 10 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
| 11 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
| 12 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
| 13 |
+
# It is up to the user to install the correct vendor-specific support.
|
| 14 |
+
|
| 15 |
+
Name: llama.cpp-cuda
|
| 16 |
+
Version: %( date "+%%Y%%m%%d" )
|
| 17 |
+
Release: 1%{?dist}
|
| 18 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
| 19 |
+
License: MIT
|
| 20 |
+
Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
|
| 21 |
+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
|
| 22 |
+
Requires: cuda-toolkit
|
| 23 |
+
URL: https://github.com/ggml-org/llama.cpp
|
| 24 |
+
|
| 25 |
+
%define debug_package %{nil}
|
| 26 |
+
%define source_date_epoch_from_changelog 0
|
| 27 |
+
|
| 28 |
+
%description
|
| 29 |
+
CPU inference for Meta's Lllama2 models using default options.
|
| 30 |
+
|
| 31 |
+
%prep
|
| 32 |
+
%setup -n llama.cpp-master
|
| 33 |
+
|
| 34 |
+
%build
|
| 35 |
+
make -j GGML_CUDA=1
|
| 36 |
+
|
| 37 |
+
%install
|
| 38 |
+
mkdir -p %{buildroot}%{_bindir}/
|
| 39 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
| 40 |
+
cp -p llama-completion %{buildroot}%{_bindir}/llama-cuda-completion
|
| 41 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
| 42 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
| 43 |
+
|
| 44 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
| 45 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
| 46 |
+
[Unit]
|
| 47 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
| 48 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
| 49 |
+
|
| 50 |
+
[Service]
|
| 51 |
+
Type=simple
|
| 52 |
+
EnvironmentFile=/etc/sysconfig/llama
|
| 53 |
+
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
| 54 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
| 55 |
+
Restart=never
|
| 56 |
+
|
| 57 |
+
[Install]
|
| 58 |
+
WantedBy=default.target
|
| 59 |
+
EOF
|
| 60 |
+
|
| 61 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
| 62 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
| 63 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
| 64 |
+
EOF
|
| 65 |
+
|
| 66 |
+
%clean
|
| 67 |
+
rm -rf %{buildroot}
|
| 68 |
+
rm -rf %{_builddir}/*
|
| 69 |
+
|
| 70 |
+
%files
|
| 71 |
+
%{_bindir}/llama-cuda-cli
|
| 72 |
+
%{_bindir}/llama-cuda-completion
|
| 73 |
+
%{_bindir}/llama-cuda-server
|
| 74 |
+
%{_bindir}/llama-cuda-simple
|
| 75 |
+
/usr/lib/systemd/system/llamacuda.service
|
| 76 |
+
%config /etc/sysconfig/llama
|
| 77 |
+
|
| 78 |
+
%pre
|
| 79 |
+
|
| 80 |
+
%post
|
| 81 |
+
|
| 82 |
+
%preun
|
| 83 |
+
%postun
|
| 84 |
+
|
| 85 |
+
%changelog
|
llama.cpp/.devops/llama-cpp.srpm.spec
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
| 2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
| 3 |
+
# Built and maintained by John Boero - boeroboy@gmail.com
|
| 4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
| 5 |
+
|
| 6 |
+
# Notes for llama.cpp:
|
| 7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
| 8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
| 9 |
+
# In the meantime, YYYYMMDD format will be used.
|
| 10 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
| 11 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
| 12 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
| 13 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
| 14 |
+
# It is up to the user to install the correct vendor-specific support.
|
| 15 |
+
|
| 16 |
+
Name: llama.cpp
|
| 17 |
+
Version: %( date "+%%Y%%m%%d" )
|
| 18 |
+
Release: 1%{?dist}
|
| 19 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
| 20 |
+
License: MIT
|
| 21 |
+
Source0: https://github.com/ggml-org/llama.cpp/archive/refs/heads/master.tar.gz
|
| 22 |
+
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
|
| 23 |
+
Requires: libstdc++
|
| 24 |
+
URL: https://github.com/ggml-org/llama.cpp
|
| 25 |
+
|
| 26 |
+
%define debug_package %{nil}
|
| 27 |
+
%define source_date_epoch_from_changelog 0
|
| 28 |
+
|
| 29 |
+
%description
|
| 30 |
+
CPU inference for Meta's Lllama2 models using default options.
|
| 31 |
+
Models are not included in this package and must be downloaded separately.
|
| 32 |
+
|
| 33 |
+
%prep
|
| 34 |
+
%setup -n llama.cpp-master
|
| 35 |
+
|
| 36 |
+
%build
|
| 37 |
+
make -j
|
| 38 |
+
|
| 39 |
+
%install
|
| 40 |
+
mkdir -p %{buildroot}%{_bindir}/
|
| 41 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
| 42 |
+
cp -p llama-completion %{buildroot}%{_bindir}/llama-completion
|
| 43 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
| 44 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
| 45 |
+
|
| 46 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
| 47 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
| 48 |
+
[Unit]
|
| 49 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
| 50 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
| 51 |
+
|
| 52 |
+
[Service]
|
| 53 |
+
Type=simple
|
| 54 |
+
EnvironmentFile=/etc/sysconfig/llama
|
| 55 |
+
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
| 56 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
| 57 |
+
Restart=never
|
| 58 |
+
|
| 59 |
+
[Install]
|
| 60 |
+
WantedBy=default.target
|
| 61 |
+
EOF
|
| 62 |
+
|
| 63 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
| 64 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
| 65 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
| 66 |
+
EOF
|
| 67 |
+
|
| 68 |
+
%clean
|
| 69 |
+
rm -rf %{buildroot}
|
| 70 |
+
rm -rf %{_builddir}/*
|
| 71 |
+
|
| 72 |
+
%files
|
| 73 |
+
%{_bindir}/llama-cli
|
| 74 |
+
%{_bindir}/llama-completion
|
| 75 |
+
%{_bindir}/llama-server
|
| 76 |
+
%{_bindir}/llama-simple
|
| 77 |
+
/usr/lib/systemd/system/llama.service
|
| 78 |
+
%config /etc/sysconfig/llama
|
| 79 |
+
|
| 80 |
+
%pre
|
| 81 |
+
|
| 82 |
+
%post
|
| 83 |
+
|
| 84 |
+
%preun
|
| 85 |
+
%postun
|
| 86 |
+
|
| 87 |
+
%changelog
|
llama.cpp/.devops/musa.Dockerfile
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=22.04
|
| 2 |
+
# This needs to generally match the container host's environment.
|
| 3 |
+
ARG MUSA_VERSION=rc4.3.0
|
| 4 |
+
# Target the MUSA build image
|
| 5 |
+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}-amd64
|
| 6 |
+
|
| 7 |
+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}-amd64
|
| 8 |
+
|
| 9 |
+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
| 10 |
+
|
| 11 |
+
# MUSA architecture to build for (defaults to all supported archs)
|
| 12 |
+
ARG MUSA_DOCKER_ARCH=default
|
| 13 |
+
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y \
|
| 16 |
+
build-essential \
|
| 17 |
+
cmake \
|
| 18 |
+
python3 \
|
| 19 |
+
python3-pip \
|
| 20 |
+
git \
|
| 21 |
+
libssl-dev \
|
| 22 |
+
libgomp1
|
| 23 |
+
|
| 24 |
+
WORKDIR /app
|
| 25 |
+
|
| 26 |
+
COPY . .
|
| 27 |
+
|
| 28 |
+
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
| 29 |
+
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
| 30 |
+
fi && \
|
| 31 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
| 32 |
+
cmake --build build --config Release -j$(nproc)
|
| 33 |
+
|
| 34 |
+
RUN mkdir -p /app/lib && \
|
| 35 |
+
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 36 |
+
|
| 37 |
+
RUN mkdir -p /app/full \
|
| 38 |
+
&& cp build/bin/* /app/full \
|
| 39 |
+
&& cp *.py /app/full \
|
| 40 |
+
&& cp -r gguf-py /app/full \
|
| 41 |
+
&& cp -r requirements /app/full \
|
| 42 |
+
&& cp requirements.txt /app/full \
|
| 43 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 44 |
+
|
| 45 |
+
## Base image
|
| 46 |
+
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
| 47 |
+
|
| 48 |
+
RUN apt-get update \
|
| 49 |
+
&& apt-get install -y libgomp1 curl\
|
| 50 |
+
&& apt autoremove -y \
|
| 51 |
+
&& apt clean -y \
|
| 52 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 53 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 54 |
+
&& find /var/cache -type f -delete
|
| 55 |
+
|
| 56 |
+
COPY --from=build /app/lib/ /app
|
| 57 |
+
|
| 58 |
+
### Full
|
| 59 |
+
FROM base AS full
|
| 60 |
+
|
| 61 |
+
COPY --from=build /app/full /app
|
| 62 |
+
|
| 63 |
+
WORKDIR /app
|
| 64 |
+
|
| 65 |
+
RUN apt-get update \
|
| 66 |
+
&& apt-get install -y \
|
| 67 |
+
git \
|
| 68 |
+
python3 \
|
| 69 |
+
python3-pip \
|
| 70 |
+
&& pip install --upgrade pip setuptools wheel \
|
| 71 |
+
&& pip install -r requirements.txt \
|
| 72 |
+
&& apt autoremove -y \
|
| 73 |
+
&& apt clean -y \
|
| 74 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 75 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 76 |
+
&& find /var/cache -type f -delete
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 80 |
+
|
| 81 |
+
### Light, CLI only
|
| 82 |
+
FROM base AS light
|
| 83 |
+
|
| 84 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 85 |
+
|
| 86 |
+
WORKDIR /app
|
| 87 |
+
|
| 88 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 89 |
+
|
| 90 |
+
### Server, Server only
|
| 91 |
+
FROM base AS server
|
| 92 |
+
|
| 93 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 94 |
+
|
| 95 |
+
COPY --from=build /app/full/llama-server /app
|
| 96 |
+
|
| 97 |
+
WORKDIR /app
|
| 98 |
+
|
| 99 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 100 |
+
|
| 101 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/rocm.Dockerfile
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=24.04
|
| 2 |
+
|
| 3 |
+
# This needs to generally match the container host's environment.
|
| 4 |
+
ARG ROCM_VERSION=7.2
|
| 5 |
+
ARG AMDGPU_VERSION=7.2
|
| 6 |
+
|
| 7 |
+
# Target the ROCm build image
|
| 8 |
+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
| 9 |
+
|
| 10 |
+
### Build image
|
| 11 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
| 12 |
+
|
| 13 |
+
# Unless otherwise specified, we make a fat build.
|
| 14 |
+
# This is mostly tied to rocBLAS supported archs.
|
| 15 |
+
# check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-7.2.0/reference/system-requirements.html
|
| 16 |
+
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityrad/native_linux/native_linux_compatibility.html
|
| 17 |
+
# check https://rocm.docs.amd.com/projects/radeon-ryzen/en/latest/docs/compatibility/compatibilityryz/native_linux/native_linux_compatibility.html
|
| 18 |
+
|
| 19 |
+
ARG ROCM_DOCKER_ARCH='gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1151;gfx1150;gfx1200;gfx1201'
|
| 20 |
+
|
| 21 |
+
# Set ROCm architectures
|
| 22 |
+
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
| 23 |
+
|
| 24 |
+
RUN apt-get update \
|
| 25 |
+
&& apt-get install -y \
|
| 26 |
+
build-essential \
|
| 27 |
+
cmake \
|
| 28 |
+
git \
|
| 29 |
+
libssl-dev \
|
| 30 |
+
curl \
|
| 31 |
+
libgomp1
|
| 32 |
+
|
| 33 |
+
WORKDIR /app
|
| 34 |
+
|
| 35 |
+
COPY . .
|
| 36 |
+
|
| 37 |
+
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
| 38 |
+
cmake -S . -B build \
|
| 39 |
+
-DGGML_HIP=ON \
|
| 40 |
+
-DGGML_HIP_ROCWMMA_FATTN=ON \
|
| 41 |
+
-DAMDGPU_TARGETS="$ROCM_DOCKER_ARCH" \
|
| 42 |
+
-DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON \
|
| 43 |
+
-DCMAKE_BUILD_TYPE=Release -DLLAMA_BUILD_TESTS=OFF \
|
| 44 |
+
&& cmake --build build --config Release -j$(nproc)
|
| 45 |
+
|
| 46 |
+
RUN mkdir -p /app/lib \
|
| 47 |
+
&& find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 48 |
+
|
| 49 |
+
RUN mkdir -p /app/full \
|
| 50 |
+
&& cp build/bin/* /app/full \
|
| 51 |
+
&& cp *.py /app/full \
|
| 52 |
+
&& cp -r gguf-py /app/full \
|
| 53 |
+
&& cp -r requirements /app/full \
|
| 54 |
+
&& cp requirements.txt /app/full \
|
| 55 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 56 |
+
|
| 57 |
+
## Base image
|
| 58 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
| 59 |
+
|
| 60 |
+
RUN apt-get update \
|
| 61 |
+
&& apt-get install -y libgomp1 curl\
|
| 62 |
+
&& apt autoremove -y \
|
| 63 |
+
&& apt clean -y \
|
| 64 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 65 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 66 |
+
&& find /var/cache -type f -delete
|
| 67 |
+
|
| 68 |
+
COPY --from=build /app/lib/ /app
|
| 69 |
+
|
| 70 |
+
### Full
|
| 71 |
+
FROM base AS full
|
| 72 |
+
|
| 73 |
+
COPY --from=build /app/full /app
|
| 74 |
+
|
| 75 |
+
WORKDIR /app
|
| 76 |
+
|
| 77 |
+
RUN apt-get update \
|
| 78 |
+
&& apt-get install -y \
|
| 79 |
+
git \
|
| 80 |
+
python3-pip \
|
| 81 |
+
python3 \
|
| 82 |
+
python3-wheel\
|
| 83 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
| 84 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 85 |
+
&& apt autoremove -y \
|
| 86 |
+
&& apt clean -y \
|
| 87 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 88 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 89 |
+
&& find /var/cache -type f -delete
|
| 90 |
+
|
| 91 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 92 |
+
|
| 93 |
+
### Light, CLI only
|
| 94 |
+
FROM base AS light
|
| 95 |
+
|
| 96 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 97 |
+
|
| 98 |
+
WORKDIR /app
|
| 99 |
+
|
| 100 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 101 |
+
|
| 102 |
+
### Server, Server only
|
| 103 |
+
FROM base AS server
|
| 104 |
+
|
| 105 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 106 |
+
|
| 107 |
+
COPY --from=build /app/full/llama-server /app
|
| 108 |
+
|
| 109 |
+
WORKDIR /app
|
| 110 |
+
|
| 111 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 112 |
+
|
| 113 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/s390x.Dockerfile
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG GCC_VERSION=15.2.0
|
| 2 |
+
ARG UBUNTU_VERSION=24.04
|
| 3 |
+
|
| 4 |
+
### Build Llama.cpp stage
|
| 5 |
+
FROM gcc:${GCC_VERSION} AS build
|
| 6 |
+
|
| 7 |
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
| 8 |
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
| 9 |
+
apt update -y && \
|
| 10 |
+
apt upgrade -y && \
|
| 11 |
+
apt install -y --no-install-recommends \
|
| 12 |
+
git cmake ccache ninja-build \
|
| 13 |
+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
|
| 14 |
+
libopenblas-dev libssl-dev && \
|
| 15 |
+
rm -rf /var/lib/apt/lists/*
|
| 16 |
+
|
| 17 |
+
WORKDIR /app
|
| 18 |
+
COPY . .
|
| 19 |
+
|
| 20 |
+
RUN --mount=type=cache,target=/root/.ccache \
|
| 21 |
+
--mount=type=cache,target=/app/build \
|
| 22 |
+
cmake -S . -B build -G Ninja \
|
| 23 |
+
-DCMAKE_BUILD_TYPE=Release \
|
| 24 |
+
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
| 25 |
+
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
|
| 26 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
| 27 |
+
-DGGML_NATIVE=OFF \
|
| 28 |
+
-DGGML_BACKEND_DL=ON \
|
| 29 |
+
-DGGML_CPU_ALL_VARIANTS=ON \
|
| 30 |
+
-DGGML_BLAS=ON \
|
| 31 |
+
-DGGML_BLAS_VENDOR=OpenBLAS && \
|
| 32 |
+
cmake --build build --config Release -j $(nproc) && \
|
| 33 |
+
cmake --install build --prefix /opt/llama.cpp
|
| 34 |
+
|
| 35 |
+
COPY *.py /opt/llama.cpp/bin
|
| 36 |
+
COPY .devops/tools.sh /opt/llama.cpp/bin
|
| 37 |
+
|
| 38 |
+
COPY gguf-py /opt/llama.cpp/gguf-py
|
| 39 |
+
COPY requirements.txt /opt/llama.cpp/gguf-py
|
| 40 |
+
COPY requirements /opt/llama.cpp/gguf-py/requirements
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
### Collect all llama.cpp binaries, libraries and distro libraries
|
| 44 |
+
FROM scratch AS collector
|
| 45 |
+
|
| 46 |
+
# Copy llama.cpp binaries and libraries
|
| 47 |
+
COPY --from=build /opt/llama.cpp/bin /llama.cpp/bin
|
| 48 |
+
COPY --from=build /opt/llama.cpp/lib /llama.cpp/lib
|
| 49 |
+
COPY --from=build /opt/llama.cpp/gguf-py /llama.cpp/gguf-py
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
### Base image
|
| 53 |
+
FROM ubuntu:${UBUNTU_VERSION} AS base
|
| 54 |
+
|
| 55 |
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
| 56 |
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
| 57 |
+
apt update -y && \
|
| 58 |
+
apt install -y --no-install-recommends \
|
| 59 |
+
# WARNING: Do not use libopenblas-openmp-dev. libopenblas-dev is faster.
|
| 60 |
+
# See: https://github.com/ggml-org/llama.cpp/pull/15915#issuecomment-3317166506
|
| 61 |
+
curl libgomp1 libopenblas-dev && \
|
| 62 |
+
apt autoremove -y && \
|
| 63 |
+
apt clean -y && \
|
| 64 |
+
rm -rf /tmp/* /var/tmp/* && \
|
| 65 |
+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
|
| 66 |
+
find /var/cache -type f -delete
|
| 67 |
+
|
| 68 |
+
# Copy llama.cpp libraries
|
| 69 |
+
COPY --from=collector /llama.cpp/lib /usr/lib/s390x-linux-gnu
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
### Full
|
| 73 |
+
FROM base AS full
|
| 74 |
+
|
| 75 |
+
ENV PATH="/root/.cargo/bin:${PATH}"
|
| 76 |
+
WORKDIR /app
|
| 77 |
+
|
| 78 |
+
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
| 79 |
+
--mount=type=cache,target=/var/lib/apt/lists,sharing=locked \
|
| 80 |
+
apt update -y && \
|
| 81 |
+
apt install -y \
|
| 82 |
+
git cmake libjpeg-dev \
|
| 83 |
+
python3 python3-pip python3-dev && \
|
| 84 |
+
apt autoremove -y && \
|
| 85 |
+
apt clean -y && \
|
| 86 |
+
rm -rf /tmp/* /var/tmp/* && \
|
| 87 |
+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
|
| 88 |
+
find /var/cache -type f -delete
|
| 89 |
+
|
| 90 |
+
RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
|
| 91 |
+
|
| 92 |
+
COPY --from=collector /llama.cpp/bin /app
|
| 93 |
+
COPY --from=collector /llama.cpp/gguf-py /app/gguf-py
|
| 94 |
+
|
| 95 |
+
RUN pip install --no-cache-dir --break-system-packages \
|
| 96 |
+
-r /app/gguf-py/requirements.txt
|
| 97 |
+
|
| 98 |
+
ENTRYPOINT [ "/app/tools.sh" ]
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
### CLI Only
|
| 102 |
+
FROM base AS light
|
| 103 |
+
|
| 104 |
+
WORKDIR /llama.cpp/bin
|
| 105 |
+
|
| 106 |
+
# Copy llama.cpp binaries and libraries
|
| 107 |
+
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
|
| 108 |
+
COPY --from=collector /llama.cpp/bin/llama-cli /llama.cpp/bin/llama-completion /llama.cpp/bin
|
| 109 |
+
|
| 110 |
+
ENTRYPOINT [ "/llama.cpp/bin/llama-cli" ]
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
### Server
|
| 114 |
+
FROM base AS server
|
| 115 |
+
|
| 116 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 117 |
+
|
| 118 |
+
WORKDIR /llama.cpp/bin
|
| 119 |
+
|
| 120 |
+
# Copy llama.cpp binaries and libraries
|
| 121 |
+
COPY --from=collector /llama.cpp/bin/*.so /llama.cpp/bin
|
| 122 |
+
COPY --from=collector /llama.cpp/bin/llama-server /llama.cpp/bin
|
| 123 |
+
|
| 124 |
+
EXPOSE 8080
|
| 125 |
+
|
| 126 |
+
ENTRYPOINT [ "/llama.cpp/bin/llama-server" ]
|
llama.cpp/.devops/tools.sh
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -e
|
| 3 |
+
|
| 4 |
+
# Read the first argument into a variable
|
| 5 |
+
arg1="$1"
|
| 6 |
+
|
| 7 |
+
# Shift the arguments to remove the first one
|
| 8 |
+
shift
|
| 9 |
+
|
| 10 |
+
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
| 11 |
+
exec python3 ./convert_hf_to_gguf.py "$@"
|
| 12 |
+
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
| 13 |
+
exec ./llama-quantize "$@"
|
| 14 |
+
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
| 15 |
+
exec ./llama-cli "$@"
|
| 16 |
+
elif [[ "$arg1" == '--run-legacy' || "$arg1" == '-l' ]]; then
|
| 17 |
+
exec ./llama-completion "$@"
|
| 18 |
+
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
| 19 |
+
exec ./llama-bench "$@"
|
| 20 |
+
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
| 21 |
+
exec ./llama-perplexity "$@"
|
| 22 |
+
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
| 23 |
+
echo "Converting PTH to GGML..."
|
| 24 |
+
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
| 25 |
+
if [ -f "${i/f16/q4_0}" ]; then
|
| 26 |
+
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
| 27 |
+
else
|
| 28 |
+
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
| 29 |
+
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
| 30 |
+
fi
|
| 31 |
+
done
|
| 32 |
+
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
| 33 |
+
exec ./llama-server "$@"
|
| 34 |
+
else
|
| 35 |
+
echo "Unknown command: $arg1"
|
| 36 |
+
echo "Available commands: "
|
| 37 |
+
echo " --run (-r): Run a model (chat) previously converted into ggml"
|
| 38 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin"
|
| 39 |
+
echo " --run-legacy (-l): Run a model (legacy completion) previously converted into ggml"
|
| 40 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -no-cnv -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
| 41 |
+
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
| 42 |
+
echo " ex: -m model.gguf"
|
| 43 |
+
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
| 44 |
+
echo " ex: -m model.gguf -f file.txt"
|
| 45 |
+
echo " --convert (-c): Convert a llama model into ggml"
|
| 46 |
+
echo " ex: --outtype f16 \"/models/7B/\" "
|
| 47 |
+
echo " --quantize (-q): Optimize with quantization process ggml"
|
| 48 |
+
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
| 49 |
+
echo " --all-in-one (-a): Execute --convert & --quantize"
|
| 50 |
+
echo " ex: \"/models/\" 7B"
|
| 51 |
+
echo " --server (-s): Run a model on the server"
|
| 52 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
|
| 53 |
+
fi
|
llama.cpp/.devops/vulkan.Dockerfile
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG UBUNTU_VERSION=26.04
|
| 2 |
+
|
| 3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
| 4 |
+
|
| 5 |
+
# Install build tools
|
| 6 |
+
RUN apt update && apt install -y git build-essential cmake wget xz-utils
|
| 7 |
+
|
| 8 |
+
# Install SSL and Vulkan SDK dependencies
|
| 9 |
+
RUN apt install -y libssl-dev curl \
|
| 10 |
+
libxcb-xinput0 libxcb-xinerama0 libxcb-cursor-dev libvulkan-dev glslc
|
| 11 |
+
|
| 12 |
+
# Build it
|
| 13 |
+
WORKDIR /app
|
| 14 |
+
|
| 15 |
+
COPY . .
|
| 16 |
+
|
| 17 |
+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
|
| 18 |
+
cmake --build build --config Release -j$(nproc)
|
| 19 |
+
|
| 20 |
+
RUN mkdir -p /app/lib && \
|
| 21 |
+
find build -name "*.so*" -exec cp -P {} /app/lib \;
|
| 22 |
+
|
| 23 |
+
RUN mkdir -p /app/full \
|
| 24 |
+
&& cp build/bin/* /app/full \
|
| 25 |
+
&& cp *.py /app/full \
|
| 26 |
+
&& cp -r gguf-py /app/full \
|
| 27 |
+
&& cp -r requirements /app/full \
|
| 28 |
+
&& cp requirements.txt /app/full \
|
| 29 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
| 30 |
+
|
| 31 |
+
## Base image
|
| 32 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
| 33 |
+
|
| 34 |
+
RUN apt-get update \
|
| 35 |
+
&& apt-get install -y libgomp1 curl libvulkan1 mesa-vulkan-drivers \
|
| 36 |
+
libglvnd0 libgl1 libglx0 libegl1 libgles2 \
|
| 37 |
+
&& apt autoremove -y \
|
| 38 |
+
&& apt clean -y \
|
| 39 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 40 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 41 |
+
&& find /var/cache -type f -delete
|
| 42 |
+
|
| 43 |
+
COPY --from=build /app/lib/ /app
|
| 44 |
+
|
| 45 |
+
### Full
|
| 46 |
+
FROM base AS full
|
| 47 |
+
|
| 48 |
+
COPY --from=build /app/full /app
|
| 49 |
+
|
| 50 |
+
WORKDIR /app
|
| 51 |
+
|
| 52 |
+
RUN apt-get update \
|
| 53 |
+
&& apt-get install -y \
|
| 54 |
+
build-essential \
|
| 55 |
+
git \
|
| 56 |
+
python3 \
|
| 57 |
+
python3-dev \
|
| 58 |
+
python3-pip \
|
| 59 |
+
python3-wheel \
|
| 60 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
| 61 |
+
&& pip install --break-system-packages -r requirements.txt \
|
| 62 |
+
&& apt autoremove -y \
|
| 63 |
+
&& apt clean -y \
|
| 64 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
| 65 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
| 66 |
+
&& find /var/cache -type f -delete
|
| 67 |
+
|
| 68 |
+
ENTRYPOINT ["/app/tools.sh"]
|
| 69 |
+
|
| 70 |
+
### Light, CLI only
|
| 71 |
+
FROM base AS light
|
| 72 |
+
|
| 73 |
+
COPY --from=build /app/full/llama-cli /app/full/llama-completion /app
|
| 74 |
+
|
| 75 |
+
WORKDIR /app
|
| 76 |
+
|
| 77 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
| 78 |
+
|
| 79 |
+
### Server, Server only
|
| 80 |
+
FROM base AS server
|
| 81 |
+
|
| 82 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
| 83 |
+
|
| 84 |
+
COPY --from=build /app/full/llama-server /app
|
| 85 |
+
|
| 86 |
+
WORKDIR /app
|
| 87 |
+
|
| 88 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
| 89 |
+
|
| 90 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.gemini/settings.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{ "contextFileName": "AGENTS.md" }
|
llama.cpp/.github/labeler.yml
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://github.com/actions/labeler
|
| 2 |
+
Apple Metal:
|
| 3 |
+
- changed-files:
|
| 4 |
+
- any-glob-to-any-file:
|
| 5 |
+
- ggml/include/ggml-metal.h
|
| 6 |
+
- ggml/src/ggml-metal/**
|
| 7 |
+
- README-metal.md
|
| 8 |
+
SYCL:
|
| 9 |
+
- changed-files:
|
| 10 |
+
- any-glob-to-any-file:
|
| 11 |
+
- ggml/include/ggml-sycl.h
|
| 12 |
+
- ggml/src/ggml-sycl/**
|
| 13 |
+
- docs/backend/SYCL.md
|
| 14 |
+
- examples/sycl/**
|
| 15 |
+
Nvidia GPU:
|
| 16 |
+
- changed-files:
|
| 17 |
+
- any-glob-to-any-file:
|
| 18 |
+
- ggml/include/ggml-cuda.h
|
| 19 |
+
- ggml/src/ggml-cuda/**
|
| 20 |
+
Vulkan:
|
| 21 |
+
- changed-files:
|
| 22 |
+
- any-glob-to-any-file:
|
| 23 |
+
- ggml/include/ggml-vulkan.h
|
| 24 |
+
- ggml/src/ggml-vulkan/**
|
| 25 |
+
IBM zDNN:
|
| 26 |
+
- changed-files:
|
| 27 |
+
- any-glob-to-any-file:
|
| 28 |
+
- ggml/include/ggml-zdnn.h
|
| 29 |
+
- ggml/src/ggml-zdnn/**
|
| 30 |
+
documentation:
|
| 31 |
+
- changed-files:
|
| 32 |
+
- any-glob-to-any-file:
|
| 33 |
+
- docs/**
|
| 34 |
+
- media/**
|
| 35 |
+
testing:
|
| 36 |
+
- changed-files:
|
| 37 |
+
- any-glob-to-any-file:
|
| 38 |
+
- tests/**
|
| 39 |
+
build:
|
| 40 |
+
- changed-files:
|
| 41 |
+
- any-glob-to-any-file:
|
| 42 |
+
- cmake/**
|
| 43 |
+
- CMakeLists.txt
|
| 44 |
+
- CMakePresets.json
|
| 45 |
+
examples:
|
| 46 |
+
- changed-files:
|
| 47 |
+
- any-glob-to-any-file:
|
| 48 |
+
- examples/**
|
| 49 |
+
- tools/**
|
| 50 |
+
devops:
|
| 51 |
+
- changed-files:
|
| 52 |
+
- any-glob-to-any-file:
|
| 53 |
+
- .devops/**
|
| 54 |
+
- .github/**
|
| 55 |
+
- ci/**
|
| 56 |
+
python:
|
| 57 |
+
- changed-files:
|
| 58 |
+
- any-glob-to-any-file:
|
| 59 |
+
- "**/*.py"
|
| 60 |
+
- requirements/**
|
| 61 |
+
- gguf-py/**
|
| 62 |
+
- .flake8
|
| 63 |
+
script:
|
| 64 |
+
- changed-files:
|
| 65 |
+
- any-glob-to-any-file:
|
| 66 |
+
- scripts/**
|
| 67 |
+
android:
|
| 68 |
+
- changed-files:
|
| 69 |
+
- any-glob-to-any-file:
|
| 70 |
+
- examples/llama.android/**
|
| 71 |
+
server:
|
| 72 |
+
- changed-files:
|
| 73 |
+
- any-glob-to-any-file:
|
| 74 |
+
- tools/server/**
|
| 75 |
+
ggml:
|
| 76 |
+
- changed-files:
|
| 77 |
+
- any-glob-to-any-file:
|
| 78 |
+
- ggml/**
|
| 79 |
+
model:
|
| 80 |
+
- changed-files:
|
| 81 |
+
- any-glob-to-any-file:
|
| 82 |
+
- src/models/**
|
| 83 |
+
nix:
|
| 84 |
+
- changed-files:
|
| 85 |
+
- any-glob-to-any-file:
|
| 86 |
+
- "**/*.nix"
|
| 87 |
+
- .github/workflows/nix-*.yml
|
| 88 |
+
- .devops/nix/nixpkgs-instances.nix
|
| 89 |
+
embedding:
|
| 90 |
+
- changed-files:
|
| 91 |
+
- any-glob-to-any-file: examples/embedding/
|
| 92 |
+
jinja parser:
|
| 93 |
+
- changed-files:
|
| 94 |
+
- any-glob-to-any-file:
|
| 95 |
+
- common/jinja/**
|
| 96 |
+
Ascend NPU:
|
| 97 |
+
- changed-files:
|
| 98 |
+
- any-glob-to-any-file:
|
| 99 |
+
- ggml/include/ggml-cann.h
|
| 100 |
+
- ggml/src/ggml-cann/**
|
| 101 |
+
- docs/backend/CANN.md
|
| 102 |
+
OpenCL:
|
| 103 |
+
- changed-files:
|
| 104 |
+
- any-glob-to-any-file:
|
| 105 |
+
- ggml/include/ggml-opencl.h
|
| 106 |
+
- ggml/src/ggml-opencl/**
|
llama.cpp/.github/pull_request_template.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*Make sure to read the [contributing guidelines](https://github.com/ggml-org/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
llama.cpp/build/CMakeCache.txt
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This is the CMakeCache file.
|
| 2 |
+
# For build in directory: r:/Quillan/Quillan-v4.2-model/llama.cpp/build
|
| 3 |
+
# It was generated by CMake: C:/Program Files/CMake/bin/cmake.exe
|
| 4 |
+
# You can edit this file to change values found and used by cmake.
|
| 5 |
+
# If you do not want to change any of the values, simply exit the editor.
|
| 6 |
+
# If you do want to change a value, simply edit, save, and exit the editor.
|
| 7 |
+
# The syntax for the file is as follows:
|
| 8 |
+
# KEY:TYPE=VALUE
|
| 9 |
+
# KEY is the name of a variable in the cache.
|
| 10 |
+
# TYPE is a hint to GUIs for the type of VALUE, DO NOT EDIT TYPE!.
|
| 11 |
+
# VALUE is the current value for the KEY.
|
| 12 |
+
|
| 13 |
+
########################
|
| 14 |
+
# EXTERNAL cache entries
|
| 15 |
+
########################
|
| 16 |
+
|
| 17 |
+
//Value Computed by CMake.
|
| 18 |
+
CMAKE_FIND_PACKAGE_REDIRECTS_DIR:STATIC=R:/Quillan/Quillan-v4.2-model/llama.cpp/build/CMakeFiles/pkgRedirects
|
| 19 |
+
|
| 20 |
+
//Program used to build from makefiles.
|
| 21 |
+
CMAKE_MAKE_PROGRAM:STRING=nmake
|
| 22 |
+
|
| 23 |
+
//Value Computed by CMake
|
| 24 |
+
CMAKE_PROJECT_COMPAT_VERSION:STATIC=
|
| 25 |
+
|
| 26 |
+
//Value Computed by CMake
|
| 27 |
+
CMAKE_PROJECT_DESCRIPTION:STATIC=
|
| 28 |
+
|
| 29 |
+
//Value Computed by CMake
|
| 30 |
+
CMAKE_PROJECT_HOMEPAGE_URL:STATIC=
|
| 31 |
+
|
| 32 |
+
//Value Computed by CMake
|
| 33 |
+
CMAKE_PROJECT_NAME:STATIC=llama.cpp
|
| 34 |
+
|
| 35 |
+
//Value Computed by CMake
|
| 36 |
+
CMAKE_PROJECT_SPDX_LICENSE:STATIC=
|
| 37 |
+
|
| 38 |
+
//Value Computed by CMake
|
| 39 |
+
llama.cpp_BINARY_DIR:STATIC=R:/Quillan/Quillan-v4.2-model/llama.cpp/build
|
| 40 |
+
|
| 41 |
+
//Value Computed by CMake
|
| 42 |
+
llama.cpp_IS_TOP_LEVEL:STATIC=ON
|
| 43 |
+
|
| 44 |
+
//Value Computed by CMake
|
| 45 |
+
llama.cpp_SOURCE_DIR:STATIC=R:/Quillan/Quillan-v4.2-model/llama.cpp
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
########################
|
| 49 |
+
# INTERNAL cache entries
|
| 50 |
+
########################
|
| 51 |
+
|
| 52 |
+
//This is the directory where this CMakeCache.txt was created
|
| 53 |
+
CMAKE_CACHEFILE_DIR:INTERNAL=r:/Quillan/Quillan-v4.2-model/llama.cpp/build
|
| 54 |
+
//Major version of cmake used to create the current loaded cache
|
| 55 |
+
CMAKE_CACHE_MAJOR_VERSION:INTERNAL=4
|
| 56 |
+
//Minor version of cmake used to create the current loaded cache
|
| 57 |
+
CMAKE_CACHE_MINOR_VERSION:INTERNAL=2
|
| 58 |
+
//Patch version of cmake used to create the current loaded cache
|
| 59 |
+
CMAKE_CACHE_PATCH_VERSION:INTERNAL=3
|
| 60 |
+
//Path to CMake executable.
|
| 61 |
+
CMAKE_COMMAND:INTERNAL=C:/Program Files/CMake/bin/cmake.exe
|
| 62 |
+
//Path to cpack program executable.
|
| 63 |
+
CMAKE_CPACK_COMMAND:INTERNAL=C:/Program Files/CMake/bin/cpack.exe
|
| 64 |
+
//Path to ctest program executable.
|
| 65 |
+
CMAKE_CTEST_COMMAND:INTERNAL=C:/Program Files/CMake/bin/ctest.exe
|
| 66 |
+
//Path to cache edit program executable.
|
| 67 |
+
CMAKE_EDIT_COMMAND:INTERNAL=C:/Program Files/CMake/bin/cmake-gui.exe
|
| 68 |
+
//Name of external makefile project generator.
|
| 69 |
+
CMAKE_EXTRA_GENERATOR:INTERNAL=
|
| 70 |
+
//Name of generator.
|
| 71 |
+
CMAKE_GENERATOR:INTERNAL=NMake Makefiles
|
| 72 |
+
//Generator instance identifier.
|
| 73 |
+
CMAKE_GENERATOR_INSTANCE:INTERNAL=
|
| 74 |
+
//Name of generator platform.
|
| 75 |
+
CMAKE_GENERATOR_PLATFORM:INTERNAL=
|
| 76 |
+
//Name of generator toolset.
|
| 77 |
+
CMAKE_GENERATOR_TOOLSET:INTERNAL=
|
| 78 |
+
//Source directory with the top level CMakeLists.txt file for this
|
| 79 |
+
// project
|
| 80 |
+
CMAKE_HOME_DIRECTORY:INTERNAL=R:/Quillan/Quillan-v4.2-model/llama.cpp
|
| 81 |
+
//Name of CMakeLists files to read
|
| 82 |
+
CMAKE_LIST_FILE_NAME:INTERNAL=CMakeLists.txt
|
| 83 |
+
//ADVANCED property for variable: CMAKE_MAKE_PROGRAM
|
| 84 |
+
CMAKE_MAKE_PROGRAM-ADVANCED:INTERNAL=1
|
| 85 |
+
//number of local generators
|
| 86 |
+
CMAKE_NUMBER_OF_MAKEFILES:INTERNAL=1
|
| 87 |
+
//Platform information initialized
|
| 88 |
+
CMAKE_PLATFORM_INFO_INITIALIZED:INTERNAL=1
|
| 89 |
+
//Path to CMake installation.
|
| 90 |
+
CMAKE_ROOT:INTERNAL=C:/Program Files/CMake/share/cmake-4.2
|
| 91 |
+
|
llama.cpp/ci/README-MUSA.md
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Running MUSA CI in a Docker Container
|
| 2 |
+
|
| 3 |
+
Assuming `$PWD` is the root of the `llama.cpp` repository, follow these steps to set up and run MUSA CI in a Docker container:
|
| 4 |
+
|
| 5 |
+
### 1. Create a local directory to store cached models, configuration files and venv:
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
mkdir -p $HOME/llama.cpp/ci-cache
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
### 2. Create a local directory to store CI run results:
|
| 12 |
+
|
| 13 |
+
```bash
|
| 14 |
+
mkdir -p $HOME/llama.cpp/ci-results
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
### 3. Start a Docker container and run the CI:
|
| 18 |
+
|
| 19 |
+
```bash
|
| 20 |
+
docker run --privileged -it \
|
| 21 |
+
-v $HOME/llama.cpp/ci-cache:/ci-cache \
|
| 22 |
+
-v $HOME/llama.cpp/ci-results:/ci-results \
|
| 23 |
+
-v $PWD:/ws -w /ws \
|
| 24 |
+
mthreads/musa:rc4.3.0-devel-ubuntu22.04-amd64
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
Inside the container, execute the following commands:
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
apt update -y && apt install -y bc cmake ccache git python3.10-venv time unzip wget
|
| 31 |
+
git config --global --add safe.directory /ws
|
| 32 |
+
GG_BUILD_MUSA=1 bash ./ci/run.sh /ci-results /ci-cache
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
This setup ensures that the CI runs within an isolated Docker environment while maintaining cached files and results across runs.
|
llama.cpp/ci/README.md
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CI
|
| 2 |
+
|
| 3 |
+
This CI implements heavy-duty workflows that run on self-hosted runners. Typically the purpose of these workflows is to
|
| 4 |
+
cover hardware configurations that are not available from Github-hosted runners and/or require more computational
|
| 5 |
+
resource than normally available.
|
| 6 |
+
|
| 7 |
+
It is a good practice, before publishing changes to execute the full CI locally on your machine. For example:
|
| 8 |
+
|
| 9 |
+
```bash
|
| 10 |
+
mkdir tmp
|
| 11 |
+
|
| 12 |
+
# CPU-only build
|
| 13 |
+
bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 14 |
+
|
| 15 |
+
# with CUDA support
|
| 16 |
+
GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 17 |
+
|
| 18 |
+
# with SYCL support
|
| 19 |
+
source /opt/intel/oneapi/setvars.sh
|
| 20 |
+
GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 21 |
+
|
| 22 |
+
# with MUSA support
|
| 23 |
+
GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 24 |
+
|
| 25 |
+
# etc.
|
| 26 |
+
```
|
| 27 |
+
|
| 28 |
+
# Adding self-hosted runners
|
| 29 |
+
|
| 30 |
+
- Add a self-hosted `ggml-ci` workflow to [[.github/workflows/build.yml]] with an appropriate label
|
| 31 |
+
- Request a runner token from `ggml-org` (for example, via a comment in the PR or email)
|
| 32 |
+
- Set-up a machine using the received token ([docs](https://docs.github.com/en/actions/how-tos/manage-runners/self-hosted-runners/add-runners))
|
| 33 |
+
- Optionally update [ci/run.sh](https://github.com/ggml-org/llama.cpp/blob/master/ci/run.sh) to build and run on the target platform by gating the implementation with a `GG_BUILD_...` env
|
llama.cpp/ci/run.sh
ADDED
|
@@ -0,0 +1,709 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
#
|
| 3 |
+
# sample usage:
|
| 4 |
+
#
|
| 5 |
+
# mkdir tmp
|
| 6 |
+
#
|
| 7 |
+
# # CPU-only build
|
| 8 |
+
# bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 9 |
+
#
|
| 10 |
+
# # with CUDA support
|
| 11 |
+
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 12 |
+
#
|
| 13 |
+
# # with SYCL support
|
| 14 |
+
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 15 |
+
#
|
| 16 |
+
# # with VULKAN support
|
| 17 |
+
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 18 |
+
#
|
| 19 |
+
# # with WebGPU support
|
| 20 |
+
# GG_BUILD_WEBGPU=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 21 |
+
#
|
| 22 |
+
# # with MUSA support
|
| 23 |
+
# GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 24 |
+
#
|
| 25 |
+
# # with KLEIDIAI support
|
| 26 |
+
# GG_BUILD_KLEIDIAI=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
| 27 |
+
#
|
| 28 |
+
|
| 29 |
+
if [ -z "$2" ]; then
|
| 30 |
+
echo "usage: $0 <output-dir> <mnt-dir>"
|
| 31 |
+
exit 1
|
| 32 |
+
fi
|
| 33 |
+
|
| 34 |
+
mkdir -p "$1"
|
| 35 |
+
mkdir -p "$2"
|
| 36 |
+
|
| 37 |
+
OUT=$(realpath "$1")
|
| 38 |
+
MNT=$(realpath "$2")
|
| 39 |
+
|
| 40 |
+
rm -f $OUT/*.log
|
| 41 |
+
rm -f $OUT/*.exit
|
| 42 |
+
rm -f $OUT/*.md
|
| 43 |
+
|
| 44 |
+
sd=`dirname $0`
|
| 45 |
+
cd $sd/../
|
| 46 |
+
SRC=`pwd`
|
| 47 |
+
|
| 48 |
+
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=${LLAMA_FATAL_WARNINGS:-ON} -DLLAMA_OPENSSL=OFF -DGGML_SCHED_NO_REALLOC=ON"
|
| 49 |
+
|
| 50 |
+
if [ ! -z ${GG_BUILD_METAL} ]; then
|
| 51 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
|
| 52 |
+
fi
|
| 53 |
+
|
| 54 |
+
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
| 55 |
+
# TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
|
| 56 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DGGML_CUDA_CUB_3DOT2=ON"
|
| 57 |
+
|
| 58 |
+
if command -v nvidia-smi >/dev/null 2>&1; then
|
| 59 |
+
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.')
|
| 60 |
+
if [[ -n "$CUDA_ARCH" && "$CUDA_ARCH" =~ ^[0-9]+$ ]]; then
|
| 61 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH}"
|
| 62 |
+
else
|
| 63 |
+
echo "Warning: Using fallback CUDA architectures"
|
| 64 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=61;70;75;80;86;89"
|
| 65 |
+
fi
|
| 66 |
+
else
|
| 67 |
+
echo "Error: nvidia-smi not found, cannot build with CUDA"
|
| 68 |
+
exit 1
|
| 69 |
+
fi
|
| 70 |
+
fi
|
| 71 |
+
|
| 72 |
+
if [ ! -z ${GG_BUILD_ROCM} ]; then
|
| 73 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_HIP=ON"
|
| 74 |
+
if [ -z ${GG_BUILD_AMDGPU_TARGETS} ]; then
|
| 75 |
+
echo "Missing GG_BUILD_AMDGPU_TARGETS, please set it to your GPU architecture (e.g. gfx90a, gfx1100, etc.)"
|
| 76 |
+
exit 1
|
| 77 |
+
fi
|
| 78 |
+
|
| 79 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGPU_TARGETS=${GG_BUILD_AMDGPU_TARGETS}"
|
| 80 |
+
fi
|
| 81 |
+
|
| 82 |
+
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
| 83 |
+
if [ -z ${ONEAPI_ROOT} ]; then
|
| 84 |
+
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
|
| 85 |
+
echo "source /opt/intel/oneapi/setvars.sh"
|
| 86 |
+
exit 1
|
| 87 |
+
fi
|
| 88 |
+
# Use only main GPU
|
| 89 |
+
export ONEAPI_DEVICE_SELECTOR="level_zero:0"
|
| 90 |
+
# Enable sysman for correct memory reporting
|
| 91 |
+
export ZES_ENABLE_SYSMAN=1
|
| 92 |
+
# to circumvent precision issues on CPY operations
|
| 93 |
+
export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt"
|
| 94 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
| 95 |
+
fi
|
| 96 |
+
|
| 97 |
+
if [ ! -z ${GG_BUILD_VULKAN} ]; then
|
| 98 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
|
| 99 |
+
|
| 100 |
+
# if on Mac, disable METAL
|
| 101 |
+
if [[ "$OSTYPE" == "darwin"* ]]; then
|
| 102 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=OFF -DGGML_BLAS=OFF"
|
| 103 |
+
fi
|
| 104 |
+
|
| 105 |
+
fi
|
| 106 |
+
|
| 107 |
+
if [ ! -z ${GG_BUILD_WEBGPU} ]; then
|
| 108 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_WEBGPU=1 -DGGML_METAL=OFF -DGGML_BLAS=OFF"
|
| 109 |
+
|
| 110 |
+
if [ ! -z "${GG_BUILD_WEBGPU_DAWN_PREFIX}" ]; then
|
| 111 |
+
if [ -z "${CMAKE_PREFIX_PATH}" ]; then
|
| 112 |
+
export CMAKE_PREFIX_PATH="${GG_BUILD_WEBGPU_DAWN_PREFIX}"
|
| 113 |
+
else
|
| 114 |
+
export CMAKE_PREFIX_PATH="${GG_BUILD_WEBGPU_DAWN_PREFIX}:${CMAKE_PREFIX_PATH}"
|
| 115 |
+
fi
|
| 116 |
+
fi
|
| 117 |
+
|
| 118 |
+
# For some systems, Dawn_DIR needs to be set explicitly, e.g., the lib64 path
|
| 119 |
+
if [ ! -z "${GG_BUILD_WEBGPU_DAWN_DIR}" ]; then
|
| 120 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DDawn_DIR=${GG_BUILD_WEBGPU_DAWN_DIR}"
|
| 121 |
+
fi
|
| 122 |
+
fi
|
| 123 |
+
|
| 124 |
+
if [ ! -z ${GG_BUILD_MUSA} ]; then
|
| 125 |
+
# Use qy1 by default (MTT S80)
|
| 126 |
+
MUSA_ARCH=${MUSA_ARCH:-21}
|
| 127 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_MUSA=ON -DMUSA_ARCHITECTURES=${MUSA_ARCH}"
|
| 128 |
+
fi
|
| 129 |
+
|
| 130 |
+
if [ ! -z ${GG_BUILD_NO_SVE} ]; then
|
| 131 |
+
# arm 9 and newer enables sve by default, adjust these flags depending on the cpu used
|
| 132 |
+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=armv8.5-a+fp16+i8mm"
|
| 133 |
+
fi
|
| 134 |
+
|
| 135 |
+
if [ -n "${GG_BUILD_KLEIDIAI}" ]; then
|
| 136 |
+
echo ">>===== Enabling KleidiAI support"
|
| 137 |
+
|
| 138 |
+
CANDIDATES=(
|
| 139 |
+
"armv9-a+dotprod+i8mm+sve2"
|
| 140 |
+
"armv9-a+dotprod+i8mm"
|
| 141 |
+
"armv8.6-a+dotprod+i8mm"
|
| 142 |
+
"armv8.2-a+dotprod"
|
| 143 |
+
)
|
| 144 |
+
CPU=""
|
| 145 |
+
|
| 146 |
+
for cpu in "${CANDIDATES[@]}"; do
|
| 147 |
+
if echo 'int main(){}' | ${CXX:-c++} -march="$cpu" -x c++ - -c -o /dev/null >/dev/null 2>&1; then
|
| 148 |
+
CPU="$cpu"
|
| 149 |
+
break
|
| 150 |
+
fi
|
| 151 |
+
done
|
| 152 |
+
|
| 153 |
+
if [ -z "$CPU" ]; then
|
| 154 |
+
echo "ERROR: None of the required ARM baselines (armv9/armv8.6/armv8.2 + dotprod) are supported by this compiler."
|
| 155 |
+
exit 1
|
| 156 |
+
fi
|
| 157 |
+
|
| 158 |
+
echo ">>===== Using ARM baseline: ${CPU}"
|
| 159 |
+
|
| 160 |
+
CMAKE_EXTRA="${CMAKE_EXTRA:+$CMAKE_EXTRA } \
|
| 161 |
+
-DGGML_NATIVE=OFF \
|
| 162 |
+
-DGGML_CPU_KLEIDIAI=ON \
|
| 163 |
+
-DGGML_CPU_AARCH64=ON \
|
| 164 |
+
-DGGML_CPU_ARM_ARCH=${CPU} \
|
| 165 |
+
-DBUILD_SHARED_LIBS=OFF"
|
| 166 |
+
fi
|
| 167 |
+
|
| 168 |
+
## helpers
|
| 169 |
+
|
| 170 |
+
# download a file if it does not exist or if it is outdated
|
| 171 |
+
function gg_wget {
|
| 172 |
+
local out=$1
|
| 173 |
+
local url=$2
|
| 174 |
+
|
| 175 |
+
local cwd=`pwd`
|
| 176 |
+
|
| 177 |
+
mkdir -p $out
|
| 178 |
+
cd $out
|
| 179 |
+
|
| 180 |
+
# should not re-download if file is the same
|
| 181 |
+
wget -nv -c -N $url
|
| 182 |
+
|
| 183 |
+
cd $cwd
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
function gg_printf {
|
| 187 |
+
printf -- "$@" >> $OUT/README.md
|
| 188 |
+
}
|
| 189 |
+
|
| 190 |
+
function gg_run {
|
| 191 |
+
ci=$1
|
| 192 |
+
|
| 193 |
+
set -o pipefail
|
| 194 |
+
set -x
|
| 195 |
+
|
| 196 |
+
gg_run_$ci | tee $OUT/$ci.log
|
| 197 |
+
cur=$?
|
| 198 |
+
echo "$cur" > $OUT/$ci.exit
|
| 199 |
+
|
| 200 |
+
set +x
|
| 201 |
+
set +o pipefail
|
| 202 |
+
|
| 203 |
+
gg_sum_$ci
|
| 204 |
+
|
| 205 |
+
ret=$((ret | cur))
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
## ci
|
| 209 |
+
|
| 210 |
+
# ctest_debug
|
| 211 |
+
|
| 212 |
+
function gg_run_ctest_debug {
|
| 213 |
+
cd ${SRC}
|
| 214 |
+
|
| 215 |
+
rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug
|
| 216 |
+
|
| 217 |
+
set -e
|
| 218 |
+
|
| 219 |
+
# Check cmake, make and ctest are installed
|
| 220 |
+
gg_check_build_requirements
|
| 221 |
+
|
| 222 |
+
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
| 223 |
+
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
| 224 |
+
|
| 225 |
+
(time ctest --output-on-failure -L main -E "test-opt|test-backend-ops" ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
| 226 |
+
|
| 227 |
+
set +e
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
function gg_sum_ctest_debug {
|
| 231 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 232 |
+
|
| 233 |
+
gg_printf 'Runs ctest in debug mode\n'
|
| 234 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 235 |
+
gg_printf '```\n'
|
| 236 |
+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
|
| 237 |
+
gg_printf '```\n'
|
| 238 |
+
gg_printf '\n'
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
# ctest_release
|
| 242 |
+
|
| 243 |
+
function gg_run_ctest_release {
|
| 244 |
+
cd ${SRC}
|
| 245 |
+
|
| 246 |
+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
| 247 |
+
|
| 248 |
+
set -e
|
| 249 |
+
|
| 250 |
+
# Check cmake, make and ctest are installed
|
| 251 |
+
gg_check_build_requirements
|
| 252 |
+
|
| 253 |
+
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
| 254 |
+
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
| 255 |
+
|
| 256 |
+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
| 257 |
+
(time ctest --output-on-failure -L 'main|python' ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
| 258 |
+
else
|
| 259 |
+
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
| 260 |
+
fi
|
| 261 |
+
|
| 262 |
+
set +e
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
function gg_sum_ctest_release {
|
| 266 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 267 |
+
|
| 268 |
+
gg_printf 'Runs ctest in release mode\n'
|
| 269 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 270 |
+
gg_printf '```\n'
|
| 271 |
+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
|
| 272 |
+
gg_printf '```\n'
|
| 273 |
+
}
|
| 274 |
+
|
| 275 |
+
# test_scripts
|
| 276 |
+
|
| 277 |
+
function gg_run_test_scripts {
|
| 278 |
+
cd ${SRC}
|
| 279 |
+
|
| 280 |
+
set -e
|
| 281 |
+
|
| 282 |
+
(cd ./tools/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
| 283 |
+
(cd ./tools/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log
|
| 284 |
+
|
| 285 |
+
set +e
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
function gg_sum_test_scripts {
|
| 289 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 290 |
+
|
| 291 |
+
gg_printf 'Runs test scripts\n'
|
| 292 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 293 |
+
gg_printf '```\n'
|
| 294 |
+
gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)"
|
| 295 |
+
gg_printf '```\n'
|
| 296 |
+
gg_printf '\n'
|
| 297 |
+
}
|
| 298 |
+
|
| 299 |
+
function gg_get_model {
|
| 300 |
+
#local gguf_0="$MNT/models/qwen3/0.6B/ggml-model-f16.gguf"
|
| 301 |
+
local gguf_0="$MNT/models/qwen3/0.6B/ggml-model-q4_0.gguf"
|
| 302 |
+
if [[ -s $gguf_0 ]]; then
|
| 303 |
+
echo -n "$gguf_0"
|
| 304 |
+
else
|
| 305 |
+
echo >&2 "No model found. Can't run gg_run_ctest_with_model."
|
| 306 |
+
exit 1
|
| 307 |
+
fi
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
function gg_run_ctest_with_model_debug {
|
| 311 |
+
cd ${SRC}
|
| 312 |
+
|
| 313 |
+
local model; model=$(gg_get_model)
|
| 314 |
+
cd build-ci-debug
|
| 315 |
+
set -e
|
| 316 |
+
|
| 317 |
+
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
| 318 |
+
|
| 319 |
+
set +e
|
| 320 |
+
cd ..
|
| 321 |
+
}
|
| 322 |
+
|
| 323 |
+
function gg_run_ctest_with_model_release {
|
| 324 |
+
cd ${SRC}
|
| 325 |
+
|
| 326 |
+
local model; model=$(gg_get_model)
|
| 327 |
+
cd build-ci-release
|
| 328 |
+
set -e
|
| 329 |
+
|
| 330 |
+
(LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
| 331 |
+
|
| 332 |
+
# test memory leaks
|
| 333 |
+
#if [[ ! -z ${GG_BUILD_METAL} ]]; then
|
| 334 |
+
# # TODO: this hangs for some reason ...
|
| 335 |
+
# (time leaks -quiet -atExit -- ./bin/test-thread-safety -m $model --parallel 2 -t 2 -p "hello") 2>&1 | tee -a $OUT/${ci}-leaks.log
|
| 336 |
+
#fi
|
| 337 |
+
|
| 338 |
+
set +e
|
| 339 |
+
cd ..
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
function gg_sum_ctest_with_model_debug {
|
| 343 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 344 |
+
|
| 345 |
+
gg_printf 'Runs ctest with model files in debug mode\n'
|
| 346 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 347 |
+
gg_printf '```\n'
|
| 348 |
+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
|
| 349 |
+
gg_printf '```\n'
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
function gg_sum_ctest_with_model_release {
|
| 353 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 354 |
+
|
| 355 |
+
gg_printf 'Runs ctest with model files in release mode\n'
|
| 356 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 357 |
+
gg_printf '```\n'
|
| 358 |
+
gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)"
|
| 359 |
+
gg_printf '```\n'
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
# qwen3_0_6b
|
| 363 |
+
|
| 364 |
+
function gg_run_qwen3_0_6b {
|
| 365 |
+
cd ${SRC}
|
| 366 |
+
|
| 367 |
+
gg_wget models-mnt/qwen3/0.6B/ https://huggingface.co/Qwen/Qwen3-0.6B-Base/raw/main/config.json
|
| 368 |
+
gg_wget models-mnt/qwen3/0.6B/ https://huggingface.co/Qwen/Qwen3-0.6B-Base/raw/main/tokenizer.json
|
| 369 |
+
gg_wget models-mnt/qwen3/0.6B/ https://huggingface.co/Qwen/Qwen3-0.6B-Base/raw/main/tokenizer_config.json
|
| 370 |
+
#gg_wget models-mnt/qwen3/0.6B/ https://huggingface.co/Qwen/Qwen3-0.6B-Base/raw/main/special_tokens_map.json
|
| 371 |
+
gg_wget models-mnt/qwen3/0.6B/ https://huggingface.co/Qwen/Qwen3-0.6B-Base/resolve/main/model.safetensors
|
| 372 |
+
|
| 373 |
+
|
| 374 |
+
gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
|
| 375 |
+
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
|
| 376 |
+
|
| 377 |
+
path_models="../models-mnt/qwen3/0.6B"
|
| 378 |
+
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
|
| 379 |
+
|
| 380 |
+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
| 381 |
+
|
| 382 |
+
set -e
|
| 383 |
+
|
| 384 |
+
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
| 385 |
+
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
| 386 |
+
|
| 387 |
+
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf --outtype f16
|
| 388 |
+
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-bf16.gguf --outtype bf16
|
| 389 |
+
|
| 390 |
+
model_f16="${path_models}/ggml-model-f16.gguf"
|
| 391 |
+
model_bf16="${path_models}/ggml-model-bf16.gguf"
|
| 392 |
+
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
| 393 |
+
model_q4_0="${path_models}/ggml-model-q4_0.gguf"
|
| 394 |
+
model_q4_1="${path_models}/ggml-model-q4_1.gguf"
|
| 395 |
+
model_q5_0="${path_models}/ggml-model-q5_0.gguf"
|
| 396 |
+
model_q5_1="${path_models}/ggml-model-q5_1.gguf"
|
| 397 |
+
model_q2_k="${path_models}/ggml-model-q2_k.gguf"
|
| 398 |
+
model_q3_k="${path_models}/ggml-model-q3_k.gguf"
|
| 399 |
+
model_q4_k="${path_models}/ggml-model-q4_k.gguf"
|
| 400 |
+
model_q5_k="${path_models}/ggml-model-q5_k.gguf"
|
| 401 |
+
model_q6_k="${path_models}/ggml-model-q6_k.gguf"
|
| 402 |
+
|
| 403 |
+
wiki_test="${path_wiki}/wiki.test.raw"
|
| 404 |
+
|
| 405 |
+
./bin/llama-quantize ${model_bf16} ${model_q8_0} q8_0 $(nproc)
|
| 406 |
+
./bin/llama-quantize ${model_bf16} ${model_q4_0} q4_0 $(nproc)
|
| 407 |
+
./bin/llama-quantize ${model_bf16} ${model_q4_1} q4_1 $(nproc)
|
| 408 |
+
./bin/llama-quantize ${model_bf16} ${model_q5_0} q5_0 $(nproc)
|
| 409 |
+
./bin/llama-quantize ${model_bf16} ${model_q5_1} q5_1 $(nproc)
|
| 410 |
+
./bin/llama-quantize ${model_bf16} ${model_q2_k} q2_k $(nproc)
|
| 411 |
+
./bin/llama-quantize ${model_bf16} ${model_q3_k} q3_k $(nproc)
|
| 412 |
+
./bin/llama-quantize ${model_bf16} ${model_q4_k} q4_k $(nproc)
|
| 413 |
+
./bin/llama-quantize ${model_bf16} ${model_q5_k} q5_k $(nproc)
|
| 414 |
+
./bin/llama-quantize ${model_bf16} ${model_q6_k} q6_k $(nproc)
|
| 415 |
+
|
| 416 |
+
(time ./bin/llama-fit-params --model ${model_f16} 2>&1 | tee -a $OUT/${ci}-fp-f16.log)
|
| 417 |
+
|
| 418 |
+
(time ./bin/llama-completion -no-cnv --model ${model_f16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
| 419 |
+
(time ./bin/llama-completion -no-cnv --model ${model_bf16} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-bf16.log
|
| 420 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q8_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
| 421 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q4_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
| 422 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q4_1} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
| 423 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q5_0} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
| 424 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q5_1} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
| 425 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q2_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
| 426 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q3_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
| 427 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q4_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
| 428 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q5_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
| 429 |
+
(time ./bin/llama-completion -no-cnv --model ${model_q6_k} -ngl 99 -c 1024 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
| 430 |
+
|
| 431 |
+
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
| 432 |
+
if [ -z ${GG_BUILD_NO_BF16} ]; then
|
| 433 |
+
(time ./bin/llama-perplexity --model ${model_bf16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-bf16.log
|
| 434 |
+
fi
|
| 435 |
+
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
| 436 |
+
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
| 437 |
+
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
| 438 |
+
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
| 439 |
+
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
| 440 |
+
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
| 441 |
+
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
| 442 |
+
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
| 443 |
+
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
| 444 |
+
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
| 445 |
+
|
| 446 |
+
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -ngl 99 -c 1024 -b 512 --chunks 2 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
| 447 |
+
|
| 448 |
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa off --no-op-offload) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
| 449 |
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 1024 -fa on --no-op-offload) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
| 450 |
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa off ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
| 451 |
+
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 1024 -fa on ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
| 452 |
+
|
| 453 |
+
function check_ppl {
|
| 454 |
+
qnt="$1"
|
| 455 |
+
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
| 456 |
+
|
| 457 |
+
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
|
| 458 |
+
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
|
| 459 |
+
return 20
|
| 460 |
+
fi
|
| 461 |
+
|
| 462 |
+
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
|
| 463 |
+
return 0
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 467 |
+
if [ -z ${GG_BUILD_NO_BF16} ]; then
|
| 468 |
+
check_ppl "bf16" "$(cat $OUT/${ci}-tg-bf16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 469 |
+
fi
|
| 470 |
+
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 471 |
+
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 472 |
+
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 473 |
+
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 474 |
+
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 475 |
+
#check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model
|
| 476 |
+
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 477 |
+
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 478 |
+
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 479 |
+
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
|
| 480 |
+
|
| 481 |
+
cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log
|
| 482 |
+
|
| 483 |
+
set +e
|
| 484 |
+
}
|
| 485 |
+
|
| 486 |
+
function gg_sum_qwen3_0_6b {
|
| 487 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 488 |
+
|
| 489 |
+
gg_printf 'Qwen3 0.6B:\n'
|
| 490 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 491 |
+
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
|
| 492 |
+
gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)"
|
| 493 |
+
gg_printf '- f16:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
| 494 |
+
if [ -z ${GG_BUILD_NO_BF16} ]; then
|
| 495 |
+
gg_printf '- bf16:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-bf16.log)"
|
| 496 |
+
fi
|
| 497 |
+
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
| 498 |
+
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
|
| 499 |
+
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
|
| 500 |
+
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
|
| 501 |
+
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
|
| 502 |
+
gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
|
| 503 |
+
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
|
| 504 |
+
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
|
| 505 |
+
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
|
| 506 |
+
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
|
| 507 |
+
gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)"
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
# bge-small
|
| 511 |
+
|
| 512 |
+
function gg_run_embd_bge_small {
|
| 513 |
+
cd ${SRC}
|
| 514 |
+
|
| 515 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
|
| 516 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json
|
| 517 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json
|
| 518 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json
|
| 519 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin
|
| 520 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json
|
| 521 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt
|
| 522 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json
|
| 523 |
+
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
|
| 524 |
+
|
| 525 |
+
gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json
|
| 526 |
+
|
| 527 |
+
path_models="../models-mnt/bge-small"
|
| 528 |
+
|
| 529 |
+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
| 530 |
+
|
| 531 |
+
set -e
|
| 532 |
+
|
| 533 |
+
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
| 534 |
+
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
| 535 |
+
|
| 536 |
+
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
| 537 |
+
|
| 538 |
+
model_f16="${path_models}/ggml-model-f16.gguf"
|
| 539 |
+
model_q8_0="${path_models}/ggml-model-q8_0.gguf"
|
| 540 |
+
|
| 541 |
+
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
| 542 |
+
|
| 543 |
+
(time ./bin/llama-fit-params --model ${model_f16} 2>&1 | tee -a $OUT/${ci}-fp-f16.log)
|
| 544 |
+
|
| 545 |
+
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 --no-op-offload) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
| 546 |
+
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 --no-op-offload) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
| 547 |
+
|
| 548 |
+
set +e
|
| 549 |
+
}
|
| 550 |
+
|
| 551 |
+
function gg_sum_embd_bge_small {
|
| 552 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 553 |
+
|
| 554 |
+
gg_printf 'BGE Small (BERT):\n'
|
| 555 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 556 |
+
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
|
| 557 |
+
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
| 558 |
+
}
|
| 559 |
+
|
| 560 |
+
# rerank_tiny
|
| 561 |
+
|
| 562 |
+
function gg_run_rerank_tiny {
|
| 563 |
+
cd ${SRC}
|
| 564 |
+
|
| 565 |
+
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
| 566 |
+
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json
|
| 567 |
+
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json
|
| 568 |
+
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json
|
| 569 |
+
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin
|
| 570 |
+
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.json
|
| 571 |
+
|
| 572 |
+
path_models="../models-mnt/rerank-tiny"
|
| 573 |
+
|
| 574 |
+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
| 575 |
+
|
| 576 |
+
set -e
|
| 577 |
+
|
| 578 |
+
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
| 579 |
+
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
| 580 |
+
|
| 581 |
+
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
| 582 |
+
|
| 583 |
+
model_f16="${path_models}/ggml-model-f16.gguf"
|
| 584 |
+
|
| 585 |
+
(time ./bin/llama-fit-params --model ${model_f16} 2>&1 | tee -a $OUT/${ci}-fp-f16.log)
|
| 586 |
+
|
| 587 |
+
# for this model, the SEP token is "</s>"
|
| 588 |
+
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --no-op-offload --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
|
| 589 |
+
|
| 590 |
+
# sample output
|
| 591 |
+
# rerank score 0: 0.029
|
| 592 |
+
# rerank score 1: 0.029
|
| 593 |
+
# rerank score 2: 0.135
|
| 594 |
+
|
| 595 |
+
# check that the score is in the range [$3, $4]
|
| 596 |
+
function check_score {
|
| 597 |
+
qnt="$1"
|
| 598 |
+
score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
| 599 |
+
|
| 600 |
+
if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then
|
| 601 |
+
printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4"
|
| 602 |
+
return 20
|
| 603 |
+
fi
|
| 604 |
+
|
| 605 |
+
printf ' - %s @ %s OK\n' "$qnt" "$score"
|
| 606 |
+
return 0
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
| 610 |
+
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
| 611 |
+
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
|
| 612 |
+
|
| 613 |
+
set +e
|
| 614 |
+
}
|
| 615 |
+
|
| 616 |
+
function gg_sum_rerank_tiny {
|
| 617 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 618 |
+
|
| 619 |
+
gg_printf 'Rerank Tiny (Jina):\n'
|
| 620 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 621 |
+
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)"
|
| 622 |
+
}
|
| 623 |
+
|
| 624 |
+
function gg_check_build_requirements {
|
| 625 |
+
if ! command -v cmake &> /dev/null; then
|
| 626 |
+
gg_printf 'cmake not found, please install'
|
| 627 |
+
fi
|
| 628 |
+
|
| 629 |
+
if ! command -v make &> /dev/null; then
|
| 630 |
+
gg_printf 'make not found, please install'
|
| 631 |
+
fi
|
| 632 |
+
|
| 633 |
+
if ! command -v ctest &> /dev/null; then
|
| 634 |
+
gg_printf 'ctest not found, please install'
|
| 635 |
+
fi
|
| 636 |
+
}
|
| 637 |
+
|
| 638 |
+
function gg_run_test_backend_ops_cpu {
|
| 639 |
+
cd ${SRC}
|
| 640 |
+
|
| 641 |
+
cd build-ci-release
|
| 642 |
+
|
| 643 |
+
set -e
|
| 644 |
+
|
| 645 |
+
(time ./bin/test-backend-ops -b CPU ) 2>&1 | tee -a $OUT/${ci}-test-backend-ops-cpu.log
|
| 646 |
+
|
| 647 |
+
set +e
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
function gg_sum_test_backend_ops_cpu {
|
| 651 |
+
gg_printf '### %s\n\n' "${ci}"
|
| 652 |
+
|
| 653 |
+
gg_printf 'Runs test-backend-ops for CPU backend\n'
|
| 654 |
+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
| 655 |
+
gg_printf '```\n'
|
| 656 |
+
gg_printf '%s\n' "$(cat $OUT/${ci}-test-backend-ops-cpu.log)"
|
| 657 |
+
gg_printf '```\n'
|
| 658 |
+
gg_printf '\n'
|
| 659 |
+
}
|
| 660 |
+
|
| 661 |
+
## main
|
| 662 |
+
|
| 663 |
+
export LLAMA_LOG_PREFIX=1
|
| 664 |
+
export LLAMA_LOG_TIMESTAMPS=1
|
| 665 |
+
|
| 666 |
+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
| 667 |
+
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models
|
| 668 |
+
rm -rf ${SRC}/models-mnt
|
| 669 |
+
mnt_models=${MNT}/models
|
| 670 |
+
mkdir -p ${mnt_models}
|
| 671 |
+
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
| 672 |
+
|
| 673 |
+
# Create a fresh python3 venv and enter it
|
| 674 |
+
if ! python3 -m venv "$MNT/venv"; then
|
| 675 |
+
echo "Error: Failed to create Python virtual environment at $MNT/venv."
|
| 676 |
+
exit 1
|
| 677 |
+
fi
|
| 678 |
+
source "$MNT/venv/bin/activate"
|
| 679 |
+
|
| 680 |
+
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
| 681 |
+
pip install --editable gguf-py --disable-pip-version-check
|
| 682 |
+
fi
|
| 683 |
+
|
| 684 |
+
ret=0
|
| 685 |
+
|
| 686 |
+
test $ret -eq 0 && gg_run ctest_debug
|
| 687 |
+
test $ret -eq 0 && gg_run ctest_release
|
| 688 |
+
|
| 689 |
+
if [ ! -z ${GG_BUILD_HIGH_PERF} ]; then
|
| 690 |
+
test $ret -eq 0 && gg_run test_backend_ops_cpu
|
| 691 |
+
fi
|
| 692 |
+
|
| 693 |
+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
| 694 |
+
test $ret -eq 0 && gg_run embd_bge_small
|
| 695 |
+
test $ret -eq 0 && gg_run rerank_tiny
|
| 696 |
+
|
| 697 |
+
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
|
| 698 |
+
test $ret -eq 0 && gg_run test_scripts
|
| 699 |
+
fi
|
| 700 |
+
|
| 701 |
+
test $ret -eq 0 && gg_run qwen3_0_6b
|
| 702 |
+
|
| 703 |
+
test $ret -eq 0 && gg_run ctest_with_model_debug
|
| 704 |
+
test $ret -eq 0 && gg_run ctest_with_model_release
|
| 705 |
+
fi
|
| 706 |
+
|
| 707 |
+
cat $OUT/README.md
|
| 708 |
+
|
| 709 |
+
exit $ret
|
llama.cpp/cmake/arm64-apple-clang.cmake
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set( CMAKE_SYSTEM_NAME Darwin )
|
| 2 |
+
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
| 3 |
+
|
| 4 |
+
set( target arm64-apple-darwin-macho )
|
| 5 |
+
|
| 6 |
+
set( CMAKE_C_COMPILER clang )
|
| 7 |
+
set( CMAKE_CXX_COMPILER clang++ )
|
| 8 |
+
|
| 9 |
+
set( CMAKE_C_COMPILER_TARGET ${target} )
|
| 10 |
+
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
| 11 |
+
|
| 12 |
+
set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
| 13 |
+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
|
| 14 |
+
|
| 15 |
+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
| 16 |
+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
llama.cpp/cmake/arm64-windows-llvm.cmake
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set( CMAKE_SYSTEM_NAME Windows )
|
| 2 |
+
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
| 3 |
+
|
| 4 |
+
set( target arm64-pc-windows-msvc )
|
| 5 |
+
|
| 6 |
+
set( CMAKE_C_COMPILER clang )
|
| 7 |
+
set( CMAKE_CXX_COMPILER clang++ )
|
| 8 |
+
|
| 9 |
+
set( CMAKE_C_COMPILER_TARGET ${target} )
|
| 10 |
+
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
| 11 |
+
|
| 12 |
+
set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
| 13 |
+
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" )
|
| 14 |
+
|
| 15 |
+
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
| 16 |
+
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
llama.cpp/cmake/build-info.cmake
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set(BUILD_NUMBER 0)
|
| 2 |
+
set(BUILD_COMMIT "unknown")
|
| 3 |
+
set(BUILD_COMPILER "unknown")
|
| 4 |
+
set(BUILD_TARGET "unknown")
|
| 5 |
+
|
| 6 |
+
# Look for git
|
| 7 |
+
find_package(Git)
|
| 8 |
+
if(NOT Git_FOUND)
|
| 9 |
+
find_program(GIT_EXECUTABLE NAMES git git.exe)
|
| 10 |
+
if(GIT_EXECUTABLE)
|
| 11 |
+
set(Git_FOUND TRUE)
|
| 12 |
+
message(STATUS "Found Git: ${GIT_EXECUTABLE}")
|
| 13 |
+
else()
|
| 14 |
+
message(WARNING "Git not found. Build info will not be accurate.")
|
| 15 |
+
endif()
|
| 16 |
+
endif()
|
| 17 |
+
|
| 18 |
+
# Get the commit count and hash
|
| 19 |
+
if(Git_FOUND)
|
| 20 |
+
execute_process(
|
| 21 |
+
COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD
|
| 22 |
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
| 23 |
+
OUTPUT_VARIABLE HEAD
|
| 24 |
+
OUTPUT_STRIP_TRAILING_WHITESPACE
|
| 25 |
+
RESULT_VARIABLE RES
|
| 26 |
+
)
|
| 27 |
+
if (RES EQUAL 0)
|
| 28 |
+
set(BUILD_COMMIT ${HEAD})
|
| 29 |
+
endif()
|
| 30 |
+
execute_process(
|
| 31 |
+
COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD
|
| 32 |
+
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
| 33 |
+
OUTPUT_VARIABLE COUNT
|
| 34 |
+
OUTPUT_STRIP_TRAILING_WHITESPACE
|
| 35 |
+
RESULT_VARIABLE RES
|
| 36 |
+
)
|
| 37 |
+
if (RES EQUAL 0)
|
| 38 |
+
set(BUILD_NUMBER ${COUNT})
|
| 39 |
+
endif()
|
| 40 |
+
endif()
|
| 41 |
+
|
| 42 |
+
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
|
| 43 |
+
|
| 44 |
+
if(CMAKE_VS_PLATFORM_NAME)
|
| 45 |
+
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
| 46 |
+
else()
|
| 47 |
+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
|
| 48 |
+
endif()
|
llama.cpp/cmake/common.cmake
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
include("ggml/cmake/common.cmake")
|
| 2 |
+
|
| 3 |
+
function(llama_add_compile_flags)
|
| 4 |
+
if (LLAMA_FATAL_WARNINGS)
|
| 5 |
+
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
| 6 |
+
list(APPEND C_FLAGS -Werror)
|
| 7 |
+
list(APPEND CXX_FLAGS -Werror)
|
| 8 |
+
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
| 9 |
+
add_compile_options(/WX)
|
| 10 |
+
endif()
|
| 11 |
+
endif()
|
| 12 |
+
|
| 13 |
+
if (LLAMA_ALL_WARNINGS)
|
| 14 |
+
if (NOT MSVC)
|
| 15 |
+
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
| 16 |
+
-Werror=implicit-int -Werror=implicit-function-declaration)
|
| 17 |
+
|
| 18 |
+
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
| 19 |
+
|
| 20 |
+
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
| 21 |
+
|
| 22 |
+
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
| 23 |
+
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
| 24 |
+
|
| 25 |
+
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
| 26 |
+
|
| 27 |
+
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
| 28 |
+
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
| 29 |
+
else()
|
| 30 |
+
# todo : msvc
|
| 31 |
+
set(C_FLAGS "" PARENT_SCOPE)
|
| 32 |
+
set(CXX_FLAGS "" PARENT_SCOPE)
|
| 33 |
+
endif()
|
| 34 |
+
endif()
|
| 35 |
+
|
| 36 |
+
if (NOT MSVC)
|
| 37 |
+
if (LLAMA_SANITIZE_THREAD)
|
| 38 |
+
message(STATUS "Using -fsanitize=thread")
|
| 39 |
+
|
| 40 |
+
add_compile_options(-fsanitize=thread)
|
| 41 |
+
link_libraries (-fsanitize=thread)
|
| 42 |
+
endif()
|
| 43 |
+
|
| 44 |
+
if (LLAMA_SANITIZE_ADDRESS)
|
| 45 |
+
message(STATUS "Using -fsanitize=address")
|
| 46 |
+
|
| 47 |
+
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
| 48 |
+
link_libraries (-fsanitize=address)
|
| 49 |
+
endif()
|
| 50 |
+
|
| 51 |
+
if (LLAMA_SANITIZE_UNDEFINED)
|
| 52 |
+
message(STATUS "Using -fsanitize=undefined")
|
| 53 |
+
|
| 54 |
+
add_compile_options(-fsanitize=undefined)
|
| 55 |
+
link_libraries (-fsanitize=undefined)
|
| 56 |
+
endif()
|
| 57 |
+
endif()
|
| 58 |
+
endfunction()
|
llama.cpp/cmake/download-models.cmake
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
get_filename_component(DEST_DIR "${DEST}" DIRECTORY)
|
| 2 |
+
file(MAKE_DIRECTORY "${DEST_DIR}")
|
| 3 |
+
|
| 4 |
+
if(NOT EXISTS "${DEST}")
|
| 5 |
+
message(STATUS "Downloading ${NAME} from ggml-org/models...")
|
| 6 |
+
endif()
|
| 7 |
+
|
| 8 |
+
file(DOWNLOAD
|
| 9 |
+
"https://huggingface.co/ggml-org/models/resolve/main/${NAME}?download=true"
|
| 10 |
+
"${DEST}"
|
| 11 |
+
TLS_VERIFY ON
|
| 12 |
+
EXPECTED_HASH ${HASH}
|
| 13 |
+
STATUS status
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
list(GET status 0 code)
|
| 17 |
+
|
| 18 |
+
if(NOT code EQUAL 0)
|
| 19 |
+
list(GET status 1 msg)
|
| 20 |
+
message(FATAL_ERROR "Failed to download ${NAME}: ${msg}")
|
| 21 |
+
endif()
|
llama.cpp/cmake/git-vars.cmake
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
find_package(Git)
|
| 2 |
+
|
| 3 |
+
# the commit's SHA1
|
| 4 |
+
execute_process(COMMAND
|
| 5 |
+
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
| 6 |
+
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
| 7 |
+
OUTPUT_VARIABLE GIT_SHA1
|
| 8 |
+
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
| 9 |
+
|
| 10 |
+
# the date of the commit
|
| 11 |
+
execute_process(COMMAND
|
| 12 |
+
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
| 13 |
+
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
| 14 |
+
OUTPUT_VARIABLE GIT_DATE
|
| 15 |
+
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
| 16 |
+
|
| 17 |
+
# the subject of the commit
|
| 18 |
+
execute_process(COMMAND
|
| 19 |
+
"${GIT_EXECUTABLE}" log -1 --format=%s
|
| 20 |
+
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
| 21 |
+
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
| 22 |
+
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
llama.cpp/cmake/license.cmake
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
define_property(GLOBAL PROPERTY LICENSE_TEXT
|
| 2 |
+
BRIEF_DOCS "Embedded licenses"
|
| 3 |
+
FULL_DOCS "Global string containing all aggregated licenses"
|
| 4 |
+
)
|
| 5 |
+
|
| 6 |
+
function(license_add_file NAME FILE)
|
| 7 |
+
if(NOT IS_ABSOLUTE "${FILE}")
|
| 8 |
+
set(FILE "${CMAKE_CURRENT_SOURCE_DIR}/${FILE}")
|
| 9 |
+
endif()
|
| 10 |
+
if(EXISTS "${FILE}")
|
| 11 |
+
set(TITLE "License for ${NAME}")
|
| 12 |
+
string(REGEX REPLACE "." "=" UNDERLINE "${TITLE}")
|
| 13 |
+
file(READ "${FILE}" TEXT)
|
| 14 |
+
get_property(TMP GLOBAL PROPERTY LICENSE_TEXT)
|
| 15 |
+
string(APPEND TMP "R\"=L=(${TITLE}\n${UNDERLINE}\n\n${TEXT})=L=\",\n")
|
| 16 |
+
set_property(GLOBAL PROPERTY LICENSE_TEXT "${TMP}")
|
| 17 |
+
else()
|
| 18 |
+
message(WARNING "License file '${FILE}' not found")
|
| 19 |
+
endif()
|
| 20 |
+
endfunction()
|
| 21 |
+
|
| 22 |
+
function(license_generate TARGET_NAME)
|
| 23 |
+
message(STATUS "Generating embedded license file for target: ${TARGET_NAME}")
|
| 24 |
+
get_property(TEXT GLOBAL PROPERTY LICENSE_TEXT)
|
| 25 |
+
|
| 26 |
+
set(CPP_CONTENT "// Generated by CMake\n\n")
|
| 27 |
+
string(APPEND CPP_CONTENT "const char* LICENSES[] = {\n")
|
| 28 |
+
string(APPEND CPP_CONTENT "${TEXT}")
|
| 29 |
+
string(APPEND CPP_CONTENT "nullptr\n")
|
| 30 |
+
string(APPEND CPP_CONTENT "};\n")
|
| 31 |
+
|
| 32 |
+
set(CPP_FILE "${CMAKE_BINARY_DIR}/license.cpp")
|
| 33 |
+
file(WRITE "${CPP_FILE}" "${CPP_CONTENT}")
|
| 34 |
+
|
| 35 |
+
if(TARGET ${TARGET_NAME})
|
| 36 |
+
target_sources(${TARGET_NAME} PRIVATE "${CPP_FILE}")
|
| 37 |
+
else()
|
| 38 |
+
message(FATAL_ERROR "Target '${TARGET_NAME}' does not exist")
|
| 39 |
+
endif()
|
| 40 |
+
endfunction()
|
llama.cpp/cmake/llama-config.cmake.in
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
|
| 2 |
+
set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
| 3 |
+
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
| 4 |
+
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
| 5 |
+
|
| 6 |
+
@PACKAGE_INIT@
|
| 7 |
+
|
| 8 |
+
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
| 9 |
+
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
| 10 |
+
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
| 11 |
+
|
| 12 |
+
find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
|
| 13 |
+
|
| 14 |
+
find_library(llama_LIBRARY llama
|
| 15 |
+
REQUIRED
|
| 16 |
+
HINTS ${LLAMA_LIB_DIR}
|
| 17 |
+
NO_CMAKE_FIND_ROOT_PATH
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
add_library(llama UNKNOWN IMPORTED)
|
| 21 |
+
set_target_properties(llama
|
| 22 |
+
PROPERTIES
|
| 23 |
+
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
| 24 |
+
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
|
| 25 |
+
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
| 26 |
+
IMPORTED_LOCATION "${llama_LIBRARY}"
|
| 27 |
+
INTERFACE_COMPILE_FEATURES c_std_90
|
| 28 |
+
POSITION_INDEPENDENT_CODE ON)
|
| 29 |
+
|
| 30 |
+
check_required_components(Llama)
|
llama.cpp/cmake/llama.pc.in
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
prefix=@CMAKE_INSTALL_PREFIX@
|
| 2 |
+
exec_prefix=@CMAKE_INSTALL_PREFIX@
|
| 3 |
+
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
| 4 |
+
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
| 5 |
+
|
| 6 |
+
Name: llama
|
| 7 |
+
Description: Port of Facebook's LLaMA model in C/C++
|
| 8 |
+
Version: @LLAMA_INSTALL_VERSION@
|
| 9 |
+
Libs: -L${libdir} -lggml -lggml-base -lllama
|
| 10 |
+
Cflags: -I${includedir}
|
llama.cpp/cmake/riscv64-spacemit-linux-gnu-gcc.cmake
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set(CMAKE_SYSTEM_NAME Linux)
|
| 2 |
+
set(CMAKE_SYSTEM_PROCESSOR riscv64)
|
| 3 |
+
set(CMAKE_SYSTEM_VERSION 1)
|
| 4 |
+
|
| 5 |
+
if (CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "^(riscv)")
|
| 6 |
+
message(STATUS "HOST SYSTEM ${CMAKE_HOST_SYSTEM_PROCESSOR}")
|
| 7 |
+
else()
|
| 8 |
+
set(GNU_MACHINE riscv64-unknown-linux-gnu CACHE STRING "GNU compiler triple")
|
| 9 |
+
if (DEFINED ENV{RISCV_ROOT_PATH})
|
| 10 |
+
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
|
| 11 |
+
else()
|
| 12 |
+
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
|
| 13 |
+
endif()
|
| 14 |
+
|
| 15 |
+
set(RISCV_ROOT_PATH ${RISCV_ROOT_PATH} CACHE STRING "root path to riscv toolchain")
|
| 16 |
+
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
|
| 17 |
+
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
|
| 18 |
+
set(CMAKE_STRIP ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-strip)
|
| 19 |
+
set(CMAKE_FIND_ROOT_PATH "${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu")
|
| 20 |
+
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot")
|
| 21 |
+
endif()
|
| 22 |
+
|
| 23 |
+
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
| 24 |
+
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
| 25 |
+
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
| 26 |
+
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
| 27 |
+
set(CMAKE_C_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CMAKE_C_FLAGS}")
|
| 28 |
+
set(CMAKE_CXX_FLAGS "-march=rv64gcv_zfh_zba_zicbop -mabi=lp64d ${CXX_FLAGS}")
|
| 29 |
+
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -latomic")
|
llama.cpp/cmake/x64-windows-llvm.cmake
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
set( CMAKE_SYSTEM_NAME Windows )
|
| 2 |
+
set( CMAKE_SYSTEM_PROCESSOR x86_64 )
|
| 3 |
+
|
| 4 |
+
set( CMAKE_C_COMPILER clang )
|
| 5 |
+
set( CMAKE_CXX_COMPILER clang++ )
|
llama.cpp/common/CMakeLists.txt
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# common
|
| 2 |
+
|
| 3 |
+
find_package(Threads REQUIRED)
|
| 4 |
+
|
| 5 |
+
llama_add_compile_flags()
|
| 6 |
+
|
| 7 |
+
# Build info header
|
| 8 |
+
|
| 9 |
+
if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
|
| 10 |
+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/.git")
|
| 11 |
+
|
| 12 |
+
# Is git submodule
|
| 13 |
+
if(NOT IS_DIRECTORY "${GIT_DIR}")
|
| 14 |
+
file(READ ${GIT_DIR} REAL_GIT_DIR_LINK)
|
| 15 |
+
string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK})
|
| 16 |
+
string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS)
|
| 17 |
+
if (SLASH_POS EQUAL 0)
|
| 18 |
+
set(GIT_DIR "${REAL_GIT_DIR}")
|
| 19 |
+
else()
|
| 20 |
+
set(GIT_DIR "${PROJECT_SOURCE_DIR}/${REAL_GIT_DIR}")
|
| 21 |
+
endif()
|
| 22 |
+
endif()
|
| 23 |
+
|
| 24 |
+
if(EXISTS "${GIT_DIR}/index")
|
| 25 |
+
# For build-info.cpp below
|
| 26 |
+
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${GIT_DIR}/index")
|
| 27 |
+
else()
|
| 28 |
+
message(WARNING "Git index not found in git repository.")
|
| 29 |
+
endif()
|
| 30 |
+
else()
|
| 31 |
+
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
|
| 32 |
+
endif()
|
| 33 |
+
|
| 34 |
+
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
|
| 35 |
+
set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
|
| 36 |
+
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
|
| 37 |
+
|
| 38 |
+
set(TARGET build_info)
|
| 39 |
+
add_library(${TARGET} OBJECT ${OUTPUT_FILE})
|
| 40 |
+
if (BUILD_SHARED_LIBS)
|
| 41 |
+
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
| 42 |
+
endif()
|
| 43 |
+
|
| 44 |
+
set(TARGET common)
|
| 45 |
+
|
| 46 |
+
add_library(${TARGET} STATIC
|
| 47 |
+
arg.cpp
|
| 48 |
+
arg.h
|
| 49 |
+
base64.hpp
|
| 50 |
+
chat-parser.cpp
|
| 51 |
+
chat-parser.h
|
| 52 |
+
chat-parser-xml-toolcall.h
|
| 53 |
+
chat-parser-xml-toolcall.cpp
|
| 54 |
+
chat-peg-parser.cpp
|
| 55 |
+
chat-peg-parser.h
|
| 56 |
+
chat.cpp
|
| 57 |
+
chat.h
|
| 58 |
+
common.cpp
|
| 59 |
+
common.h
|
| 60 |
+
console.cpp
|
| 61 |
+
console.h
|
| 62 |
+
debug.cpp
|
| 63 |
+
debug.h
|
| 64 |
+
download.cpp
|
| 65 |
+
download.h
|
| 66 |
+
http.h
|
| 67 |
+
json-partial.cpp
|
| 68 |
+
json-partial.h
|
| 69 |
+
json-schema-to-grammar.cpp
|
| 70 |
+
llguidance.cpp
|
| 71 |
+
log.cpp
|
| 72 |
+
log.h
|
| 73 |
+
ngram-cache.cpp
|
| 74 |
+
ngram-cache.h
|
| 75 |
+
ngram-map.cpp
|
| 76 |
+
ngram-map.h
|
| 77 |
+
ngram-mod.cpp
|
| 78 |
+
ngram-mod.h
|
| 79 |
+
peg-parser.cpp
|
| 80 |
+
peg-parser.h
|
| 81 |
+
preset.cpp
|
| 82 |
+
preset.h
|
| 83 |
+
regex-partial.cpp
|
| 84 |
+
regex-partial.h
|
| 85 |
+
sampling.cpp
|
| 86 |
+
sampling.h
|
| 87 |
+
speculative.cpp
|
| 88 |
+
speculative.h
|
| 89 |
+
unicode.cpp
|
| 90 |
+
unicode.h
|
| 91 |
+
jinja/lexer.cpp
|
| 92 |
+
jinja/lexer.h
|
| 93 |
+
jinja/parser.cpp
|
| 94 |
+
jinja/parser.h
|
| 95 |
+
jinja/runtime.cpp
|
| 96 |
+
jinja/runtime.h
|
| 97 |
+
jinja/value.cpp
|
| 98 |
+
jinja/value.h
|
| 99 |
+
jinja/string.cpp
|
| 100 |
+
jinja/string.h
|
| 101 |
+
jinja/caps.cpp
|
| 102 |
+
jinja/caps.h
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
target_include_directories(${TARGET} PUBLIC . ../vendor)
|
| 106 |
+
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
| 107 |
+
|
| 108 |
+
if (BUILD_SHARED_LIBS)
|
| 109 |
+
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
| 110 |
+
endif()
|
| 111 |
+
|
| 112 |
+
target_link_libraries(${TARGET} PRIVATE
|
| 113 |
+
build_info
|
| 114 |
+
cpp-httplib
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
if (LLAMA_LLGUIDANCE)
|
| 118 |
+
include(ExternalProject)
|
| 119 |
+
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
|
| 120 |
+
set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
|
| 121 |
+
set(LLGUIDANCE_LIB_NAME "${CMAKE_STATIC_LIBRARY_PREFIX}llguidance${CMAKE_STATIC_LIBRARY_SUFFIX}")
|
| 122 |
+
|
| 123 |
+
ExternalProject_Add(llguidance_ext
|
| 124 |
+
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
|
| 125 |
+
# v1.0.1:
|
| 126 |
+
GIT_TAG d795912fedc7d393de740177ea9ea761e7905774
|
| 127 |
+
PREFIX ${CMAKE_BINARY_DIR}/llguidance
|
| 128 |
+
SOURCE_DIR ${LLGUIDANCE_SRC}
|
| 129 |
+
BUILD_IN_SOURCE TRUE
|
| 130 |
+
CONFIGURE_COMMAND ""
|
| 131 |
+
BUILD_COMMAND cargo build --release --package llguidance
|
| 132 |
+
INSTALL_COMMAND ""
|
| 133 |
+
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h
|
| 134 |
+
UPDATE_COMMAND ""
|
| 135 |
+
)
|
| 136 |
+
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
|
| 137 |
+
|
| 138 |
+
add_library(llguidance STATIC IMPORTED)
|
| 139 |
+
set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME})
|
| 140 |
+
add_dependencies(llguidance llguidance_ext)
|
| 141 |
+
|
| 142 |
+
target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
|
| 143 |
+
target_link_libraries(${TARGET} PRIVATE llguidance)
|
| 144 |
+
if (WIN32)
|
| 145 |
+
target_link_libraries(${TARGET} PRIVATE ws2_32 userenv ntdll bcrypt)
|
| 146 |
+
endif()
|
| 147 |
+
endif()
|
| 148 |
+
|
| 149 |
+
target_link_libraries(${TARGET} PUBLIC llama Threads::Threads)
|
llama.cpp/common/arg.cpp
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
llama.cpp/common/arg.h
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "common.h"
|
| 4 |
+
|
| 5 |
+
#include <set>
|
| 6 |
+
#include <map>
|
| 7 |
+
#include <string>
|
| 8 |
+
#include <vector>
|
| 9 |
+
#include <cstring>
|
| 10 |
+
|
| 11 |
+
// pseudo-env variable to identify preset-only arguments
|
| 12 |
+
#define COMMON_ARG_PRESET_LOAD_ON_STARTUP "__PRESET_LOAD_ON_STARTUP"
|
| 13 |
+
#define COMMON_ARG_PRESET_STOP_TIMEOUT "__PRESET_STOP_TIMEOUT"
|
| 14 |
+
|
| 15 |
+
//
|
| 16 |
+
// CLI argument parsing
|
| 17 |
+
//
|
| 18 |
+
|
| 19 |
+
struct common_arg {
|
| 20 |
+
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
| 21 |
+
std::set<enum llama_example> excludes = {};
|
| 22 |
+
std::vector<const char *> args;
|
| 23 |
+
std::vector<const char *> args_neg; // for negated args like --no-xxx
|
| 24 |
+
const char * value_hint = nullptr; // help text or example for arg value
|
| 25 |
+
const char * value_hint_2 = nullptr; // for second arg value
|
| 26 |
+
const char * env = nullptr;
|
| 27 |
+
std::string help;
|
| 28 |
+
bool is_sparam = false; // is current arg a sampling param?
|
| 29 |
+
bool is_preset_only = false; // is current arg preset-only (not treated as CLI arg)
|
| 30 |
+
void (*handler_void) (common_params & params) = nullptr;
|
| 31 |
+
void (*handler_string) (common_params & params, const std::string &) = nullptr;
|
| 32 |
+
void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
|
| 33 |
+
void (*handler_int) (common_params & params, int) = nullptr;
|
| 34 |
+
void (*handler_bool) (common_params & params, bool) = nullptr;
|
| 35 |
+
|
| 36 |
+
common_arg() = default;
|
| 37 |
+
|
| 38 |
+
common_arg(
|
| 39 |
+
const std::initializer_list<const char *> & args,
|
| 40 |
+
const char * value_hint,
|
| 41 |
+
const std::string & help,
|
| 42 |
+
void (*handler)(common_params & params, const std::string &)
|
| 43 |
+
) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
|
| 44 |
+
|
| 45 |
+
common_arg(
|
| 46 |
+
const std::initializer_list<const char *> & args,
|
| 47 |
+
const char * value_hint,
|
| 48 |
+
const std::string & help,
|
| 49 |
+
void (*handler)(common_params & params, int)
|
| 50 |
+
) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
|
| 51 |
+
|
| 52 |
+
common_arg(
|
| 53 |
+
const std::initializer_list<const char *> & args,
|
| 54 |
+
const std::string & help,
|
| 55 |
+
void (*handler)(common_params & params)
|
| 56 |
+
) : args(args), help(help), handler_void(handler) {}
|
| 57 |
+
|
| 58 |
+
common_arg(
|
| 59 |
+
const std::initializer_list<const char *> & args,
|
| 60 |
+
const std::initializer_list<const char *> & args_neg,
|
| 61 |
+
const std::string & help,
|
| 62 |
+
void (*handler)(common_params & params, bool)
|
| 63 |
+
) : args(args), args_neg(args_neg), help(help), handler_bool(handler) {}
|
| 64 |
+
|
| 65 |
+
// support 2 values for arg
|
| 66 |
+
common_arg(
|
| 67 |
+
const std::initializer_list<const char *> & args,
|
| 68 |
+
const char * value_hint,
|
| 69 |
+
const char * value_hint_2,
|
| 70 |
+
const std::string & help,
|
| 71 |
+
void (*handler)(common_params & params, const std::string &, const std::string &)
|
| 72 |
+
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
| 73 |
+
|
| 74 |
+
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
|
| 75 |
+
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
|
| 76 |
+
common_arg & set_env(const char * env);
|
| 77 |
+
common_arg & set_sparam();
|
| 78 |
+
common_arg & set_preset_only();
|
| 79 |
+
bool in_example(enum llama_example ex);
|
| 80 |
+
bool is_exclude(enum llama_example ex);
|
| 81 |
+
bool get_value_from_env(std::string & output) const;
|
| 82 |
+
bool has_value_from_env() const;
|
| 83 |
+
std::string to_string() const;
|
| 84 |
+
|
| 85 |
+
// for using as key in std::map
|
| 86 |
+
bool operator<(const common_arg& other) const {
|
| 87 |
+
if (args.empty() || other.args.empty()) {
|
| 88 |
+
return false;
|
| 89 |
+
}
|
| 90 |
+
return strcmp(args[0], other.args[0]) < 0;
|
| 91 |
+
}
|
| 92 |
+
bool operator==(const common_arg& other) const {
|
| 93 |
+
if (args.empty() || other.args.empty()) {
|
| 94 |
+
return false;
|
| 95 |
+
}
|
| 96 |
+
return strcmp(args[0], other.args[0]) == 0;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
// get all args and env vars (including negated args/env)
|
| 100 |
+
std::vector<std::string> get_args() const;
|
| 101 |
+
std::vector<std::string> get_env() const;
|
| 102 |
+
};
|
| 103 |
+
|
| 104 |
+
namespace common_arg_utils {
|
| 105 |
+
bool is_truthy(const std::string & value);
|
| 106 |
+
bool is_falsey(const std::string & value);
|
| 107 |
+
bool is_autoy(const std::string & value);
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
struct common_params_context {
|
| 111 |
+
enum llama_example ex = LLAMA_EXAMPLE_COMMON;
|
| 112 |
+
common_params & params;
|
| 113 |
+
std::vector<common_arg> options;
|
| 114 |
+
void(*print_usage)(int, char **) = nullptr;
|
| 115 |
+
common_params_context(common_params & params) : params(params) {}
|
| 116 |
+
};
|
| 117 |
+
|
| 118 |
+
// parse input arguments from CLI
|
| 119 |
+
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
| 120 |
+
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
| 121 |
+
|
| 122 |
+
// parse input arguments from CLI into a map
|
| 123 |
+
bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map);
|
| 124 |
+
|
| 125 |
+
// populate preset-only arguments
|
| 126 |
+
// these arguments are not treated as command line arguments
|
| 127 |
+
// see: https://github.com/ggml-org/llama.cpp/issues/18163
|
| 128 |
+
void common_params_add_preset_options(std::vector<common_arg> & args);
|
| 129 |
+
|
| 130 |
+
// initialize argument parser context - used by test-arg-parser and preset
|
| 131 |
+
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
llama.cpp/common/base64.hpp
ADDED
|
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
This is free and unencumbered software released into the public domain.
|
| 3 |
+
|
| 4 |
+
Anyone is free to copy, modify, publish, use, compile, sell, or
|
| 5 |
+
distribute this software, either in source code form or as a compiled
|
| 6 |
+
binary, for any purpose, commercial or non-commercial, and by any
|
| 7 |
+
means.
|
| 8 |
+
|
| 9 |
+
In jurisdictions that recognize copyright laws, the author or authors
|
| 10 |
+
of this software dedicate any and all copyright interest in the
|
| 11 |
+
software to the public domain. We make this dedication for the benefit
|
| 12 |
+
of the public at large and to the detriment of our heirs and
|
| 13 |
+
successors. We intend this dedication to be an overt act of
|
| 14 |
+
relinquishment in perpetuity of all present and future rights to this
|
| 15 |
+
software under copyright law.
|
| 16 |
+
|
| 17 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| 18 |
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| 19 |
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
| 20 |
+
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
| 21 |
+
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
| 22 |
+
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
| 23 |
+
OTHER DEALINGS IN THE SOFTWARE.
|
| 24 |
+
|
| 25 |
+
For more information, please refer to <http://unlicense.org>
|
| 26 |
+
*/
|
| 27 |
+
|
| 28 |
+
#ifndef PUBLIC_DOMAIN_BASE64_HPP_
|
| 29 |
+
#define PUBLIC_DOMAIN_BASE64_HPP_
|
| 30 |
+
|
| 31 |
+
#include <cstdint>
|
| 32 |
+
#include <iterator>
|
| 33 |
+
#include <stdexcept>
|
| 34 |
+
#include <string>
|
| 35 |
+
|
| 36 |
+
class base64_error : public std::runtime_error
|
| 37 |
+
{
|
| 38 |
+
public:
|
| 39 |
+
using std::runtime_error::runtime_error;
|
| 40 |
+
};
|
| 41 |
+
|
| 42 |
+
class base64
|
| 43 |
+
{
|
| 44 |
+
public:
|
| 45 |
+
enum class alphabet
|
| 46 |
+
{
|
| 47 |
+
/** the alphabet is detected automatically */
|
| 48 |
+
auto_,
|
| 49 |
+
/** the standard base64 alphabet is used */
|
| 50 |
+
standard,
|
| 51 |
+
/** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/
|
| 52 |
+
url_filename_safe
|
| 53 |
+
};
|
| 54 |
+
|
| 55 |
+
enum class decoding_behavior
|
| 56 |
+
{
|
| 57 |
+
/** if the input is not padded, the remaining bits are ignored */
|
| 58 |
+
moderate,
|
| 59 |
+
/** if a padding character is encounter decoding is finished */
|
| 60 |
+
loose
|
| 61 |
+
};
|
| 62 |
+
|
| 63 |
+
/**
|
| 64 |
+
Encodes all the elements from `in_begin` to `in_end` to `out`.
|
| 65 |
+
|
| 66 |
+
@warning The source and destination cannot overlap. The destination must be able to hold at least
|
| 67 |
+
`required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator.
|
| 68 |
+
|
| 69 |
+
@tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than
|
| 70 |
+
8 bits
|
| 71 |
+
@tparam Output_iterator the destination; the elements written to it are from the type `char`
|
| 72 |
+
@param in_begin the beginning of the source
|
| 73 |
+
@param in_end the ending of the source
|
| 74 |
+
@param out the destination iterator
|
| 75 |
+
@param alphabet which alphabet should be used
|
| 76 |
+
@returns the iterator to the next element past the last element copied
|
| 77 |
+
@throws see `Input_iterator` and `Output_iterator`
|
| 78 |
+
*/
|
| 79 |
+
template<typename Input_iterator, typename Output_iterator>
|
| 80 |
+
static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
|
| 81 |
+
alphabet alphabet = alphabet::standard)
|
| 82 |
+
{
|
| 83 |
+
constexpr auto pad = '=';
|
| 84 |
+
const char* alpha = alphabet == alphabet::url_filename_safe
|
| 85 |
+
? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"
|
| 86 |
+
: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
| 87 |
+
|
| 88 |
+
while (in_begin != in_end) {
|
| 89 |
+
std::uint8_t i0 = 0, i1 = 0, i2 = 0;
|
| 90 |
+
|
| 91 |
+
// first character
|
| 92 |
+
i0 = static_cast<std::uint8_t>(*in_begin);
|
| 93 |
+
++in_begin;
|
| 94 |
+
|
| 95 |
+
*out = alpha[i0 >> 2 & 0x3f];
|
| 96 |
+
++out;
|
| 97 |
+
|
| 98 |
+
// part of first character and second
|
| 99 |
+
if (in_begin != in_end) {
|
| 100 |
+
i1 = static_cast<std::uint8_t>(*in_begin);
|
| 101 |
+
++in_begin;
|
| 102 |
+
|
| 103 |
+
*out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)];
|
| 104 |
+
++out;
|
| 105 |
+
} else {
|
| 106 |
+
*out = alpha[(i0 & 0x3) << 4];
|
| 107 |
+
++out;
|
| 108 |
+
|
| 109 |
+
// last padding
|
| 110 |
+
*out = pad;
|
| 111 |
+
++out;
|
| 112 |
+
|
| 113 |
+
// last padding
|
| 114 |
+
*out = pad;
|
| 115 |
+
++out;
|
| 116 |
+
|
| 117 |
+
break;
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
// part of second character and third
|
| 121 |
+
if (in_begin != in_end) {
|
| 122 |
+
i2 = static_cast<std::uint8_t>(*in_begin);
|
| 123 |
+
++in_begin;
|
| 124 |
+
|
| 125 |
+
*out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)];
|
| 126 |
+
++out;
|
| 127 |
+
} else {
|
| 128 |
+
*out = alpha[(i1 & 0xf) << 2];
|
| 129 |
+
++out;
|
| 130 |
+
|
| 131 |
+
// last padding
|
| 132 |
+
*out = pad;
|
| 133 |
+
++out;
|
| 134 |
+
|
| 135 |
+
break;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
// rest of third
|
| 139 |
+
*out = alpha[i2 & 0x3f];
|
| 140 |
+
++out;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
return out;
|
| 144 |
+
}
|
| 145 |
+
/**
|
| 146 |
+
Encodes a string.
|
| 147 |
+
|
| 148 |
+
@param str the string that should be encoded
|
| 149 |
+
@param alphabet which alphabet should be used
|
| 150 |
+
@returns the encoded base64 string
|
| 151 |
+
@throws see base64::encode()
|
| 152 |
+
*/
|
| 153 |
+
static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard)
|
| 154 |
+
{
|
| 155 |
+
std::string result;
|
| 156 |
+
|
| 157 |
+
result.reserve(required_encode_size(str.length()) + 1);
|
| 158 |
+
|
| 159 |
+
encode(str.begin(), str.end(), std::back_inserter(result), alphabet);
|
| 160 |
+
|
| 161 |
+
return result;
|
| 162 |
+
}
|
| 163 |
+
/**
|
| 164 |
+
Encodes a char array.
|
| 165 |
+
|
| 166 |
+
@param buffer the char array
|
| 167 |
+
@param size the size of the array
|
| 168 |
+
@param alphabet which alphabet should be used
|
| 169 |
+
@returns the encoded string
|
| 170 |
+
*/
|
| 171 |
+
static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard)
|
| 172 |
+
{
|
| 173 |
+
std::string result;
|
| 174 |
+
|
| 175 |
+
result.reserve(required_encode_size(size) + 1);
|
| 176 |
+
|
| 177 |
+
encode(buffer, buffer + size, std::back_inserter(result), alphabet);
|
| 178 |
+
|
| 179 |
+
return result;
|
| 180 |
+
}
|
| 181 |
+
/**
|
| 182 |
+
Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`,
|
| 183 |
+
in other words: inplace decoding is possible.
|
| 184 |
+
|
| 185 |
+
@warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`,
|
| 186 |
+
otherwise the behavior depends on the output iterator.
|
| 187 |
+
|
| 188 |
+
@tparam Input_iterator the source; the returned elements are cast to `char`
|
| 189 |
+
@tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t`
|
| 190 |
+
@param in_begin the beginning of the source
|
| 191 |
+
@param in_end the ending of the source
|
| 192 |
+
@param out the destination iterator
|
| 193 |
+
@param alphabet which alphabet should be used
|
| 194 |
+
@param behavior the behavior when an error was detected
|
| 195 |
+
@returns the iterator to the next element past the last element copied
|
| 196 |
+
@throws base64_error depending on the set behavior
|
| 197 |
+
@throws see `Input_iterator` and `Output_iterator`
|
| 198 |
+
*/
|
| 199 |
+
template<typename Input_iterator, typename Output_iterator>
|
| 200 |
+
static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out,
|
| 201 |
+
alphabet alphabet = alphabet::auto_,
|
| 202 |
+
decoding_behavior behavior = decoding_behavior::moderate)
|
| 203 |
+
{
|
| 204 |
+
//constexpr auto pad = '=';
|
| 205 |
+
std::uint8_t last = 0;
|
| 206 |
+
auto bits = 0;
|
| 207 |
+
|
| 208 |
+
while (in_begin != in_end) {
|
| 209 |
+
auto c = *in_begin;
|
| 210 |
+
++in_begin;
|
| 211 |
+
|
| 212 |
+
if (c == '=') {
|
| 213 |
+
break;
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
auto part = _base64_value(alphabet, c);
|
| 217 |
+
|
| 218 |
+
// enough bits for one byte
|
| 219 |
+
if (bits + 6 >= 8) {
|
| 220 |
+
*out = (last << (8 - bits)) | (part >> (bits - 2));
|
| 221 |
+
++out;
|
| 222 |
+
|
| 223 |
+
bits -= 2;
|
| 224 |
+
} else {
|
| 225 |
+
bits += 6;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
last = part;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
// check padding
|
| 232 |
+
if (behavior != decoding_behavior::loose) {
|
| 233 |
+
while (in_begin != in_end) {
|
| 234 |
+
auto c = *in_begin;
|
| 235 |
+
++in_begin;
|
| 236 |
+
|
| 237 |
+
if (c != '=') {
|
| 238 |
+
throw base64_error("invalid base64 character.");
|
| 239 |
+
}
|
| 240 |
+
}
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
return out;
|
| 244 |
+
}
|
| 245 |
+
/**
|
| 246 |
+
Decodes a string.
|
| 247 |
+
|
| 248 |
+
@param str the base64 encoded string
|
| 249 |
+
@param alphabet which alphabet should be used
|
| 250 |
+
@param behavior the behavior when an error was detected
|
| 251 |
+
@returns the decoded string
|
| 252 |
+
@throws see base64::decode()
|
| 253 |
+
*/
|
| 254 |
+
static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_,
|
| 255 |
+
decoding_behavior behavior = decoding_behavior::moderate)
|
| 256 |
+
{
|
| 257 |
+
std::string result;
|
| 258 |
+
|
| 259 |
+
result.reserve(max_decode_size(str.length()));
|
| 260 |
+
|
| 261 |
+
decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior);
|
| 262 |
+
|
| 263 |
+
return result;
|
| 264 |
+
}
|
| 265 |
+
/**
|
| 266 |
+
Decodes a string.
|
| 267 |
+
|
| 268 |
+
@param buffer the base64 encoded buffer
|
| 269 |
+
@param size the size of the buffer
|
| 270 |
+
@param alphabet which alphabet should be used
|
| 271 |
+
@param behavior the behavior when an error was detected
|
| 272 |
+
@returns the decoded string
|
| 273 |
+
@throws see base64::decode()
|
| 274 |
+
*/
|
| 275 |
+
static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_,
|
| 276 |
+
decoding_behavior behavior = decoding_behavior::moderate)
|
| 277 |
+
{
|
| 278 |
+
std::string result;
|
| 279 |
+
|
| 280 |
+
result.reserve(max_decode_size(size));
|
| 281 |
+
|
| 282 |
+
decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior);
|
| 283 |
+
|
| 284 |
+
return result;
|
| 285 |
+
}
|
| 286 |
+
/**
|
| 287 |
+
Decodes a string inplace.
|
| 288 |
+
|
| 289 |
+
@param[in,out] str the base64 encoded string
|
| 290 |
+
@param alphabet which alphabet should be used
|
| 291 |
+
@param behavior the behavior when an error was detected
|
| 292 |
+
@throws base64::decode_inplace()
|
| 293 |
+
*/
|
| 294 |
+
static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_,
|
| 295 |
+
decoding_behavior behavior = decoding_behavior::moderate)
|
| 296 |
+
{
|
| 297 |
+
str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin());
|
| 298 |
+
}
|
| 299 |
+
/**
|
| 300 |
+
Decodes a char array inplace.
|
| 301 |
+
|
| 302 |
+
@param[in,out] str the string array
|
| 303 |
+
@param size the length of the array
|
| 304 |
+
@param alphabet which alphabet should be used
|
| 305 |
+
@param behavior the behavior when an error was detected
|
| 306 |
+
@returns the pointer to the next element past the last element decoded
|
| 307 |
+
@throws base64::decode_inplace()
|
| 308 |
+
*/
|
| 309 |
+
static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_,
|
| 310 |
+
decoding_behavior behavior = decoding_behavior::moderate)
|
| 311 |
+
{
|
| 312 |
+
return decode(str, str + size, str, alphabet, behavior);
|
| 313 |
+
}
|
| 314 |
+
/**
|
| 315 |
+
Returns the required decoding size for a given size. The value is calculated with the following formula:
|
| 316 |
+
|
| 317 |
+
$$
|
| 318 |
+
\lceil \frac{size}{4} \rceil \cdot 3
|
| 319 |
+
$$
|
| 320 |
+
|
| 321 |
+
@param size the size of the encoded input
|
| 322 |
+
@returns the size of the resulting decoded buffer; this the absolute maximum
|
| 323 |
+
*/
|
| 324 |
+
static std::size_t max_decode_size(std::size_t size) noexcept
|
| 325 |
+
{
|
| 326 |
+
return (size / 4 + (size % 4 ? 1 : 0)) * 3;
|
| 327 |
+
}
|
| 328 |
+
/**
|
| 329 |
+
Returns the required encoding size for a given size. The value is calculated with the following formula:
|
| 330 |
+
|
| 331 |
+
$$
|
| 332 |
+
\lceil \frac{size}{3} \rceil \cdot 4
|
| 333 |
+
$$
|
| 334 |
+
|
| 335 |
+
@param size the size of the decoded input
|
| 336 |
+
@returns the size of the resulting encoded buffer
|
| 337 |
+
*/
|
| 338 |
+
static std::size_t required_encode_size(std::size_t size) noexcept
|
| 339 |
+
{
|
| 340 |
+
return (size / 3 + (size % 3 ? 1 : 0)) * 4;
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
private:
|
| 344 |
+
static std::uint8_t _base64_value(alphabet& alphabet, char c)
|
| 345 |
+
{
|
| 346 |
+
if (c >= 'A' && c <= 'Z') {
|
| 347 |
+
return c - 'A';
|
| 348 |
+
} else if (c >= 'a' && c <= 'z') {
|
| 349 |
+
return c - 'a' + 26;
|
| 350 |
+
} else if (c >= '0' && c <= '9') {
|
| 351 |
+
return c - '0' + 52;
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
// comes down to alphabet
|
| 355 |
+
if (alphabet == alphabet::standard) {
|
| 356 |
+
if (c == '+') {
|
| 357 |
+
return 62;
|
| 358 |
+
} else if (c == '/') {
|
| 359 |
+
return 63;
|
| 360 |
+
}
|
| 361 |
+
} else if (alphabet == alphabet::url_filename_safe) {
|
| 362 |
+
if (c == '-') {
|
| 363 |
+
return 62;
|
| 364 |
+
} else if (c == '_') {
|
| 365 |
+
return 63;
|
| 366 |
+
}
|
| 367 |
+
} // auto detect
|
| 368 |
+
else {
|
| 369 |
+
if (c == '+') {
|
| 370 |
+
alphabet = alphabet::standard;
|
| 371 |
+
|
| 372 |
+
return 62;
|
| 373 |
+
} else if (c == '/') {
|
| 374 |
+
alphabet = alphabet::standard;
|
| 375 |
+
|
| 376 |
+
return 63;
|
| 377 |
+
} else if (c == '-') {
|
| 378 |
+
alphabet = alphabet::url_filename_safe;
|
| 379 |
+
|
| 380 |
+
return 62;
|
| 381 |
+
} else if (c == '_') {
|
| 382 |
+
alphabet = alphabet::url_filename_safe;
|
| 383 |
+
|
| 384 |
+
return 63;
|
| 385 |
+
}
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
throw base64_error("invalid base64 character.");
|
| 389 |
+
}
|
| 390 |
+
};
|
| 391 |
+
|
| 392 |
+
#endif // !PUBLIC_DOMAIN_BASE64_HPP_
|
llama.cpp/common/build-info.cpp.in
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
int LLAMA_BUILD_NUMBER = @LLAMA_BUILD_NUMBER@;
|
| 2 |
+
char const *LLAMA_COMMIT = "@LLAMA_BUILD_COMMIT@";
|
| 3 |
+
char const *LLAMA_COMPILER = "@BUILD_COMPILER@";
|
| 4 |
+
char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@";
|
llama.cpp/common/chat-parser-xml-toolcall.cpp
ADDED
|
@@ -0,0 +1,879 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "chat.h"
|
| 2 |
+
#include "chat-parser.h"
|
| 3 |
+
#include "common.h"
|
| 4 |
+
#include "json-partial.h"
|
| 5 |
+
#include "json-schema-to-grammar.h"
|
| 6 |
+
#include "log.h"
|
| 7 |
+
#include "regex-partial.h"
|
| 8 |
+
|
| 9 |
+
using json = nlohmann::ordered_json;
|
| 10 |
+
|
| 11 |
+
class xml_toolcall_syntax_exception : public std::runtime_error {
|
| 12 |
+
public:
|
| 13 |
+
xml_toolcall_syntax_exception(const std::string & message) : std::runtime_error(message) {}
|
| 14 |
+
};
|
| 15 |
+
|
| 16 |
+
template<typename T>
|
| 17 |
+
inline void sort_uniq(std::vector<T> &vec) {
|
| 18 |
+
std::sort(vec.begin(), vec.end());
|
| 19 |
+
vec.erase(std::unique(vec.begin(), vec.end()), vec.end());
|
| 20 |
+
}
|
| 21 |
+
|
| 22 |
+
template<typename T>
|
| 23 |
+
inline bool all_space(const T &str) {
|
| 24 |
+
return std::all_of(str.begin(), str.end(), [](unsigned char ch) { return std::isspace(ch); });
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
static size_t utf8_truncate_safe(const std::string_view s) {
|
| 28 |
+
size_t len = s.size();
|
| 29 |
+
if (len == 0) return 0;
|
| 30 |
+
size_t i = len;
|
| 31 |
+
for (size_t back = 0; back < 4 && i > 0; ++back) {
|
| 32 |
+
--i;
|
| 33 |
+
unsigned char c = s[i];
|
| 34 |
+
if ((c & 0x80) == 0) {
|
| 35 |
+
return len;
|
| 36 |
+
} else if ((c & 0xC0) == 0xC0) {
|
| 37 |
+
size_t expected_len = 0;
|
| 38 |
+
if ((c & 0xE0) == 0xC0) expected_len = 2;
|
| 39 |
+
else if ((c & 0xF0) == 0xE0) expected_len = 3;
|
| 40 |
+
else if ((c & 0xF8) == 0xF0) expected_len = 4;
|
| 41 |
+
else return i;
|
| 42 |
+
if (len - i >= expected_len) {
|
| 43 |
+
return len;
|
| 44 |
+
} else {
|
| 45 |
+
return i;
|
| 46 |
+
}
|
| 47 |
+
}
|
| 48 |
+
}
|
| 49 |
+
return len - std::min(len, size_t(3));
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
inline void utf8_truncate_safe_resize(std::string &s) {
|
| 53 |
+
s.resize(utf8_truncate_safe(s));
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
inline std::string_view utf8_truncate_safe_view(const std::string_view s) {
|
| 57 |
+
return s.substr(0, utf8_truncate_safe(s));
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
static std::optional<common_chat_msg_parser::find_regex_result> try_find_2_literal_splited_by_spaces(common_chat_msg_parser & builder, const std::string & literal1, const std::string & literal2) {
|
| 61 |
+
if (literal1.size() == 0) return builder.try_find_literal(literal2);
|
| 62 |
+
const auto saved_pos = builder.pos();
|
| 63 |
+
while (auto res = builder.try_find_literal(literal1)) {
|
| 64 |
+
builder.consume_spaces();
|
| 65 |
+
const auto match_len = std::min(literal2.size(), builder.input().size() - builder.pos());
|
| 66 |
+
if (builder.input().compare(builder.pos(), match_len, literal2, 0, match_len) == 0) {
|
| 67 |
+
if (res->prelude.size() != res->groups[0].begin - saved_pos) {
|
| 68 |
+
res->prelude = builder.str({saved_pos, res->groups[0].begin});
|
| 69 |
+
}
|
| 70 |
+
builder.move_to(builder.pos() + match_len);
|
| 71 |
+
res->groups[0].end = builder.pos();
|
| 72 |
+
GGML_ASSERT(res->groups[0].begin != res->groups[0].end);
|
| 73 |
+
return res;
|
| 74 |
+
}
|
| 75 |
+
builder.move_to(res->groups[0].begin + 1);
|
| 76 |
+
}
|
| 77 |
+
builder.move_to(saved_pos);
|
| 78 |
+
return std::nullopt;
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
/**
|
| 82 |
+
* make a GBNF that accept any strings except those containing any of the forbidden strings.
|
| 83 |
+
*/
|
| 84 |
+
std::string make_gbnf_excluding(std::vector<std::string> forbids) {
|
| 85 |
+
constexpr auto charclass_escape = [](unsigned char c) -> std::string {
|
| 86 |
+
if (c == '\\' || c == ']' || c == '^' || c == '-') {
|
| 87 |
+
std::string s = "\\";
|
| 88 |
+
s.push_back((char)c);
|
| 89 |
+
return s;
|
| 90 |
+
}
|
| 91 |
+
if (isprint(c)) {
|
| 92 |
+
return std::string(1, (char)c);
|
| 93 |
+
}
|
| 94 |
+
char buf[16];
|
| 95 |
+
snprintf(buf, 15, "\\x%02X", c);
|
| 96 |
+
return std::string(buf);
|
| 97 |
+
};
|
| 98 |
+
constexpr auto build_expr = [charclass_escape](auto self, const std::vector<std::string>& forbids, int l, int r, int depth) -> std::string {
|
| 99 |
+
std::vector<std::pair<unsigned char, std::pair<int,int>>> children;
|
| 100 |
+
int i = l;
|
| 101 |
+
while (i < r) {
|
| 102 |
+
const std::string &s = forbids[i];
|
| 103 |
+
if ((int)s.size() == depth) {
|
| 104 |
+
++i;
|
| 105 |
+
continue;
|
| 106 |
+
}
|
| 107 |
+
unsigned char c = (unsigned char)s[depth];
|
| 108 |
+
int j = i;
|
| 109 |
+
while (j < r && (int)forbids[j].size() > depth &&
|
| 110 |
+
(unsigned char)forbids[j][depth] == c) {
|
| 111 |
+
++j;
|
| 112 |
+
}
|
| 113 |
+
children.push_back({c, {i, j}});
|
| 114 |
+
i = j;
|
| 115 |
+
}
|
| 116 |
+
std::vector<std::string> alts;
|
| 117 |
+
if (!children.empty()) {
|
| 118 |
+
std::string cls;
|
| 119 |
+
for (auto &ch : children) cls += charclass_escape(ch.first);
|
| 120 |
+
alts.push_back(std::string("[^") + cls + "]");
|
| 121 |
+
}
|
| 122 |
+
for (auto &ch : children) {
|
| 123 |
+
std::string childExpr = self(self, forbids, ch.second.first, ch.second.second, depth+1);
|
| 124 |
+
if (!childExpr.empty()) {
|
| 125 |
+
std::string quoted_ch = "\"";
|
| 126 |
+
if (ch.first == '\\') quoted_ch += "\\\\";
|
| 127 |
+
else if (ch.first == '"') quoted_ch += "\\\"";
|
| 128 |
+
else if (isprint(ch.first)) quoted_ch.push_back(ch.first);
|
| 129 |
+
else {
|
| 130 |
+
char buf[16];
|
| 131 |
+
snprintf(buf, 15, "\\x%02X", ch.first);
|
| 132 |
+
quoted_ch += buf;
|
| 133 |
+
}
|
| 134 |
+
quoted_ch += "\"";
|
| 135 |
+
std::string branch = quoted_ch + std::string(" ") + childExpr;
|
| 136 |
+
alts.push_back(branch);
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
if (alts.empty()) return "";
|
| 140 |
+
std::ostringstream oss;
|
| 141 |
+
oss << "( ";
|
| 142 |
+
for (size_t k = 0; k < alts.size(); ++k) {
|
| 143 |
+
if (k) oss << " | ";
|
| 144 |
+
oss << alts[k];
|
| 145 |
+
}
|
| 146 |
+
oss << " )";
|
| 147 |
+
return oss.str();
|
| 148 |
+
};
|
| 149 |
+
if (forbids.empty()) return "( . )*";
|
| 150 |
+
sort(forbids.begin(), forbids.end());
|
| 151 |
+
std::string expr = build_expr(build_expr, forbids, 0, forbids.size(), 0);
|
| 152 |
+
if (expr.empty()) {
|
| 153 |
+
std::string cls;
|
| 154 |
+
for (auto &s : forbids) if (!s.empty()) cls += charclass_escape((unsigned char)s[0]);
|
| 155 |
+
expr = std::string("( [^") + cls + "] )";
|
| 156 |
+
}
|
| 157 |
+
if (forbids.size() == 1)
|
| 158 |
+
return expr + "*";
|
| 159 |
+
else
|
| 160 |
+
return std::string("( ") + expr + " )*";
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
/**
|
| 164 |
+
* Build grammar for xml-style tool call
|
| 165 |
+
* form.scope_start and form.scope_end can be empty.
|
| 166 |
+
* Requires data.format for model-specific hacks.
|
| 167 |
+
*/
|
| 168 |
+
void build_grammar_xml_tool_call(common_chat_params & data, const json & tools, const struct xml_tool_call_format & form) {
|
| 169 |
+
GGML_ASSERT(!form.tool_start.empty());
|
| 170 |
+
GGML_ASSERT(!form.tool_sep.empty());
|
| 171 |
+
GGML_ASSERT(!form.key_start.empty());
|
| 172 |
+
GGML_ASSERT(!form.val_end.empty());
|
| 173 |
+
GGML_ASSERT(!form.tool_end.empty());
|
| 174 |
+
|
| 175 |
+
std::string key_val_sep = form.key_val_sep;
|
| 176 |
+
if (form.key_val_sep2) {
|
| 177 |
+
key_val_sep += "\n";
|
| 178 |
+
key_val_sep += *form.key_val_sep2;
|
| 179 |
+
}
|
| 180 |
+
GGML_ASSERT(!key_val_sep.empty());
|
| 181 |
+
|
| 182 |
+
if (tools.is_array() && !tools.empty()) {
|
| 183 |
+
data.grammar = build_grammar([&](const common_grammar_builder &builder) {
|
| 184 |
+
auto string_arg_val = form.last_val_end ?
|
| 185 |
+
builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end, *form.last_val_end})) :
|
| 186 |
+
builder.add_rule("string-arg-val", make_gbnf_excluding({form.val_end}));
|
| 187 |
+
|
| 188 |
+
std::vector<std::string> tool_rules;
|
| 189 |
+
for (const auto & tool : tools) {
|
| 190 |
+
if (!tool.contains("type") || tool.at("type") != "function" || !tool.contains("function")) {
|
| 191 |
+
LOG_WRN("Skipping tool without function: %s", tool.dump(2).c_str());
|
| 192 |
+
continue;
|
| 193 |
+
}
|
| 194 |
+
const auto & function = tool.at("function");
|
| 195 |
+
if (!function.contains("name") || !function.at("name").is_string()) {
|
| 196 |
+
LOG_WRN("Skipping invalid function (invalid name): %s", function.dump(2).c_str());
|
| 197 |
+
continue;
|
| 198 |
+
}
|
| 199 |
+
if (!function.contains("parameters") || !function.at("parameters").is_object()) {
|
| 200 |
+
LOG_WRN("Skipping invalid function (invalid parameters): %s", function.dump(2).c_str());
|
| 201 |
+
continue;
|
| 202 |
+
}
|
| 203 |
+
std::string name = function.at("name");
|
| 204 |
+
auto parameters = function.at("parameters");
|
| 205 |
+
builder.resolve_refs(parameters);
|
| 206 |
+
|
| 207 |
+
struct parameter_rule {
|
| 208 |
+
std::string symbol_name;
|
| 209 |
+
bool is_required;
|
| 210 |
+
};
|
| 211 |
+
std::vector<parameter_rule> arg_rules;
|
| 212 |
+
if (!parameters.contains("properties") || !parameters.at("properties").is_object()) {
|
| 213 |
+
LOG_WRN("Skipping invalid function (invalid properties): %s", function.dump(2).c_str());
|
| 214 |
+
continue;
|
| 215 |
+
} else {
|
| 216 |
+
std::vector<std::string> requiredParameters;
|
| 217 |
+
if (parameters.contains("required")) {
|
| 218 |
+
try { parameters.at("required").get_to(requiredParameters); }
|
| 219 |
+
catch (const std::runtime_error&) {
|
| 220 |
+
LOG_WRN("Invalid function required parameters, ignoring: %s", function.at("required").dump(2).c_str());
|
| 221 |
+
}
|
| 222 |
+
}
|
| 223 |
+
sort_uniq(requiredParameters);
|
| 224 |
+
for (const auto & [key, value] : parameters.at("properties").items()) {
|
| 225 |
+
std::string quoted_key = key;
|
| 226 |
+
bool required = std::binary_search(requiredParameters.begin(), requiredParameters.end(), key);
|
| 227 |
+
if (form.key_start.back() == '"' && key_val_sep[0] == '"') {
|
| 228 |
+
quoted_key = gbnf_format_literal(key);
|
| 229 |
+
quoted_key = quoted_key.substr(1, quoted_key.size() - 2);
|
| 230 |
+
}
|
| 231 |
+
arg_rules.push_back(parameter_rule {builder.add_rule("func-" + name + "-kv-" + key,
|
| 232 |
+
gbnf_format_literal(form.key_start) + " " +
|
| 233 |
+
gbnf_format_literal(quoted_key) + " " +
|
| 234 |
+
gbnf_format_literal(key_val_sep) + " " +
|
| 235 |
+
((value.contains("type") && value["type"].is_string() && value["type"] == "string" && (!form.raw_argval || *form.raw_argval)) ?
|
| 236 |
+
(form.raw_argval ?
|
| 237 |
+
string_arg_val :
|
| 238 |
+
"( " + string_arg_val + " | " + builder.add_schema(name + "-arg-" + key, value) + " )"
|
| 239 |
+
) :
|
| 240 |
+
builder.add_schema(name + "-arg-" + key, value)
|
| 241 |
+
)
|
| 242 |
+
), required});
|
| 243 |
+
}
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
auto next_arg_with_sep = builder.add_rule(name + "-last-arg-end", form.last_val_end ? gbnf_format_literal(*form.last_val_end) : gbnf_format_literal(form.val_end));
|
| 247 |
+
decltype(next_arg_with_sep) next_arg = "\"\"";
|
| 248 |
+
for (auto i = arg_rules.size() - 1; /* i >= 0 && */ i < arg_rules.size(); --i) {
|
| 249 |
+
std::string include_this_arg = arg_rules[i].symbol_name + " " + next_arg_with_sep;
|
| 250 |
+
next_arg = builder.add_rule(name + "-arg-after-" + std::to_string(i), arg_rules[i].is_required ?
|
| 251 |
+
include_this_arg : "( " + include_this_arg + " ) | " + next_arg
|
| 252 |
+
);
|
| 253 |
+
include_this_arg = gbnf_format_literal(form.val_end) + " " + include_this_arg;
|
| 254 |
+
next_arg_with_sep = builder.add_rule(name + "-arg-after-" + std::to_string(i) + "-with-sep", arg_rules[i].is_required ?
|
| 255 |
+
include_this_arg : "( " + include_this_arg + " ) | " + next_arg_with_sep
|
| 256 |
+
);
|
| 257 |
+
}
|
| 258 |
+
|
| 259 |
+
std::string quoted_name = name;
|
| 260 |
+
if (form.tool_start.back() == '"' && form.tool_sep[0] == '"') {
|
| 261 |
+
quoted_name = gbnf_format_literal(name);
|
| 262 |
+
quoted_name = quoted_name.substr(1, quoted_name.size() - 2);
|
| 263 |
+
}
|
| 264 |
+
quoted_name = gbnf_format_literal(quoted_name);
|
| 265 |
+
// Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
|
| 266 |
+
if (data.format == COMMON_CHAT_FORMAT_KIMI_K2) {
|
| 267 |
+
quoted_name = "\"functions.\" " + quoted_name + " \":\" [0-9]+";
|
| 268 |
+
}
|
| 269 |
+
tool_rules.push_back(builder.add_rule(name + "-call",
|
| 270 |
+
gbnf_format_literal(form.tool_start) + " " +
|
| 271 |
+
quoted_name + " " +
|
| 272 |
+
gbnf_format_literal(form.tool_sep) + " " +
|
| 273 |
+
next_arg
|
| 274 |
+
));
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
auto tool_call_once = builder.add_rule("root-tool-call-once", string_join(tool_rules, " | "));
|
| 278 |
+
auto tool_call_more = builder.add_rule("root-tool-call-more", gbnf_format_literal(form.tool_end) + " " + tool_call_once);
|
| 279 |
+
auto call_end = builder.add_rule("root-call-end", form.last_tool_end ? gbnf_format_literal(*form.last_tool_end) : gbnf_format_literal(form.tool_end));
|
| 280 |
+
auto tool_call_multiple_with_end = builder.add_rule("root-tool-call-multiple-with-end", tool_call_once + " " + tool_call_more + "* " + call_end);
|
| 281 |
+
builder.add_rule("root",
|
| 282 |
+
(form.scope_start.empty() ? "" : gbnf_format_literal(form.scope_start) + " ") +
|
| 283 |
+
tool_call_multiple_with_end + "?" +
|
| 284 |
+
(form.scope_end.empty() ? "" : " " + gbnf_format_literal(form.scope_end))
|
| 285 |
+
);
|
| 286 |
+
});
|
| 287 |
+
|
| 288 |
+
// grammar trigger for tool call
|
| 289 |
+
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, form.scope_start + form.tool_start });
|
| 290 |
+
}
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
/**
|
| 294 |
+
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
| 295 |
+
* Throws xml_toolcall_syntax_exception if there is invalid syntax and cannot recover the original status for common_chat_msg_parser.
|
| 296 |
+
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
| 297 |
+
*/
|
| 298 |
+
inline bool parse_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form) {
|
| 299 |
+
GGML_ASSERT(!form.tool_start.empty());
|
| 300 |
+
GGML_ASSERT(!form.key_start.empty());
|
| 301 |
+
GGML_ASSERT(!form.key_val_sep.empty());
|
| 302 |
+
GGML_ASSERT(!form.val_end.empty());
|
| 303 |
+
GGML_ASSERT(!form.tool_end.empty());
|
| 304 |
+
|
| 305 |
+
// Helper to choose return false or throw error
|
| 306 |
+
constexpr auto return_error = [](common_chat_msg_parser & builder, auto &start_pos, const bool &recovery) {
|
| 307 |
+
LOG_DBG("Failed to parse XML-Style tool call at position: %s\n", gbnf_format_literal(builder.consume_rest().substr(0, 20)).c_str());
|
| 308 |
+
if (recovery) {
|
| 309 |
+
builder.move_to(start_pos);
|
| 310 |
+
return false;
|
| 311 |
+
} else throw xml_toolcall_syntax_exception("Tool call parsing failed with unrecoverable errors. Try using a grammar to constrain the model’s output.");
|
| 312 |
+
};
|
| 313 |
+
// Drop substring from needle to end from a JSON
|
| 314 |
+
constexpr auto partial_json = [](std::string &json_str, std::string_view needle = "XML_TOOL_CALL_PARTIAL_FLAG") {
|
| 315 |
+
auto pos = json_str.rfind(needle);
|
| 316 |
+
if (pos == std::string::npos) {
|
| 317 |
+
return false;
|
| 318 |
+
}
|
| 319 |
+
for (auto i = pos + needle.size(); i < json_str.size(); ++i) {
|
| 320 |
+
unsigned char ch = static_cast<unsigned char>(json_str[i]);
|
| 321 |
+
if (ch != '\'' && ch != '"' && ch != '}' && ch != ':' && !std::isspace(ch)) {
|
| 322 |
+
return false;
|
| 323 |
+
}
|
| 324 |
+
}
|
| 325 |
+
if (pos != 0 && json_str[pos - 1] == '"') {
|
| 326 |
+
--pos;
|
| 327 |
+
}
|
| 328 |
+
json_str.resize(pos);
|
| 329 |
+
return true;
|
| 330 |
+
};
|
| 331 |
+
// Helper to generate a partial argument JSON
|
| 332 |
+
constexpr auto gen_partial_json = [partial_json](auto set_partial_arg, auto &arguments, auto &builder, auto &function_name) {
|
| 333 |
+
auto rest = builder.consume_rest();
|
| 334 |
+
utf8_truncate_safe_resize(rest);
|
| 335 |
+
set_partial_arg(rest, "XML_TOOL_CALL_PARTIAL_FLAG");
|
| 336 |
+
auto tool_str = arguments.dump();
|
| 337 |
+
if (partial_json(tool_str)) {
|
| 338 |
+
if (builder.add_tool_call(function_name, "", tool_str)) {
|
| 339 |
+
return;
|
| 340 |
+
}
|
| 341 |
+
}
|
| 342 |
+
LOG_DBG("Failed to parse partial XML-Style tool call, fallback to non-partial: %s\n", tool_str.c_str());
|
| 343 |
+
};
|
| 344 |
+
// Helper to find a close (because there may be form.last_val_end or form.last_tool_end)
|
| 345 |
+
constexpr auto try_find_close = [](
|
| 346 |
+
common_chat_msg_parser & builder,
|
| 347 |
+
const std::string & end,
|
| 348 |
+
const std::optional<std::string> & alt_end,
|
| 349 |
+
const std::string & end_next,
|
| 350 |
+
const std::optional<std::string> & alt_end_next
|
| 351 |
+
) {
|
| 352 |
+
auto saved_pos = builder.pos();
|
| 353 |
+
auto tc = builder.try_find_literal(end);
|
| 354 |
+
auto val_end_size = end.size();
|
| 355 |
+
if (alt_end) {
|
| 356 |
+
auto pos_1 = builder.pos();
|
| 357 |
+
builder.move_to(saved_pos);
|
| 358 |
+
auto tc2 = try_find_2_literal_splited_by_spaces(builder, *alt_end, end_next);
|
| 359 |
+
if (alt_end_next) {
|
| 360 |
+
builder.move_to(saved_pos);
|
| 361 |
+
auto tc3 = try_find_2_literal_splited_by_spaces(builder, *alt_end, *alt_end_next);
|
| 362 |
+
if (tc3 && (!tc2 || tc2->prelude.size() > tc3->prelude.size())) {
|
| 363 |
+
tc2 = tc3;
|
| 364 |
+
}
|
| 365 |
+
}
|
| 366 |
+
if (tc2 && (!tc || tc->prelude.size() > tc2->prelude.size())) {
|
| 367 |
+
tc = tc2;
|
| 368 |
+
tc->groups[0].end = std::min(builder.input().size(), tc->groups[0].begin + alt_end->size());
|
| 369 |
+
builder.move_to(tc->groups[0].end);
|
| 370 |
+
val_end_size = alt_end->size();
|
| 371 |
+
} else {
|
| 372 |
+
builder.move_to(pos_1);
|
| 373 |
+
}
|
| 374 |
+
}
|
| 375 |
+
return std::make_pair(val_end_size, tc);
|
| 376 |
+
};
|
| 377 |
+
// Helper to find a val_end or last_val_end, returns matched pattern size
|
| 378 |
+
const auto try_find_val_end = [try_find_close, &builder, &form]() {
|
| 379 |
+
return try_find_close(builder, form.val_end, form.last_val_end, form.tool_end, form.last_tool_end);
|
| 380 |
+
};
|
| 381 |
+
// Helper to find a tool_end or last_tool_end, returns matched pattern size
|
| 382 |
+
const auto try_find_tool_end = [try_find_close, &builder, &form]() {
|
| 383 |
+
return try_find_close(builder, form.tool_end, form.last_tool_end, form.scope_end, std::nullopt);
|
| 384 |
+
};
|
| 385 |
+
|
| 386 |
+
bool recovery = true;
|
| 387 |
+
const auto start_pos = builder.pos();
|
| 388 |
+
if (!all_space(form.scope_start)) {
|
| 389 |
+
if (auto tc = builder.try_find_literal(form.scope_start)) {
|
| 390 |
+
if (all_space(tc->prelude)) {
|
| 391 |
+
if (form.scope_start.size() != tc->groups[0].end - tc->groups[0].begin)
|
| 392 |
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.scope_start));
|
| 393 |
+
} else {
|
| 394 |
+
builder.move_to(start_pos);
|
| 395 |
+
return false;
|
| 396 |
+
}
|
| 397 |
+
} else return false;
|
| 398 |
+
}
|
| 399 |
+
while (auto tc = builder.try_find_literal(form.tool_start)) {
|
| 400 |
+
if (!all_space(tc->prelude)) {
|
| 401 |
+
LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
|
| 402 |
+
gbnf_format_literal(form.tool_start).c_str(),
|
| 403 |
+
gbnf_format_literal(tc->prelude).c_str()
|
| 404 |
+
);
|
| 405 |
+
builder.move_to(tc->groups[0].begin - tc->prelude.size());
|
| 406 |
+
break;
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
// Find tool name
|
| 410 |
+
auto func_name = builder.try_find_literal(all_space(form.tool_sep) ? form.key_start : form.tool_sep);
|
| 411 |
+
if (!func_name) {
|
| 412 |
+
auto [sz, tc] = try_find_tool_end();
|
| 413 |
+
func_name = tc;
|
| 414 |
+
}
|
| 415 |
+
if (!func_name) {
|
| 416 |
+
// Partial tool name not supported
|
| 417 |
+
throw common_chat_msg_partial_exception("incomplete tool_call");
|
| 418 |
+
}
|
| 419 |
+
// If the model generate multiple tool call and the first tool call has no argument
|
| 420 |
+
if (func_name->prelude.find(form.tool_end) != std::string::npos || (form.last_tool_end ? func_name->prelude.find(*form.last_tool_end) != std::string::npos : false)) {
|
| 421 |
+
builder.move_to(func_name->groups[0].begin - func_name->prelude.size());
|
| 422 |
+
auto [sz, tc] = try_find_tool_end();
|
| 423 |
+
func_name = tc;
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
// Parse tool name
|
| 427 |
+
builder.move_to(all_space(form.tool_sep) ? func_name->groups[0].begin : func_name->groups[0].end);
|
| 428 |
+
std::string function_name = string_strip(func_name->prelude);
|
| 429 |
+
// Kimi-K2 uses functions.{{ tool_call['function']['name'] }}:{{ loop.index }} as function name
|
| 430 |
+
if (builder.syntax().format == COMMON_CHAT_FORMAT_KIMI_K2) {
|
| 431 |
+
if (string_starts_with(function_name, "functions.")) {
|
| 432 |
+
static const std::regex re(":\\d+$");
|
| 433 |
+
if (std::regex_search(function_name, re)) {
|
| 434 |
+
function_name = function_name.substr(10, function_name.rfind(":") - 10);
|
| 435 |
+
}
|
| 436 |
+
}
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
// Argument JSON
|
| 440 |
+
json arguments = json::object();
|
| 441 |
+
|
| 442 |
+
// Helper to generate a partial argument JSON
|
| 443 |
+
const auto gen_partial_args = [&](auto set_partial_arg) {
|
| 444 |
+
gen_partial_json(set_partial_arg, arguments, builder, function_name);
|
| 445 |
+
};
|
| 446 |
+
|
| 447 |
+
// Parse all arg_key/arg_value pairs
|
| 448 |
+
while (auto tc = builder.try_find_literal(form.key_start)) {
|
| 449 |
+
if (!all_space(tc->prelude)) {
|
| 450 |
+
LOG_DBG("XML-Style tool call: Expected %s, but found %s, trying to match next pattern\n",
|
| 451 |
+
gbnf_format_literal(form.key_start).c_str(),
|
| 452 |
+
gbnf_format_literal(tc->prelude).c_str()
|
| 453 |
+
);
|
| 454 |
+
builder.move_to(tc->groups[0].begin - tc->prelude.size());
|
| 455 |
+
break;
|
| 456 |
+
}
|
| 457 |
+
if (tc->groups[0].end - tc->groups[0].begin != form.key_start.size()) {
|
| 458 |
+
auto tool_call_arg = arguments.dump();
|
| 459 |
+
if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
|
| 460 |
+
tool_call_arg.resize(tool_call_arg.size() - 1);
|
| 461 |
+
}
|
| 462 |
+
builder.add_tool_call(function_name, "", tool_call_arg);
|
| 463 |
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_start));
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
// Parse arg_key
|
| 467 |
+
auto key_res = builder.try_find_literal(form.key_val_sep);
|
| 468 |
+
if (!key_res) {
|
| 469 |
+
gen_partial_args([&](auto &rest, auto &needle) {arguments[rest + needle] = "";});
|
| 470 |
+
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.key_val_sep) + " after " + gbnf_format_literal(form.key_start));
|
| 471 |
+
}
|
| 472 |
+
if (key_res->groups[0].end - key_res->groups[0].begin != form.key_val_sep.size()) {
|
| 473 |
+
gen_partial_args([&](auto &, auto &needle) {arguments[key_res->prelude + needle] = "";});
|
| 474 |
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.key_val_sep));
|
| 475 |
+
}
|
| 476 |
+
auto &key = key_res->prelude;
|
| 477 |
+
recovery = false;
|
| 478 |
+
|
| 479 |
+
// Parse arg_value
|
| 480 |
+
if (form.key_val_sep2) {
|
| 481 |
+
if (auto tc = builder.try_find_literal(*form.key_val_sep2)) {
|
| 482 |
+
if (!all_space(tc->prelude)) {
|
| 483 |
+
LOG_DBG("Failed to parse XML-Style tool call: Unexcepted %s between %s and %s\n",
|
| 484 |
+
gbnf_format_literal(tc->prelude).c_str(),
|
| 485 |
+
gbnf_format_literal(form.key_val_sep).c_str(),
|
| 486 |
+
gbnf_format_literal(*form.key_val_sep2).c_str()
|
| 487 |
+
);
|
| 488 |
+
return return_error(builder, start_pos, false);
|
| 489 |
+
}
|
| 490 |
+
if (tc->groups[0].end - tc->groups[0].begin != form.key_val_sep2->size()) {
|
| 491 |
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
| 492 |
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(*form.key_val_sep2));
|
| 493 |
+
}
|
| 494 |
+
} else {
|
| 495 |
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
| 496 |
+
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(*form.key_val_sep2) + " after " + gbnf_format_literal(form.key_val_sep));
|
| 497 |
+
}
|
| 498 |
+
}
|
| 499 |
+
auto val_start = builder.pos();
|
| 500 |
+
|
| 501 |
+
// Test if arg_val is a partial JSON
|
| 502 |
+
std::optional<common_json> value_json = std::nullopt;
|
| 503 |
+
if (!form.raw_argval || !*form.raw_argval) {
|
| 504 |
+
try { value_json = builder.try_consume_json(); }
|
| 505 |
+
catch (const std::runtime_error&) { builder.move_to(val_start); }
|
| 506 |
+
// TODO: Delete this when json_partial adds top-level support for null/true/false
|
| 507 |
+
if (builder.pos() == val_start) {
|
| 508 |
+
const static std::regex number_regex(R"([0-9-][0-9]*(\.\d*)?([eE][+-]?\d*)?)");
|
| 509 |
+
builder.consume_spaces();
|
| 510 |
+
std::string_view sv = utf8_truncate_safe_view(builder.input());
|
| 511 |
+
sv.remove_prefix(builder.pos());
|
| 512 |
+
std::string rest = "a";
|
| 513 |
+
if (sv.size() < 6) rest = sv;
|
| 514 |
+
if (string_starts_with("null", rest) || string_starts_with("true", rest) || string_starts_with("false", rest) || std::regex_match(sv.begin(), sv.end(), number_regex)) {
|
| 515 |
+
value_json = {123, {"123", "123"}};
|
| 516 |
+
builder.consume_rest();
|
| 517 |
+
} else {
|
| 518 |
+
builder.move_to(val_start);
|
| 519 |
+
}
|
| 520 |
+
}
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
// If it is a JSON and followed by </arg_value>, parse as json
|
| 524 |
+
// cannot support streaming because it may be a plain text starting with JSON
|
| 525 |
+
if (value_json) {
|
| 526 |
+
auto json_end = builder.pos();
|
| 527 |
+
builder.consume_spaces();
|
| 528 |
+
if (builder.pos() == builder.input().size()) {
|
| 529 |
+
if (form.raw_argval && !*form.raw_argval && (value_json->json.is_string() || value_json->json.is_object() || value_json->json.is_array())) {
|
| 530 |
+
arguments[key] = value_json->json;
|
| 531 |
+
auto json_str = arguments.dump();
|
| 532 |
+
if (!value_json->healing_marker.json_dump_marker.empty()) {
|
| 533 |
+
GGML_ASSERT(std::string::npos != json_str.rfind(value_json->healing_marker.json_dump_marker));
|
| 534 |
+
json_str.resize(json_str.rfind(value_json->healing_marker.json_dump_marker));
|
| 535 |
+
} else {
|
| 536 |
+
GGML_ASSERT(json_str.back() == '}');
|
| 537 |
+
json_str.resize(json_str.size() - 1);
|
| 538 |
+
}
|
| 539 |
+
builder.add_tool_call(function_name, "", json_str);
|
| 540 |
+
} else {
|
| 541 |
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
| 542 |
+
}
|
| 543 |
+
LOG_DBG("Possible JSON arg_value: %s\n", value_json->json.dump().c_str());
|
| 544 |
+
throw common_chat_msg_partial_exception("JSON arg_value detected. Waiting for more tokens for validations.");
|
| 545 |
+
}
|
| 546 |
+
builder.move_to(json_end);
|
| 547 |
+
auto [val_end_size, tc] = try_find_val_end();
|
| 548 |
+
if (tc && all_space(tc->prelude) && value_json->healing_marker.marker.empty()) {
|
| 549 |
+
if (tc->groups[0].end - tc->groups[0].begin != val_end_size) {
|
| 550 |
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = needle;});
|
| 551 |
+
LOG_DBG("Possible terminated JSON arg_value: %s\n", value_json->json.dump().c_str());
|
| 552 |
+
throw common_chat_msg_partial_exception("Partial literal: " + gbnf_format_literal(form.val_end) + (form.last_val_end ? gbnf_format_literal(*form.last_val_end) : ""));
|
| 553 |
+
} else arguments[key] = value_json->json;
|
| 554 |
+
} else builder.move_to(val_start);
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
// If not, parse as plain text
|
| 558 |
+
if (val_start == builder.pos()) {
|
| 559 |
+
if (auto [val_end_size, value_plain] = try_find_val_end(); value_plain) {
|
| 560 |
+
auto &value_str = value_plain->prelude;
|
| 561 |
+
if (form.trim_raw_argval) value_str = string_strip(value_str);
|
| 562 |
+
if (value_plain->groups[0].end - value_plain->groups[0].begin != val_end_size) {
|
| 563 |
+
gen_partial_args([&](auto &, auto &needle) {arguments[key] = value_str + needle;});
|
| 564 |
+
throw common_chat_msg_partial_exception(
|
| 565 |
+
"Expected " + gbnf_format_literal(form.val_end) +
|
| 566 |
+
" after " + gbnf_format_literal(form.key_val_sep) +
|
| 567 |
+
(form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
|
| 568 |
+
);
|
| 569 |
+
}
|
| 570 |
+
arguments[key] = value_str;
|
| 571 |
+
} else {
|
| 572 |
+
if (form.trim_raw_argval) {
|
| 573 |
+
gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = string_strip(rest) + needle;});
|
| 574 |
+
} else {
|
| 575 |
+
gen_partial_args([&](auto &rest, auto &needle) {arguments[key] = rest + needle;});
|
| 576 |
+
}
|
| 577 |
+
throw common_chat_msg_partial_exception(
|
| 578 |
+
"Expected " + gbnf_format_literal(form.val_end) +
|
| 579 |
+
" after " + gbnf_format_literal(form.key_val_sep) +
|
| 580 |
+
(form.key_val_sep2 ? " " + gbnf_format_literal(*form.key_val_sep2) : "")
|
| 581 |
+
);
|
| 582 |
+
}
|
| 583 |
+
}
|
| 584 |
+
}
|
| 585 |
+
|
| 586 |
+
// Consume closing tag
|
| 587 |
+
if (auto [tool_end_size, tc] = try_find_tool_end(); tc) {
|
| 588 |
+
if (!all_space(tc->prelude)) {
|
| 589 |
+
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
| 590 |
+
gbnf_format_literal(form.tool_end).c_str(),
|
| 591 |
+
gbnf_format_literal(tc->prelude).c_str()
|
| 592 |
+
);
|
| 593 |
+
return return_error(builder, start_pos, recovery);
|
| 594 |
+
}
|
| 595 |
+
if (tc->groups[0].end - tc->groups[0].begin == tool_end_size) {
|
| 596 |
+
// Add the parsed tool call
|
| 597 |
+
if (!builder.add_tool_call(function_name, "", arguments.dump())) {
|
| 598 |
+
throw common_chat_msg_partial_exception("Failed to add XML-Style tool call");
|
| 599 |
+
}
|
| 600 |
+
recovery = false;
|
| 601 |
+
continue;
|
| 602 |
+
}
|
| 603 |
+
}
|
| 604 |
+
|
| 605 |
+
auto tool_call_arg = arguments.dump();
|
| 606 |
+
if (tool_call_arg.size() != 0 && tool_call_arg[tool_call_arg.size() - 1] == '}') {
|
| 607 |
+
tool_call_arg.resize(tool_call_arg.size() - 1);
|
| 608 |
+
}
|
| 609 |
+
builder.add_tool_call(function_name, "", tool_call_arg);
|
| 610 |
+
throw common_chat_msg_partial_exception("Expected " + gbnf_format_literal(form.tool_end) + " after " + gbnf_format_literal(form.val_end));
|
| 611 |
+
}
|
| 612 |
+
if (auto tc = builder.try_find_literal(form.scope_end)) {
|
| 613 |
+
if (!all_space(tc->prelude)) {
|
| 614 |
+
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
| 615 |
+
gbnf_format_literal(form.scope_end).c_str(),
|
| 616 |
+
gbnf_format_literal(tc->prelude).c_str()
|
| 617 |
+
);
|
| 618 |
+
return return_error(builder, start_pos, recovery);
|
| 619 |
+
}
|
| 620 |
+
} else {
|
| 621 |
+
if (all_space(form.scope_end)) return true;
|
| 622 |
+
builder.consume_spaces();
|
| 623 |
+
if (builder.pos() == builder.input().size())
|
| 624 |
+
throw common_chat_msg_partial_exception("incomplete tool calls");
|
| 625 |
+
LOG_DBG("Failed to parse XML-Style tool call: Expected %s, but found %s\n",
|
| 626 |
+
gbnf_format_literal(form.scope_end).c_str(),
|
| 627 |
+
gbnf_format_literal(builder.consume_rest()).c_str()
|
| 628 |
+
);
|
| 629 |
+
return return_error(builder, start_pos, recovery);
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
return true;
|
| 633 |
+
}
|
| 634 |
+
|
| 635 |
+
/**
|
| 636 |
+
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
| 637 |
+
* May cause std::runtime_error if there is invalid syntax because partial valid tool call is already sent out to client.
|
| 638 |
+
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
| 639 |
+
*/
|
| 640 |
+
bool common_chat_msg_parser::try_consume_xml_tool_calls(const struct xml_tool_call_format & form) {
|
| 641 |
+
auto pos = pos_;
|
| 642 |
+
auto tsize = result_.tool_calls.size();
|
| 643 |
+
try { return parse_xml_tool_calls(*this, form); }
|
| 644 |
+
catch (const xml_toolcall_syntax_exception&) {}
|
| 645 |
+
move_to(pos);
|
| 646 |
+
result_.tool_calls.resize(tsize);
|
| 647 |
+
return false;
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
/**
|
| 651 |
+
* Parse content uses reasoning and XML-Style tool call
|
| 652 |
+
* TODO: Note that form.allow_toolcall_in_think is not tested yet. If anyone confirms it works, this comment can be removed.
|
| 653 |
+
*/
|
| 654 |
+
inline void parse_msg_with_xml_tool_calls(common_chat_msg_parser & builder, const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>") {
|
| 655 |
+
constexpr auto rstrip = [](std::string &s) {
|
| 656 |
+
s.resize(std::distance(s.begin(), std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { return !std::isspace(ch); }).base()));
|
| 657 |
+
};
|
| 658 |
+
// Erase substring from l to r, along with additional spaces nearby
|
| 659 |
+
constexpr auto erase_spaces = [](auto &str, size_t l, size_t r) {
|
| 660 |
+
while (/* l > -1 && */ --l < str.size() && std::isspace(static_cast<unsigned char>(str[l])));
|
| 661 |
+
++l;
|
| 662 |
+
while (++r < str.size() && std::isspace(static_cast<unsigned char>(str[r])));
|
| 663 |
+
if (l < r) str[l] = '\n';
|
| 664 |
+
if (l + 1 < r) str[l + 1] = '\n';
|
| 665 |
+
if (l != 0) l += 2;
|
| 666 |
+
str.erase(l, r - l);
|
| 667 |
+
return l;
|
| 668 |
+
};
|
| 669 |
+
constexpr auto trim_suffix = [](std::string &content, std::initializer_list<std::string_view> list) {
|
| 670 |
+
auto best_match = content.size();
|
| 671 |
+
for (auto pattern: list) {
|
| 672 |
+
if (pattern.size() == 0) continue;
|
| 673 |
+
for (auto match_idx = content.size() - std::min(pattern.size(), content.size()); content.size() > match_idx; match_idx++) {
|
| 674 |
+
auto match_len = content.size() - match_idx;
|
| 675 |
+
if (content.compare(match_idx, match_len, pattern.data(), match_len) == 0 && best_match > match_idx) {
|
| 676 |
+
best_match = match_idx;
|
| 677 |
+
}
|
| 678 |
+
}
|
| 679 |
+
}
|
| 680 |
+
if (content.size() > best_match) {
|
| 681 |
+
content.erase(best_match);
|
| 682 |
+
}
|
| 683 |
+
};
|
| 684 |
+
const auto trim_potential_partial_word = [&start_think, &end_think, &form, trim_suffix](std::string &content) {
|
| 685 |
+
return trim_suffix(content, {
|
| 686 |
+
start_think, end_think, form.scope_start, form.tool_start, form.tool_sep, form.key_start,
|
| 687 |
+
form.key_val_sep, form.key_val_sep2 ? form.key_val_sep2->c_str() : "",
|
| 688 |
+
form.val_end, form.last_val_end ? form.last_val_end->c_str() : "",
|
| 689 |
+
form.tool_end, form.last_tool_end ? form.last_tool_end->c_str() : "",
|
| 690 |
+
form.scope_end
|
| 691 |
+
});
|
| 692 |
+
};
|
| 693 |
+
|
| 694 |
+
|
| 695 |
+
// Trim leading spaces without affecting keyword matching
|
| 696 |
+
static const common_regex spaces_regex("\\s*");
|
| 697 |
+
{
|
| 698 |
+
auto tc = builder.consume_regex(spaces_regex);
|
| 699 |
+
auto spaces = builder.str(tc.groups[0]);
|
| 700 |
+
auto s1 = spaces.size();
|
| 701 |
+
trim_potential_partial_word(spaces);
|
| 702 |
+
auto s2 = spaces.size();
|
| 703 |
+
builder.move_to(builder.pos() - (s1 - s2));
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
// Parse content
|
| 707 |
+
bool reasoning_unclosed = builder.syntax().thinking_forced_open;
|
| 708 |
+
std::string unclosed_reasoning_content("");
|
| 709 |
+
for (;;) {
|
| 710 |
+
auto tc = try_find_2_literal_splited_by_spaces(builder, form.scope_start, form.tool_start);
|
| 711 |
+
std::string content;
|
| 712 |
+
std::string tool_call_start;
|
| 713 |
+
|
| 714 |
+
if (tc) {
|
| 715 |
+
content = std::move(tc->prelude);
|
| 716 |
+
tool_call_start = builder.str(tc->groups[0]);
|
| 717 |
+
LOG_DBG("Matched tool start: %s\n", gbnf_format_literal(tool_call_start).c_str());
|
| 718 |
+
} else {
|
| 719 |
+
content = builder.consume_rest();
|
| 720 |
+
utf8_truncate_safe_resize(content);
|
| 721 |
+
}
|
| 722 |
+
|
| 723 |
+
// Handle unclosed think block
|
| 724 |
+
if (reasoning_unclosed) {
|
| 725 |
+
if (auto pos = content.find(end_think); pos == std::string::npos && builder.pos() != builder.input().size()) {
|
| 726 |
+
unclosed_reasoning_content += content;
|
| 727 |
+
if (!(form.allow_toolcall_in_think && tc)) {
|
| 728 |
+
unclosed_reasoning_content += tool_call_start;
|
| 729 |
+
continue;
|
| 730 |
+
}
|
| 731 |
+
} else {
|
| 732 |
+
reasoning_unclosed = false;
|
| 733 |
+
std::string reasoning_content;
|
| 734 |
+
if (pos == std::string::npos) {
|
| 735 |
+
reasoning_content = std::move(content);
|
| 736 |
+
} else {
|
| 737 |
+
reasoning_content = content.substr(0, pos);
|
| 738 |
+
content.erase(0, pos + end_think.size());
|
| 739 |
+
}
|
| 740 |
+
if (builder.pos() == builder.input().size() && all_space(content)) {
|
| 741 |
+
rstrip(reasoning_content);
|
| 742 |
+
trim_potential_partial_word(reasoning_content);
|
| 743 |
+
rstrip(reasoning_content);
|
| 744 |
+
if (reasoning_content.empty()) {
|
| 745 |
+
rstrip(unclosed_reasoning_content);
|
| 746 |
+
trim_potential_partial_word(unclosed_reasoning_content);
|
| 747 |
+
rstrip(unclosed_reasoning_content);
|
| 748 |
+
if (unclosed_reasoning_content.empty()) continue;
|
| 749 |
+
}
|
| 750 |
+
}
|
| 751 |
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
|
| 752 |
+
builder.add_content(start_think);
|
| 753 |
+
builder.add_content(unclosed_reasoning_content);
|
| 754 |
+
builder.add_content(reasoning_content);
|
| 755 |
+
if (builder.pos() != builder.input().size() || !all_space(content))
|
| 756 |
+
builder.add_content(end_think);
|
| 757 |
+
} else {
|
| 758 |
+
builder.add_reasoning_content(unclosed_reasoning_content);
|
| 759 |
+
builder.add_reasoning_content(reasoning_content);
|
| 760 |
+
}
|
| 761 |
+
unclosed_reasoning_content.clear();
|
| 762 |
+
}
|
| 763 |
+
}
|
| 764 |
+
|
| 765 |
+
// Handle multiple think block
|
| 766 |
+
bool toolcall_in_think = false;
|
| 767 |
+
for (auto think_start = content.find(start_think); think_start != std::string::npos; think_start = content.find(start_think, think_start)) {
|
| 768 |
+
if (auto think_end = content.find(end_think, think_start + start_think.size()); think_end != std::string::npos) {
|
| 769 |
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
| 770 |
+
auto reasoning_content = content.substr(think_start + start_think.size(), think_end - think_start - start_think.size());
|
| 771 |
+
builder.add_reasoning_content(reasoning_content);
|
| 772 |
+
think_start = erase_spaces(content, think_start, think_end + end_think.size() - 1);
|
| 773 |
+
} else {
|
| 774 |
+
think_start = think_end + end_think.size() - 1;
|
| 775 |
+
}
|
| 776 |
+
} else {
|
| 777 |
+
// This <tool_call> start is in thinking block, skip this tool call
|
| 778 |
+
// This <tool_call> start is in thinking block
|
| 779 |
+
if (form.allow_toolcall_in_think) {
|
| 780 |
+
unclosed_reasoning_content = content.substr(think_start + start_think.size());
|
| 781 |
+
} else {
|
| 782 |
+
unclosed_reasoning_content = content.substr(think_start + start_think.size()) + tool_call_start;
|
| 783 |
+
}
|
| 784 |
+
reasoning_unclosed = true;
|
| 785 |
+
content.resize(think_start);
|
| 786 |
+
toolcall_in_think = true;
|
| 787 |
+
}
|
| 788 |
+
}
|
| 789 |
+
|
| 790 |
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
| 791 |
+
rstrip(content);
|
| 792 |
+
// Handle unclosed </think> token from content: delete all </think> token
|
| 793 |
+
if (auto pos = content.rfind(end_think); pos != std::string::npos) {
|
| 794 |
+
while (pos != std::string::npos) {
|
| 795 |
+
pos = erase_spaces(content, pos, pos + end_think.size() - 1);
|
| 796 |
+
pos = content.rfind(end_think, pos);
|
| 797 |
+
}
|
| 798 |
+
}
|
| 799 |
+
// Strip if needed
|
| 800 |
+
if (content.size() > 0 && std::isspace(static_cast<unsigned char>(content[0]))) {
|
| 801 |
+
content = string_strip(content);
|
| 802 |
+
}
|
| 803 |
+
}
|
| 804 |
+
|
| 805 |
+
// remove potential partial suffix
|
| 806 |
+
if (builder.pos() == builder.input().size() && builder.is_partial()) {
|
| 807 |
+
if (unclosed_reasoning_content.empty()) {
|
| 808 |
+
rstrip(content);
|
| 809 |
+
trim_potential_partial_word(content);
|
| 810 |
+
rstrip(content);
|
| 811 |
+
} else {
|
| 812 |
+
rstrip(unclosed_reasoning_content);
|
| 813 |
+
trim_potential_partial_word(unclosed_reasoning_content);
|
| 814 |
+
rstrip(unclosed_reasoning_content);
|
| 815 |
+
}
|
| 816 |
+
}
|
| 817 |
+
|
| 818 |
+
// consume unclosed_reasoning_content if allow_toolcall_in_think is set
|
| 819 |
+
if (form.allow_toolcall_in_think && !unclosed_reasoning_content.empty()) {
|
| 820 |
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content) {
|
| 821 |
+
builder.add_reasoning_content(unclosed_reasoning_content);
|
| 822 |
+
} else {
|
| 823 |
+
if (content.empty()) {
|
| 824 |
+
content = start_think + unclosed_reasoning_content;
|
| 825 |
+
} else {
|
| 826 |
+
content += "\n\n" + start_think;
|
| 827 |
+
content += unclosed_reasoning_content;
|
| 828 |
+
}
|
| 829 |
+
}
|
| 830 |
+
unclosed_reasoning_content.clear();
|
| 831 |
+
}
|
| 832 |
+
|
| 833 |
+
// Add content
|
| 834 |
+
if (!content.empty()) {
|
| 835 |
+
// If there are multiple content blocks
|
| 836 |
+
if (builder.syntax().reasoning_format != COMMON_REASONING_FORMAT_NONE && !builder.syntax().reasoning_in_content && builder.result().content.size() != 0) {
|
| 837 |
+
builder.add_content("\n\n");
|
| 838 |
+
}
|
| 839 |
+
builder.add_content(content);
|
| 840 |
+
}
|
| 841 |
+
|
| 842 |
+
// This <tool_call> start is in thinking block and toolcall_in_think not set, skip this tool call
|
| 843 |
+
if (toolcall_in_think && !form.allow_toolcall_in_think) {
|
| 844 |
+
continue;
|
| 845 |
+
}
|
| 846 |
+
|
| 847 |
+
// There is no tool call and all content is parsed
|
| 848 |
+
if (!tc) {
|
| 849 |
+
GGML_ASSERT(builder.pos() == builder.input().size());
|
| 850 |
+
GGML_ASSERT(unclosed_reasoning_content.empty());
|
| 851 |
+
if (!form.allow_toolcall_in_think) GGML_ASSERT(!reasoning_unclosed);
|
| 852 |
+
break;
|
| 853 |
+
}
|
| 854 |
+
|
| 855 |
+
builder.move_to(tc->groups[0].begin);
|
| 856 |
+
if (builder.try_consume_xml_tool_calls(form)) {
|
| 857 |
+
auto end_of_tool = builder.pos();
|
| 858 |
+
builder.consume_spaces();
|
| 859 |
+
if (builder.pos() != builder.input().size()) {
|
| 860 |
+
builder.move_to(end_of_tool);
|
| 861 |
+
if (!builder.result().content.empty()) {
|
| 862 |
+
builder.add_content("\n\n");
|
| 863 |
+
}
|
| 864 |
+
}
|
| 865 |
+
} else {
|
| 866 |
+
static const common_regex next_char_regex(".");
|
| 867 |
+
auto c = builder.str(builder.consume_regex(next_char_regex).groups[0]);
|
| 868 |
+
rstrip(c);
|
| 869 |
+
builder.add_content(c);
|
| 870 |
+
}
|
| 871 |
+
}
|
| 872 |
+
}
|
| 873 |
+
|
| 874 |
+
/**
|
| 875 |
+
* Parse content uses reasoning and XML-Style tool call
|
| 876 |
+
*/
|
| 877 |
+
void common_chat_msg_parser::consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think, const std::string & end_think) {
|
| 878 |
+
parse_msg_with_xml_tool_calls(*this, form, start_think, end_think);
|
| 879 |
+
}
|
llama.cpp/common/chat-parser-xml-toolcall.h
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "chat.h"
|
| 4 |
+
|
| 5 |
+
#include <nlohmann/json.hpp>
|
| 6 |
+
|
| 7 |
+
#include <optional>
|
| 8 |
+
#include <string>
|
| 9 |
+
#include <vector>
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
// Sample config:
|
| 13 |
+
// MiniMax-M2 (left): <minimax:tool_call>\n<invoke name="tool-name">\n<parameter name="key">value</parameter>\n...</invoke>\n...</minimax:tool_call>
|
| 14 |
+
// GLM 4.5 (right): <tool_call>function_name\n<arg_key>key</arg_key>\n<arg_value>value</arg_value>\n</tool_call>
|
| 15 |
+
struct xml_tool_call_format {
|
| 16 |
+
std::string scope_start; // <minimax:tool_call>\n // \n // can be empty
|
| 17 |
+
std::string tool_start; // <invoke name=\" // <tool_call>
|
| 18 |
+
std::string tool_sep; // \">\n // \n // can be empty only for parse_xml_tool_calls
|
| 19 |
+
std::string key_start; // <parameter name=\" // <arg_key>
|
| 20 |
+
std::string key_val_sep; // \"> // </arg_key>\n<arg_value>
|
| 21 |
+
std::string val_end; // </parameter>\n // </arg_value>\n
|
| 22 |
+
std::string tool_end; // </invoke>\n // </tool_call>\n
|
| 23 |
+
std::string scope_end; // </minimax:tool_call> // // can be empty
|
| 24 |
+
// Set this if there can be dynamic spaces inside key_val_sep.
|
| 25 |
+
// e.g. key_val_sep=</arg_key> key_val_sep2=<arg_value> for GLM4.5
|
| 26 |
+
std::optional<std::string> key_val_sep2 = std::nullopt;
|
| 27 |
+
// Set true if argval should only be raw string. e.g. Hello "world" hi
|
| 28 |
+
// Set false if argval should only be json string. e.g. "Hello \"world\" hi"
|
| 29 |
+
// Defaults to std::nullopt, both will be allowed.
|
| 30 |
+
std::optional<bool> raw_argval = std::nullopt;
|
| 31 |
+
std::optional<std::string> last_val_end = std::nullopt;
|
| 32 |
+
std::optional<std::string> last_tool_end = std::nullopt;
|
| 33 |
+
bool trim_raw_argval = false;
|
| 34 |
+
bool allow_toolcall_in_think = false;
|
| 35 |
+
};
|
| 36 |
+
|
| 37 |
+
// make a GBNF that accept any strings except those containing any of the forbidden strings.
|
| 38 |
+
std::string make_gbnf_excluding(std::vector<std::string> forbids);
|
| 39 |
+
|
| 40 |
+
/**
|
| 41 |
+
* Build grammar for xml-style tool call
|
| 42 |
+
* form.scope_start and form.scope_end can be empty.
|
| 43 |
+
* Requires data.format for model-specific hacks.
|
| 44 |
+
*/
|
| 45 |
+
void build_grammar_xml_tool_call(common_chat_params & data, const nlohmann::ordered_json & tools, const struct xml_tool_call_format & form);
|
llama.cpp/common/chat-parser.cpp
ADDED
|
@@ -0,0 +1,1649 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "chat-parser.h"
|
| 2 |
+
#include "chat-peg-parser.h"
|
| 3 |
+
#include "common.h"
|
| 4 |
+
#include "log.h"
|
| 5 |
+
#include "peg-parser.h"
|
| 6 |
+
#include "regex-partial.h"
|
| 7 |
+
|
| 8 |
+
#include <algorithm>
|
| 9 |
+
#include <cctype>
|
| 10 |
+
#include <optional>
|
| 11 |
+
#include <stdexcept>
|
| 12 |
+
#include <string>
|
| 13 |
+
#include <string_view>
|
| 14 |
+
#include <vector>
|
| 15 |
+
|
| 16 |
+
using json = nlohmann::ordered_json;
|
| 17 |
+
|
| 18 |
+
static void parse_prefixed_json_tool_call_array(common_chat_msg_parser & builder,
|
| 19 |
+
const common_regex & prefix,
|
| 20 |
+
size_t rstrip_prefix = 0) {
|
| 21 |
+
static const std::vector<std::vector<std::string>> args_paths = { { "arguments" } };
|
| 22 |
+
if (auto res = builder.try_find_regex(prefix)) {
|
| 23 |
+
builder.move_back(rstrip_prefix);
|
| 24 |
+
auto tool_calls = builder.consume_json_with_dumped_args(args_paths);
|
| 25 |
+
if (!builder.add_tool_calls(tool_calls.value) || tool_calls.is_partial) {
|
| 26 |
+
throw common_chat_msg_partial_exception("incomplete tool call array");
|
| 27 |
+
}
|
| 28 |
+
} else {
|
| 29 |
+
builder.add_content(builder.consume_rest());
|
| 30 |
+
}
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
|
| 34 |
+
std::string arguments;
|
| 35 |
+
if (builder.is_partial()) {
|
| 36 |
+
arguments = (json{
|
| 37 |
+
{ "code", code + builder.healing_marker() }
|
| 38 |
+
})
|
| 39 |
+
.dump();
|
| 40 |
+
auto idx = arguments.find(builder.healing_marker());
|
| 41 |
+
if (idx != std::string::npos) {
|
| 42 |
+
arguments.resize(idx);
|
| 43 |
+
}
|
| 44 |
+
} else {
|
| 45 |
+
arguments = (json{
|
| 46 |
+
{ "code", code }
|
| 47 |
+
})
|
| 48 |
+
.dump();
|
| 49 |
+
}
|
| 50 |
+
return arguments;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
/**
|
| 54 |
+
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
| 55 |
+
* Aggregates the prefix, suffix and in-between text into the content.
|
| 56 |
+
*/
|
| 57 |
+
static void parse_json_tool_calls(
|
| 58 |
+
common_chat_msg_parser & builder,
|
| 59 |
+
const std::optional<common_regex> & block_open,
|
| 60 |
+
const std::optional<common_regex> & function_regex_start_only,
|
| 61 |
+
const std::optional<common_regex> & function_regex,
|
| 62 |
+
const common_regex & close_regex,
|
| 63 |
+
const std::optional<common_regex> & block_close,
|
| 64 |
+
bool allow_raw_python = false,
|
| 65 |
+
const std::function<std::string(const common_chat_msg_parser::find_regex_result & fres)> & get_function_name =
|
| 66 |
+
nullptr) {
|
| 67 |
+
auto parse_tool_calls = [&]() {
|
| 68 |
+
size_t from = std::string::npos;
|
| 69 |
+
auto first = true;
|
| 70 |
+
while (true) {
|
| 71 |
+
auto start_pos = builder.pos();
|
| 72 |
+
auto res = function_regex_start_only && first ? builder.try_consume_regex(*function_regex_start_only) :
|
| 73 |
+
function_regex ? builder.try_find_regex(*function_regex, from) :
|
| 74 |
+
std::nullopt;
|
| 75 |
+
|
| 76 |
+
if (res) {
|
| 77 |
+
std::string name;
|
| 78 |
+
if (get_function_name) {
|
| 79 |
+
name = get_function_name(*res);
|
| 80 |
+
} else {
|
| 81 |
+
GGML_ASSERT(res->groups.size() == 2);
|
| 82 |
+
name = builder.str(res->groups[1]);
|
| 83 |
+
}
|
| 84 |
+
first = false;
|
| 85 |
+
if (name.empty()) {
|
| 86 |
+
// get_function_name signalled us that we should skip this match and treat it as content.
|
| 87 |
+
from = res->groups[0].begin + 1;
|
| 88 |
+
continue;
|
| 89 |
+
}
|
| 90 |
+
from = std::string::npos;
|
| 91 |
+
|
| 92 |
+
auto maybe_raw_python = name == "python" && allow_raw_python;
|
| 93 |
+
if (builder.input()[builder.pos()] == '{' || !maybe_raw_python) {
|
| 94 |
+
if (auto arguments = builder.try_consume_json_with_dumped_args({ {} })) {
|
| 95 |
+
if (!builder.add_tool_call(name, "", arguments->value) || arguments->is_partial) {
|
| 96 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 97 |
+
}
|
| 98 |
+
builder.consume_regex(close_regex);
|
| 99 |
+
}
|
| 100 |
+
continue;
|
| 101 |
+
}
|
| 102 |
+
if (maybe_raw_python) {
|
| 103 |
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
| 104 |
+
if (!builder.add_tool_call(name, "", arguments)) {
|
| 105 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 106 |
+
}
|
| 107 |
+
return;
|
| 108 |
+
}
|
| 109 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 110 |
+
} else {
|
| 111 |
+
builder.move_to(start_pos);
|
| 112 |
+
}
|
| 113 |
+
break;
|
| 114 |
+
}
|
| 115 |
+
if (block_close) {
|
| 116 |
+
builder.consume_regex(*block_close);
|
| 117 |
+
}
|
| 118 |
+
builder.consume_spaces();
|
| 119 |
+
builder.add_content(builder.consume_rest());
|
| 120 |
+
};
|
| 121 |
+
if (block_open) {
|
| 122 |
+
if (auto res = builder.try_find_regex(*block_open)) {
|
| 123 |
+
parse_tool_calls();
|
| 124 |
+
} else {
|
| 125 |
+
builder.add_content(builder.consume_rest());
|
| 126 |
+
}
|
| 127 |
+
} else {
|
| 128 |
+
parse_tool_calls();
|
| 129 |
+
}
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
common_chat_msg_parser::common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax)
|
| 133 |
+
: input_(input), is_partial_(is_partial), syntax_(syntax)
|
| 134 |
+
{
|
| 135 |
+
result_.role = "assistant";
|
| 136 |
+
|
| 137 |
+
while (true) {
|
| 138 |
+
std::string id = std::to_string(std::rand());
|
| 139 |
+
if (input.find(id) == std::string::npos) {
|
| 140 |
+
healing_marker_ = id;
|
| 141 |
+
break;
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
std::string common_chat_msg_parser::str(const common_string_range & rng) const {
|
| 147 |
+
GGML_ASSERT(rng.begin <= rng.end);
|
| 148 |
+
return input_.substr(rng.begin, rng.end - rng.begin);
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
void common_chat_msg_parser::add_content(const std::string &content) {
|
| 152 |
+
result_.content += content;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
void common_chat_msg_parser::add_reasoning_content(const std::string &reasoning_content) {
|
| 156 |
+
result_.reasoning_content += reasoning_content;
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::string & id, const std::string & arguments) {
|
| 160 |
+
if (name.empty()) {
|
| 161 |
+
return false;
|
| 162 |
+
}
|
| 163 |
+
|
| 164 |
+
common_chat_tool_call tool_call;
|
| 165 |
+
tool_call.name = name;
|
| 166 |
+
tool_call.arguments = arguments;
|
| 167 |
+
tool_call.id = id;
|
| 168 |
+
|
| 169 |
+
// LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
|
| 170 |
+
result_.tool_calls.emplace_back(tool_call);
|
| 171 |
+
|
| 172 |
+
return true;
|
| 173 |
+
}
|
| 174 |
+
bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
|
| 175 |
+
std::string name = tool_call.contains("name") ? tool_call.at("name") : "";
|
| 176 |
+
std::string id = tool_call.contains("id") ? tool_call.at("id") : "";
|
| 177 |
+
std::string arguments = "";
|
| 178 |
+
if (tool_call.contains("arguments")) {
|
| 179 |
+
if (tool_call.at("arguments").is_object()) {
|
| 180 |
+
arguments = tool_call.at("arguments").dump();
|
| 181 |
+
} else {
|
| 182 |
+
arguments = tool_call.at("arguments");
|
| 183 |
+
}
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
return add_tool_call(name, id, arguments);
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
bool common_chat_msg_parser::add_tool_calls(const json & arr) {
|
| 190 |
+
for (const auto & item : arr) {
|
| 191 |
+
if (!add_tool_call(item)) {
|
| 192 |
+
return false;
|
| 193 |
+
}
|
| 194 |
+
}
|
| 195 |
+
return true;
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
bool common_chat_msg_parser::add_tool_call_short_form(const json & tool_call) {
|
| 199 |
+
if (!tool_call.is_object() || tool_call.size() != 1) {
|
| 200 |
+
return false;
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
// Get the tool name (the single key in the object)
|
| 204 |
+
auto it = tool_call.begin();
|
| 205 |
+
std::string name = it.key();
|
| 206 |
+
|
| 207 |
+
if (name.empty()) {
|
| 208 |
+
return false;
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
// Get the arguments (the nested object)
|
| 212 |
+
const json & args_json = it.value();
|
| 213 |
+
std::string arguments = "";
|
| 214 |
+
|
| 215 |
+
if (args_json.is_object()) {
|
| 216 |
+
arguments = args_json.dump();
|
| 217 |
+
} else if (args_json.is_string()) {
|
| 218 |
+
arguments = args_json;
|
| 219 |
+
} else if (!args_json.is_null()) {
|
| 220 |
+
// For other types, convert to string representation
|
| 221 |
+
arguments = args_json.dump();
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
return add_tool_call(name, "", arguments);
|
| 225 |
+
}
|
| 226 |
+
void common_chat_msg_parser::finish() {
|
| 227 |
+
if (!is_partial_ && pos_ != input_.size()) {
|
| 228 |
+
throw std::runtime_error("Unexpected content at end of input");// + input_.substr(pos_));
|
| 229 |
+
}
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
bool common_chat_msg_parser::consume_spaces() {
|
| 233 |
+
const auto length = input_.size();
|
| 234 |
+
auto consumed = false;
|
| 235 |
+
while (pos_ < length && std::isspace(input_[pos_])) {
|
| 236 |
+
++pos_;
|
| 237 |
+
consumed = true;
|
| 238 |
+
}
|
| 239 |
+
return consumed;
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
bool common_chat_msg_parser::try_consume_literal(const std::string & literal) {
|
| 243 |
+
auto pos = pos_;
|
| 244 |
+
for (auto i = 0u; i < literal.size(); ++i) {
|
| 245 |
+
if (pos >= input_.size()) {
|
| 246 |
+
return false;
|
| 247 |
+
}
|
| 248 |
+
if (input_[pos] != literal[i]) {
|
| 249 |
+
return false;
|
| 250 |
+
}
|
| 251 |
+
++pos;
|
| 252 |
+
}
|
| 253 |
+
pos_ = pos;
|
| 254 |
+
return true;
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_literal(const std::string & literal) {
|
| 258 |
+
auto idx = input_.find(literal, pos_);
|
| 259 |
+
if (idx != std::string::npos) {
|
| 260 |
+
find_regex_result res;
|
| 261 |
+
res.prelude = input_.substr(pos_, idx - pos_);
|
| 262 |
+
auto end = idx + literal.size();
|
| 263 |
+
res.groups.emplace_back(common_string_range{idx, end});
|
| 264 |
+
move_to(end);
|
| 265 |
+
return res;
|
| 266 |
+
}
|
| 267 |
+
if (is_partial_) {
|
| 268 |
+
idx = string_find_partial_stop(input_, literal);
|
| 269 |
+
if (idx != std::string::npos && idx >= pos_) {
|
| 270 |
+
find_regex_result res;
|
| 271 |
+
res.prelude = input_.substr(pos_, idx - pos_);
|
| 272 |
+
auto end = input_.size();
|
| 273 |
+
res.groups.emplace_back(common_string_range{idx, end});
|
| 274 |
+
move_to(end);
|
| 275 |
+
return res;
|
| 276 |
+
}
|
| 277 |
+
}
|
| 278 |
+
return std::nullopt;
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
void common_chat_msg_parser::consume_literal(const std::string & literal) {
|
| 282 |
+
if (!try_consume_literal(literal)) {
|
| 283 |
+
throw common_chat_msg_partial_exception(literal);
|
| 284 |
+
}
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
bool common_chat_msg_parser::try_parse_reasoning(const std::string & start_think, const std::string & end_think) {
|
| 288 |
+
std::string pending_reasoning_prefix;
|
| 289 |
+
|
| 290 |
+
if (syntax_.reasoning_format == COMMON_REASONING_FORMAT_NONE) {
|
| 291 |
+
return false;
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
auto set_reasoning_prefix = [&](size_t prefix_pos) {
|
| 295 |
+
if (!syntax_.thinking_forced_open || syntax_.reasoning_in_content) {
|
| 296 |
+
return;
|
| 297 |
+
}
|
| 298 |
+
if (prefix_pos + start_think.size() > input_.size()) {
|
| 299 |
+
pending_reasoning_prefix.clear();
|
| 300 |
+
return;
|
| 301 |
+
}
|
| 302 |
+
// Capture the exact literal that opened the reasoning section so we can
|
| 303 |
+
// surface it back to callers. This ensures formats that force the
|
| 304 |
+
// reasoning tag open (e.g. DeepSeek R1) retain their original prefix
|
| 305 |
+
// instead of dropping it during parsing.
|
| 306 |
+
pending_reasoning_prefix = input_.substr(prefix_pos, start_think.size());
|
| 307 |
+
};
|
| 308 |
+
|
| 309 |
+
auto handle_reasoning = [&](const std::string & reasoning, bool closed) {
|
| 310 |
+
auto stripped_reasoning = string_strip(reasoning);
|
| 311 |
+
if (stripped_reasoning.empty()) {
|
| 312 |
+
return;
|
| 313 |
+
}
|
| 314 |
+
if (syntax_.reasoning_in_content) {
|
| 315 |
+
add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "<think>" : start_think);
|
| 316 |
+
add_content(stripped_reasoning);
|
| 317 |
+
if (closed) {
|
| 318 |
+
add_content(syntax_.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK ? "</think>" : end_think);
|
| 319 |
+
}
|
| 320 |
+
} else {
|
| 321 |
+
if (!pending_reasoning_prefix.empty()) {
|
| 322 |
+
add_reasoning_content(pending_reasoning_prefix);
|
| 323 |
+
pending_reasoning_prefix.clear();
|
| 324 |
+
}
|
| 325 |
+
add_reasoning_content(stripped_reasoning);
|
| 326 |
+
}
|
| 327 |
+
};
|
| 328 |
+
|
| 329 |
+
const size_t saved_pos = pos_;
|
| 330 |
+
const size_t saved_content_size = result_.content.size();
|
| 331 |
+
const size_t saved_reasoning_size = result_.reasoning_content.size();
|
| 332 |
+
|
| 333 |
+
auto restore_state = [&]() {
|
| 334 |
+
move_to(saved_pos);
|
| 335 |
+
result_.content.resize(saved_content_size);
|
| 336 |
+
result_.reasoning_content.resize(saved_reasoning_size);
|
| 337 |
+
};
|
| 338 |
+
|
| 339 |
+
// Allow leading whitespace to be preserved as content when reasoning is present at the start
|
| 340 |
+
size_t cursor = pos_;
|
| 341 |
+
size_t whitespace_end = cursor;
|
| 342 |
+
while (whitespace_end < input_.size() && std::isspace(static_cast<unsigned char>(input_[whitespace_end]))) {
|
| 343 |
+
++whitespace_end;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
if (whitespace_end >= input_.size()) {
|
| 347 |
+
restore_state();
|
| 348 |
+
if (syntax_.thinking_forced_open) {
|
| 349 |
+
auto rest = input_.substr(saved_pos);
|
| 350 |
+
if (!rest.empty()) {
|
| 351 |
+
handle_reasoning(rest, /* closed */ !is_partial());
|
| 352 |
+
}
|
| 353 |
+
move_to(input_.size());
|
| 354 |
+
return true;
|
| 355 |
+
}
|
| 356 |
+
return false;
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
cursor = whitespace_end;
|
| 360 |
+
const size_t remaining = input_.size() - cursor;
|
| 361 |
+
const size_t start_prefix = std::min(start_think.size(), remaining);
|
| 362 |
+
const bool has_start_tag = input_.compare(cursor, start_prefix, start_think, 0, start_prefix) == 0;
|
| 363 |
+
|
| 364 |
+
if (has_start_tag && start_prefix < start_think.size()) {
|
| 365 |
+
move_to(input_.size());
|
| 366 |
+
return true;
|
| 367 |
+
}
|
| 368 |
+
|
| 369 |
+
if (has_start_tag) {
|
| 370 |
+
if (whitespace_end > pos_) {
|
| 371 |
+
add_content(input_.substr(pos_, whitespace_end - pos_));
|
| 372 |
+
}
|
| 373 |
+
set_reasoning_prefix(cursor);
|
| 374 |
+
cursor += start_think.size();
|
| 375 |
+
} else if (syntax_.thinking_forced_open) {
|
| 376 |
+
cursor = whitespace_end;
|
| 377 |
+
} else {
|
| 378 |
+
restore_state();
|
| 379 |
+
return false;
|
| 380 |
+
}
|
| 381 |
+
while (true) {
|
| 382 |
+
if (cursor >= input_.size()) {
|
| 383 |
+
move_to(input_.size());
|
| 384 |
+
return true;
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
size_t end_pos = input_.find(end_think, cursor);
|
| 388 |
+
if (end_pos == std::string::npos) {
|
| 389 |
+
std::string_view remaining_view(input_.data() + cursor, input_.size() - cursor);
|
| 390 |
+
size_t partial_off = string_find_partial_stop(remaining_view, end_think);
|
| 391 |
+
size_t reasoning_end = partial_off == std::string::npos ? input_.size() : cursor + partial_off;
|
| 392 |
+
if (reasoning_end > cursor) {
|
| 393 |
+
handle_reasoning(input_.substr(cursor, reasoning_end - cursor), /* closed */ partial_off == std::string::npos && !is_partial());
|
| 394 |
+
}
|
| 395 |
+
move_to(input_.size());
|
| 396 |
+
return true;
|
| 397 |
+
}
|
| 398 |
+
|
| 399 |
+
if (end_pos > cursor) {
|
| 400 |
+
handle_reasoning(input_.substr(cursor, end_pos - cursor), /* closed */ true);
|
| 401 |
+
} else {
|
| 402 |
+
handle_reasoning("", /* closed */ true);
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
cursor = end_pos + end_think.size();
|
| 406 |
+
|
| 407 |
+
while (cursor < input_.size() && std::isspace(static_cast<unsigned char>(input_[cursor]))) {
|
| 408 |
+
++cursor;
|
| 409 |
+
}
|
| 410 |
+
|
| 411 |
+
const size_t next_remaining = input_.size() - cursor;
|
| 412 |
+
if (next_remaining == 0) {
|
| 413 |
+
move_to(cursor);
|
| 414 |
+
return true;
|
| 415 |
+
}
|
| 416 |
+
|
| 417 |
+
const size_t next_prefix = std::min(start_think.size(), next_remaining);
|
| 418 |
+
if (input_.compare(cursor, next_prefix, start_think, 0, next_prefix) == 0) {
|
| 419 |
+
if (next_prefix < start_think.size()) {
|
| 420 |
+
move_to(input_.size());
|
| 421 |
+
return true;
|
| 422 |
+
}
|
| 423 |
+
set_reasoning_prefix(cursor);
|
| 424 |
+
cursor += start_think.size();
|
| 425 |
+
continue;
|
| 426 |
+
}
|
| 427 |
+
|
| 428 |
+
move_to(cursor);
|
| 429 |
+
return true;
|
| 430 |
+
}
|
| 431 |
+
}
|
| 432 |
+
|
| 433 |
+
std::string common_chat_msg_parser::consume_rest() {
|
| 434 |
+
auto rest = input_.substr(pos_);
|
| 435 |
+
pos_ = input_.size();
|
| 436 |
+
return rest;
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
// Tries to find the regex, consumes it (pos right after it) and gives the prelude (right before it) and the groups to the callback.
|
| 440 |
+
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_find_regex(const common_regex & regex, size_t from, bool add_prelude_to_content) {
|
| 441 |
+
auto m = regex.search(input_, from == std::string::npos ? pos_ : from);
|
| 442 |
+
if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
|
| 443 |
+
return std::nullopt;
|
| 444 |
+
}
|
| 445 |
+
auto prelude = input_.substr(pos_, m.groups[0].begin - pos_);
|
| 446 |
+
pos_ = m.groups[0].end;
|
| 447 |
+
|
| 448 |
+
if (add_prelude_to_content) {
|
| 449 |
+
add_content(prelude);
|
| 450 |
+
}
|
| 451 |
+
if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
|
| 452 |
+
if (is_partial()) {
|
| 453 |
+
throw common_chat_msg_partial_exception(regex.str());
|
| 454 |
+
}
|
| 455 |
+
return std::nullopt;
|
| 456 |
+
}
|
| 457 |
+
return find_regex_result{prelude, m.groups};
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
common_chat_msg_parser::find_regex_result common_chat_msg_parser::consume_regex(const common_regex & regex) {
|
| 461 |
+
if (auto result = try_consume_regex(regex)) {
|
| 462 |
+
return *result;
|
| 463 |
+
}
|
| 464 |
+
throw common_chat_msg_partial_exception(regex.str());
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
std::optional<common_chat_msg_parser::find_regex_result> common_chat_msg_parser::try_consume_regex(const common_regex & regex) {
|
| 468 |
+
auto m = regex.search(input_, pos_);
|
| 469 |
+
if (m.type == COMMON_REGEX_MATCH_TYPE_NONE) {
|
| 470 |
+
return std::nullopt;
|
| 471 |
+
}
|
| 472 |
+
if (m.type == COMMON_REGEX_MATCH_TYPE_PARTIAL) {
|
| 473 |
+
if (is_partial()) {
|
| 474 |
+
throw common_chat_msg_partial_exception(regex.str());
|
| 475 |
+
}
|
| 476 |
+
return std::nullopt;
|
| 477 |
+
}
|
| 478 |
+
if (m.groups[0].begin != pos_) {
|
| 479 |
+
// Didn't match at the current position.
|
| 480 |
+
return std::nullopt;
|
| 481 |
+
}
|
| 482 |
+
pos_ = m.groups[0].end;
|
| 483 |
+
|
| 484 |
+
return find_regex_result {
|
| 485 |
+
/* .prelude = */ "",
|
| 486 |
+
m.groups,
|
| 487 |
+
};
|
| 488 |
+
}
|
| 489 |
+
|
| 490 |
+
std::optional<common_json> common_chat_msg_parser::try_consume_json() {
|
| 491 |
+
auto it = input_.cbegin() + pos_;
|
| 492 |
+
const auto end = input_.cend();
|
| 493 |
+
common_json result;
|
| 494 |
+
if (!common_json_parse(it, end, healing_marker_, result)) {
|
| 495 |
+
return std::nullopt;
|
| 496 |
+
}
|
| 497 |
+
pos_ = std::distance(input_.cbegin(), it);
|
| 498 |
+
if (result.healing_marker.marker.empty()) {
|
| 499 |
+
// No healing marker, just return the parsed json
|
| 500 |
+
return result;
|
| 501 |
+
}
|
| 502 |
+
if (!is_partial()) {
|
| 503 |
+
throw common_chat_msg_partial_exception("JSON");
|
| 504 |
+
}
|
| 505 |
+
return result;
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
common_json common_chat_msg_parser::consume_json() {
|
| 509 |
+
if (auto result = try_consume_json()) {
|
| 510 |
+
return *result;
|
| 511 |
+
}
|
| 512 |
+
throw common_chat_msg_partial_exception("JSON");
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
common_chat_msg_parser::consume_json_result common_chat_msg_parser::consume_json_with_dumped_args(
|
| 516 |
+
const std::vector<std::vector<std::string>> & args_paths,
|
| 517 |
+
const std::vector<std::vector<std::string>> & content_paths
|
| 518 |
+
) {
|
| 519 |
+
if (auto result = try_consume_json_with_dumped_args(args_paths, content_paths)) {
|
| 520 |
+
return *result;
|
| 521 |
+
}
|
| 522 |
+
throw common_chat_msg_partial_exception("JSON");
|
| 523 |
+
}
|
| 524 |
+
|
| 525 |
+
std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parser::try_consume_json_with_dumped_args(
|
| 526 |
+
const std::vector<std::vector<std::string>> & args_paths,
|
| 527 |
+
const std::vector<std::vector<std::string>> & content_paths
|
| 528 |
+
) {
|
| 529 |
+
auto partial = try_consume_json();
|
| 530 |
+
if (!partial) {
|
| 531 |
+
return std::nullopt;
|
| 532 |
+
}
|
| 533 |
+
auto is_arguments_path = [&](const std::vector<std::string> & path) {
|
| 534 |
+
return std::find(args_paths.begin(), args_paths.end(), path) != args_paths.end();
|
| 535 |
+
};
|
| 536 |
+
auto is_content_path = [&](const std::vector<std::string> & path) {
|
| 537 |
+
return std::find(content_paths.begin(), content_paths.end(), path) != content_paths.end();
|
| 538 |
+
};
|
| 539 |
+
|
| 540 |
+
if (partial->healing_marker.marker.empty()) {
|
| 541 |
+
if (args_paths.empty()) {
|
| 542 |
+
// No arguments to dump, and JSON was parsed fully.
|
| 543 |
+
return consume_json_result {
|
| 544 |
+
partial->json,
|
| 545 |
+
/* .is_partial = */ false,
|
| 546 |
+
};
|
| 547 |
+
}
|
| 548 |
+
if (is_arguments_path({})) {
|
| 549 |
+
// Entire JSON is the arguments and was parsed fully.
|
| 550 |
+
return consume_json_result {
|
| 551 |
+
partial->json.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true),
|
| 552 |
+
/* .is_partial = */ false,
|
| 553 |
+
};
|
| 554 |
+
}
|
| 555 |
+
}
|
| 556 |
+
|
| 557 |
+
LOG_DBG("Parsed partial JSON: %s (json_healing_marker: %s)\n", partial->json.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
|
| 558 |
+
|
| 559 |
+
auto found_healing_marker = false;
|
| 560 |
+
std::vector<std::string> path;
|
| 561 |
+
std::function<json(const json &)> remove_unsupported_healings_and_dump_args = [&](const json & j) -> json {
|
| 562 |
+
if (is_arguments_path(path)) {
|
| 563 |
+
auto arguments = j.dump(/* indent */ -1, /* indent_char */ ' ', /* ensure_ascii */ true);
|
| 564 |
+
if (is_partial() && !partial->healing_marker.marker.empty()) {
|
| 565 |
+
auto idx = arguments.find(partial->healing_marker.json_dump_marker);
|
| 566 |
+
if (idx != std::string::npos) {
|
| 567 |
+
arguments.resize(idx);
|
| 568 |
+
found_healing_marker = true;
|
| 569 |
+
}
|
| 570 |
+
if (arguments == "\"") {
|
| 571 |
+
// This happens because of completing `:"$magic` after `"arguments"`
|
| 572 |
+
arguments = "";
|
| 573 |
+
}
|
| 574 |
+
}
|
| 575 |
+
return arguments;
|
| 576 |
+
}
|
| 577 |
+
if (is_content_path(path)) {
|
| 578 |
+
if (!j.is_string()) {
|
| 579 |
+
throw std::runtime_error("Content path must be a string");
|
| 580 |
+
}
|
| 581 |
+
std::string str = j;
|
| 582 |
+
auto idx = str.find(partial->healing_marker.marker); // not using json_dump_marker as we're inside a string
|
| 583 |
+
if (idx != std::string::npos) {
|
| 584 |
+
str.resize(idx);
|
| 585 |
+
found_healing_marker = true;
|
| 586 |
+
}
|
| 587 |
+
return str;
|
| 588 |
+
}
|
| 589 |
+
if (j.is_object()) {
|
| 590 |
+
auto obj = json::object();
|
| 591 |
+
for (const auto & p : j.items()) {
|
| 592 |
+
const auto & key = p.key();
|
| 593 |
+
const auto & value = p.value();
|
| 594 |
+
const std::string key_str = key; // NOLINT
|
| 595 |
+
auto idx = key_str.find(healing_marker_);
|
| 596 |
+
if (idx != std::string::npos) {
|
| 597 |
+
found_healing_marker = true;
|
| 598 |
+
break;
|
| 599 |
+
}
|
| 600 |
+
path.push_back(key_str);
|
| 601 |
+
if (value.is_string()) {
|
| 602 |
+
const std::string value_str = value;
|
| 603 |
+
if (value_str.find(healing_marker_) != std::string::npos) {
|
| 604 |
+
found_healing_marker = true;
|
| 605 |
+
if (is_content_path(path)) {
|
| 606 |
+
if (partial->healing_marker.marker == partial->healing_marker.json_dump_marker) {
|
| 607 |
+
// The healing occurred inside the string: good. Otherwise we just ditch the entire key/value pair.
|
| 608 |
+
obj[key] = remove_unsupported_healings_and_dump_args(value);
|
| 609 |
+
}
|
| 610 |
+
}
|
| 611 |
+
break;
|
| 612 |
+
}
|
| 613 |
+
obj[key] = value;
|
| 614 |
+
} else {
|
| 615 |
+
obj[key] = remove_unsupported_healings_and_dump_args(value);
|
| 616 |
+
}
|
| 617 |
+
path.pop_back();
|
| 618 |
+
}
|
| 619 |
+
return obj;
|
| 620 |
+
}
|
| 621 |
+
if (j.is_array()) {
|
| 622 |
+
auto arr = json::array();
|
| 623 |
+
for (const auto & value : j) {
|
| 624 |
+
if (value.is_string()) {
|
| 625 |
+
std::string str = value;
|
| 626 |
+
auto idx = str.find(healing_marker_);
|
| 627 |
+
if (idx != std::string::npos) {
|
| 628 |
+
// Don't heal array values that aren't in the arguments.
|
| 629 |
+
found_healing_marker = true;
|
| 630 |
+
break;
|
| 631 |
+
}
|
| 632 |
+
}
|
| 633 |
+
arr.push_back(remove_unsupported_healings_and_dump_args(value));
|
| 634 |
+
}
|
| 635 |
+
return arr;
|
| 636 |
+
}
|
| 637 |
+
return j;
|
| 638 |
+
};
|
| 639 |
+
|
| 640 |
+
auto cleaned = remove_unsupported_healings_and_dump_args(partial->json);
|
| 641 |
+
LOG_DBG("Cleaned up JSON %s to %s (json_healing_marker : '%s')\n", partial->json.dump().c_str(), cleaned.dump().c_str(), partial->healing_marker.json_dump_marker.c_str());
|
| 642 |
+
return consume_json_result {
|
| 643 |
+
cleaned,
|
| 644 |
+
/* .is_partial = */ found_healing_marker,
|
| 645 |
+
};
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
void common_chat_msg_parser::clear_tools() {
|
| 649 |
+
result_.tool_calls.clear();
|
| 650 |
+
}
|
| 651 |
+
|
| 652 |
+
/**
|
| 653 |
+
* All common_chat_parse_* moved from chat.cpp to chat-parser.cpp below
|
| 654 |
+
* to reduce incremental compile time for parser changes.
|
| 655 |
+
*/
|
| 656 |
+
static void common_chat_parse_generic(common_chat_msg_parser & builder) {
|
| 657 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 658 |
+
builder.add_content(builder.consume_rest());
|
| 659 |
+
return;
|
| 660 |
+
}
|
| 661 |
+
static const std::vector<std::vector<std::string>> content_paths = {
|
| 662 |
+
{"response"},
|
| 663 |
+
};
|
| 664 |
+
static const std::vector<std::vector<std::string>> args_paths = {
|
| 665 |
+
{"tool_call", "arguments"},
|
| 666 |
+
{"tool_calls", "arguments"},
|
| 667 |
+
};
|
| 668 |
+
auto data = builder.consume_json_with_dumped_args(args_paths, content_paths);
|
| 669 |
+
if (data.value.contains("tool_calls")) {
|
| 670 |
+
if (!builder.add_tool_calls(data.value.at("tool_calls")) || data.is_partial) {
|
| 671 |
+
throw common_chat_msg_partial_exception("incomplete tool calls");
|
| 672 |
+
}
|
| 673 |
+
} else if (data.value.contains("tool_call")) {
|
| 674 |
+
if (!builder.add_tool_call(data.value.at("tool_call")) || data.is_partial) {
|
| 675 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 676 |
+
}
|
| 677 |
+
} else if (data.value.contains("response")) {
|
| 678 |
+
const auto & response = data.value.at("response");
|
| 679 |
+
builder.add_content(response.is_string() ? response.template get<std::string>() : response.dump(2));
|
| 680 |
+
if (data.is_partial) {
|
| 681 |
+
throw common_chat_msg_partial_exception("incomplete response");
|
| 682 |
+
}
|
| 683 |
+
} else {
|
| 684 |
+
throw common_chat_msg_partial_exception("Expected 'tool_call', 'tool_calls' or 'response' in JSON");
|
| 685 |
+
}
|
| 686 |
+
}
|
| 687 |
+
|
| 688 |
+
static void common_chat_parse_mistral_nemo(common_chat_msg_parser & builder) {
|
| 689 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 690 |
+
builder.add_content(builder.consume_rest());
|
| 691 |
+
return;
|
| 692 |
+
}
|
| 693 |
+
|
| 694 |
+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
|
| 695 |
+
parse_prefixed_json_tool_call_array(builder, prefix);
|
| 696 |
+
}
|
| 697 |
+
|
| 698 |
+
static void common_chat_parse_magistral(common_chat_msg_parser & builder) {
|
| 699 |
+
builder.try_parse_reasoning("[THINK]", "[/THINK]");
|
| 700 |
+
|
| 701 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 702 |
+
builder.add_content(builder.consume_rest());
|
| 703 |
+
return;
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
static const common_regex prefix(regex_escape("[TOOL_CALLS]"));
|
| 707 |
+
parse_prefixed_json_tool_call_array(builder, prefix);
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
static void common_chat_parse_command_r7b(common_chat_msg_parser & builder) {
|
| 711 |
+
builder.try_parse_reasoning("<|START_THINKING|>", "<|END_THINKING|>");
|
| 712 |
+
|
| 713 |
+
static const common_regex start_action_regex("<\\|START_ACTION\\|>");
|
| 714 |
+
static const common_regex end_action_regex("<\\|END_ACTION\\|>");
|
| 715 |
+
static const common_regex start_response_regex("<\\|START_RESPONSE\\|>");
|
| 716 |
+
static const common_regex end_response_regex("<\\|END_RESPONSE\\|>");
|
| 717 |
+
|
| 718 |
+
if (auto res = builder.try_find_regex(start_action_regex)) {
|
| 719 |
+
// If we didn't extract thoughts, prelude includes them.
|
| 720 |
+
auto tool_calls = builder.consume_json_with_dumped_args({{"parameters"}});
|
| 721 |
+
for (const auto & tool_call : tool_calls.value) {
|
| 722 |
+
std::string name = tool_call.contains("tool_name") ? tool_call.at("tool_name") : "";
|
| 723 |
+
std::string id = tool_call.contains("tool_call_id") ? tool_call.at("tool_call_id") : "";
|
| 724 |
+
std::string arguments = tool_call.contains("parameters") ? tool_call.at("parameters") : "";
|
| 725 |
+
if (!builder.add_tool_call(name, id, arguments) || tool_calls.is_partial) {
|
| 726 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 727 |
+
}
|
| 728 |
+
}
|
| 729 |
+
if (tool_calls.is_partial) {
|
| 730 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 731 |
+
}
|
| 732 |
+
builder.consume_regex(end_action_regex);
|
| 733 |
+
} else if (auto res = builder.try_find_regex(start_response_regex)) {
|
| 734 |
+
if (!builder.try_find_regex(end_response_regex)) {
|
| 735 |
+
builder.add_content(builder.consume_rest());
|
| 736 |
+
throw common_chat_msg_partial_exception(end_response_regex.str());
|
| 737 |
+
}
|
| 738 |
+
} else {
|
| 739 |
+
builder.add_content(builder.consume_rest());
|
| 740 |
+
}
|
| 741 |
+
}
|
| 742 |
+
|
| 743 |
+
static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool with_builtin_tools = false) {
|
| 744 |
+
builder.try_parse_reasoning("<think>", "</think>");
|
| 745 |
+
|
| 746 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 747 |
+
builder.add_content(builder.consume_rest());
|
| 748 |
+
return;
|
| 749 |
+
}
|
| 750 |
+
|
| 751 |
+
static const common_regex function_regex(
|
| 752 |
+
"\\s*\\{\\s*(?:\"type\"\\s*:\\s*\"function\"\\s*,\\s*)?\"name\"\\s*:\\s*\"([^\"]+)\"\\s*,\\s*\"parameters\"\\s*: ");
|
| 753 |
+
static const common_regex close_regex("\\}\\s*");
|
| 754 |
+
|
| 755 |
+
static const common_regex function_name_regex("\\s*(\\w+)\\s*\\.\\s*call\\(");
|
| 756 |
+
static const common_regex arg_name_regex("\\s*(\\w+)\\s*=\\s*");
|
| 757 |
+
|
| 758 |
+
if (with_builtin_tools) {
|
| 759 |
+
static const common_regex builtin_call_regex("<\\|python_tag\\|>");
|
| 760 |
+
if (auto res = builder.try_find_regex(builtin_call_regex)) {
|
| 761 |
+
auto fun_res = builder.consume_regex(function_name_regex);
|
| 762 |
+
auto function_name = builder.str(fun_res.groups[1]);
|
| 763 |
+
|
| 764 |
+
common_healing_marker healing_marker;
|
| 765 |
+
json args = json::object();
|
| 766 |
+
while (true) {
|
| 767 |
+
if (auto arg_res = builder.try_consume_regex(arg_name_regex)) {
|
| 768 |
+
auto arg_name = builder.str(arg_res->groups[1]);
|
| 769 |
+
auto partial = builder.consume_json();
|
| 770 |
+
args[arg_name] = partial.json;
|
| 771 |
+
healing_marker.marker = partial.healing_marker.marker;
|
| 772 |
+
healing_marker.json_dump_marker = partial.healing_marker.json_dump_marker;
|
| 773 |
+
builder.consume_spaces();
|
| 774 |
+
if (!builder.try_consume_literal(",")) {
|
| 775 |
+
break;
|
| 776 |
+
}
|
| 777 |
+
} else {
|
| 778 |
+
break;
|
| 779 |
+
}
|
| 780 |
+
}
|
| 781 |
+
builder.consume_literal(")");
|
| 782 |
+
builder.consume_spaces();
|
| 783 |
+
|
| 784 |
+
auto arguments = args.dump();
|
| 785 |
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
| 786 |
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
| 787 |
+
}
|
| 788 |
+
return;
|
| 789 |
+
}
|
| 790 |
+
}
|
| 791 |
+
parse_json_tool_calls(
|
| 792 |
+
builder,
|
| 793 |
+
/* block_open= */ std::nullopt,
|
| 794 |
+
/* function_regex_start_only= */ function_regex,
|
| 795 |
+
/* function_regex= */ std::nullopt,
|
| 796 |
+
close_regex,
|
| 797 |
+
std::nullopt);
|
| 798 |
+
|
| 799 |
+
}
|
| 800 |
+
|
| 801 |
+
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
| 802 |
+
builder.try_parse_reasoning("<think>", "</think>");
|
| 803 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 804 |
+
builder.add_content(builder.consume_rest());
|
| 805 |
+
return;
|
| 806 |
+
}
|
| 807 |
+
|
| 808 |
+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
| 809 |
+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
| 810 |
+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?function<|tool▁sep|>([^\n]+)\n```json\n");
|
| 811 |
+
static const common_regex close_regex("```[\\s\\r\\n]*<|tool▁call▁end|>");
|
| 812 |
+
|
| 813 |
+
parse_json_tool_calls(
|
| 814 |
+
builder,
|
| 815 |
+
/* block_open= */ tool_calls_begin,
|
| 816 |
+
/* function_regex_start_only= */ std::nullopt,
|
| 817 |
+
function_regex,
|
| 818 |
+
close_regex,
|
| 819 |
+
tool_calls_end);
|
| 820 |
+
}
|
| 821 |
+
|
| 822 |
+
static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
|
| 823 |
+
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
|
| 824 |
+
|
| 825 |
+
static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
|
| 826 |
+
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
| 827 |
+
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
| 828 |
+
|
| 829 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 830 |
+
LOG_DBG("%s: not parse_tool_calls\n", __func__);
|
| 831 |
+
builder.add_content(builder.consume_rest());
|
| 832 |
+
return;
|
| 833 |
+
}
|
| 834 |
+
|
| 835 |
+
LOG_DBG("%s: parse_tool_calls\n", __func__);
|
| 836 |
+
|
| 837 |
+
parse_json_tool_calls(
|
| 838 |
+
builder,
|
| 839 |
+
/* block_open= */ tool_calls_begin,
|
| 840 |
+
/* function_regex_start_only= */ std::nullopt,
|
| 841 |
+
function_regex,
|
| 842 |
+
close_regex,
|
| 843 |
+
tool_calls_end);
|
| 844 |
+
}
|
| 845 |
+
|
| 846 |
+
static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
| 847 |
+
// DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
|
| 848 |
+
// First try to parse using the standard reasoning parsing method
|
| 849 |
+
LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
|
| 850 |
+
|
| 851 |
+
auto start_pos = builder.pos();
|
| 852 |
+
auto found_end_think = builder.try_find_literal("</think>");
|
| 853 |
+
builder.move_to(start_pos);
|
| 854 |
+
|
| 855 |
+
if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
|
| 856 |
+
LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
|
| 857 |
+
common_chat_parse_deepseek_v3_1_content(builder);
|
| 858 |
+
} else if (builder.try_parse_reasoning("<think>", "</think>")) {
|
| 859 |
+
// If reasoning was parsed successfully, the remaining content is regular content
|
| 860 |
+
LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
|
| 861 |
+
// </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
|
| 862 |
+
common_chat_parse_deepseek_v3_1_content(builder);
|
| 863 |
+
} else {
|
| 864 |
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
|
| 865 |
+
LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
|
| 866 |
+
common_chat_parse_deepseek_v3_1_content(builder);
|
| 867 |
+
return;
|
| 868 |
+
}
|
| 869 |
+
// If no reasoning tags found, check if we should treat everything as reasoning
|
| 870 |
+
if (builder.syntax().thinking_forced_open) {
|
| 871 |
+
// If thinking is forced open but no tags found, treat everything as reasoning
|
| 872 |
+
LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
|
| 873 |
+
builder.add_reasoning_content(builder.consume_rest());
|
| 874 |
+
} else {
|
| 875 |
+
LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
|
| 876 |
+
// <|tool▁call▁begin|>NAME<���tool▁sep|>JSON<|tool▁call▁end|>
|
| 877 |
+
common_chat_parse_deepseek_v3_1_content(builder);
|
| 878 |
+
}
|
| 879 |
+
}
|
| 880 |
+
}
|
| 881 |
+
|
| 882 |
+
static void common_chat_parse_minimax_m2(common_chat_msg_parser & builder) {
|
| 883 |
+
static const xml_tool_call_format form {
|
| 884 |
+
/* form.scope_start = */ "<minimax:tool_call>",
|
| 885 |
+
/* form.tool_start = */ "<invoke name=\"",
|
| 886 |
+
/* form.tool_sep = */ "\">",
|
| 887 |
+
/* form.key_start = */ "<parameter name=\"",
|
| 888 |
+
/* form.key_val_sep = */ "\">",
|
| 889 |
+
/* form.val_end = */ "</parameter>",
|
| 890 |
+
/* form.tool_end = */ "</invoke>",
|
| 891 |
+
/* form.scope_end = */ "</minimax:tool_call>",
|
| 892 |
+
};
|
| 893 |
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
| 894 |
+
}
|
| 895 |
+
|
| 896 |
+
static void common_chat_parse_kimi_k2(common_chat_msg_parser & builder) {
|
| 897 |
+
static const xml_tool_call_format form = ([]() {
|
| 898 |
+
xml_tool_call_format form {};
|
| 899 |
+
form.scope_start = "<|tool_calls_section_begin|>";
|
| 900 |
+
form.tool_start = "<|tool_call_begin|>";
|
| 901 |
+
form.tool_sep = "<|tool_call_argument_begin|>{";
|
| 902 |
+
form.key_start = "\"";
|
| 903 |
+
form.key_val_sep = "\":";
|
| 904 |
+
form.val_end = ",";
|
| 905 |
+
form.tool_end = "}<|tool_call_end|>";
|
| 906 |
+
form.scope_end = "<|tool_calls_section_end|>";
|
| 907 |
+
form.raw_argval = false;
|
| 908 |
+
form.last_val_end = "";
|
| 909 |
+
form.allow_toolcall_in_think = true;
|
| 910 |
+
return form;
|
| 911 |
+
})();
|
| 912 |
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
| 913 |
+
}
|
| 914 |
+
|
| 915 |
+
static void common_chat_parse_apriel_1_5(common_chat_msg_parser & builder) {
|
| 916 |
+
static const xml_tool_call_format form = ([]() {
|
| 917 |
+
xml_tool_call_format form {};
|
| 918 |
+
form.scope_start = "<tool_calls>[";
|
| 919 |
+
form.tool_start = "{\"name\": \"";
|
| 920 |
+
form.tool_sep = "\", \"arguments\": {";
|
| 921 |
+
form.key_start = "\"";
|
| 922 |
+
form.key_val_sep = "\": ";
|
| 923 |
+
form.val_end = ", ";
|
| 924 |
+
form.tool_end = "}, ";
|
| 925 |
+
form.scope_end = "]</tool_calls>";
|
| 926 |
+
form.raw_argval = false;
|
| 927 |
+
form.last_val_end = "";
|
| 928 |
+
form.last_tool_end = "}";
|
| 929 |
+
return form;
|
| 930 |
+
})();
|
| 931 |
+
builder.consume_reasoning_with_xml_tool_calls(form, "<thinking>", "</thinking>");
|
| 932 |
+
}
|
| 933 |
+
|
| 934 |
+
static void common_chat_parse_xiaomi_mimo(common_chat_msg_parser & builder) {
|
| 935 |
+
static const xml_tool_call_format form = ([]() {
|
| 936 |
+
xml_tool_call_format form {};
|
| 937 |
+
form.scope_start = "";
|
| 938 |
+
form.tool_start = "<tool_call>\n{\"name\": \"";
|
| 939 |
+
form.tool_sep = "\", \"arguments\": {";
|
| 940 |
+
form.key_start = "\"";
|
| 941 |
+
form.key_val_sep = "\": ";
|
| 942 |
+
form.val_end = ", ";
|
| 943 |
+
form.tool_end = "}\n</tool_call>";
|
| 944 |
+
form.scope_end = "";
|
| 945 |
+
form.raw_argval = false;
|
| 946 |
+
form.last_val_end = "";
|
| 947 |
+
return form;
|
| 948 |
+
})();
|
| 949 |
+
builder.consume_reasoning_with_xml_tool_calls(form);
|
| 950 |
+
}
|
| 951 |
+
|
| 952 |
+
static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
|
| 953 |
+
static const std::string constraint = "(?: (<\\|constrain\\|>)?([a-zA-Z0-9_-]+))";
|
| 954 |
+
static const std::string recipient("(?: to=functions\\.([^<\\s]+))");
|
| 955 |
+
|
| 956 |
+
static const common_regex start_regex("<\\|start\\|>assistant");
|
| 957 |
+
static const common_regex analysis_regex("<\\|channel\\|>analysis");
|
| 958 |
+
static const common_regex final_regex("<\\|channel\\|>final" + constraint + "?");
|
| 959 |
+
static const common_regex preamble_regex("<\\|channel\\|>commentary");
|
| 960 |
+
static const common_regex tool_call1_regex(recipient + "<\\|channel\\|>(analysis|commentary)" + constraint + "?");
|
| 961 |
+
static const common_regex tool_call2_regex("<\\|channel\\|>(analysis|commentary)" + recipient + constraint + "?");
|
| 962 |
+
|
| 963 |
+
auto consume_end = [&](bool include_end = false) {
|
| 964 |
+
if (auto res = builder.try_find_literal("<|end|>")) {
|
| 965 |
+
return res->prelude + (include_end ? builder.str(res->groups[0]) : "");
|
| 966 |
+
}
|
| 967 |
+
return builder.consume_rest();
|
| 968 |
+
};
|
| 969 |
+
|
| 970 |
+
auto handle_tool_call = [&](const std::string & name) {
|
| 971 |
+
if (auto args = builder.try_consume_json_with_dumped_args({{}})) {
|
| 972 |
+
if (builder.syntax().parse_tool_calls) {
|
| 973 |
+
if (!builder.add_tool_call(name, "", args->value) || args->is_partial) {
|
| 974 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 975 |
+
}
|
| 976 |
+
} else if (args->is_partial) {
|
| 977 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 978 |
+
}
|
| 979 |
+
}
|
| 980 |
+
};
|
| 981 |
+
|
| 982 |
+
auto regex_match = [](const common_regex & regex, const std::string & input) -> std::optional<common_regex_match> {
|
| 983 |
+
auto match = regex.search(input, 0, true);
|
| 984 |
+
if (match.type == COMMON_REGEX_MATCH_TYPE_FULL) {
|
| 985 |
+
return match;
|
| 986 |
+
}
|
| 987 |
+
return std::nullopt;
|
| 988 |
+
};
|
| 989 |
+
|
| 990 |
+
do {
|
| 991 |
+
auto header_start_pos = builder.pos();
|
| 992 |
+
auto content_start = builder.try_find_literal("<|message|>");
|
| 993 |
+
if (!content_start) {
|
| 994 |
+
throw common_chat_msg_partial_exception("incomplete header");
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
auto header = content_start->prelude;
|
| 998 |
+
|
| 999 |
+
if (auto match = regex_match(tool_call1_regex, header)) {
|
| 1000 |
+
auto group = match->groups[1];
|
| 1001 |
+
auto name = header.substr(group.begin, group.end - group.begin);
|
| 1002 |
+
handle_tool_call(name);
|
| 1003 |
+
continue;
|
| 1004 |
+
}
|
| 1005 |
+
|
| 1006 |
+
if (auto match = regex_match(tool_call2_regex, header)) {
|
| 1007 |
+
auto group = match->groups[2];
|
| 1008 |
+
auto name = header.substr(group.begin, group.end - group.begin);
|
| 1009 |
+
handle_tool_call(name);
|
| 1010 |
+
continue;
|
| 1011 |
+
}
|
| 1012 |
+
|
| 1013 |
+
if (regex_match(analysis_regex, header)) {
|
| 1014 |
+
builder.move_to(header_start_pos);
|
| 1015 |
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
|
| 1016 |
+
builder.add_content(consume_end(true));
|
| 1017 |
+
} else {
|
| 1018 |
+
builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|end|>");
|
| 1019 |
+
}
|
| 1020 |
+
continue;
|
| 1021 |
+
}
|
| 1022 |
+
|
| 1023 |
+
if(regex_match(final_regex, header) || regex_match(preamble_regex, header)) {
|
| 1024 |
+
builder.add_content(consume_end());
|
| 1025 |
+
continue;
|
| 1026 |
+
}
|
| 1027 |
+
|
| 1028 |
+
// Possibly a malformed message, attempt to recover by rolling
|
| 1029 |
+
// back to pick up the next <|start|>
|
| 1030 |
+
LOG_DBG("%s: unknown header from message: %s\n", __func__, header.c_str());
|
| 1031 |
+
builder.move_to(header_start_pos);
|
| 1032 |
+
} while (builder.try_find_regex(start_regex, std::string::npos, false));
|
| 1033 |
+
|
| 1034 |
+
auto remaining = builder.consume_rest();
|
| 1035 |
+
if (!remaining.empty()) {
|
| 1036 |
+
LOG_DBG("%s: content after last message: %s\n", __func__, remaining.c_str());
|
| 1037 |
+
}
|
| 1038 |
+
}
|
| 1039 |
+
|
| 1040 |
+
static void common_chat_parse_glm_4_5(common_chat_msg_parser & builder) {
|
| 1041 |
+
static const xml_tool_call_format form {
|
| 1042 |
+
/* form.scope_start = */ "",
|
| 1043 |
+
/* form.tool_start = */ "<tool_call>",
|
| 1044 |
+
/* form.tool_sep = */ "",
|
| 1045 |
+
/* form.key_start = */ "<arg_key>",
|
| 1046 |
+
/* form.key_val_sep = */ "</arg_key>",
|
| 1047 |
+
/* form.val_end = */ "</arg_value>",
|
| 1048 |
+
/* form.tool_end = */ "</tool_call>",
|
| 1049 |
+
/* form.scope_end = */ "",
|
| 1050 |
+
/* form.key_val_sep2 = */ "<arg_value>",
|
| 1051 |
+
};
|
| 1052 |
+
builder.consume_reasoning_with_xml_tool_calls(form, "<think>", "</think>");
|
| 1053 |
+
}
|
| 1054 |
+
|
| 1055 |
+
static void common_chat_parse_firefunction_v2(common_chat_msg_parser & builder) {
|
| 1056 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1057 |
+
builder.add_content(builder.consume_rest());
|
| 1058 |
+
return;
|
| 1059 |
+
}
|
| 1060 |
+
static const common_regex prefix(regex_escape(" functools["));
|
| 1061 |
+
parse_prefixed_json_tool_call_array(builder, prefix, /* rstrip_prefix= */ 1);
|
| 1062 |
+
}
|
| 1063 |
+
|
| 1064 |
+
static void common_chat_parse_functionary_v3_2(common_chat_msg_parser & builder) {
|
| 1065 |
+
static const common_regex function_regex_start_only(R"((\w+\n\{|python\n|all\n))");
|
| 1066 |
+
static const common_regex function_regex(R"(>>>(\w+\n\{|python\n|all\n))");
|
| 1067 |
+
static const common_regex close_regex(R"(\s*)");
|
| 1068 |
+
|
| 1069 |
+
parse_json_tool_calls(
|
| 1070 |
+
builder,
|
| 1071 |
+
std::nullopt,
|
| 1072 |
+
function_regex_start_only,
|
| 1073 |
+
function_regex,
|
| 1074 |
+
close_regex,
|
| 1075 |
+
std::nullopt,
|
| 1076 |
+
/* allow_raw_python= */ true,
|
| 1077 |
+
/* get_function_name= */ [&](const auto & res) -> std::string {
|
| 1078 |
+
auto at_start = res.groups[0].begin == 0;
|
| 1079 |
+
auto name = builder.str(res.groups[1]);
|
| 1080 |
+
if (!name.empty() && name.back() == '{') {
|
| 1081 |
+
// Unconsume the opening brace '{' to ensure the JSON parsing goes well.
|
| 1082 |
+
builder.move_back(1);
|
| 1083 |
+
}
|
| 1084 |
+
auto idx = name.find_last_not_of("\n{");
|
| 1085 |
+
name = name.substr(0, idx + 1);
|
| 1086 |
+
if (at_start && name == "all") {
|
| 1087 |
+
return "";
|
| 1088 |
+
}
|
| 1089 |
+
return name;
|
| 1090 |
+
});
|
| 1091 |
+
}
|
| 1092 |
+
|
| 1093 |
+
static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser & builder) {
|
| 1094 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1095 |
+
builder.add_content(builder.consume_rest());
|
| 1096 |
+
return;
|
| 1097 |
+
}
|
| 1098 |
+
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
| 1099 |
+
static const common_regex python_tag_regex(regex_escape("<|python_tag|>"));
|
| 1100 |
+
|
| 1101 |
+
static const common_regex function_regex(R"(<function=(\w+)>)");
|
| 1102 |
+
static const common_regex close_regex(R"(</function>)");
|
| 1103 |
+
|
| 1104 |
+
parse_json_tool_calls(
|
| 1105 |
+
builder,
|
| 1106 |
+
/* block_open= */ std::nullopt,
|
| 1107 |
+
/* function_regex_start_only= */ std::nullopt,
|
| 1108 |
+
function_regex,
|
| 1109 |
+
close_regex,
|
| 1110 |
+
std::nullopt);
|
| 1111 |
+
|
| 1112 |
+
if (auto res = builder.try_find_regex(python_tag_regex)) {
|
| 1113 |
+
auto arguments = wrap_code_as_arguments(builder, builder.consume_rest());
|
| 1114 |
+
builder.add_tool_call("python", "", arguments);
|
| 1115 |
+
return;
|
| 1116 |
+
}
|
| 1117 |
+
}
|
| 1118 |
+
|
| 1119 |
+
static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) {
|
| 1120 |
+
builder.try_parse_reasoning("<think>", "</think>");
|
| 1121 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1122 |
+
builder.add_content(builder.consume_rest());
|
| 1123 |
+
return;
|
| 1124 |
+
}
|
| 1125 |
+
|
| 1126 |
+
static const common_regex open_regex(
|
| 1127 |
+
"(?:"
|
| 1128 |
+
"(```(?:xml|json)?\\n\\s*)?" // match 1 (block_start)
|
| 1129 |
+
"(" // match 2 (open_tag)
|
| 1130 |
+
"<tool_call>"
|
| 1131 |
+
"|<function_call>"
|
| 1132 |
+
"|<tool>"
|
| 1133 |
+
"|<tools>"
|
| 1134 |
+
"|<response>"
|
| 1135 |
+
"|<json>"
|
| 1136 |
+
"|<xml>"
|
| 1137 |
+
"|<JSON>"
|
| 1138 |
+
")?"
|
| 1139 |
+
"(\\s*\\{\\s*\"name\")" // match 3 (named tool call)
|
| 1140 |
+
")"
|
| 1141 |
+
"|<function=([^>]+)>" // match 4 (function name)
|
| 1142 |
+
"|<function name=\"([^\"]+)\">" // match 5 (function name again)
|
| 1143 |
+
);
|
| 1144 |
+
|
| 1145 |
+
while (auto res = builder.try_find_regex(open_regex)) {
|
| 1146 |
+
const auto & block_start = res->groups[1];
|
| 1147 |
+
std::string block_end = block_start.empty() ? "" : "```";
|
| 1148 |
+
|
| 1149 |
+
const auto & open_tag = res->groups[2];
|
| 1150 |
+
std::string close_tag;
|
| 1151 |
+
|
| 1152 |
+
if (!res->groups[3].empty()) {
|
| 1153 |
+
builder.move_to(res->groups[3].begin);
|
| 1154 |
+
close_tag = open_tag.empty() ? "" : "</" + builder.str(open_tag).substr(1);
|
| 1155 |
+
|
| 1156 |
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{"arguments"}})) {
|
| 1157 |
+
if (!builder.add_tool_call(tool_call->value) || tool_call->is_partial) {
|
| 1158 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 1159 |
+
}
|
| 1160 |
+
builder.consume_spaces();
|
| 1161 |
+
builder.consume_literal(close_tag);
|
| 1162 |
+
builder.consume_spaces();
|
| 1163 |
+
if (!block_end.empty()) {
|
| 1164 |
+
builder.consume_literal(block_end);
|
| 1165 |
+
builder.consume_spaces();
|
| 1166 |
+
}
|
| 1167 |
+
} else {
|
| 1168 |
+
throw common_chat_msg_partial_exception("failed to parse tool call");
|
| 1169 |
+
}
|
| 1170 |
+
} else {
|
| 1171 |
+
auto function_name = builder.str(res->groups[4]);
|
| 1172 |
+
if (function_name.empty()) {
|
| 1173 |
+
function_name = builder.str(res->groups[5]);
|
| 1174 |
+
}
|
| 1175 |
+
GGML_ASSERT(!function_name.empty());
|
| 1176 |
+
|
| 1177 |
+
close_tag = "</function>";
|
| 1178 |
+
|
| 1179 |
+
if (auto arguments = builder.try_consume_json_with_dumped_args({{}})) {
|
| 1180 |
+
if (!builder.add_tool_call(function_name, "", arguments->value) || arguments->is_partial) {
|
| 1181 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 1182 |
+
}
|
| 1183 |
+
builder.consume_spaces();
|
| 1184 |
+
builder.consume_literal(close_tag);
|
| 1185 |
+
builder.consume_spaces();
|
| 1186 |
+
if (!block_end.empty()) {
|
| 1187 |
+
builder.consume_literal(block_end);
|
| 1188 |
+
builder.consume_spaces();
|
| 1189 |
+
}
|
| 1190 |
+
}
|
| 1191 |
+
}
|
| 1192 |
+
}
|
| 1193 |
+
|
| 1194 |
+
builder.add_content(builder.consume_rest());
|
| 1195 |
+
}
|
| 1196 |
+
|
| 1197 |
+
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
|
| 1198 |
+
// Parse thinking tags
|
| 1199 |
+
static const common_regex start_think_regex(regex_escape("<think>"));
|
| 1200 |
+
static const common_regex end_think_regex(regex_escape("</think>"));
|
| 1201 |
+
// Granite models output partial tokens such as "<" and "<think".
|
| 1202 |
+
// By leveraging try_consume_regex()/try_find_regex() throwing
|
| 1203 |
+
// common_chat_msg_partial_exception for these partial tokens,
|
| 1204 |
+
// processing is interrupted and the tokens are not passed to add_content().
|
| 1205 |
+
if (auto res = builder.try_consume_regex(start_think_regex)) {
|
| 1206 |
+
// Restore position for try_parse_reasoning()
|
| 1207 |
+
builder.move_to(res->groups[0].begin);
|
| 1208 |
+
builder.try_find_regex(end_think_regex, std::string::npos, false);
|
| 1209 |
+
// Restore position for try_parse_reasoning()
|
| 1210 |
+
builder.move_to(res->groups[0].begin);
|
| 1211 |
+
}
|
| 1212 |
+
builder.try_parse_reasoning("<think>", "</think>");
|
| 1213 |
+
|
| 1214 |
+
// Parse response tags
|
| 1215 |
+
static const common_regex start_response_regex(regex_escape("<response>"));
|
| 1216 |
+
static const common_regex end_response_regex(regex_escape("</response>"));
|
| 1217 |
+
// Granite models output partial tokens such as "<" and "<response".
|
| 1218 |
+
// Same hack as reasoning parsing.
|
| 1219 |
+
if (builder.try_consume_regex(start_response_regex)) {
|
| 1220 |
+
builder.try_find_regex(end_response_regex);
|
| 1221 |
+
}
|
| 1222 |
+
|
| 1223 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1224 |
+
builder.add_content(builder.consume_rest());
|
| 1225 |
+
return;
|
| 1226 |
+
}
|
| 1227 |
+
|
| 1228 |
+
// Look for tool calls
|
| 1229 |
+
static const common_regex tool_call_regex(regex_escape("<|tool_call|>"));
|
| 1230 |
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
| 1231 |
+
builder.move_to(res->groups[0].end);
|
| 1232 |
+
|
| 1233 |
+
// Expect JSON array of tool calls
|
| 1234 |
+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
|
| 1235 |
+
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
|
| 1236 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 1237 |
+
}
|
| 1238 |
+
}
|
| 1239 |
+
} else {
|
| 1240 |
+
builder.add_content(builder.consume_rest());
|
| 1241 |
+
}
|
| 1242 |
+
}
|
| 1243 |
+
|
| 1244 |
+
static void common_chat_parse_nemotron_v2(common_chat_msg_parser & builder) {
|
| 1245 |
+
// Parse thinking tags
|
| 1246 |
+
builder.try_parse_reasoning("<think>", "</think>");
|
| 1247 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1248 |
+
builder.add_content(builder.consume_rest());
|
| 1249 |
+
return;
|
| 1250 |
+
}
|
| 1251 |
+
|
| 1252 |
+
// Look for tool calls
|
| 1253 |
+
static const common_regex tool_call_regex(regex_escape("<TOOLCALL>"));
|
| 1254 |
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
| 1255 |
+
builder.move_to(res->groups[0].end);
|
| 1256 |
+
|
| 1257 |
+
// Expect JSON array of tool calls
|
| 1258 |
+
auto tool_calls_data = builder.consume_json();
|
| 1259 |
+
if (tool_calls_data.json.is_array()) {
|
| 1260 |
+
if (!builder.try_consume_literal("</TOOLCALL>")) {
|
| 1261 |
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
| 1262 |
+
}
|
| 1263 |
+
builder.add_tool_calls(tool_calls_data.json);
|
| 1264 |
+
} else {
|
| 1265 |
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
| 1266 |
+
}
|
| 1267 |
+
}
|
| 1268 |
+
builder.add_content(builder.consume_rest());
|
| 1269 |
+
}
|
| 1270 |
+
|
| 1271 |
+
static void common_chat_parse_apertus(common_chat_msg_parser & builder) {
|
| 1272 |
+
// Parse thinking tags
|
| 1273 |
+
builder.try_parse_reasoning("<|inner_prefix|>", "<|inner_suffix|>");
|
| 1274 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1275 |
+
builder.add_content(builder.consume_rest());
|
| 1276 |
+
return;
|
| 1277 |
+
}
|
| 1278 |
+
|
| 1279 |
+
// Look for tool calls
|
| 1280 |
+
static const common_regex tool_call_regex(regex_escape("<|tools_prefix|>"));
|
| 1281 |
+
if (auto res = builder.try_find_regex(tool_call_regex)) {
|
| 1282 |
+
builder.move_to(res->groups[0].end);
|
| 1283 |
+
|
| 1284 |
+
auto tool_calls_data = builder.consume_json();
|
| 1285 |
+
if (tool_calls_data.json.is_array()) {
|
| 1286 |
+
builder.consume_spaces();
|
| 1287 |
+
if (!builder.try_consume_literal("<|tools_suffix|>")) {
|
| 1288 |
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
| 1289 |
+
}
|
| 1290 |
+
for (const auto & value : tool_calls_data.json) {
|
| 1291 |
+
if (value.is_object()) {
|
| 1292 |
+
builder.add_tool_call_short_form(value);
|
| 1293 |
+
}
|
| 1294 |
+
}
|
| 1295 |
+
} else {
|
| 1296 |
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
| 1297 |
+
}
|
| 1298 |
+
}
|
| 1299 |
+
builder.add_content(builder.consume_rest());
|
| 1300 |
+
}
|
| 1301 |
+
|
| 1302 |
+
|
| 1303 |
+
static void common_chat_parse_lfm2(common_chat_msg_parser & builder) {
|
| 1304 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1305 |
+
builder.add_content(builder.consume_rest());
|
| 1306 |
+
return;
|
| 1307 |
+
}
|
| 1308 |
+
|
| 1309 |
+
// LFM2 format: <|tool_call_start|>[{"name": "get_current_time", "arguments": {"location": "Paris"}}]<|tool_call_end|>
|
| 1310 |
+
static const common_regex tool_call_start_regex(regex_escape("<|tool_call_start|>"));
|
| 1311 |
+
static const common_regex tool_call_end_regex(regex_escape("<|tool_call_end|>"));
|
| 1312 |
+
|
| 1313 |
+
// Loop through all tool calls
|
| 1314 |
+
while (auto res = builder.try_find_regex(tool_call_start_regex, std::string::npos, /* add_prelude_to_content= */ true)) {
|
| 1315 |
+
builder.move_to(res->groups[0].end);
|
| 1316 |
+
|
| 1317 |
+
// Parse JSON array format: [{"name": "...", "arguments": {...}}]
|
| 1318 |
+
auto tool_calls_data = builder.consume_json();
|
| 1319 |
+
|
| 1320 |
+
// Consume end marker
|
| 1321 |
+
builder.consume_spaces();
|
| 1322 |
+
if (!builder.try_consume_regex(tool_call_end_regex)) {
|
| 1323 |
+
throw common_chat_msg_partial_exception("Expected <|tool_call_end|>");
|
| 1324 |
+
}
|
| 1325 |
+
|
| 1326 |
+
// Process each tool call in the array
|
| 1327 |
+
if (tool_calls_data.json.is_array()) {
|
| 1328 |
+
for (const auto & tool_call : tool_calls_data.json) {
|
| 1329 |
+
if (!tool_call.is_object()) {
|
| 1330 |
+
throw common_chat_msg_partial_exception("Tool call must be an object");
|
| 1331 |
+
}
|
| 1332 |
+
|
| 1333 |
+
if (!tool_call.contains("name")) {
|
| 1334 |
+
throw common_chat_msg_partial_exception("Tool call missing 'name' field");
|
| 1335 |
+
}
|
| 1336 |
+
|
| 1337 |
+
std::string function_name = tool_call.at("name");
|
| 1338 |
+
std::string arguments = "{}";
|
| 1339 |
+
|
| 1340 |
+
if (tool_call.contains("arguments")) {
|
| 1341 |
+
if (tool_call.at("arguments").is_object()) {
|
| 1342 |
+
arguments = tool_call.at("arguments").dump();
|
| 1343 |
+
} else if (tool_call.at("arguments").is_string()) {
|
| 1344 |
+
arguments = tool_call.at("arguments");
|
| 1345 |
+
}
|
| 1346 |
+
}
|
| 1347 |
+
|
| 1348 |
+
if (!builder.add_tool_call(function_name, "", arguments)) {
|
| 1349 |
+
throw common_chat_msg_partial_exception("Incomplete tool call");
|
| 1350 |
+
}
|
| 1351 |
+
}
|
| 1352 |
+
} else {
|
| 1353 |
+
throw common_chat_msg_partial_exception("Expected JSON array for tool calls");
|
| 1354 |
+
}
|
| 1355 |
+
|
| 1356 |
+
// Consume any trailing whitespace after this tool call
|
| 1357 |
+
builder.consume_spaces();
|
| 1358 |
+
}
|
| 1359 |
+
|
| 1360 |
+
// Consume any remaining content after all tool calls
|
| 1361 |
+
auto remaining = builder.consume_rest();
|
| 1362 |
+
if (!string_strip(remaining).empty()) {
|
| 1363 |
+
builder.add_content(remaining);
|
| 1364 |
+
}
|
| 1365 |
+
}
|
| 1366 |
+
|
| 1367 |
+
static void common_chat_parse_seed_oss(common_chat_msg_parser & builder) {
|
| 1368 |
+
static const xml_tool_call_format form {
|
| 1369 |
+
/* form.scope_start = */ "<seed:tool_call>",
|
| 1370 |
+
/* form.tool_start = */ "<function=",
|
| 1371 |
+
/* form.tool_sep = */ ">",
|
| 1372 |
+
/* form.key_start = */ "<parameter=",
|
| 1373 |
+
/* form.key_val_sep = */ ">",
|
| 1374 |
+
/* form.val_end = */ "</parameter>",
|
| 1375 |
+
/* form.tool_end = */ "</function>",
|
| 1376 |
+
/* form.scope_end = */ "</seed:tool_call>",
|
| 1377 |
+
};
|
| 1378 |
+
builder.consume_reasoning_with_xml_tool_calls(form, "<seed:think>", "</seed:think>");
|
| 1379 |
+
}
|
| 1380 |
+
|
| 1381 |
+
static void common_chat_parse_solar_open(common_chat_msg_parser & builder) {
|
| 1382 |
+
builder.try_parse_reasoning("<|think|>", "<|end|><|begin|>assistant<|content|>");
|
| 1383 |
+
|
| 1384 |
+
// TODO: Tool calling
|
| 1385 |
+
|
| 1386 |
+
builder.add_content(builder.consume_rest());
|
| 1387 |
+
}
|
| 1388 |
+
|
| 1389 |
+
static void common_chat_parse_exaone_moe_content(common_chat_msg_parser & builder) {
|
| 1390 |
+
// 1) <tool_call>{ "name": "...", "arguments": {...} }</tool_call>
|
| 1391 |
+
// 2) <tool_call>{ "id": "...", "type": "function", "function": { "name": "...", "arguments": {...} } }</tool_call>
|
| 1392 |
+
static const common_regex tool_call_open(R"(<tool_call[^>]*>)");
|
| 1393 |
+
|
| 1394 |
+
if (!builder.syntax().parse_tool_calls) {
|
| 1395 |
+
LOG_DBG("%s: not parse_tool_calls\n", __func__);
|
| 1396 |
+
builder.add_content(builder.consume_rest());
|
| 1397 |
+
return;
|
| 1398 |
+
}
|
| 1399 |
+
|
| 1400 |
+
LOG_DBG("%s: parse_tool_calls\n", __func__);
|
| 1401 |
+
|
| 1402 |
+
// Find all <tool_call></tool_call> blocks
|
| 1403 |
+
while (auto first = builder.try_find_regex(tool_call_open, std::string::npos, /* add_prelude_to_content= */ true)) {
|
| 1404 |
+
builder.move_to(first->groups[0].end);
|
| 1405 |
+
builder.consume_spaces();
|
| 1406 |
+
|
| 1407 |
+
builder.try_consume_literal("```json");
|
| 1408 |
+
builder.try_consume_literal("```");
|
| 1409 |
+
builder.consume_spaces();
|
| 1410 |
+
|
| 1411 |
+
// Consume JSON object
|
| 1412 |
+
auto data = builder.consume_json();
|
| 1413 |
+
|
| 1414 |
+
builder.consume_spaces();
|
| 1415 |
+
builder.try_consume_literal("```");
|
| 1416 |
+
builder.consume_spaces();
|
| 1417 |
+
|
| 1418 |
+
if (!builder.try_consume_literal("</tool_call>")) {
|
| 1419 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 1420 |
+
}
|
| 1421 |
+
builder.consume_spaces();
|
| 1422 |
+
|
| 1423 |
+
// Extract name and arguments
|
| 1424 |
+
std::string name;
|
| 1425 |
+
std::string id;
|
| 1426 |
+
nlohmann::ordered_json arguments;
|
| 1427 |
+
|
| 1428 |
+
const auto extract_args = [&](const nlohmann::ordered_json & obj) -> bool {
|
| 1429 |
+
if (!obj.contains("name") || !obj.contains("arguments")) {
|
| 1430 |
+
return false;
|
| 1431 |
+
}
|
| 1432 |
+
name = obj.at("name").get<std::string>();
|
| 1433 |
+
arguments = obj.at("arguments");
|
| 1434 |
+
if (obj.contains("id") && obj.at("id").is_string()) {
|
| 1435 |
+
id = obj.at("id").get<std::string>();
|
| 1436 |
+
}
|
| 1437 |
+
return true;
|
| 1438 |
+
};
|
| 1439 |
+
|
| 1440 |
+
if (!extract_args(data.json)) {
|
| 1441 |
+
if (data.json.contains("function") && data.json.at("function").is_object()) {
|
| 1442 |
+
auto fn = data.json.at("function");
|
| 1443 |
+
extract_args(fn);
|
| 1444 |
+
if (id.empty() && data.json.contains("id") && data.json.at("id").is_string()) {
|
| 1445 |
+
id = data.json.at("id").get<std::string>();
|
| 1446 |
+
}
|
| 1447 |
+
}
|
| 1448 |
+
}
|
| 1449 |
+
|
| 1450 |
+
// If name is empty, treat the JSON object as content
|
| 1451 |
+
if (name.empty()) {
|
| 1452 |
+
LOG_DBG("%s: tool call missing name, treating as content\n", __func__);
|
| 1453 |
+
builder.add_content(data.json.dump());
|
| 1454 |
+
continue;
|
| 1455 |
+
}
|
| 1456 |
+
|
| 1457 |
+
std::string args_str = arguments.dump();
|
| 1458 |
+
if (!builder.add_tool_call(name, id, args_str)) {
|
| 1459 |
+
throw common_chat_msg_partial_exception("incomplete tool call");
|
| 1460 |
+
}
|
| 1461 |
+
}
|
| 1462 |
+
|
| 1463 |
+
builder.add_content(builder.consume_rest());
|
| 1464 |
+
}
|
| 1465 |
+
|
| 1466 |
+
static void common_chat_parse_exaone_moe(common_chat_msg_parser & builder) {
|
| 1467 |
+
LOG_DBG("%s: parsing exaone_moe\n", __func__);
|
| 1468 |
+
// EXAONE MoE outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
|
| 1469 |
+
// First try to parse using the standard reasoning parsing method
|
| 1470 |
+
LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
|
| 1471 |
+
|
| 1472 |
+
auto start_pos = builder.pos();
|
| 1473 |
+
auto found_end_think = builder.try_find_literal("</think>");
|
| 1474 |
+
builder.move_to(start_pos);
|
| 1475 |
+
|
| 1476 |
+
if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
|
| 1477 |
+
LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
|
| 1478 |
+
common_chat_parse_exaone_moe_content(builder);
|
| 1479 |
+
} else if (builder.try_parse_reasoning("<think>", "</think>")) {
|
| 1480 |
+
// If reasoning was parsed successfully, the remaining content is regular content
|
| 1481 |
+
LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
|
| 1482 |
+
common_chat_parse_exaone_moe_content(builder);
|
| 1483 |
+
} else {
|
| 1484 |
+
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
|
| 1485 |
+
LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
|
| 1486 |
+
common_chat_parse_exaone_moe_content(builder);
|
| 1487 |
+
return;
|
| 1488 |
+
}
|
| 1489 |
+
// If no reasoning tags found, check if we should treat everything as reasoning
|
| 1490 |
+
if (builder.syntax().thinking_forced_open) {
|
| 1491 |
+
// If thinking is forced open but no tags found, treat everything as reasoning
|
| 1492 |
+
LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
|
| 1493 |
+
builder.add_reasoning_content(builder.consume_rest());
|
| 1494 |
+
} else {
|
| 1495 |
+
LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
|
| 1496 |
+
common_chat_parse_exaone_moe_content(builder);
|
| 1497 |
+
}
|
| 1498 |
+
}
|
| 1499 |
+
}
|
| 1500 |
+
|
| 1501 |
+
static void common_chat_parse_content_only(common_chat_msg_parser & builder) {
|
| 1502 |
+
builder.try_parse_reasoning("<think>", "</think>");
|
| 1503 |
+
builder.add_content(builder.consume_rest());
|
| 1504 |
+
}
|
| 1505 |
+
|
| 1506 |
+
static void common_chat_parse(common_chat_msg_parser & builder) {
|
| 1507 |
+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(builder.syntax().format), builder.input().c_str());
|
| 1508 |
+
|
| 1509 |
+
switch (builder.syntax().format) {
|
| 1510 |
+
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
|
| 1511 |
+
common_chat_parse_content_only(builder);
|
| 1512 |
+
break;
|
| 1513 |
+
case COMMON_CHAT_FORMAT_GENERIC:
|
| 1514 |
+
common_chat_parse_generic(builder);
|
| 1515 |
+
break;
|
| 1516 |
+
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
| 1517 |
+
common_chat_parse_mistral_nemo(builder);
|
| 1518 |
+
break;
|
| 1519 |
+
case COMMON_CHAT_FORMAT_MAGISTRAL:
|
| 1520 |
+
common_chat_parse_magistral(builder);
|
| 1521 |
+
break;
|
| 1522 |
+
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
| 1523 |
+
common_chat_parse_llama_3_1(builder);
|
| 1524 |
+
break;
|
| 1525 |
+
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
| 1526 |
+
common_chat_parse_llama_3_1(builder, /* with_builtin_tools= */ true);
|
| 1527 |
+
break;
|
| 1528 |
+
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
| 1529 |
+
common_chat_parse_deepseek_r1(builder);
|
| 1530 |
+
break;
|
| 1531 |
+
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
|
| 1532 |
+
common_chat_parse_deepseek_v3_1(builder);
|
| 1533 |
+
break;
|
| 1534 |
+
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
| 1535 |
+
common_chat_parse_functionary_v3_2(builder);
|
| 1536 |
+
break;
|
| 1537 |
+
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
| 1538 |
+
common_chat_parse_functionary_v3_1_llama_3_1(builder);
|
| 1539 |
+
break;
|
| 1540 |
+
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
| 1541 |
+
common_chat_parse_hermes_2_pro(builder);
|
| 1542 |
+
break;
|
| 1543 |
+
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
| 1544 |
+
common_chat_parse_firefunction_v2(builder);
|
| 1545 |
+
break;
|
| 1546 |
+
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
| 1547 |
+
common_chat_parse_command_r7b(builder);
|
| 1548 |
+
break;
|
| 1549 |
+
case COMMON_CHAT_FORMAT_GRANITE:
|
| 1550 |
+
common_chat_parse_granite(builder);
|
| 1551 |
+
break;
|
| 1552 |
+
case COMMON_CHAT_FORMAT_GPT_OSS:
|
| 1553 |
+
common_chat_parse_gpt_oss(builder);
|
| 1554 |
+
break;
|
| 1555 |
+
case COMMON_CHAT_FORMAT_SEED_OSS:
|
| 1556 |
+
common_chat_parse_seed_oss(builder);
|
| 1557 |
+
break;
|
| 1558 |
+
case COMMON_CHAT_FORMAT_NEMOTRON_V2:
|
| 1559 |
+
common_chat_parse_nemotron_v2(builder);
|
| 1560 |
+
break;
|
| 1561 |
+
case COMMON_CHAT_FORMAT_APERTUS:
|
| 1562 |
+
common_chat_parse_apertus(builder);
|
| 1563 |
+
break;
|
| 1564 |
+
case COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS:
|
| 1565 |
+
common_chat_parse_lfm2(builder);
|
| 1566 |
+
break;
|
| 1567 |
+
case COMMON_CHAT_FORMAT_MINIMAX_M2:
|
| 1568 |
+
common_chat_parse_minimax_m2(builder);
|
| 1569 |
+
break;
|
| 1570 |
+
case COMMON_CHAT_FORMAT_GLM_4_5:
|
| 1571 |
+
common_chat_parse_glm_4_5(builder);
|
| 1572 |
+
break;
|
| 1573 |
+
case COMMON_CHAT_FORMAT_KIMI_K2:
|
| 1574 |
+
common_chat_parse_kimi_k2(builder);
|
| 1575 |
+
break;
|
| 1576 |
+
case COMMON_CHAT_FORMAT_APRIEL_1_5:
|
| 1577 |
+
common_chat_parse_apriel_1_5(builder);
|
| 1578 |
+
break;
|
| 1579 |
+
case COMMON_CHAT_FORMAT_XIAOMI_MIMO:
|
| 1580 |
+
common_chat_parse_xiaomi_mimo(builder);
|
| 1581 |
+
break;
|
| 1582 |
+
case COMMON_CHAT_FORMAT_SOLAR_OPEN:
|
| 1583 |
+
common_chat_parse_solar_open(builder);
|
| 1584 |
+
break;
|
| 1585 |
+
case COMMON_CHAT_FORMAT_EXAONE_MOE:
|
| 1586 |
+
common_chat_parse_exaone_moe(builder);
|
| 1587 |
+
break;
|
| 1588 |
+
default:
|
| 1589 |
+
throw std::runtime_error(std::string("Unsupported format: ") + common_chat_format_name(builder.syntax().format));
|
| 1590 |
+
}
|
| 1591 |
+
builder.finish();
|
| 1592 |
+
}
|
| 1593 |
+
|
| 1594 |
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
|
| 1595 |
+
if (syntax.format == COMMON_CHAT_FORMAT_PEG_SIMPLE ||
|
| 1596 |
+
syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE ||
|
| 1597 |
+
syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
|
| 1598 |
+
return common_chat_peg_parse(syntax.parser, input, is_partial, syntax);
|
| 1599 |
+
}
|
| 1600 |
+
common_chat_msg_parser builder(input, is_partial, syntax);
|
| 1601 |
+
try {
|
| 1602 |
+
common_chat_parse(builder);
|
| 1603 |
+
} catch (const common_chat_msg_partial_exception & ex) {
|
| 1604 |
+
LOG_DBG("Partial parse: %s\n", ex.what());
|
| 1605 |
+
if (!is_partial) {
|
| 1606 |
+
builder.clear_tools();
|
| 1607 |
+
builder.move_to(0);
|
| 1608 |
+
common_chat_parse_content_only(builder);
|
| 1609 |
+
}
|
| 1610 |
+
}
|
| 1611 |
+
auto msg = builder.result();
|
| 1612 |
+
if (!is_partial) {
|
| 1613 |
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
|
| 1614 |
+
}
|
| 1615 |
+
return msg;
|
| 1616 |
+
}
|
| 1617 |
+
|
| 1618 |
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax) {
|
| 1619 |
+
if (parser.empty()) {
|
| 1620 |
+
throw std::runtime_error("Failed to parse due to missing parser definition.");
|
| 1621 |
+
}
|
| 1622 |
+
|
| 1623 |
+
LOG_DBG("Parsing input with format %s: %s\n", common_chat_format_name(syntax.format), input.c_str());
|
| 1624 |
+
|
| 1625 |
+
common_peg_parse_context ctx(input, is_partial);
|
| 1626 |
+
auto result = parser.parse(ctx);
|
| 1627 |
+
if (result.fail()) {
|
| 1628 |
+
throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end));
|
| 1629 |
+
}
|
| 1630 |
+
|
| 1631 |
+
common_chat_msg msg;
|
| 1632 |
+
msg.role = "assistant";
|
| 1633 |
+
|
| 1634 |
+
if (syntax.format == COMMON_CHAT_FORMAT_PEG_NATIVE) {
|
| 1635 |
+
auto mapper = common_chat_peg_native_mapper(msg);
|
| 1636 |
+
mapper.from_ast(ctx.ast, result);
|
| 1637 |
+
} else if (syntax.format == COMMON_CHAT_FORMAT_PEG_CONSTRUCTED) {
|
| 1638 |
+
auto mapper = common_chat_peg_constructed_mapper(msg);
|
| 1639 |
+
mapper.from_ast(ctx.ast, result);
|
| 1640 |
+
} else {
|
| 1641 |
+
// Generic mapper
|
| 1642 |
+
auto mapper = common_chat_peg_mapper(msg);
|
| 1643 |
+
mapper.from_ast(ctx.ast, result);
|
| 1644 |
+
}
|
| 1645 |
+
if (!is_partial) {
|
| 1646 |
+
LOG_DBG("Parsed message: %s\n", common_chat_msgs_to_json_oaicompat({msg}).at(0).dump().c_str());
|
| 1647 |
+
}
|
| 1648 |
+
return msg;
|
| 1649 |
+
}
|
llama.cpp/common/chat-parser.h
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "chat.h"
|
| 4 |
+
#include "chat-parser-xml-toolcall.h"
|
| 5 |
+
#include "json-partial.h"
|
| 6 |
+
#include "regex-partial.h"
|
| 7 |
+
|
| 8 |
+
#include <nlohmann/json_fwd.hpp>
|
| 9 |
+
|
| 10 |
+
#include <optional>
|
| 11 |
+
#include <string>
|
| 12 |
+
#include <vector>
|
| 13 |
+
|
| 14 |
+
class common_chat_msg_partial_exception : public std::runtime_error {
|
| 15 |
+
public:
|
| 16 |
+
common_chat_msg_partial_exception(const std::string & message) : std::runtime_error(message) {}
|
| 17 |
+
};
|
| 18 |
+
|
| 19 |
+
class common_chat_msg_parser {
|
| 20 |
+
std::string input_;
|
| 21 |
+
bool is_partial_;
|
| 22 |
+
common_chat_parser_params syntax_; // TODO: rename to params
|
| 23 |
+
std::string healing_marker_;
|
| 24 |
+
|
| 25 |
+
size_t pos_ = 0;
|
| 26 |
+
common_chat_msg result_;
|
| 27 |
+
|
| 28 |
+
public:
|
| 29 |
+
common_chat_msg_parser(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
| 30 |
+
const std::string & input() const { return input_; }
|
| 31 |
+
size_t pos() const { return pos_; }
|
| 32 |
+
const std::string & healing_marker() const { return healing_marker_; }
|
| 33 |
+
const bool & is_partial() const { return is_partial_; }
|
| 34 |
+
const common_chat_msg & result() const { return result_; }
|
| 35 |
+
const common_chat_parser_params & syntax() const { return syntax_; }
|
| 36 |
+
|
| 37 |
+
void move_to(size_t pos) {
|
| 38 |
+
if (pos > input_.size()) {
|
| 39 |
+
throw std::runtime_error("Invalid position!");
|
| 40 |
+
}
|
| 41 |
+
pos_ = pos;
|
| 42 |
+
}
|
| 43 |
+
void move_back(size_t n) {
|
| 44 |
+
if (pos_ < n) {
|
| 45 |
+
throw std::runtime_error("Can't move back that far!");
|
| 46 |
+
}
|
| 47 |
+
pos_ -= n;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
// Get the substring of the input at the given range
|
| 51 |
+
std::string str(const common_string_range & rng) const;
|
| 52 |
+
|
| 53 |
+
// Appends to the result.content field
|
| 54 |
+
void add_content(const std::string & content);
|
| 55 |
+
|
| 56 |
+
// Appends to the result.reasoning_content field
|
| 57 |
+
void add_reasoning_content(const std::string & reasoning_content);
|
| 58 |
+
|
| 59 |
+
// Adds a tool call to the result. If the tool call is too incomplete (e.g. name empty), it won't add anything.
|
| 60 |
+
bool add_tool_call(const std::string & name, const std::string & id, const std::string & arguments);
|
| 61 |
+
|
| 62 |
+
// Adds a tool call using the "name", "id" and "arguments" fields of the json object
|
| 63 |
+
bool add_tool_call(const nlohmann::ordered_json & tool_call);
|
| 64 |
+
|
| 65 |
+
// Adds an array of tool calls using their "name", "id" and "arguments" fields.
|
| 66 |
+
bool add_tool_calls(const nlohmann::ordered_json & arr);
|
| 67 |
+
|
| 68 |
+
// Adds a tool call using the short form: { "tool_name": { "arg1": val, "arg2": val } }
|
| 69 |
+
bool add_tool_call_short_form(const nlohmann::ordered_json & tool_call);
|
| 70 |
+
|
| 71 |
+
void finish();
|
| 72 |
+
|
| 73 |
+
bool consume_spaces();
|
| 74 |
+
|
| 75 |
+
void consume_literal(const std::string & literal);
|
| 76 |
+
|
| 77 |
+
bool try_parse_reasoning(const std::string & start_think, const std::string & end_think);
|
| 78 |
+
|
| 79 |
+
std::string consume_rest();
|
| 80 |
+
|
| 81 |
+
struct find_regex_result {
|
| 82 |
+
std::string prelude;
|
| 83 |
+
std::vector<common_string_range> groups;
|
| 84 |
+
};
|
| 85 |
+
|
| 86 |
+
std::optional<find_regex_result> try_find_regex(const common_regex & regex, size_t from = std::string::npos, bool add_prelude_to_content = true);
|
| 87 |
+
|
| 88 |
+
bool try_consume_literal(const std::string & literal);
|
| 89 |
+
|
| 90 |
+
std::optional<find_regex_result> try_find_literal(const std::string & literal);
|
| 91 |
+
|
| 92 |
+
find_regex_result consume_regex(const common_regex & regex);
|
| 93 |
+
|
| 94 |
+
std::optional<find_regex_result> try_consume_regex(const common_regex & regex);
|
| 95 |
+
|
| 96 |
+
std::optional<common_json> try_consume_json();
|
| 97 |
+
common_json consume_json();
|
| 98 |
+
|
| 99 |
+
struct consume_json_result {
|
| 100 |
+
nlohmann::ordered_json value;
|
| 101 |
+
bool is_partial;
|
| 102 |
+
};
|
| 103 |
+
|
| 104 |
+
/*
|
| 105 |
+
Consume (possibly partial) json and converts specific subtrees to (possibly truncated) JSON strings.
|
| 106 |
+
|
| 107 |
+
By default, object keys can't be truncated, nor can string values (their corresponding key is removed,
|
| 108 |
+
e.g. `{"foo": "bar", "baz": "b` -> `{"foo": "bar"}`
|
| 109 |
+
|
| 110 |
+
But one can allow subpaths to be kept truncated, and possibly json-dumped to truncated json strings
|
| 111 |
+
- with `content_paths={{"foo"}}` -> `{"foo": "b` -> {"foo": "b"}`
|
| 112 |
+
- with `args_paths={{"foo"}}` -> `{"foo": {"b` -> `{"foo": "{b"}`
|
| 113 |
+
*/
|
| 114 |
+
consume_json_result consume_json_with_dumped_args(
|
| 115 |
+
const std::vector<std::vector<std::string>> & args_paths = {},
|
| 116 |
+
const std::vector<std::vector<std::string>> & content_paths = {}
|
| 117 |
+
);
|
| 118 |
+
std::optional<consume_json_result> try_consume_json_with_dumped_args(
|
| 119 |
+
const std::vector<std::vector<std::string>> & args_paths = {},
|
| 120 |
+
const std::vector<std::vector<std::string>> & content_paths = {}
|
| 121 |
+
);
|
| 122 |
+
|
| 123 |
+
/**
|
| 124 |
+
* Parse XML-Style tool call for given xml_tool_call_format. Return false for invalid syntax and get the position untouched.
|
| 125 |
+
* form.scope_start, form.tool_sep and form.scope_end can be empty.
|
| 126 |
+
*/
|
| 127 |
+
bool try_consume_xml_tool_calls(const struct xml_tool_call_format & form);
|
| 128 |
+
|
| 129 |
+
// Parse content uses reasoning and XML-Style tool call
|
| 130 |
+
void consume_reasoning_with_xml_tool_calls(const struct xml_tool_call_format & form, const std::string & start_think = "<think>", const std::string & end_think = "</think>");
|
| 131 |
+
|
| 132 |
+
void clear_tools();
|
| 133 |
+
};
|
llama.cpp/common/chat-peg-parser.cpp
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "chat-peg-parser.h"
|
| 2 |
+
|
| 3 |
+
#include <nlohmann/json.hpp>
|
| 4 |
+
|
| 5 |
+
using json = nlohmann::json;
|
| 6 |
+
|
| 7 |
+
static std::string_view trim_trailing_space(std::string_view sv, int max = -1) {
|
| 8 |
+
int count = 0;
|
| 9 |
+
while (!sv.empty() && std::isspace(static_cast<unsigned char>(sv.back()))) {
|
| 10 |
+
if (max != -1 && count <= max) {
|
| 11 |
+
break;
|
| 12 |
+
}
|
| 13 |
+
sv.remove_suffix(1);
|
| 14 |
+
count++;
|
| 15 |
+
}
|
| 16 |
+
return sv;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
void common_chat_peg_mapper::from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result) {
|
| 20 |
+
arena.visit(result, [this](const common_peg_ast_node & node) {
|
| 21 |
+
map(node);
|
| 22 |
+
});
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
| 26 |
+
bool is_reasoning = node.tag == common_chat_peg_builder::REASONING;
|
| 27 |
+
bool is_content = node.tag == common_chat_peg_builder::CONTENT;
|
| 28 |
+
|
| 29 |
+
if (is_reasoning) {
|
| 30 |
+
result.reasoning_content = std::string(trim_trailing_space(node.text));
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
if (is_content) {
|
| 34 |
+
result.content = std::string(trim_trailing_space(node.text));
|
| 35 |
+
}
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
void common_chat_peg_native_mapper::map(const common_peg_ast_node & node) {
|
| 39 |
+
common_chat_peg_mapper::map(node);
|
| 40 |
+
|
| 41 |
+
bool is_tool_open = node.tag == common_chat_peg_native_builder::TOOL_OPEN;
|
| 42 |
+
bool is_tool_name = node.tag == common_chat_peg_native_builder::TOOL_NAME;
|
| 43 |
+
bool is_tool_id = node.tag == common_chat_peg_native_builder::TOOL_ID;
|
| 44 |
+
bool is_tool_args = node.tag == common_chat_peg_native_builder::TOOL_ARGS;
|
| 45 |
+
|
| 46 |
+
if (is_tool_open) {
|
| 47 |
+
result.tool_calls.emplace_back();
|
| 48 |
+
current_tool = &result.tool_calls.back();
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
if (is_tool_id && current_tool) {
|
| 52 |
+
current_tool->id = std::string(trim_trailing_space(node.text));
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
if (is_tool_name && current_tool) {
|
| 56 |
+
current_tool->name = std::string(trim_trailing_space(node.text));
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
if (is_tool_args && current_tool) {
|
| 60 |
+
current_tool->arguments = std::string(trim_trailing_space(node.text));
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
void common_chat_peg_constructed_mapper::map(const common_peg_ast_node & node) {
|
| 65 |
+
common_chat_peg_mapper::map(node);
|
| 66 |
+
|
| 67 |
+
bool is_tool_open = node.tag == common_chat_peg_constructed_builder::TOOL_OPEN;
|
| 68 |
+
bool is_tool_name = node.tag == common_chat_peg_constructed_builder::TOOL_NAME;
|
| 69 |
+
bool is_tool_close = node.tag == common_chat_peg_constructed_builder::TOOL_CLOSE;
|
| 70 |
+
bool is_arg_open = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_OPEN;
|
| 71 |
+
bool is_arg_close = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_CLOSE;
|
| 72 |
+
bool is_arg_name = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_NAME;
|
| 73 |
+
bool is_arg_string = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_STRING_VALUE;
|
| 74 |
+
bool is_arg_json = node.tag == common_chat_peg_constructed_builder::TOOL_ARG_JSON_VALUE;
|
| 75 |
+
|
| 76 |
+
if (is_tool_open) {
|
| 77 |
+
result.tool_calls.emplace_back();
|
| 78 |
+
current_tool = &result.tool_calls.back();
|
| 79 |
+
arg_count = 0;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
if (is_tool_name) {
|
| 83 |
+
current_tool->name = std::string(node.text);
|
| 84 |
+
current_tool->arguments = "{";
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
if (is_arg_open) {
|
| 88 |
+
needs_closing_quote = false;
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
if (is_arg_name && current_tool) {
|
| 92 |
+
if (arg_count > 0) {
|
| 93 |
+
current_tool->arguments += ",";
|
| 94 |
+
}
|
| 95 |
+
current_tool->arguments += json(trim_trailing_space(node.text)).dump() + ":";
|
| 96 |
+
++arg_count;
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
if (is_arg_string && current_tool) {
|
| 100 |
+
// Serialize to JSON, but exclude the end quote
|
| 101 |
+
std::string dumped = json(trim_trailing_space(node.text)).dump();
|
| 102 |
+
current_tool->arguments += dumped.substr(0, dumped.size() - 1);
|
| 103 |
+
needs_closing_quote = true;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
if (is_arg_close && current_tool) {
|
| 107 |
+
if (needs_closing_quote) {
|
| 108 |
+
current_tool->arguments += "\"";
|
| 109 |
+
needs_closing_quote = false;
|
| 110 |
+
}
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
if (is_arg_json && current_tool) {
|
| 114 |
+
current_tool->arguments += std::string(trim_trailing_space(node.text));
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
if (is_tool_close && current_tool) {
|
| 118 |
+
if (needs_closing_quote) {
|
| 119 |
+
current_tool->arguments += "\"";
|
| 120 |
+
needs_closing_quote = false;
|
| 121 |
+
}
|
| 122 |
+
current_tool->arguments += "}";
|
| 123 |
+
}
|
| 124 |
+
}
|
llama.cpp/common/chat-peg-parser.h
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
|
| 3 |
+
#include "chat.h"
|
| 4 |
+
#include "peg-parser.h"
|
| 5 |
+
|
| 6 |
+
class common_chat_peg_builder : public common_peg_parser_builder {
|
| 7 |
+
public:
|
| 8 |
+
static constexpr const char * REASONING_BLOCK = "reasoning-block";
|
| 9 |
+
static constexpr const char * REASONING = "reasoning";
|
| 10 |
+
static constexpr const char * CONTENT = "content";
|
| 11 |
+
|
| 12 |
+
common_peg_parser reasoning_block(const common_peg_parser & p) { return tag(REASONING_BLOCK, p); }
|
| 13 |
+
common_peg_parser reasoning(const common_peg_parser & p) { return tag(REASONING, p); }
|
| 14 |
+
common_peg_parser content(const common_peg_parser & p) { return tag(CONTENT, p); }
|
| 15 |
+
};
|
| 16 |
+
|
| 17 |
+
inline common_peg_arena build_chat_peg_parser(const std::function<common_peg_parser(common_chat_peg_builder & builder)> & fn) {
|
| 18 |
+
common_chat_peg_builder builder;
|
| 19 |
+
builder.set_root(fn(builder));
|
| 20 |
+
return builder.build();
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
class common_chat_peg_mapper {
|
| 24 |
+
public:
|
| 25 |
+
common_chat_msg & result;
|
| 26 |
+
|
| 27 |
+
common_chat_peg_mapper(common_chat_msg & msg) : result(msg) {}
|
| 28 |
+
|
| 29 |
+
virtual void from_ast(const common_peg_ast_arena & arena, const common_peg_parse_result & result);
|
| 30 |
+
virtual void map(const common_peg_ast_node & node);
|
| 31 |
+
};
|
| 32 |
+
|
| 33 |
+
class common_chat_peg_native_builder : public common_chat_peg_builder {
|
| 34 |
+
public:
|
| 35 |
+
static constexpr const char * TOOL = "tool";
|
| 36 |
+
static constexpr const char * TOOL_OPEN = "tool-open";
|
| 37 |
+
static constexpr const char * TOOL_CLOSE = "tool-close";
|
| 38 |
+
static constexpr const char * TOOL_ID = "tool-id";
|
| 39 |
+
static constexpr const char * TOOL_NAME = "tool-name";
|
| 40 |
+
static constexpr const char * TOOL_ARGS = "tool-args";
|
| 41 |
+
|
| 42 |
+
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
| 43 |
+
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
| 44 |
+
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
| 45 |
+
common_peg_parser tool_id(const common_peg_parser & p) { return atomic(tag(TOOL_ID, p)); }
|
| 46 |
+
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
| 47 |
+
common_peg_parser tool_args(const common_peg_parser & p) { return tag(TOOL_ARGS, p); }
|
| 48 |
+
};
|
| 49 |
+
|
| 50 |
+
class common_chat_peg_native_mapper : public common_chat_peg_mapper {
|
| 51 |
+
common_chat_tool_call * current_tool;
|
| 52 |
+
|
| 53 |
+
public:
|
| 54 |
+
common_chat_peg_native_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
| 55 |
+
|
| 56 |
+
void map(const common_peg_ast_node & node) override;
|
| 57 |
+
};
|
| 58 |
+
|
| 59 |
+
inline common_peg_arena build_chat_peg_native_parser(const std::function<common_peg_parser(common_chat_peg_native_builder & builder)> & fn) {
|
| 60 |
+
common_chat_peg_native_builder builder;
|
| 61 |
+
builder.set_root(fn(builder));
|
| 62 |
+
return builder.build();
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
class common_chat_peg_constructed_builder : public common_chat_peg_builder {
|
| 66 |
+
public:
|
| 67 |
+
static constexpr const char * TOOL = "tool";
|
| 68 |
+
static constexpr const char * TOOL_OPEN = "tool-open";
|
| 69 |
+
static constexpr const char * TOOL_CLOSE = "tool-close";
|
| 70 |
+
static constexpr const char * TOOL_NAME = "tool-name";
|
| 71 |
+
static constexpr const char * TOOL_ARG = "tool-arg";
|
| 72 |
+
static constexpr const char * TOOL_ARG_OPEN = "tool-arg-open";
|
| 73 |
+
static constexpr const char * TOOL_ARG_CLOSE = "tool-arg-close";
|
| 74 |
+
static constexpr const char * TOOL_ARG_NAME = "tool-arg-name";
|
| 75 |
+
static constexpr const char * TOOL_ARG_STRING_VALUE = "tool-arg-string-value";
|
| 76 |
+
static constexpr const char * TOOL_ARG_JSON_VALUE = "tool-arg-json-value";
|
| 77 |
+
|
| 78 |
+
common_peg_parser tool(const common_peg_parser & p) { return tag(TOOL, p); }
|
| 79 |
+
common_peg_parser tool_open(const common_peg_parser & p) { return atomic(tag(TOOL_OPEN, p)); }
|
| 80 |
+
common_peg_parser tool_close(const common_peg_parser & p) { return atomic(tag(TOOL_CLOSE, p)); }
|
| 81 |
+
common_peg_parser tool_name(const common_peg_parser & p) { return atomic(tag(TOOL_NAME, p)); }
|
| 82 |
+
common_peg_parser tool_arg(const common_peg_parser & p) { return tag(TOOL_ARG, p); }
|
| 83 |
+
common_peg_parser tool_arg_open(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_OPEN, p)); }
|
| 84 |
+
common_peg_parser tool_arg_close(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_CLOSE, p)); }
|
| 85 |
+
common_peg_parser tool_arg_name(const common_peg_parser & p) { return atomic(tag(TOOL_ARG_NAME, p)); }
|
| 86 |
+
common_peg_parser tool_arg_string_value(const common_peg_parser & p) { return tag(TOOL_ARG_STRING_VALUE, p); }
|
| 87 |
+
common_peg_parser tool_arg_json_value(const common_peg_parser & p) { return tag(TOOL_ARG_JSON_VALUE, p); }
|
| 88 |
+
};
|
| 89 |
+
|
| 90 |
+
class common_chat_peg_constructed_mapper : public common_chat_peg_mapper {
|
| 91 |
+
common_chat_tool_call * current_tool;
|
| 92 |
+
int arg_count = 0;
|
| 93 |
+
bool needs_closing_quote = false;
|
| 94 |
+
|
| 95 |
+
public:
|
| 96 |
+
common_chat_peg_constructed_mapper(common_chat_msg & msg) : common_chat_peg_mapper(msg) {}
|
| 97 |
+
|
| 98 |
+
void map(const common_peg_ast_node & node) override;
|
| 99 |
+
};
|
| 100 |
+
|
| 101 |
+
inline common_peg_arena build_chat_peg_constructed_parser(const std::function<common_peg_parser(common_chat_peg_constructed_builder & builder)> & fn) {
|
| 102 |
+
common_chat_peg_constructed_builder builder;
|
| 103 |
+
builder.set_root(fn(builder));
|
| 104 |
+
return builder.build();
|
| 105 |
+
}
|
llama.cpp/common/chat.cpp
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
llama.cpp/common/chat.h
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
|
| 2 |
+
|
| 3 |
+
#pragma once
|
| 4 |
+
|
| 5 |
+
#include "common.h"
|
| 6 |
+
#include "peg-parser.h"
|
| 7 |
+
#include <functional>
|
| 8 |
+
#include <chrono>
|
| 9 |
+
#include <string>
|
| 10 |
+
#include <vector>
|
| 11 |
+
#include <map>
|
| 12 |
+
|
| 13 |
+
#include <nlohmann/json_fwd.hpp>
|
| 14 |
+
|
| 15 |
+
struct common_chat_templates;
|
| 16 |
+
|
| 17 |
+
struct common_chat_tool_call {
|
| 18 |
+
std::string name;
|
| 19 |
+
std::string arguments;
|
| 20 |
+
std::string id;
|
| 21 |
+
|
| 22 |
+
bool operator==(const common_chat_tool_call & other) const {
|
| 23 |
+
return name == other.name && arguments == other.arguments && id == other.id;
|
| 24 |
+
}
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
struct common_chat_msg_content_part {
|
| 28 |
+
std::string type;
|
| 29 |
+
std::string text;
|
| 30 |
+
|
| 31 |
+
// TODO @ngxson : no known chat templates support reasoning_content in content parts yet
|
| 32 |
+
// this can be useful for models with interleaved thinking (like Kimi-K2)
|
| 33 |
+
// if you see any templates explicitly support this, please ping me
|
| 34 |
+
// std::string reasoning_content;
|
| 35 |
+
|
| 36 |
+
bool operator==(const common_chat_msg_content_part & other) const {
|
| 37 |
+
return type == other.type && text == other.text;
|
| 38 |
+
}
|
| 39 |
+
};
|
| 40 |
+
|
| 41 |
+
struct common_chat_msg {
|
| 42 |
+
std::string role;
|
| 43 |
+
std::string content;
|
| 44 |
+
std::vector<common_chat_msg_content_part> content_parts;
|
| 45 |
+
std::vector<common_chat_tool_call> tool_calls;
|
| 46 |
+
std::string reasoning_content;
|
| 47 |
+
std::string tool_name;
|
| 48 |
+
std::string tool_call_id;
|
| 49 |
+
|
| 50 |
+
nlohmann::ordered_json to_json_oaicompat(bool concat_typed_text = false) const;
|
| 51 |
+
|
| 52 |
+
bool empty() const {
|
| 53 |
+
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
|
| 54 |
+
}
|
| 55 |
+
void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
|
| 56 |
+
for (auto i = 0u; i < tool_calls.size(); i++) {
|
| 57 |
+
if (ids_cache.size() <= i) {
|
| 58 |
+
auto id = tool_calls[i].id;
|
| 59 |
+
if (id.empty()) {
|
| 60 |
+
id = gen_tool_call_id();
|
| 61 |
+
}
|
| 62 |
+
ids_cache.push_back(id);
|
| 63 |
+
}
|
| 64 |
+
tool_calls[i].id = ids_cache[i];
|
| 65 |
+
}
|
| 66 |
+
}
|
| 67 |
+
bool operator==(const common_chat_msg & other) const {
|
| 68 |
+
return role == other.role
|
| 69 |
+
&& content == other.content
|
| 70 |
+
&& content_parts == other.content_parts
|
| 71 |
+
&& tool_calls == other.tool_calls
|
| 72 |
+
&& reasoning_content == other.reasoning_content
|
| 73 |
+
&& tool_name == other.tool_name
|
| 74 |
+
&& tool_call_id == other.tool_call_id;
|
| 75 |
+
}
|
| 76 |
+
bool operator!=(const common_chat_msg & other) const {
|
| 77 |
+
return !(*this == other);
|
| 78 |
+
}
|
| 79 |
+
};
|
| 80 |
+
|
| 81 |
+
struct common_chat_msg_diff {
|
| 82 |
+
std::string reasoning_content_delta;
|
| 83 |
+
std::string content_delta;
|
| 84 |
+
size_t tool_call_index = std::string::npos;
|
| 85 |
+
common_chat_tool_call tool_call_delta;
|
| 86 |
+
|
| 87 |
+
static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new);
|
| 88 |
+
|
| 89 |
+
bool operator==(const common_chat_msg_diff & other) const {
|
| 90 |
+
return content_delta == other.content_delta
|
| 91 |
+
&& tool_call_index == other.tool_call_index
|
| 92 |
+
&& tool_call_delta == other.tool_call_delta;
|
| 93 |
+
}
|
| 94 |
+
};
|
| 95 |
+
|
| 96 |
+
struct common_chat_tool {
|
| 97 |
+
std::string name;
|
| 98 |
+
std::string description;
|
| 99 |
+
std::string parameters;
|
| 100 |
+
};
|
| 101 |
+
|
| 102 |
+
enum common_chat_tool_choice {
|
| 103 |
+
COMMON_CHAT_TOOL_CHOICE_AUTO,
|
| 104 |
+
COMMON_CHAT_TOOL_CHOICE_REQUIRED,
|
| 105 |
+
COMMON_CHAT_TOOL_CHOICE_NONE,
|
| 106 |
+
};
|
| 107 |
+
|
| 108 |
+
enum common_chat_format {
|
| 109 |
+
COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
| 110 |
+
COMMON_CHAT_FORMAT_GENERIC,
|
| 111 |
+
COMMON_CHAT_FORMAT_MISTRAL_NEMO,
|
| 112 |
+
COMMON_CHAT_FORMAT_MAGISTRAL,
|
| 113 |
+
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
| 114 |
+
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
| 115 |
+
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
| 116 |
+
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
| 117 |
+
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
| 118 |
+
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
| 119 |
+
COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
| 120 |
+
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
| 121 |
+
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
| 122 |
+
COMMON_CHAT_FORMAT_GRANITE,
|
| 123 |
+
COMMON_CHAT_FORMAT_GPT_OSS,
|
| 124 |
+
COMMON_CHAT_FORMAT_SEED_OSS,
|
| 125 |
+
COMMON_CHAT_FORMAT_NEMOTRON_V2,
|
| 126 |
+
COMMON_CHAT_FORMAT_APERTUS,
|
| 127 |
+
COMMON_CHAT_FORMAT_LFM2_WITH_JSON_TOOLS,
|
| 128 |
+
COMMON_CHAT_FORMAT_GLM_4_5,
|
| 129 |
+
COMMON_CHAT_FORMAT_MINIMAX_M2,
|
| 130 |
+
COMMON_CHAT_FORMAT_KIMI_K2,
|
| 131 |
+
COMMON_CHAT_FORMAT_APRIEL_1_5,
|
| 132 |
+
COMMON_CHAT_FORMAT_XIAOMI_MIMO,
|
| 133 |
+
COMMON_CHAT_FORMAT_SOLAR_OPEN,
|
| 134 |
+
COMMON_CHAT_FORMAT_EXAONE_MOE,
|
| 135 |
+
|
| 136 |
+
// These are intended to be parsed by the PEG parser
|
| 137 |
+
COMMON_CHAT_FORMAT_PEG_SIMPLE,
|
| 138 |
+
COMMON_CHAT_FORMAT_PEG_NATIVE,
|
| 139 |
+
COMMON_CHAT_FORMAT_PEG_CONSTRUCTED,
|
| 140 |
+
|
| 141 |
+
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
| 142 |
+
};
|
| 143 |
+
|
| 144 |
+
struct common_chat_templates_inputs {
|
| 145 |
+
std::vector<common_chat_msg> messages;
|
| 146 |
+
std::string grammar;
|
| 147 |
+
std::string json_schema;
|
| 148 |
+
bool add_generation_prompt = true;
|
| 149 |
+
bool use_jinja = true;
|
| 150 |
+
// Parameters below only supported when use_jinja is true
|
| 151 |
+
std::vector<common_chat_tool> tools;
|
| 152 |
+
common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
| 153 |
+
bool parallel_tool_calls = false;
|
| 154 |
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool enable_thinking"
|
| 155 |
+
bool enable_thinking = true;
|
| 156 |
+
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
| 157 |
+
std::map<std::string, std::string> chat_template_kwargs;
|
| 158 |
+
bool add_bos = false;
|
| 159 |
+
bool add_eos = false;
|
| 160 |
+
};
|
| 161 |
+
|
| 162 |
+
struct common_chat_params {
|
| 163 |
+
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
| 164 |
+
std::string prompt;
|
| 165 |
+
std::string grammar;
|
| 166 |
+
bool grammar_lazy = false;
|
| 167 |
+
bool thinking_forced_open = false;
|
| 168 |
+
std::vector<common_grammar_trigger> grammar_triggers;
|
| 169 |
+
std::vector<std::string> preserved_tokens;
|
| 170 |
+
std::vector<std::string> additional_stops;
|
| 171 |
+
std::string parser;
|
| 172 |
+
};
|
| 173 |
+
|
| 174 |
+
// per-message parsing syntax
|
| 175 |
+
// should be derived from common_chat_params
|
| 176 |
+
struct common_chat_parser_params {
|
| 177 |
+
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
| 178 |
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; // TODO: refactor this to "bool parse_reasoning"
|
| 179 |
+
// Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
|
| 180 |
+
bool reasoning_in_content = false;
|
| 181 |
+
bool thinking_forced_open = false;
|
| 182 |
+
bool parse_tool_calls = true;
|
| 183 |
+
common_peg_arena parser = {};
|
| 184 |
+
common_chat_parser_params() = default;
|
| 185 |
+
common_chat_parser_params(const common_chat_params & chat_params) {
|
| 186 |
+
format = chat_params.format;
|
| 187 |
+
thinking_forced_open = chat_params.thinking_forced_open;
|
| 188 |
+
}
|
| 189 |
+
};
|
| 190 |
+
|
| 191 |
+
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
| 192 |
+
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
| 193 |
+
|
| 194 |
+
void common_chat_templates_free(struct common_chat_templates * tmpls);
|
| 195 |
+
|
| 196 |
+
struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
|
| 197 |
+
|
| 198 |
+
typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
|
| 199 |
+
|
| 200 |
+
common_chat_templates_ptr common_chat_templates_init(
|
| 201 |
+
const struct llama_model * model,
|
| 202 |
+
const std::string & chat_template_override,
|
| 203 |
+
const std::string & bos_token_override = "",
|
| 204 |
+
const std::string & eos_token_override = "");
|
| 205 |
+
|
| 206 |
+
bool common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
|
| 207 |
+
std::string common_chat_templates_source(const struct common_chat_templates * tmpls, const std::string & variant = "");
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
struct common_chat_params common_chat_templates_apply(
|
| 211 |
+
const struct common_chat_templates * tmpls,
|
| 212 |
+
const struct common_chat_templates_inputs & inputs);
|
| 213 |
+
|
| 214 |
+
// Format single message, while taking into account the position of that message in chat history
|
| 215 |
+
std::string common_chat_format_single(
|
| 216 |
+
const struct common_chat_templates * tmpls,
|
| 217 |
+
const std::vector<common_chat_msg> & past_msg,
|
| 218 |
+
const common_chat_msg & new_msg,
|
| 219 |
+
bool add_ass,
|
| 220 |
+
bool use_jinja);
|
| 221 |
+
|
| 222 |
+
// Returns an example of formatted chat
|
| 223 |
+
std::string common_chat_format_example(
|
| 224 |
+
const struct common_chat_templates * tmpls,
|
| 225 |
+
bool use_jinja,
|
| 226 |
+
const std::map<std::string, std::string> & chat_template_kwargs);
|
| 227 |
+
|
| 228 |
+
const char* common_chat_format_name(common_chat_format format);
|
| 229 |
+
common_chat_msg common_chat_parse(const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
| 230 |
+
common_chat_msg common_chat_peg_parse(const common_peg_arena & parser, const std::string & input, bool is_partial, const common_chat_parser_params & syntax);
|
| 231 |
+
|
| 232 |
+
// used by arg and server
|
| 233 |
+
const char * common_reasoning_format_name(common_reasoning_format format);
|
| 234 |
+
common_reasoning_format common_reasoning_format_from_name(const std::string & format);
|
| 235 |
+
|
| 236 |
+
common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
|
| 237 |
+
|
| 238 |
+
bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
|
| 239 |
+
|
| 240 |
+
// Parses a JSON array of messages in OpenAI's chat completion API format.
|
| 241 |
+
std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const nlohmann::ordered_json & messages);
|
| 242 |
+
|
| 243 |
+
// DEPRECATED: only used in tests
|
| 244 |
+
nlohmann::ordered_json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
|
| 245 |
+
|
| 246 |
+
std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const nlohmann::ordered_json & tools);
|
| 247 |
+
nlohmann::ordered_json common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
|
| 248 |
+
|
| 249 |
+
nlohmann::ordered_json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
|
| 250 |
+
|
| 251 |
+
// get template caps, useful for reporting to server /props endpoint
|
| 252 |
+
std::map<std::string, bool> common_chat_templates_get_caps(const common_chat_templates * chat_templates);
|
llama.cpp/common/common.cpp
ADDED
|
@@ -0,0 +1,1824 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "ggml.h"
|
| 2 |
+
#include "gguf.h"
|
| 3 |
+
|
| 4 |
+
#include "common.h"
|
| 5 |
+
#include "log.h"
|
| 6 |
+
#include "llama.h"
|
| 7 |
+
#include "sampling.h"
|
| 8 |
+
#include "unicode.h"
|
| 9 |
+
|
| 10 |
+
#include <algorithm>
|
| 11 |
+
#include <cinttypes>
|
| 12 |
+
#include <climits>
|
| 13 |
+
#include <cmath>
|
| 14 |
+
#include <chrono>
|
| 15 |
+
#include <cstdarg>
|
| 16 |
+
#include <cstring>
|
| 17 |
+
#include <ctime>
|
| 18 |
+
#include <filesystem>
|
| 19 |
+
#include <fstream>
|
| 20 |
+
#include <iostream>
|
| 21 |
+
#include <iterator>
|
| 22 |
+
#include <regex>
|
| 23 |
+
#include <sstream>
|
| 24 |
+
#include <string>
|
| 25 |
+
#include <thread>
|
| 26 |
+
#include <unordered_set>
|
| 27 |
+
#include <vector>
|
| 28 |
+
|
| 29 |
+
#if defined(__APPLE__) && defined(__MACH__)
|
| 30 |
+
#include <sys/types.h>
|
| 31 |
+
#include <sys/sysctl.h>
|
| 32 |
+
#endif
|
| 33 |
+
|
| 34 |
+
#if defined(_WIN32)
|
| 35 |
+
#define WIN32_LEAN_AND_MEAN
|
| 36 |
+
#ifndef NOMINMAX
|
| 37 |
+
# define NOMINMAX
|
| 38 |
+
#endif
|
| 39 |
+
#include <locale>
|
| 40 |
+
#include <windows.h>
|
| 41 |
+
#include <string.h>
|
| 42 |
+
#include <fcntl.h>
|
| 43 |
+
#include <io.h>
|
| 44 |
+
#else
|
| 45 |
+
#include <sys/ioctl.h>
|
| 46 |
+
#include <sys/stat.h>
|
| 47 |
+
#include <unistd.h>
|
| 48 |
+
#endif
|
| 49 |
+
|
| 50 |
+
#if defined(__linux__)
|
| 51 |
+
#include <sys/types.h>
|
| 52 |
+
#include <pwd.h>
|
| 53 |
+
#endif
|
| 54 |
+
|
| 55 |
+
#if defined(_MSC_VER)
|
| 56 |
+
#pragma warning(disable: 4244 4267) // possible loss of data
|
| 57 |
+
#endif
|
| 58 |
+
|
| 59 |
+
common_time_meas::common_time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}
|
| 60 |
+
|
| 61 |
+
common_time_meas::~common_time_meas() {
|
| 62 |
+
if (t_start_us >= 0) {
|
| 63 |
+
t_acc += ggml_time_us() - t_start_us;
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
//
|
| 68 |
+
// CPU utils
|
| 69 |
+
//
|
| 70 |
+
|
| 71 |
+
int32_t cpu_get_num_physical_cores() {
|
| 72 |
+
#ifdef __linux__
|
| 73 |
+
// enumerate the set of thread siblings, num entries is num cores
|
| 74 |
+
std::unordered_set<std::string> siblings;
|
| 75 |
+
for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) {
|
| 76 |
+
std::ifstream thread_siblings("/sys/devices/system/cpu/cpu"
|
| 77 |
+
+ std::to_string(cpu) + "/topology/thread_siblings");
|
| 78 |
+
if (!thread_siblings.is_open()) {
|
| 79 |
+
break; // no more cpus
|
| 80 |
+
}
|
| 81 |
+
std::string line;
|
| 82 |
+
if (std::getline(thread_siblings, line)) {
|
| 83 |
+
siblings.insert(line);
|
| 84 |
+
}
|
| 85 |
+
}
|
| 86 |
+
if (!siblings.empty()) {
|
| 87 |
+
return static_cast<int32_t>(siblings.size());
|
| 88 |
+
}
|
| 89 |
+
#elif defined(__APPLE__) && defined(__MACH__)
|
| 90 |
+
int32_t num_physical_cores;
|
| 91 |
+
size_t len = sizeof(num_physical_cores);
|
| 92 |
+
int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
| 93 |
+
if (result == 0) {
|
| 94 |
+
return num_physical_cores;
|
| 95 |
+
}
|
| 96 |
+
result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0);
|
| 97 |
+
if (result == 0) {
|
| 98 |
+
return num_physical_cores;
|
| 99 |
+
}
|
| 100 |
+
#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
| 101 |
+
// TODO: windows + arm64 + mingw64
|
| 102 |
+
unsigned int n_threads_win = std::thread::hardware_concurrency();
|
| 103 |
+
unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4;
|
| 104 |
+
|
| 105 |
+
DWORD buffer_size = 0;
|
| 106 |
+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) {
|
| 107 |
+
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
|
| 108 |
+
return default_threads;
|
| 109 |
+
}
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
std::vector<char> buffer(buffer_size);
|
| 113 |
+
if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data()), &buffer_size)) {
|
| 114 |
+
return default_threads;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
int32_t num_physical_cores = 0;
|
| 118 |
+
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(buffer.data());
|
| 119 |
+
while (buffer_size > 0) {
|
| 120 |
+
if (info->Relationship == RelationProcessorCore) {
|
| 121 |
+
num_physical_cores += info->Processor.GroupCount;
|
| 122 |
+
}
|
| 123 |
+
buffer_size -= info->Size;
|
| 124 |
+
info = reinterpret_cast<PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX>(reinterpret_cast<char*>(info) + info->Size);
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
return num_physical_cores > 0 ? num_physical_cores : default_threads;
|
| 128 |
+
#endif
|
| 129 |
+
unsigned int n_threads = std::thread::hardware_concurrency();
|
| 130 |
+
return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
|
| 134 |
+
#include <pthread.h>
|
| 135 |
+
|
| 136 |
+
static void cpuid(unsigned leaf, unsigned subleaf,
|
| 137 |
+
unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) {
|
| 138 |
+
__asm__("movq\t%%rbx,%%rsi\n\t"
|
| 139 |
+
"cpuid\n\t"
|
| 140 |
+
"xchgq\t%%rbx,%%rsi"
|
| 141 |
+
: "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx)
|
| 142 |
+
: "0"(leaf), "2"(subleaf));
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
static int pin_cpu(int cpu) {
|
| 146 |
+
cpu_set_t mask;
|
| 147 |
+
CPU_ZERO(&mask);
|
| 148 |
+
CPU_SET(cpu, &mask);
|
| 149 |
+
return pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask);
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
static bool is_hybrid_cpu(void) {
|
| 153 |
+
unsigned eax, ebx, ecx, edx;
|
| 154 |
+
cpuid(7, 0, &eax, &ebx, &ecx, &edx);
|
| 155 |
+
return !!(edx & (1u << 15));
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
static bool is_running_on_efficiency_core(void) {
|
| 159 |
+
unsigned eax, ebx, ecx, edx;
|
| 160 |
+
cpuid(0x1a, 0, &eax, &ebx, &ecx, &edx);
|
| 161 |
+
int intel_atom = 0x20;
|
| 162 |
+
int core_type = (eax & 0xff000000u) >> 24;
|
| 163 |
+
return core_type == intel_atom;
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
static int cpu_count_math_cpus(int n_cpu) {
|
| 167 |
+
int result = 0;
|
| 168 |
+
for (int cpu = 0; cpu < n_cpu; ++cpu) {
|
| 169 |
+
if (pin_cpu(cpu)) {
|
| 170 |
+
return -1;
|
| 171 |
+
}
|
| 172 |
+
if (is_running_on_efficiency_core()) {
|
| 173 |
+
continue; // efficiency cores harm lockstep threading
|
| 174 |
+
}
|
| 175 |
+
++cpu; // hyperthreading isn't useful for linear algebra
|
| 176 |
+
++result;
|
| 177 |
+
}
|
| 178 |
+
return result;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
#endif // __x86_64__ && __linux__
|
| 182 |
+
|
| 183 |
+
/**
|
| 184 |
+
* Returns number of CPUs on system that are useful for math.
|
| 185 |
+
*/
|
| 186 |
+
int32_t cpu_get_num_math() {
|
| 187 |
+
#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__)
|
| 188 |
+
int n_cpu = sysconf(_SC_NPROCESSORS_ONLN);
|
| 189 |
+
if (n_cpu < 1) {
|
| 190 |
+
return cpu_get_num_physical_cores();
|
| 191 |
+
}
|
| 192 |
+
if (is_hybrid_cpu()) {
|
| 193 |
+
cpu_set_t affinity;
|
| 194 |
+
if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) {
|
| 195 |
+
int result = cpu_count_math_cpus(n_cpu);
|
| 196 |
+
pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity);
|
| 197 |
+
if (result > 0) {
|
| 198 |
+
return result;
|
| 199 |
+
}
|
| 200 |
+
}
|
| 201 |
+
}
|
| 202 |
+
#endif
|
| 203 |
+
return cpu_get_num_physical_cores();
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
// Helper for setting process priority
|
| 207 |
+
|
| 208 |
+
#if defined(_WIN32)
|
| 209 |
+
|
| 210 |
+
bool set_process_priority(enum ggml_sched_priority prio) {
|
| 211 |
+
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
| 212 |
+
return true;
|
| 213 |
+
}
|
| 214 |
+
|
| 215 |
+
DWORD p = NORMAL_PRIORITY_CLASS;
|
| 216 |
+
switch (prio) {
|
| 217 |
+
case GGML_SCHED_PRIO_LOW: p = BELOW_NORMAL_PRIORITY_CLASS; break;
|
| 218 |
+
case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break;
|
| 219 |
+
case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break;
|
| 220 |
+
case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break;
|
| 221 |
+
case GGML_SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break;
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
if (!SetPriorityClass(GetCurrentProcess(), p)) {
|
| 225 |
+
LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError());
|
| 226 |
+
return false;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
return true;
|
| 230 |
+
}
|
| 231 |
+
|
| 232 |
+
#else // MacOS and POSIX
|
| 233 |
+
#include <sys/types.h>
|
| 234 |
+
#include <sys/resource.h>
|
| 235 |
+
|
| 236 |
+
bool set_process_priority(enum ggml_sched_priority prio) {
|
| 237 |
+
if (prio == GGML_SCHED_PRIO_NORMAL) {
|
| 238 |
+
return true;
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
int p = 0;
|
| 242 |
+
switch (prio) {
|
| 243 |
+
case GGML_SCHED_PRIO_LOW: p = 5; break;
|
| 244 |
+
case GGML_SCHED_PRIO_NORMAL: p = 0; break;
|
| 245 |
+
case GGML_SCHED_PRIO_MEDIUM: p = -5; break;
|
| 246 |
+
case GGML_SCHED_PRIO_HIGH: p = -10; break;
|
| 247 |
+
case GGML_SCHED_PRIO_REALTIME: p = -20; break;
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
if (setpriority(PRIO_PROCESS, 0, p) != 0) {
|
| 251 |
+
LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno);
|
| 252 |
+
return false;
|
| 253 |
+
}
|
| 254 |
+
return true;
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
#endif
|
| 258 |
+
|
| 259 |
+
//
|
| 260 |
+
// CLI argument parsing
|
| 261 |
+
//
|
| 262 |
+
|
| 263 |
+
|
| 264 |
+
void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) {
|
| 265 |
+
int32_t n_set = 0;
|
| 266 |
+
|
| 267 |
+
if (cpuparams.n_threads < 0) {
|
| 268 |
+
// Assuming everything about cpuparams is invalid
|
| 269 |
+
if (role_model != nullptr) {
|
| 270 |
+
cpuparams = *role_model;
|
| 271 |
+
} else {
|
| 272 |
+
cpuparams.n_threads = cpu_get_num_math();
|
| 273 |
+
}
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
for (int32_t i = 0; i < GGML_MAX_N_THREADS; i++) {
|
| 277 |
+
if (cpuparams.cpumask[i]) {
|
| 278 |
+
n_set++;
|
| 279 |
+
}
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
if (n_set && n_set < cpuparams.n_threads) {
|
| 283 |
+
// Not enough set bits, may experience performance issues.
|
| 284 |
+
LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) {
|
| 289 |
+
size_t dash_loc = range.find('-');
|
| 290 |
+
if (dash_loc == std::string::npos) {
|
| 291 |
+
LOG_ERR("Format of CPU range is invalid! Expected [<start>]-[<end>].\n");
|
| 292 |
+
return false;
|
| 293 |
+
}
|
| 294 |
+
|
| 295 |
+
size_t start_i;
|
| 296 |
+
size_t end_i;
|
| 297 |
+
|
| 298 |
+
if (dash_loc == 0) {
|
| 299 |
+
start_i = 0;
|
| 300 |
+
} else {
|
| 301 |
+
start_i = std::stoull(range.substr(0, dash_loc));
|
| 302 |
+
if (start_i >= GGML_MAX_N_THREADS) {
|
| 303 |
+
LOG_ERR("Start index out of bounds!\n");
|
| 304 |
+
return false;
|
| 305 |
+
}
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
if (dash_loc == range.length() - 1) {
|
| 309 |
+
end_i = GGML_MAX_N_THREADS - 1;
|
| 310 |
+
} else {
|
| 311 |
+
end_i = std::stoull(range.substr(dash_loc + 1));
|
| 312 |
+
if (end_i >= GGML_MAX_N_THREADS) {
|
| 313 |
+
LOG_ERR("End index out of bounds!\n");
|
| 314 |
+
return false;
|
| 315 |
+
}
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
for (size_t i = start_i; i <= end_i; i++) {
|
| 319 |
+
boolmask[i] = true;
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
return true;
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREADS]) {
|
| 326 |
+
// Discard potential 0x prefix
|
| 327 |
+
size_t start_i = 0;
|
| 328 |
+
if (mask.length() >= 2 && mask.substr(0, 2) == "0x") {
|
| 329 |
+
start_i = 2;
|
| 330 |
+
}
|
| 331 |
+
|
| 332 |
+
size_t num_digits = mask.length() - start_i;
|
| 333 |
+
if (num_digits > 128) num_digits = 128;
|
| 334 |
+
|
| 335 |
+
size_t end_i = num_digits + start_i;
|
| 336 |
+
|
| 337 |
+
for (size_t i = start_i, n = (num_digits*4 - 1); i < end_i; i++, n-=4) {
|
| 338 |
+
char c = mask.at(i);
|
| 339 |
+
int8_t id = c;
|
| 340 |
+
|
| 341 |
+
if ((c >= '0' && c <= '9')) {
|
| 342 |
+
id -= '0';
|
| 343 |
+
} else if (c >= 'a' && c <= 'f') {
|
| 344 |
+
id -= 'a' - 10;
|
| 345 |
+
} else if (c >= 'A' && c <= 'F') {
|
| 346 |
+
id -= 'A' - 10;
|
| 347 |
+
} else {
|
| 348 |
+
LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i));
|
| 349 |
+
return false;
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
boolmask[ n ] = boolmask[ n ] || ((id & 8) != 0);
|
| 353 |
+
boolmask[n - 1] = boolmask[n - 1] || ((id & 4) != 0);
|
| 354 |
+
boolmask[n - 2] = boolmask[n - 2] || ((id & 2) != 0);
|
| 355 |
+
boolmask[n - 3] = boolmask[n - 3] || ((id & 1) != 0);
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
return true;
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
void common_init() {
|
| 362 |
+
llama_log_set(common_log_default_callback, NULL);
|
| 363 |
+
|
| 364 |
+
#ifdef NDEBUG
|
| 365 |
+
const char * build_type = "";
|
| 366 |
+
#else
|
| 367 |
+
const char * build_type = " (debug)";
|
| 368 |
+
#endif
|
| 369 |
+
|
| 370 |
+
LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type);
|
| 371 |
+
}
|
| 372 |
+
|
| 373 |
+
std::string common_params_get_system_info(const common_params & params) {
|
| 374 |
+
std::ostringstream os;
|
| 375 |
+
|
| 376 |
+
os << "system_info: n_threads = " << params.cpuparams.n_threads;
|
| 377 |
+
if (params.cpuparams_batch.n_threads != -1) {
|
| 378 |
+
os << " (n_threads_batch = " << params.cpuparams_batch.n_threads << ")";
|
| 379 |
+
}
|
| 380 |
+
#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later
|
| 381 |
+
// TODO: windows + arm64 + mingw64
|
| 382 |
+
DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
|
| 383 |
+
os << " / " << logicalProcessorCount << " | " << llama_print_system_info();
|
| 384 |
+
#else
|
| 385 |
+
os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info();
|
| 386 |
+
#endif
|
| 387 |
+
|
| 388 |
+
return os.str();
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
//
|
| 392 |
+
// String utils
|
| 393 |
+
//
|
| 394 |
+
|
| 395 |
+
std::string string_format(const char * fmt, ...) {
|
| 396 |
+
va_list ap;
|
| 397 |
+
va_list ap2;
|
| 398 |
+
va_start(ap, fmt);
|
| 399 |
+
va_copy(ap2, ap);
|
| 400 |
+
int size = vsnprintf(NULL, 0, fmt, ap);
|
| 401 |
+
GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT
|
| 402 |
+
std::vector<char> buf(size + 1);
|
| 403 |
+
int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2);
|
| 404 |
+
GGML_ASSERT(size2 == size);
|
| 405 |
+
va_end(ap2);
|
| 406 |
+
va_end(ap);
|
| 407 |
+
return std::string(buf.data(), size);
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
std::string string_strip(const std::string & str) {
|
| 411 |
+
size_t start = 0;
|
| 412 |
+
size_t end = str.size();
|
| 413 |
+
while (start < end && std::isspace(str[start])) {
|
| 414 |
+
start++;
|
| 415 |
+
}
|
| 416 |
+
while (end > start && std::isspace(str[end - 1])) {
|
| 417 |
+
end--;
|
| 418 |
+
}
|
| 419 |
+
return str.substr(start, end - start);
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
std::string string_get_sortable_timestamp() {
|
| 423 |
+
using clock = std::chrono::system_clock;
|
| 424 |
+
|
| 425 |
+
const clock::time_point current_time = clock::now();
|
| 426 |
+
const time_t as_time_t = clock::to_time_t(current_time);
|
| 427 |
+
char timestamp_no_ns[100];
|
| 428 |
+
std::strftime(timestamp_no_ns, 100, "%Y_%m_%d-%H_%M_%S", std::localtime(&as_time_t));
|
| 429 |
+
|
| 430 |
+
const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
| 431 |
+
current_time.time_since_epoch() % 1000000000).count();
|
| 432 |
+
char timestamp_ns[11];
|
| 433 |
+
snprintf(timestamp_ns, 11, "%09" PRId64, ns);
|
| 434 |
+
|
| 435 |
+
return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
void string_replace_all(std::string & s, const std::string & search, const std::string & replace) {
|
| 439 |
+
if (search.empty()) {
|
| 440 |
+
return;
|
| 441 |
+
}
|
| 442 |
+
std::string builder;
|
| 443 |
+
builder.reserve(s.length());
|
| 444 |
+
size_t pos = 0;
|
| 445 |
+
size_t last_pos = 0;
|
| 446 |
+
while ((pos = s.find(search, last_pos)) != std::string::npos) {
|
| 447 |
+
builder.append(s, last_pos, pos - last_pos);
|
| 448 |
+
builder.append(replace);
|
| 449 |
+
last_pos = pos + search.length();
|
| 450 |
+
}
|
| 451 |
+
builder.append(s, last_pos, std::string::npos);
|
| 452 |
+
s = std::move(builder);
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
std::string regex_escape(const std::string & s) {
|
| 456 |
+
static const std::regex special_chars("[.^$|()*+?\\[\\]{}\\\\]");
|
| 457 |
+
return std::regex_replace(s, special_chars, "\\$&");
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
std::string string_join(const std::vector<std::string> & values, const std::string & separator) {
|
| 461 |
+
std::ostringstream result;
|
| 462 |
+
for (size_t i = 0; i < values.size(); ++i) {
|
| 463 |
+
if (i > 0) {
|
| 464 |
+
result << separator;
|
| 465 |
+
}
|
| 466 |
+
result << values[i];
|
| 467 |
+
}
|
| 468 |
+
return result.str();
|
| 469 |
+
}
|
| 470 |
+
|
| 471 |
+
std::vector<std::string> string_split(const std::string & str, const std::string & delimiter) {
|
| 472 |
+
std::vector<std::string> parts;
|
| 473 |
+
size_t start = 0;
|
| 474 |
+
size_t end = str.find(delimiter);
|
| 475 |
+
|
| 476 |
+
while (end != std::string::npos) {
|
| 477 |
+
parts.push_back(str.substr(start, end - start));
|
| 478 |
+
start = end + delimiter.length();
|
| 479 |
+
end = str.find(delimiter, start);
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
parts.push_back(str.substr(start));
|
| 483 |
+
|
| 484 |
+
return parts;
|
| 485 |
+
}
|
| 486 |
+
|
| 487 |
+
std::string string_repeat(const std::string & str, size_t n) {
|
| 488 |
+
if (n == 0) {
|
| 489 |
+
return "";
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
std::string result;
|
| 493 |
+
result.reserve(str.length() * n);
|
| 494 |
+
|
| 495 |
+
for (size_t i = 0; i < n; ++i) {
|
| 496 |
+
result += str;
|
| 497 |
+
}
|
| 498 |
+
|
| 499 |
+
return result;
|
| 500 |
+
}
|
| 501 |
+
|
| 502 |
+
std::string string_from(bool value) {
|
| 503 |
+
return value ? "true" : "false";
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
std::string string_from(const std::vector<int> & values) {
|
| 507 |
+
std::stringstream buf;
|
| 508 |
+
|
| 509 |
+
buf << "[ ";
|
| 510 |
+
bool first = true;
|
| 511 |
+
for (auto e : values) {
|
| 512 |
+
if (first) {
|
| 513 |
+
first = false;
|
| 514 |
+
} else {
|
| 515 |
+
buf << ", ";
|
| 516 |
+
}
|
| 517 |
+
buf << std::to_string(e);
|
| 518 |
+
}
|
| 519 |
+
buf << " ]";
|
| 520 |
+
|
| 521 |
+
return buf.str();
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens) {
|
| 525 |
+
std::stringstream buf;
|
| 526 |
+
|
| 527 |
+
buf << "[ ";
|
| 528 |
+
|
| 529 |
+
bool first = true;
|
| 530 |
+
for (const auto & token : tokens) {
|
| 531 |
+
if (!first) {
|
| 532 |
+
buf << ", ";
|
| 533 |
+
} else {
|
| 534 |
+
first = false;
|
| 535 |
+
}
|
| 536 |
+
|
| 537 |
+
auto detokenized = common_token_to_piece(ctx, token);
|
| 538 |
+
|
| 539 |
+
buf << "'" << detokenized << "'"
|
| 540 |
+
<< ":" << std::to_string(token);
|
| 541 |
+
}
|
| 542 |
+
|
| 543 |
+
buf << " ]";
|
| 544 |
+
|
| 545 |
+
return buf.str();
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) {
|
| 549 |
+
std::stringstream buf;
|
| 550 |
+
|
| 551 |
+
buf << "[ ";
|
| 552 |
+
|
| 553 |
+
bool first = true;
|
| 554 |
+
for (int i = 0; i < batch.n_tokens; ++i) {
|
| 555 |
+
if (!first) {
|
| 556 |
+
buf << ", ";
|
| 557 |
+
} else {
|
| 558 |
+
first = false;
|
| 559 |
+
}
|
| 560 |
+
|
| 561 |
+
auto detokenized = common_token_to_piece(ctx, batch.token[i]);
|
| 562 |
+
|
| 563 |
+
buf << "\n" << std::to_string(i)
|
| 564 |
+
<< ", token '" << detokenized << "'"
|
| 565 |
+
<< ", pos " << std::to_string(batch.pos[i])
|
| 566 |
+
<< ", n_seq_id " << std::to_string(batch.n_seq_id[i])
|
| 567 |
+
<< ", seq_id " << std::to_string(batch.seq_id[i][0])
|
| 568 |
+
<< ", logits " << std::to_string(batch.logits[i]);
|
| 569 |
+
}
|
| 570 |
+
|
| 571 |
+
buf << " ]";
|
| 572 |
+
|
| 573 |
+
return buf.str();
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
void string_process_escapes(std::string & input) {
|
| 577 |
+
std::size_t input_len = input.length();
|
| 578 |
+
std::size_t output_idx = 0;
|
| 579 |
+
|
| 580 |
+
for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) {
|
| 581 |
+
if (input[input_idx] == '\\' && input_idx + 1 < input_len) {
|
| 582 |
+
switch (input[++input_idx]) {
|
| 583 |
+
case 'n': input[output_idx++] = '\n'; break;
|
| 584 |
+
case 'r': input[output_idx++] = '\r'; break;
|
| 585 |
+
case 't': input[output_idx++] = '\t'; break;
|
| 586 |
+
case '\'': input[output_idx++] = '\''; break;
|
| 587 |
+
case '\"': input[output_idx++] = '\"'; break;
|
| 588 |
+
case '\\': input[output_idx++] = '\\'; break;
|
| 589 |
+
case 'x':
|
| 590 |
+
// Handle \x12, etc
|
| 591 |
+
if (input_idx + 2 < input_len) {
|
| 592 |
+
const char x[3] = { input[input_idx + 1], input[input_idx + 2], 0 };
|
| 593 |
+
char *err_p = nullptr;
|
| 594 |
+
const long val = std::strtol(x, &err_p, 16);
|
| 595 |
+
if (err_p == x + 2) {
|
| 596 |
+
input_idx += 2;
|
| 597 |
+
input[output_idx++] = char(val);
|
| 598 |
+
break;
|
| 599 |
+
}
|
| 600 |
+
}
|
| 601 |
+
// fall through
|
| 602 |
+
default: input[output_idx++] = '\\';
|
| 603 |
+
input[output_idx++] = input[input_idx]; break;
|
| 604 |
+
}
|
| 605 |
+
} else {
|
| 606 |
+
input[output_idx++] = input[input_idx];
|
| 607 |
+
}
|
| 608 |
+
}
|
| 609 |
+
|
| 610 |
+
input.resize(output_idx);
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides) {
|
| 614 |
+
const char * sep = strchr(data, '=');
|
| 615 |
+
if (sep == nullptr || sep - data >= 128) {
|
| 616 |
+
LOG_ERR("%s: malformed KV override '%s'\n", __func__, data);
|
| 617 |
+
return false;
|
| 618 |
+
}
|
| 619 |
+
llama_model_kv_override kvo;
|
| 620 |
+
std::strncpy(kvo.key, data, sep - data);
|
| 621 |
+
kvo.key[sep - data] = 0;
|
| 622 |
+
sep++;
|
| 623 |
+
if (strncmp(sep, "int:", 4) == 0) {
|
| 624 |
+
sep += 4;
|
| 625 |
+
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
|
| 626 |
+
kvo.val_i64 = std::atol(sep);
|
| 627 |
+
} else if (strncmp(sep, "float:", 6) == 0) {
|
| 628 |
+
sep += 6;
|
| 629 |
+
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
|
| 630 |
+
kvo.val_f64 = std::atof(sep);
|
| 631 |
+
} else if (strncmp(sep, "bool:", 5) == 0) {
|
| 632 |
+
sep += 5;
|
| 633 |
+
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
|
| 634 |
+
if (std::strcmp(sep, "true") == 0) {
|
| 635 |
+
kvo.val_bool = true;
|
| 636 |
+
} else if (std::strcmp(sep, "false") == 0) {
|
| 637 |
+
kvo.val_bool = false;
|
| 638 |
+
} else {
|
| 639 |
+
LOG_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data);
|
| 640 |
+
return false;
|
| 641 |
+
}
|
| 642 |
+
} else if (strncmp(sep, "str:", 4) == 0) {
|
| 643 |
+
sep += 4;
|
| 644 |
+
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR;
|
| 645 |
+
if (strlen(sep) > 127) {
|
| 646 |
+
LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data);
|
| 647 |
+
return false;
|
| 648 |
+
}
|
| 649 |
+
strncpy(kvo.val_str, sep, 127);
|
| 650 |
+
kvo.val_str[127] = '\0';
|
| 651 |
+
} else {
|
| 652 |
+
LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data);
|
| 653 |
+
return false;
|
| 654 |
+
}
|
| 655 |
+
overrides.emplace_back(std::move(kvo));
|
| 656 |
+
return true;
|
| 657 |
+
}
|
| 658 |
+
|
| 659 |
+
//
|
| 660 |
+
// Filesystem utils
|
| 661 |
+
//
|
| 662 |
+
|
| 663 |
+
// Validate if a filename is safe to use
|
| 664 |
+
// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function
|
| 665 |
+
bool fs_validate_filename(const std::string & filename, bool allow_subdirs) {
|
| 666 |
+
if (!filename.length()) {
|
| 667 |
+
// Empty filename invalid
|
| 668 |
+
return false;
|
| 669 |
+
}
|
| 670 |
+
if (filename.length() > 255) {
|
| 671 |
+
// Limit at common largest possible filename on Linux filesystems
|
| 672 |
+
// to avoid unnecessary further validation
|
| 673 |
+
// (On systems with smaller limits it will be caught by the OS)
|
| 674 |
+
return false;
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
size_t offset = 0;
|
| 678 |
+
while (offset < filename.size()) {
|
| 679 |
+
utf8_parse_result result = parse_utf8_codepoint(filename, offset);
|
| 680 |
+
|
| 681 |
+
if (result.status != utf8_parse_result::SUCCESS) {
|
| 682 |
+
return false;
|
| 683 |
+
}
|
| 684 |
+
uint32_t c = result.codepoint;
|
| 685 |
+
|
| 686 |
+
if ((result.bytes_consumed == 2 && c < 0x80) ||
|
| 687 |
+
(result.bytes_consumed == 3 && c < 0x800) ||
|
| 688 |
+
(result.bytes_consumed == 4 && c < 0x10000)) {
|
| 689 |
+
return false;
|
| 690 |
+
}
|
| 691 |
+
|
| 692 |
+
// Check for forbidden codepoints:
|
| 693 |
+
// - Control characters
|
| 694 |
+
// - Unicode equivalents of illegal characters
|
| 695 |
+
// - UTF-16 surrogate pairs
|
| 696 |
+
// - UTF-8 replacement character
|
| 697 |
+
// - Byte order mark (BOM)
|
| 698 |
+
// - Illegal characters: / \ : * ? " < > |
|
| 699 |
+
if (c <= 0x1F // Control characters (C0)
|
| 700 |
+
|| c == 0x7F // Control characters (DEL)
|
| 701 |
+
|| (c >= 0x80 && c <= 0x9F) // Control characters (C1)
|
| 702 |
+
|| c == 0xFF0E // Fullwidth Full Stop (period equivalent)
|
| 703 |
+
|| c == 0x2215 // Division Slash (forward slash equivalent)
|
| 704 |
+
|| c == 0x2216 // Set Minus (backslash equivalent)
|
| 705 |
+
|| (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|
| 706 |
+
|| c > 0x10FFFF // Max Unicode limit
|
| 707 |
+
|| c == 0xFFFD // Replacement Character (UTF-8)
|
| 708 |
+
|| c == 0xFEFF // Byte Order Mark (BOM)
|
| 709 |
+
|| c == ':' || c == '*' // Illegal characters
|
| 710 |
+
|| c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
|
| 711 |
+
return false;
|
| 712 |
+
}
|
| 713 |
+
if (!allow_subdirs && (c == '/' || c == '\\')) {
|
| 714 |
+
// Subdirectories not allowed, reject path separators
|
| 715 |
+
return false;
|
| 716 |
+
}
|
| 717 |
+
offset += result.bytes_consumed;
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
// Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename
|
| 721 |
+
// Unicode and other whitespace is not affected, only 0x20 space
|
| 722 |
+
if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') {
|
| 723 |
+
return false;
|
| 724 |
+
}
|
| 725 |
+
|
| 726 |
+
// Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead)
|
| 727 |
+
if (filename.find("..") != std::string::npos) {
|
| 728 |
+
return false;
|
| 729 |
+
}
|
| 730 |
+
|
| 731 |
+
// Reject "."
|
| 732 |
+
if (filename == ".") {
|
| 733 |
+
return false;
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
return true;
|
| 737 |
+
}
|
| 738 |
+
|
| 739 |
+
#include <iostream>
|
| 740 |
+
|
| 741 |
+
|
| 742 |
+
#ifdef _WIN32
|
| 743 |
+
static std::wstring utf8_to_wstring(const std::string & str) {
|
| 744 |
+
if (str.empty()) {
|
| 745 |
+
return std::wstring();
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
int size = MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), NULL, 0);
|
| 749 |
+
|
| 750 |
+
if (size <= 0) {
|
| 751 |
+
return std::wstring();
|
| 752 |
+
}
|
| 753 |
+
|
| 754 |
+
std::wstring wstr(size, 0);
|
| 755 |
+
MultiByteToWideChar(CP_UTF8, 0, str.c_str(), (int)str.size(), &wstr[0], size);
|
| 756 |
+
|
| 757 |
+
return wstr;
|
| 758 |
+
}
|
| 759 |
+
#endif
|
| 760 |
+
|
| 761 |
+
// returns true if successful, false otherwise
|
| 762 |
+
bool fs_create_directory_with_parents(const std::string & path) {
|
| 763 |
+
#ifdef _WIN32
|
| 764 |
+
std::wstring wpath = utf8_to_wstring(path);
|
| 765 |
+
|
| 766 |
+
// if the path already exists, check whether it's a directory
|
| 767 |
+
const DWORD attributes = GetFileAttributesW(wpath.c_str());
|
| 768 |
+
if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
| 769 |
+
return true;
|
| 770 |
+
}
|
| 771 |
+
|
| 772 |
+
size_t pos_slash = 0;
|
| 773 |
+
|
| 774 |
+
// process path from front to back, procedurally creating directories
|
| 775 |
+
while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) {
|
| 776 |
+
const std::wstring subpath = wpath.substr(0, pos_slash);
|
| 777 |
+
|
| 778 |
+
pos_slash += 1;
|
| 779 |
+
|
| 780 |
+
// skip the drive letter, in some systems it can return an access denied error
|
| 781 |
+
if (subpath.length() == 2 && subpath[1] == ':') {
|
| 782 |
+
continue;
|
| 783 |
+
}
|
| 784 |
+
|
| 785 |
+
const bool success = CreateDirectoryW(subpath.c_str(), NULL);
|
| 786 |
+
|
| 787 |
+
if (!success) {
|
| 788 |
+
const DWORD error = GetLastError();
|
| 789 |
+
|
| 790 |
+
// if the path already exists, ensure that it's a directory
|
| 791 |
+
if (error == ERROR_ALREADY_EXISTS) {
|
| 792 |
+
const DWORD attributes = GetFileAttributesW(subpath.c_str());
|
| 793 |
+
if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) {
|
| 794 |
+
return false;
|
| 795 |
+
}
|
| 796 |
+
} else {
|
| 797 |
+
return false;
|
| 798 |
+
}
|
| 799 |
+
}
|
| 800 |
+
}
|
| 801 |
+
|
| 802 |
+
return true;
|
| 803 |
+
#else
|
| 804 |
+
// if the path already exists, check whether it's a directory
|
| 805 |
+
struct stat info;
|
| 806 |
+
if (stat(path.c_str(), &info) == 0) {
|
| 807 |
+
return S_ISDIR(info.st_mode);
|
| 808 |
+
}
|
| 809 |
+
|
| 810 |
+
size_t pos_slash = 1; // skip leading slashes for directory creation
|
| 811 |
+
|
| 812 |
+
// process path from front to back, procedurally creating directories
|
| 813 |
+
while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) {
|
| 814 |
+
const std::string subpath = path.substr(0, pos_slash);
|
| 815 |
+
struct stat info;
|
| 816 |
+
|
| 817 |
+
// if the path already exists, ensure that it's a directory
|
| 818 |
+
if (stat(subpath.c_str(), &info) == 0) {
|
| 819 |
+
if (!S_ISDIR(info.st_mode)) {
|
| 820 |
+
return false;
|
| 821 |
+
}
|
| 822 |
+
} else {
|
| 823 |
+
// create parent directories
|
| 824 |
+
const int ret = mkdir(subpath.c_str(), 0755);
|
| 825 |
+
if (ret != 0) {
|
| 826 |
+
return false;
|
| 827 |
+
}
|
| 828 |
+
}
|
| 829 |
+
|
| 830 |
+
pos_slash += 1;
|
| 831 |
+
}
|
| 832 |
+
|
| 833 |
+
return true;
|
| 834 |
+
#endif // _WIN32
|
| 835 |
+
}
|
| 836 |
+
|
| 837 |
+
bool fs_is_directory(const std::string & path) {
|
| 838 |
+
std::filesystem::path dir(path);
|
| 839 |
+
return std::filesystem::exists(dir) && std::filesystem::is_directory(dir);
|
| 840 |
+
}
|
| 841 |
+
|
| 842 |
+
std::string fs_get_cache_directory() {
|
| 843 |
+
std::string cache_directory = "";
|
| 844 |
+
auto ensure_trailing_slash = [](std::string p) {
|
| 845 |
+
// Make sure to add trailing slash
|
| 846 |
+
if (p.back() != DIRECTORY_SEPARATOR) {
|
| 847 |
+
p += DIRECTORY_SEPARATOR;
|
| 848 |
+
}
|
| 849 |
+
return p;
|
| 850 |
+
};
|
| 851 |
+
if (getenv("LLAMA_CACHE")) {
|
| 852 |
+
cache_directory = std::getenv("LLAMA_CACHE");
|
| 853 |
+
} else {
|
| 854 |
+
#if defined(__linux__) || defined(__FreeBSD__) || defined(_AIX) || \
|
| 855 |
+
defined(__OpenBSD__) || defined(__NetBSD__)
|
| 856 |
+
if (std::getenv("XDG_CACHE_HOME")) {
|
| 857 |
+
cache_directory = std::getenv("XDG_CACHE_HOME");
|
| 858 |
+
} else if (std::getenv("HOME")) {
|
| 859 |
+
cache_directory = std::getenv("HOME") + std::string("/.cache/");
|
| 860 |
+
} else {
|
| 861 |
+
#if defined(__linux__)
|
| 862 |
+
/* no $HOME is defined, fallback to getpwuid */
|
| 863 |
+
struct passwd *pw = getpwuid(getuid());
|
| 864 |
+
if ((!pw) || (!pw->pw_dir)) {
|
| 865 |
+
throw std::runtime_error("Failed to find $HOME directory");
|
| 866 |
+
}
|
| 867 |
+
|
| 868 |
+
cache_directory = std::string(pw->pw_dir) + std::string("/.cache/");
|
| 869 |
+
#else /* defined(__linux__) */
|
| 870 |
+
throw std::runtime_error("Failed to find $HOME directory");
|
| 871 |
+
#endif /* defined(__linux__) */
|
| 872 |
+
}
|
| 873 |
+
#elif defined(__APPLE__)
|
| 874 |
+
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
|
| 875 |
+
#elif defined(_WIN32)
|
| 876 |
+
cache_directory = std::getenv("LOCALAPPDATA");
|
| 877 |
+
#elif defined(__EMSCRIPTEN__)
|
| 878 |
+
GGML_ABORT("not implemented on this platform");
|
| 879 |
+
#else
|
| 880 |
+
# error Unknown architecture
|
| 881 |
+
#endif
|
| 882 |
+
cache_directory = ensure_trailing_slash(cache_directory);
|
| 883 |
+
cache_directory += "llama.cpp";
|
| 884 |
+
}
|
| 885 |
+
return ensure_trailing_slash(cache_directory);
|
| 886 |
+
}
|
| 887 |
+
|
| 888 |
+
std::string fs_get_cache_file(const std::string & filename) {
|
| 889 |
+
GGML_ASSERT(filename.find(DIRECTORY_SEPARATOR) == std::string::npos);
|
| 890 |
+
std::string cache_directory = fs_get_cache_directory();
|
| 891 |
+
const bool success = fs_create_directory_with_parents(cache_directory);
|
| 892 |
+
if (!success) {
|
| 893 |
+
throw std::runtime_error("failed to create cache directory: " + cache_directory);
|
| 894 |
+
}
|
| 895 |
+
return cache_directory + filename;
|
| 896 |
+
}
|
| 897 |
+
|
| 898 |
+
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories) {
|
| 899 |
+
std::vector<common_file_info> files;
|
| 900 |
+
if (path.empty()) return files;
|
| 901 |
+
|
| 902 |
+
std::filesystem::path dir(path);
|
| 903 |
+
if (!std::filesystem::exists(dir) || !std::filesystem::is_directory(dir)) {
|
| 904 |
+
return files;
|
| 905 |
+
}
|
| 906 |
+
|
| 907 |
+
for (const auto & entry : std::filesystem::directory_iterator(dir)) {
|
| 908 |
+
try {
|
| 909 |
+
// Only include regular files (skip directories)
|
| 910 |
+
const auto & p = entry.path();
|
| 911 |
+
if (std::filesystem::is_regular_file(p)) {
|
| 912 |
+
common_file_info info;
|
| 913 |
+
info.path = p.string();
|
| 914 |
+
info.name = p.filename().string();
|
| 915 |
+
info.is_dir = false;
|
| 916 |
+
try {
|
| 917 |
+
info.size = static_cast<size_t>(std::filesystem::file_size(p));
|
| 918 |
+
} catch (const std::filesystem::filesystem_error &) {
|
| 919 |
+
info.size = 0;
|
| 920 |
+
}
|
| 921 |
+
files.push_back(std::move(info));
|
| 922 |
+
} else if (include_directories && std::filesystem::is_directory(p)) {
|
| 923 |
+
common_file_info info;
|
| 924 |
+
info.path = p.string();
|
| 925 |
+
info.name = p.filename().string();
|
| 926 |
+
info.size = 0; // Directories have no size
|
| 927 |
+
info.is_dir = true;
|
| 928 |
+
files.push_back(std::move(info));
|
| 929 |
+
}
|
| 930 |
+
} catch (const std::filesystem::filesystem_error &) {
|
| 931 |
+
// skip entries we cannot inspect
|
| 932 |
+
continue;
|
| 933 |
+
}
|
| 934 |
+
}
|
| 935 |
+
|
| 936 |
+
return files;
|
| 937 |
+
}
|
| 938 |
+
|
| 939 |
+
//
|
| 940 |
+
// TTY utils
|
| 941 |
+
//
|
| 942 |
+
|
| 943 |
+
bool tty_can_use_colors() {
|
| 944 |
+
// Check NO_COLOR environment variable (https://no-color.org/)
|
| 945 |
+
if (const char * no_color = std::getenv("NO_COLOR")) {
|
| 946 |
+
if (no_color[0] != '\0') {
|
| 947 |
+
return false;
|
| 948 |
+
}
|
| 949 |
+
}
|
| 950 |
+
|
| 951 |
+
// Check TERM environment variable
|
| 952 |
+
if (const char * term = std::getenv("TERM")) {
|
| 953 |
+
if (std::strcmp(term, "dumb") == 0) {
|
| 954 |
+
return false;
|
| 955 |
+
}
|
| 956 |
+
}
|
| 957 |
+
|
| 958 |
+
// Check if stdout and stderr are connected to a terminal
|
| 959 |
+
// We check both because log messages can go to either
|
| 960 |
+
bool stdout_is_tty = isatty(fileno(stdout));
|
| 961 |
+
bool stderr_is_tty = isatty(fileno(stderr));
|
| 962 |
+
|
| 963 |
+
return stdout_is_tty || stderr_is_tty;
|
| 964 |
+
}
|
| 965 |
+
|
| 966 |
+
//
|
| 967 |
+
// Model utils
|
| 968 |
+
//
|
| 969 |
+
|
| 970 |
+
// TODO: move to common/sampling
|
| 971 |
+
static void common_init_sampler_from_model(
|
| 972 |
+
const llama_model * model,
|
| 973 |
+
common_params_sampling & sparams) {
|
| 974 |
+
|
| 975 |
+
const uint64_t config = sparams.user_sampling_config;
|
| 976 |
+
|
| 977 |
+
auto get_int32 = [&](const char * key, int32_t & dst, uint64_t user_config) {
|
| 978 |
+
if (config & user_config) {
|
| 979 |
+
return;
|
| 980 |
+
}
|
| 981 |
+
|
| 982 |
+
char buf[64] = {0};
|
| 983 |
+
if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
|
| 984 |
+
char * end = nullptr;
|
| 985 |
+
int32_t v = strtol(buf, &end, 10);
|
| 986 |
+
if (end && end != buf) {
|
| 987 |
+
dst = v;
|
| 988 |
+
}
|
| 989 |
+
}
|
| 990 |
+
};
|
| 991 |
+
|
| 992 |
+
auto get_float = [&](const char * key, float & dst, uint64_t user_config) {
|
| 993 |
+
if (config & user_config) {
|
| 994 |
+
return;
|
| 995 |
+
}
|
| 996 |
+
|
| 997 |
+
char buf[128] = {0};
|
| 998 |
+
if (llama_model_meta_val_str(model, key, buf, sizeof(buf)) > 0) {
|
| 999 |
+
char * end = nullptr;
|
| 1000 |
+
float v = strtof(buf, &end);
|
| 1001 |
+
if (end && end != buf) {
|
| 1002 |
+
dst = v;
|
| 1003 |
+
}
|
| 1004 |
+
}
|
| 1005 |
+
};
|
| 1006 |
+
|
| 1007 |
+
// Sampling sequence
|
| 1008 |
+
if (!(config & common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS)) {
|
| 1009 |
+
char buf[512] = {0};
|
| 1010 |
+
if (llama_model_meta_val_str(model, llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_SEQUENCE), buf, sizeof(buf)) > 0) {
|
| 1011 |
+
const std::vector<std::string> sampler_names = string_split<std::string>(std::string(buf), ';');
|
| 1012 |
+
if (!sampler_names.empty()) {
|
| 1013 |
+
sparams.samplers = common_sampler_types_from_names(sampler_names, true);
|
| 1014 |
+
}
|
| 1015 |
+
}
|
| 1016 |
+
}
|
| 1017 |
+
|
| 1018 |
+
get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_K), sparams.top_k, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_K);
|
| 1019 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TOP_P), sparams.top_p, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TOP_P);
|
| 1020 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIN_P), sparams.min_p, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIN_P);
|
| 1021 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_PROBABILITY), sparams.xtc_probability, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY);
|
| 1022 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_XTC_THRESHOLD), sparams.xtc_threshold, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD);
|
| 1023 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_TEMP), sparams.temp, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_TEMP);
|
| 1024 |
+
get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_LAST_N), sparams.penalty_last_n, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N);
|
| 1025 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_PENALTY_REPEAT), sparams.penalty_repeat, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT);
|
| 1026 |
+
get_int32(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT), sparams.mirostat, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT);
|
| 1027 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_TAU), sparams.mirostat_tau, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU);
|
| 1028 |
+
get_float(llama_model_meta_key_str(LLAMA_MODEL_META_KEY_SAMPLING_MIROSTAT_ETA), sparams.mirostat_eta, common_params_sampling_config::COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA);
|
| 1029 |
+
}
|
| 1030 |
+
|
| 1031 |
+
struct common_init_result::impl {
|
| 1032 |
+
impl() = default;
|
| 1033 |
+
~impl() = default;
|
| 1034 |
+
|
| 1035 |
+
// note: the order in which model, context, etc. are declared matters because their destructors will be called bottom-to-top
|
| 1036 |
+
|
| 1037 |
+
llama_model_ptr model;
|
| 1038 |
+
llama_context_ptr context;
|
| 1039 |
+
|
| 1040 |
+
std::vector<llama_adapter_lora_ptr> lora;
|
| 1041 |
+
|
| 1042 |
+
std::vector<common_sampler_ptr> samplers;
|
| 1043 |
+
std::vector<llama_sampler_seq_config> samplers_seq_config;
|
| 1044 |
+
};
|
| 1045 |
+
|
| 1046 |
+
common_init_result::common_init_result(common_params & params) :
|
| 1047 |
+
pimpl(new impl{}) {
|
| 1048 |
+
auto mparams = common_model_params_to_llama(params);
|
| 1049 |
+
auto cparams = common_context_params_to_llama(params);
|
| 1050 |
+
|
| 1051 |
+
if (params.fit_params) {
|
| 1052 |
+
LOG_INF("%s: fitting params to device memory, for bugs during this step try to reproduce them with -fit off, or provide --verbose logs if the bug only occurs with -fit on\n", __func__);
|
| 1053 |
+
llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
|
| 1054 |
+
params.tensor_split,
|
| 1055 |
+
params.tensor_buft_overrides.data(),
|
| 1056 |
+
params.fit_params_target.data(),
|
| 1057 |
+
params.fit_params_min_ctx,
|
| 1058 |
+
params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
|
| 1059 |
+
}
|
| 1060 |
+
|
| 1061 |
+
llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams);
|
| 1062 |
+
if (model == NULL) {
|
| 1063 |
+
return;
|
| 1064 |
+
}
|
| 1065 |
+
|
| 1066 |
+
pimpl->model.reset(model);
|
| 1067 |
+
|
| 1068 |
+
const llama_vocab * vocab = llama_model_get_vocab(model);
|
| 1069 |
+
|
| 1070 |
+
// load and optionally apply lora adapters (must be loaded before context creation)
|
| 1071 |
+
for (auto & la : params.lora_adapters) {
|
| 1072 |
+
llama_adapter_lora_ptr lora;
|
| 1073 |
+
lora.reset(llama_adapter_lora_init(model, la.path.c_str()));
|
| 1074 |
+
if (lora == nullptr) {
|
| 1075 |
+
LOG_ERR("%s: failed to load lora adapter '%s'\n", __func__, la.path.c_str());
|
| 1076 |
+
pimpl->model.reset(model);
|
| 1077 |
+
return;
|
| 1078 |
+
}
|
| 1079 |
+
|
| 1080 |
+
char buf[1024];
|
| 1081 |
+
la.ptr = lora.get();
|
| 1082 |
+
llama_adapter_meta_val_str(la.ptr, "adapter.lora.task_name", buf, sizeof(buf));
|
| 1083 |
+
la.task_name = buf;
|
| 1084 |
+
llama_adapter_meta_val_str(la.ptr, "adapter.lora.prompt_prefix", buf, sizeof(buf));
|
| 1085 |
+
la.prompt_prefix = buf;
|
| 1086 |
+
pimpl->lora.emplace_back(std::move(lora)); // copy to list of loaded adapters
|
| 1087 |
+
}
|
| 1088 |
+
|
| 1089 |
+
// updates params.sampling
|
| 1090 |
+
// TODO: fix naming
|
| 1091 |
+
common_init_sampler_from_model(model, params.sampling);
|
| 1092 |
+
|
| 1093 |
+
if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
|
| 1094 |
+
LOG_WRN("%s: warning: vocab does not have an EOS token, ignoring --ignore-eos\n", __func__);
|
| 1095 |
+
params.sampling.ignore_eos = false;
|
| 1096 |
+
}
|
| 1097 |
+
|
| 1098 |
+
// initialize once
|
| 1099 |
+
for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) {
|
| 1100 |
+
if (llama_vocab_is_eog(vocab, i)) {
|
| 1101 |
+
LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(vocab, i).c_str(), -INFINITY);
|
| 1102 |
+
params.sampling.logit_bias_eog.push_back({i, -INFINITY});
|
| 1103 |
+
}
|
| 1104 |
+
}
|
| 1105 |
+
|
| 1106 |
+
if (params.sampling.ignore_eos) {
|
| 1107 |
+
// add EOG biases to the active set of logit biases
|
| 1108 |
+
params.sampling.logit_bias.insert(
|
| 1109 |
+
params.sampling.logit_bias.end(),
|
| 1110 |
+
params.sampling.logit_bias_eog.begin(), params.sampling.logit_bias_eog.end());
|
| 1111 |
+
}
|
| 1112 |
+
|
| 1113 |
+
//if (params.sampling.penalty_last_n == -1) {
|
| 1114 |
+
// LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
| 1115 |
+
// params.sampling.penalty_last_n = llama_n_ctx(lctx);
|
| 1116 |
+
//}
|
| 1117 |
+
|
| 1118 |
+
//if (params.sampling.dry_penalty_last_n == -1) {
|
| 1119 |
+
// LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx));
|
| 1120 |
+
// params.sampling.dry_penalty_last_n = llama_n_ctx(lctx);
|
| 1121 |
+
//}
|
| 1122 |
+
|
| 1123 |
+
// init the backend samplers as part of the context creation
|
| 1124 |
+
pimpl->samplers.resize(cparams.n_seq_max);
|
| 1125 |
+
pimpl->samplers_seq_config.resize(cparams.n_seq_max);
|
| 1126 |
+
|
| 1127 |
+
for (int i = 0; i < (int) cparams.n_seq_max; ++i) {
|
| 1128 |
+
pimpl->samplers[i].reset(common_sampler_init(model, params.sampling));
|
| 1129 |
+
pimpl->samplers_seq_config[i] = { i, common_sampler_get(pimpl->samplers[i].get()) };
|
| 1130 |
+
}
|
| 1131 |
+
|
| 1132 |
+
if (params.sampling.backend_sampling) {
|
| 1133 |
+
cparams.samplers = pimpl->samplers_seq_config.data();
|
| 1134 |
+
cparams.n_samplers = pimpl->samplers_seq_config.size();
|
| 1135 |
+
}
|
| 1136 |
+
|
| 1137 |
+
llama_context * lctx = llama_init_from_model(model, cparams);
|
| 1138 |
+
if (lctx == NULL) {
|
| 1139 |
+
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
|
| 1140 |
+
return;
|
| 1141 |
+
}
|
| 1142 |
+
|
| 1143 |
+
pimpl->context.reset(lctx);
|
| 1144 |
+
}
|
| 1145 |
+
|
| 1146 |
+
llama_model * common_init_result::model() {
|
| 1147 |
+
return pimpl->model.get();
|
| 1148 |
+
}
|
| 1149 |
+
|
| 1150 |
+
llama_context * common_init_result::context() {
|
| 1151 |
+
return pimpl->context.get();
|
| 1152 |
+
}
|
| 1153 |
+
|
| 1154 |
+
common_sampler * common_init_result::sampler(llama_seq_id seq_id) {
|
| 1155 |
+
return pimpl->samplers[seq_id].get();
|
| 1156 |
+
}
|
| 1157 |
+
|
| 1158 |
+
void common_init_result::reset_samplers() {
|
| 1159 |
+
for (int i = 0; i < (int) pimpl->samplers.size(); ++i) {
|
| 1160 |
+
llama_sampler_reset(common_sampler_get(pimpl->samplers[i].get()));
|
| 1161 |
+
}
|
| 1162 |
+
}
|
| 1163 |
+
|
| 1164 |
+
std::vector<llama_adapter_lora_ptr> & common_init_result::lora() {
|
| 1165 |
+
return pimpl->lora;
|
| 1166 |
+
}
|
| 1167 |
+
|
| 1168 |
+
common_init_result_ptr common_init_from_params(common_params & params) {
|
| 1169 |
+
common_init_result_ptr res(new common_init_result(params));
|
| 1170 |
+
|
| 1171 |
+
llama_model * model = res->model();
|
| 1172 |
+
if (model == NULL) {
|
| 1173 |
+
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str());
|
| 1174 |
+
return res;
|
| 1175 |
+
}
|
| 1176 |
+
|
| 1177 |
+
llama_context * lctx = res->context();
|
| 1178 |
+
if (lctx == NULL) {
|
| 1179 |
+
LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.path.c_str());
|
| 1180 |
+
return res;
|
| 1181 |
+
}
|
| 1182 |
+
|
| 1183 |
+
const llama_vocab * vocab = llama_model_get_vocab(model);
|
| 1184 |
+
|
| 1185 |
+
if (params.ctx_shift && !llama_memory_can_shift(llama_get_memory(lctx))) {
|
| 1186 |
+
LOG_WRN("%s: KV cache shifting is not supported for this context, disabling KV cache shifting\n", __func__);
|
| 1187 |
+
params.ctx_shift = false;
|
| 1188 |
+
}
|
| 1189 |
+
|
| 1190 |
+
if (!params.control_vectors.empty()) {
|
| 1191 |
+
if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1;
|
| 1192 |
+
if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_model_n_layer(model);
|
| 1193 |
+
|
| 1194 |
+
const auto cvec = common_control_vector_load(params.control_vectors);
|
| 1195 |
+
if (cvec.n_embd == -1) {
|
| 1196 |
+
return res;
|
| 1197 |
+
}
|
| 1198 |
+
|
| 1199 |
+
int err = llama_set_adapter_cvec(
|
| 1200 |
+
lctx,
|
| 1201 |
+
cvec.data.data(),
|
| 1202 |
+
cvec.data.size(),
|
| 1203 |
+
cvec.n_embd,
|
| 1204 |
+
params.control_vector_layer_start,
|
| 1205 |
+
params.control_vector_layer_end);
|
| 1206 |
+
if (err) {
|
| 1207 |
+
return res;
|
| 1208 |
+
}
|
| 1209 |
+
}
|
| 1210 |
+
|
| 1211 |
+
if (llama_pooling_type(lctx) == LLAMA_POOLING_TYPE_RANK) {
|
| 1212 |
+
bool ok = true;
|
| 1213 |
+
|
| 1214 |
+
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
|
| 1215 |
+
LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
|
| 1216 |
+
ok = false;
|
| 1217 |
+
}
|
| 1218 |
+
|
| 1219 |
+
bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
|
| 1220 |
+
bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
|
| 1221 |
+
bool has_rerank_prompt = llama_model_chat_template(model, "rerank") != NULL;
|
| 1222 |
+
|
| 1223 |
+
if (!has_eos && !has_sep && !has_rerank_prompt) {
|
| 1224 |
+
LOG_WRN("%s: warning: vocab does not have an EOS token, SEP token, or rerank prompt. Reranking will not work\n", __func__);
|
| 1225 |
+
ok = false;
|
| 1226 |
+
} else if (!has_eos) {
|
| 1227 |
+
LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
|
| 1228 |
+
}
|
| 1229 |
+
|
| 1230 |
+
if (!ok) {
|
| 1231 |
+
return res;
|
| 1232 |
+
}
|
| 1233 |
+
}
|
| 1234 |
+
|
| 1235 |
+
if (!params.lora_init_without_apply) {
|
| 1236 |
+
common_set_adapter_lora(lctx, params.lora_adapters);
|
| 1237 |
+
}
|
| 1238 |
+
|
| 1239 |
+
if (params.warmup) {
|
| 1240 |
+
LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__);
|
| 1241 |
+
|
| 1242 |
+
llama_set_warmup(lctx, true);
|
| 1243 |
+
|
| 1244 |
+
std::vector<llama_token> tmp;
|
| 1245 |
+
llama_token bos = llama_vocab_bos(vocab);
|
| 1246 |
+
llama_token eos = llama_vocab_eos(vocab);
|
| 1247 |
+
|
| 1248 |
+
// some models (e.g. T5) don't have a BOS token
|
| 1249 |
+
if (bos != LLAMA_TOKEN_NULL) {
|
| 1250 |
+
tmp.push_back(bos);
|
| 1251 |
+
}
|
| 1252 |
+
if (eos != LLAMA_TOKEN_NULL) {
|
| 1253 |
+
tmp.push_back(eos);
|
| 1254 |
+
}
|
| 1255 |
+
if (tmp.empty()) {
|
| 1256 |
+
tmp.push_back(0);
|
| 1257 |
+
}
|
| 1258 |
+
|
| 1259 |
+
if (llama_model_has_encoder(model)) {
|
| 1260 |
+
llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size()));
|
| 1261 |
+
llama_token decoder_start_token_id = llama_model_decoder_start_token(model);
|
| 1262 |
+
if (decoder_start_token_id == LLAMA_TOKEN_NULL) {
|
| 1263 |
+
decoder_start_token_id = bos;
|
| 1264 |
+
}
|
| 1265 |
+
tmp.clear();
|
| 1266 |
+
tmp.push_back(decoder_start_token_id);
|
| 1267 |
+
}
|
| 1268 |
+
if (llama_model_has_decoder(model)) {
|
| 1269 |
+
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch)));
|
| 1270 |
+
}
|
| 1271 |
+
llama_memory_clear(llama_get_memory(lctx), true);
|
| 1272 |
+
llama_synchronize(lctx);
|
| 1273 |
+
llama_perf_context_reset(lctx);
|
| 1274 |
+
llama_set_warmup(lctx, false);
|
| 1275 |
+
|
| 1276 |
+
// reset samplers to reset RNG state after warmup to the seeded state
|
| 1277 |
+
res->reset_samplers();
|
| 1278 |
+
}
|
| 1279 |
+
|
| 1280 |
+
return res;
|
| 1281 |
+
}
|
| 1282 |
+
|
| 1283 |
+
common_init_result::~common_init_result() = default;
|
| 1284 |
+
|
| 1285 |
+
std::string get_model_endpoint() {
|
| 1286 |
+
const char * model_endpoint_env = getenv("MODEL_ENDPOINT");
|
| 1287 |
+
// We still respect the use of environment-variable "HF_ENDPOINT" for backward-compatibility.
|
| 1288 |
+
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
|
| 1289 |
+
const char * endpoint_env = model_endpoint_env ? model_endpoint_env : hf_endpoint_env;
|
| 1290 |
+
std::string model_endpoint = "https://huggingface.co/";
|
| 1291 |
+
if (endpoint_env) {
|
| 1292 |
+
model_endpoint = endpoint_env;
|
| 1293 |
+
if (model_endpoint.back() != '/') {
|
| 1294 |
+
model_endpoint += '/';
|
| 1295 |
+
}
|
| 1296 |
+
}
|
| 1297 |
+
return model_endpoint;
|
| 1298 |
+
}
|
| 1299 |
+
|
| 1300 |
+
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora) {
|
| 1301 |
+
std::vector<llama_adapter_lora *> loras;
|
| 1302 |
+
std::vector<float> scales;
|
| 1303 |
+
|
| 1304 |
+
for (auto & la: lora) {
|
| 1305 |
+
loras.push_back(la.ptr);
|
| 1306 |
+
scales.push_back(la.scale);
|
| 1307 |
+
}
|
| 1308 |
+
|
| 1309 |
+
llama_set_adapters_lora(ctx, loras.data(), loras.size(), scales.data());
|
| 1310 |
+
}
|
| 1311 |
+
|
| 1312 |
+
struct llama_model_params common_model_params_to_llama(common_params & params) {
|
| 1313 |
+
auto mparams = llama_model_default_params();
|
| 1314 |
+
|
| 1315 |
+
if (!params.devices.empty()) {
|
| 1316 |
+
mparams.devices = params.devices.data();
|
| 1317 |
+
}
|
| 1318 |
+
|
| 1319 |
+
mparams.n_gpu_layers = params.n_gpu_layers;
|
| 1320 |
+
mparams.main_gpu = params.main_gpu;
|
| 1321 |
+
mparams.split_mode = params.split_mode;
|
| 1322 |
+
mparams.tensor_split = params.tensor_split;
|
| 1323 |
+
mparams.use_mmap = params.use_mmap;
|
| 1324 |
+
mparams.use_direct_io = params.use_direct_io;
|
| 1325 |
+
mparams.use_mlock = params.use_mlock;
|
| 1326 |
+
mparams.check_tensors = params.check_tensors;
|
| 1327 |
+
mparams.use_extra_bufts = !params.no_extra_bufts;
|
| 1328 |
+
mparams.no_host = params.no_host;
|
| 1329 |
+
|
| 1330 |
+
if (params.kv_overrides.empty()) {
|
| 1331 |
+
mparams.kv_overrides = NULL;
|
| 1332 |
+
} else {
|
| 1333 |
+
GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key");
|
| 1334 |
+
mparams.kv_overrides = params.kv_overrides.data();
|
| 1335 |
+
}
|
| 1336 |
+
|
| 1337 |
+
if (params.tensor_buft_overrides.empty()) {
|
| 1338 |
+
mparams.tensor_buft_overrides = NULL;
|
| 1339 |
+
} else {
|
| 1340 |
+
GGML_ASSERT(params.tensor_buft_overrides.back().pattern == nullptr && "Tensor buffer overrides not terminated with empty pattern");
|
| 1341 |
+
mparams.tensor_buft_overrides = params.tensor_buft_overrides.data();
|
| 1342 |
+
}
|
| 1343 |
+
|
| 1344 |
+
mparams.progress_callback = params.load_progress_callback;
|
| 1345 |
+
mparams.progress_callback_user_data = params.load_progress_callback_user_data;
|
| 1346 |
+
|
| 1347 |
+
return mparams;
|
| 1348 |
+
}
|
| 1349 |
+
|
| 1350 |
+
struct llama_context_params common_context_params_to_llama(const common_params & params) {
|
| 1351 |
+
auto cparams = llama_context_default_params();
|
| 1352 |
+
|
| 1353 |
+
cparams.n_ctx = params.n_ctx;
|
| 1354 |
+
cparams.n_seq_max = params.n_parallel;
|
| 1355 |
+
cparams.n_batch = params.n_batch;
|
| 1356 |
+
cparams.n_ubatch = params.n_ubatch;
|
| 1357 |
+
cparams.n_threads = params.cpuparams.n_threads;
|
| 1358 |
+
cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ?
|
| 1359 |
+
params.cpuparams.n_threads : params.cpuparams_batch.n_threads;
|
| 1360 |
+
cparams.embeddings = params.embedding;
|
| 1361 |
+
cparams.rope_scaling_type = params.rope_scaling_type;
|
| 1362 |
+
cparams.rope_freq_base = params.rope_freq_base;
|
| 1363 |
+
cparams.rope_freq_scale = params.rope_freq_scale;
|
| 1364 |
+
cparams.yarn_ext_factor = params.yarn_ext_factor;
|
| 1365 |
+
cparams.yarn_attn_factor = params.yarn_attn_factor;
|
| 1366 |
+
cparams.yarn_beta_fast = params.yarn_beta_fast;
|
| 1367 |
+
cparams.yarn_beta_slow = params.yarn_beta_slow;
|
| 1368 |
+
cparams.yarn_orig_ctx = params.yarn_orig_ctx;
|
| 1369 |
+
cparams.pooling_type = params.pooling_type;
|
| 1370 |
+
cparams.attention_type = params.attention_type;
|
| 1371 |
+
cparams.flash_attn_type = params.flash_attn_type;
|
| 1372 |
+
cparams.cb_eval = params.cb_eval;
|
| 1373 |
+
cparams.cb_eval_user_data = params.cb_eval_user_data;
|
| 1374 |
+
cparams.offload_kqv = !params.no_kv_offload;
|
| 1375 |
+
cparams.no_perf = params.no_perf;
|
| 1376 |
+
cparams.op_offload = !params.no_op_offload;
|
| 1377 |
+
cparams.swa_full = params.swa_full;
|
| 1378 |
+
cparams.kv_unified = params.kv_unified;
|
| 1379 |
+
|
| 1380 |
+
cparams.type_k = params.cache_type_k;
|
| 1381 |
+
cparams.type_v = params.cache_type_v;
|
| 1382 |
+
|
| 1383 |
+
return cparams;
|
| 1384 |
+
}
|
| 1385 |
+
|
| 1386 |
+
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) {
|
| 1387 |
+
struct ggml_threadpool_params tpp;
|
| 1388 |
+
|
| 1389 |
+
ggml_threadpool_params_init(&tpp, params.n_threads); // setup the defaults
|
| 1390 |
+
|
| 1391 |
+
if (params.mask_valid) {
|
| 1392 |
+
std::memcpy(&tpp.cpumask, ¶ms.cpumask, GGML_MAX_N_THREADS);
|
| 1393 |
+
}
|
| 1394 |
+
|
| 1395 |
+
tpp.prio = params.priority;
|
| 1396 |
+
tpp.poll = params.poll;
|
| 1397 |
+
tpp.strict_cpu = params.strict_cpu;
|
| 1398 |
+
|
| 1399 |
+
return tpp;
|
| 1400 |
+
}
|
| 1401 |
+
|
| 1402 |
+
//
|
| 1403 |
+
// Batch utils
|
| 1404 |
+
//
|
| 1405 |
+
|
| 1406 |
+
void common_batch_clear(struct llama_batch & batch) {
|
| 1407 |
+
batch.n_tokens = 0;
|
| 1408 |
+
}
|
| 1409 |
+
|
| 1410 |
+
void common_batch_add(
|
| 1411 |
+
struct llama_batch & batch,
|
| 1412 |
+
llama_token id,
|
| 1413 |
+
llama_pos pos,
|
| 1414 |
+
const std::vector<llama_seq_id> & seq_ids,
|
| 1415 |
+
bool logits) {
|
| 1416 |
+
GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded");
|
| 1417 |
+
|
| 1418 |
+
batch.token [batch.n_tokens] = id;
|
| 1419 |
+
batch.pos [batch.n_tokens] = pos;
|
| 1420 |
+
batch.n_seq_id[batch.n_tokens] = seq_ids.size();
|
| 1421 |
+
for (size_t i = 0; i < seq_ids.size(); ++i) {
|
| 1422 |
+
batch.seq_id[batch.n_tokens][i] = seq_ids[i];
|
| 1423 |
+
}
|
| 1424 |
+
batch.logits [batch.n_tokens] = logits;
|
| 1425 |
+
|
| 1426 |
+
batch.n_tokens++;
|
| 1427 |
+
}
|
| 1428 |
+
|
| 1429 |
+
//
|
| 1430 |
+
// Vocab utils
|
| 1431 |
+
//
|
| 1432 |
+
|
| 1433 |
+
std::vector<llama_token> common_tokenize(
|
| 1434 |
+
const struct llama_context * ctx,
|
| 1435 |
+
const std::string & text,
|
| 1436 |
+
bool add_special,
|
| 1437 |
+
bool parse_special) {
|
| 1438 |
+
const llama_model * model = llama_get_model(ctx);
|
| 1439 |
+
const llama_vocab * vocab = llama_model_get_vocab(model);
|
| 1440 |
+
return common_tokenize(vocab, text, add_special, parse_special);
|
| 1441 |
+
}
|
| 1442 |
+
|
| 1443 |
+
std::vector<llama_token> common_tokenize(
|
| 1444 |
+
const struct llama_vocab * vocab,
|
| 1445 |
+
const std::string & text,
|
| 1446 |
+
bool add_special,
|
| 1447 |
+
bool parse_special) {
|
| 1448 |
+
// upper limit for the number of tokens
|
| 1449 |
+
int n_tokens = text.length() + 2 * add_special;
|
| 1450 |
+
std::vector<llama_token> result(n_tokens);
|
| 1451 |
+
n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
| 1452 |
+
if (n_tokens == std::numeric_limits<int32_t>::min()) {
|
| 1453 |
+
throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit");
|
| 1454 |
+
}
|
| 1455 |
+
if (n_tokens < 0) {
|
| 1456 |
+
result.resize(-n_tokens);
|
| 1457 |
+
int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special);
|
| 1458 |
+
GGML_ASSERT(check == -n_tokens);
|
| 1459 |
+
} else {
|
| 1460 |
+
result.resize(n_tokens);
|
| 1461 |
+
}
|
| 1462 |
+
return result;
|
| 1463 |
+
}
|
| 1464 |
+
|
| 1465 |
+
std::string common_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) {
|
| 1466 |
+
const llama_model * model = llama_get_model(ctx);
|
| 1467 |
+
const llama_vocab * vocab = llama_model_get_vocab(model);
|
| 1468 |
+
return common_token_to_piece(vocab, token, special);
|
| 1469 |
+
}
|
| 1470 |
+
|
| 1471 |
+
std::string common_token_to_piece(const struct llama_vocab * vocab, llama_token token, bool special) {
|
| 1472 |
+
std::string piece;
|
| 1473 |
+
piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n'
|
| 1474 |
+
const int n_chars = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
|
| 1475 |
+
if (n_chars < 0) {
|
| 1476 |
+
piece.resize(-n_chars);
|
| 1477 |
+
int check = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special);
|
| 1478 |
+
GGML_ASSERT(check == -n_chars);
|
| 1479 |
+
}
|
| 1480 |
+
else {
|
| 1481 |
+
piece.resize(n_chars);
|
| 1482 |
+
}
|
| 1483 |
+
|
| 1484 |
+
return piece;
|
| 1485 |
+
}
|
| 1486 |
+
|
| 1487 |
+
std::string common_detokenize(const struct llama_context * ctx, const std::vector<llama_token> & tokens, bool special) {
|
| 1488 |
+
const llama_model * model = llama_get_model(ctx);
|
| 1489 |
+
const llama_vocab * vocab = llama_model_get_vocab(model);
|
| 1490 |
+
return common_detokenize(vocab, tokens, special);
|
| 1491 |
+
}
|
| 1492 |
+
|
| 1493 |
+
std::string common_detokenize(const struct llama_vocab * vocab, const std::vector<llama_token> & tokens, bool special) {
|
| 1494 |
+
std::string text;
|
| 1495 |
+
text.resize(std::max(text.capacity(), tokens.size()));
|
| 1496 |
+
int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
| 1497 |
+
if (n_chars < 0) {
|
| 1498 |
+
text.resize(-n_chars);
|
| 1499 |
+
n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special);
|
| 1500 |
+
GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization
|
| 1501 |
+
}
|
| 1502 |
+
|
| 1503 |
+
text.resize(n_chars);
|
| 1504 |
+
|
| 1505 |
+
// NOTE: the original tokenizer decodes bytes after collecting the pieces.
|
| 1506 |
+
return text;
|
| 1507 |
+
}
|
| 1508 |
+
|
| 1509 |
+
//
|
| 1510 |
+
// Embedding utils
|
| 1511 |
+
//
|
| 1512 |
+
|
| 1513 |
+
void common_embd_normalize(const float * inp, float * out, int n, int embd_norm) {
|
| 1514 |
+
double sum = 0.0;
|
| 1515 |
+
|
| 1516 |
+
switch (embd_norm) {
|
| 1517 |
+
case -1: // no normalisation
|
| 1518 |
+
sum = 1.0;
|
| 1519 |
+
break;
|
| 1520 |
+
case 0: // max absolute
|
| 1521 |
+
for (int i = 0; i < n; i++) {
|
| 1522 |
+
if (sum < std::abs(inp[i])) {
|
| 1523 |
+
sum = std::abs(inp[i]);
|
| 1524 |
+
}
|
| 1525 |
+
}
|
| 1526 |
+
sum /= 32760.0; // make an int16 range
|
| 1527 |
+
break;
|
| 1528 |
+
case 2: // euclidean
|
| 1529 |
+
for (int i = 0; i < n; i++) {
|
| 1530 |
+
sum += inp[i] * inp[i];
|
| 1531 |
+
}
|
| 1532 |
+
sum = std::sqrt(sum);
|
| 1533 |
+
break;
|
| 1534 |
+
default: // p-norm (euclidean is p-norm p=2)
|
| 1535 |
+
for (int i = 0; i < n; i++) {
|
| 1536 |
+
sum += std::pow(std::abs(inp[i]), embd_norm);
|
| 1537 |
+
}
|
| 1538 |
+
sum = std::pow(sum, 1.0 / embd_norm);
|
| 1539 |
+
break;
|
| 1540 |
+
}
|
| 1541 |
+
|
| 1542 |
+
const float norm = sum > 0.0 ? 1.0 / sum : 0.0f;
|
| 1543 |
+
|
| 1544 |
+
for (int i = 0; i < n; i++) {
|
| 1545 |
+
out[i] = inp[i] * norm;
|
| 1546 |
+
}
|
| 1547 |
+
}
|
| 1548 |
+
|
| 1549 |
+
float common_embd_similarity_cos(const float * embd1, const float * embd2, int n){
|
| 1550 |
+
double sum = 0.0;
|
| 1551 |
+
double sum1 = 0.0;
|
| 1552 |
+
double sum2 = 0.0;
|
| 1553 |
+
|
| 1554 |
+
for (int i = 0; i < n; i++) {
|
| 1555 |
+
sum += embd1[i] * embd2[i];
|
| 1556 |
+
sum1 += embd1[i] * embd1[i];
|
| 1557 |
+
sum2 += embd2[i] * embd2[i];
|
| 1558 |
+
}
|
| 1559 |
+
|
| 1560 |
+
// Handle the case where one or both vectors are zero vectors
|
| 1561 |
+
if (sum1 == 0.0 || sum2 == 0.0) {
|
| 1562 |
+
if (sum1 == 0.0 && sum2 == 0.0) {
|
| 1563 |
+
return 1.0f; // two zero vectors are similar
|
| 1564 |
+
}
|
| 1565 |
+
return 0.0f;
|
| 1566 |
+
}
|
| 1567 |
+
|
| 1568 |
+
return sum / (sqrt(sum1) * sqrt(sum2));
|
| 1569 |
+
}
|
| 1570 |
+
|
| 1571 |
+
//
|
| 1572 |
+
// Control vector utils
|
| 1573 |
+
//
|
| 1574 |
+
|
| 1575 |
+
static common_control_vector_data common_control_vector_load_one(const common_control_vector_load_info & load_info) {
|
| 1576 |
+
common_control_vector_data result = { -1, {} };
|
| 1577 |
+
|
| 1578 |
+
ggml_context * ctx = nullptr;
|
| 1579 |
+
struct gguf_init_params meta_gguf_params = {
|
| 1580 |
+
/* .no_alloc = */ false,
|
| 1581 |
+
/* .ctx = */ &ctx,
|
| 1582 |
+
};
|
| 1583 |
+
struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params);
|
| 1584 |
+
if (!ctx_gguf) {
|
| 1585 |
+
LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str());
|
| 1586 |
+
return result;
|
| 1587 |
+
}
|
| 1588 |
+
|
| 1589 |
+
int32_t n_tensors = gguf_get_n_tensors(ctx_gguf);
|
| 1590 |
+
if (n_tensors == 0) {
|
| 1591 |
+
LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str());
|
| 1592 |
+
}
|
| 1593 |
+
|
| 1594 |
+
for (int i = 0; i < n_tensors; i++) {
|
| 1595 |
+
std::string name = gguf_get_tensor_name(ctx_gguf, i);
|
| 1596 |
+
|
| 1597 |
+
int layer_idx = -1;
|
| 1598 |
+
|
| 1599 |
+
// split on '.'
|
| 1600 |
+
size_t dotpos = name.find('.');
|
| 1601 |
+
if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") {
|
| 1602 |
+
try {
|
| 1603 |
+
layer_idx = std::stoi(name.substr(dotpos + 1));
|
| 1604 |
+
} catch (...) {
|
| 1605 |
+
layer_idx = -1;
|
| 1606 |
+
}
|
| 1607 |
+
}
|
| 1608 |
+
if (layer_idx < 0) {
|
| 1609 |
+
LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
|
| 1610 |
+
result.n_embd = -1;
|
| 1611 |
+
break;
|
| 1612 |
+
} else if (layer_idx == 0) {
|
| 1613 |
+
LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str());
|
| 1614 |
+
result.n_embd = -1;
|
| 1615 |
+
break;
|
| 1616 |
+
}
|
| 1617 |
+
|
| 1618 |
+
struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str());
|
| 1619 |
+
if (tensor->type != GGML_TYPE_F32) {
|
| 1620 |
+
LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str());
|
| 1621 |
+
result.n_embd = -1;
|
| 1622 |
+
break;
|
| 1623 |
+
}
|
| 1624 |
+
if (ggml_n_dims(tensor) != 1) {
|
| 1625 |
+
LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str());
|
| 1626 |
+
result.n_embd = -1;
|
| 1627 |
+
break;
|
| 1628 |
+
}
|
| 1629 |
+
|
| 1630 |
+
if (result.n_embd == -1) {
|
| 1631 |
+
result.n_embd = ggml_nelements(tensor);
|
| 1632 |
+
} else if (ggml_nelements(tensor) != result.n_embd) {
|
| 1633 |
+
LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str());
|
| 1634 |
+
result.n_embd = -1;
|
| 1635 |
+
break;
|
| 1636 |
+
}
|
| 1637 |
+
|
| 1638 |
+
// extend if necessary - do not store data for layer 0 (it's not used)
|
| 1639 |
+
result.data.resize(std::max(result.data.size(), static_cast<size_t>(result.n_embd * layer_idx)), 0.0f);
|
| 1640 |
+
|
| 1641 |
+
const float * src = (const float *) tensor->data;
|
| 1642 |
+
float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0]
|
| 1643 |
+
for (int j = 0; j < result.n_embd; j++) {
|
| 1644 |
+
dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file
|
| 1645 |
+
}
|
| 1646 |
+
|
| 1647 |
+
}
|
| 1648 |
+
|
| 1649 |
+
if (result.n_embd == -1) {
|
| 1650 |
+
LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str());
|
| 1651 |
+
result.data.clear();
|
| 1652 |
+
}
|
| 1653 |
+
|
| 1654 |
+
gguf_free(ctx_gguf);
|
| 1655 |
+
ggml_free(ctx);
|
| 1656 |
+
|
| 1657 |
+
return result;
|
| 1658 |
+
}
|
| 1659 |
+
|
| 1660 |
+
common_control_vector_data common_control_vector_load(const std::vector<common_control_vector_load_info> & load_infos) {
|
| 1661 |
+
common_control_vector_data result = { -1, {} };
|
| 1662 |
+
|
| 1663 |
+
for (const auto & info : load_infos) {
|
| 1664 |
+
auto cur = common_control_vector_load_one(info);
|
| 1665 |
+
|
| 1666 |
+
if (cur.n_embd == -1) {
|
| 1667 |
+
result.n_embd = -1;
|
| 1668 |
+
break;
|
| 1669 |
+
}
|
| 1670 |
+
if (result.n_embd != -1 && result.n_embd != cur.n_embd) {
|
| 1671 |
+
LOG_ERR("%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str());
|
| 1672 |
+
result.n_embd = -1;
|
| 1673 |
+
break;
|
| 1674 |
+
}
|
| 1675 |
+
|
| 1676 |
+
if (result.n_embd == -1) {
|
| 1677 |
+
result = std::move(cur);
|
| 1678 |
+
} else {
|
| 1679 |
+
result.data.resize(std::max(result.data.size(), cur.data.size()), 0.0f); // extend if necessary
|
| 1680 |
+
for (size_t i = 0; i < cur.data.size(); i++) {
|
| 1681 |
+
result.data[i] += cur.data[i];
|
| 1682 |
+
}
|
| 1683 |
+
}
|
| 1684 |
+
}
|
| 1685 |
+
|
| 1686 |
+
if (result.n_embd == -1) {
|
| 1687 |
+
LOG_ERR("%s: no valid control vector files passed\n", __func__);
|
| 1688 |
+
result.data.clear();
|
| 1689 |
+
}
|
| 1690 |
+
|
| 1691 |
+
return result;
|
| 1692 |
+
}
|
| 1693 |
+
|
| 1694 |
+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride) {
|
| 1695 |
+
const int64_t ne_datapoint = llama_n_ctx(ctx);
|
| 1696 |
+
const int64_t ndata = (tokens.size() - ne_datapoint - 1) / stride;
|
| 1697 |
+
ggml_opt_dataset_t result = ggml_opt_dataset_init(
|
| 1698 |
+
GGML_TYPE_I32, GGML_TYPE_I32, ne_datapoint, ne_datapoint, ndata, /*ndata_shard =*/ 1);
|
| 1699 |
+
|
| 1700 |
+
llama_token * data = (llama_token *) ggml_opt_dataset_data(result)->data;
|
| 1701 |
+
llama_token * labels = (llama_token *) ggml_opt_dataset_labels(result)->data;
|
| 1702 |
+
|
| 1703 |
+
for (int64_t idata = 0; idata < ndata; ++idata) {
|
| 1704 |
+
memcpy(data + idata*ne_datapoint, tokens.data() + idata*stride + 0, ne_datapoint*sizeof(llama_token));
|
| 1705 |
+
memcpy(labels + idata*ne_datapoint, tokens.data() + idata*stride + 1, ne_datapoint*sizeof(llama_token));
|
| 1706 |
+
}
|
| 1707 |
+
|
| 1708 |
+
return result;
|
| 1709 |
+
}
|
| 1710 |
+
|
| 1711 |
+
ggml_opt_optimizer_params common_opt_lr_pars(void * userdata) {
|
| 1712 |
+
ggml_opt_optimizer_params result = ggml_opt_get_default_optimizer_params(nullptr);
|
| 1713 |
+
const lr_opt & d = *(lr_opt *) userdata;
|
| 1714 |
+
result.adamw.alpha = result.sgd.alpha = d.get_lr(d.epoch);
|
| 1715 |
+
result.sgd.wd = result.adamw.wd = d.wd;
|
| 1716 |
+
return result;
|
| 1717 |
+
}
|
| 1718 |
+
|
| 1719 |
+
// TODO make all command line args case-insensitive
|
| 1720 |
+
static inline bool eq_case_insensitive(char const* a, char const* b) {
|
| 1721 |
+
return !
|
| 1722 |
+
#if defined(_MSC_VER)
|
| 1723 |
+
_stricmp
|
| 1724 |
+
#else
|
| 1725 |
+
strcasecmp
|
| 1726 |
+
#endif // defined(_MSC_VER)
|
| 1727 |
+
(a, b);
|
| 1728 |
+
}
|
| 1729 |
+
|
| 1730 |
+
enum ggml_opt_optimizer_type common_opt_get_optimizer(const char * n) {
|
| 1731 |
+
if (eq_case_insensitive("adamw", n)) {
|
| 1732 |
+
return GGML_OPT_OPTIMIZER_TYPE_ADAMW;
|
| 1733 |
+
}
|
| 1734 |
+
if (eq_case_insensitive("sgd", n)) {
|
| 1735 |
+
return GGML_OPT_OPTIMIZER_TYPE_SGD;
|
| 1736 |
+
}
|
| 1737 |
+
return GGML_OPT_OPTIMIZER_TYPE_COUNT;
|
| 1738 |
+
}
|
| 1739 |
+
|
| 1740 |
+
// TODO simplify to use just log and exp
|
| 1741 |
+
static float const k_log_2 = std::log(2.f);
|
| 1742 |
+
|
| 1743 |
+
void lr_opt::init() {
|
| 1744 |
+
if (lr_min > 0 && lr_min < lr0) {
|
| 1745 |
+
float nhalf = std::log(lr0 / lr_min) / k_log_2;
|
| 1746 |
+
float e = epochs;
|
| 1747 |
+
if (decay_epochs > 0 && decay_epochs < e) {
|
| 1748 |
+
e = decay_epochs;
|
| 1749 |
+
} else {
|
| 1750 |
+
decay_epochs = e;
|
| 1751 |
+
}
|
| 1752 |
+
scale_epoch = nhalf / e;
|
| 1753 |
+
}
|
| 1754 |
+
}
|
| 1755 |
+
|
| 1756 |
+
float lr_opt::get_lr(float epoch) const {
|
| 1757 |
+
float r = lr_min <= 0 ? lr0 :
|
| 1758 |
+
epoch >= decay_epochs ? lr_min :
|
| 1759 |
+
lr0 * std::pow(0.5f, epoch * scale_epoch);
|
| 1760 |
+
LOG_INF("epoch %.2g lr=%.2g\n", epoch, r);
|
| 1761 |
+
return r;
|
| 1762 |
+
}
|
| 1763 |
+
|
| 1764 |
+
bool common_replay_last_token(struct llama_context * ctx, llama_token last_token, int32_t pos) {
|
| 1765 |
+
llama_batch batch = llama_batch_get_one(&last_token, 1);
|
| 1766 |
+
batch.pos = &pos;
|
| 1767 |
+
if (llama_decode(ctx, batch)) {
|
| 1768 |
+
LOG_ERR("%s: failed to replay last token\n", __func__);
|
| 1769 |
+
return false;
|
| 1770 |
+
}
|
| 1771 |
+
return true;
|
| 1772 |
+
}
|
| 1773 |
+
|
| 1774 |
+
bool common_prompt_batch_decode(
|
| 1775 |
+
struct llama_context * ctx,
|
| 1776 |
+
const std::vector<llama_token> & tokens,
|
| 1777 |
+
int & n_past,
|
| 1778 |
+
int n_batch,
|
| 1779 |
+
std::string_view state_path,
|
| 1780 |
+
bool save_state) {
|
| 1781 |
+
const int n_eval = tokens.size();
|
| 1782 |
+
if (n_eval == 0) {
|
| 1783 |
+
return true;
|
| 1784 |
+
}
|
| 1785 |
+
|
| 1786 |
+
if (save_state && n_eval > 1) {
|
| 1787 |
+
const int n_tokens_before_last = n_eval - 1;
|
| 1788 |
+
|
| 1789 |
+
GGML_ASSERT(n_eval <= n_batch);
|
| 1790 |
+
|
| 1791 |
+
// Decode all but the last token so we can save the memory state before decoding the last token.
|
| 1792 |
+
// This is done so we can restore the session state later and replay the last token.
|
| 1793 |
+
// Memory implementations in recurrent/hybrid models don't support removing tokens from their
|
| 1794 |
+
// memory, so we can't just remove the last token from the memory and replay the last token which
|
| 1795 |
+
// is the reason for this logic.
|
| 1796 |
+
if (llama_decode(ctx, llama_batch_get_one(const_cast<llama_token*>(tokens.data()), n_tokens_before_last))) {
|
| 1797 |
+
LOG_ERR("%s : failed to eval\n", __func__);
|
| 1798 |
+
return false;
|
| 1799 |
+
}
|
| 1800 |
+
n_past += n_tokens_before_last;
|
| 1801 |
+
|
| 1802 |
+
llama_state_save_file(ctx, state_path.data(), tokens.data(), n_tokens_before_last);
|
| 1803 |
+
LOG_INF("saved session before last token to %s, n_tokens = %d\n", state_path.data(), n_tokens_before_last);
|
| 1804 |
+
|
| 1805 |
+
llama_token last_token = tokens.back();
|
| 1806 |
+
llama_batch batch = llama_batch_get_one(&last_token, 1);
|
| 1807 |
+
int32_t pos = n_past;
|
| 1808 |
+
batch.pos = &pos;
|
| 1809 |
+
|
| 1810 |
+
if (llama_decode(ctx, batch)) {
|
| 1811 |
+
LOG_ERR("%s : failed to eval last token\n", __func__);
|
| 1812 |
+
return false;
|
| 1813 |
+
}
|
| 1814 |
+
n_past++;
|
| 1815 |
+
} else {
|
| 1816 |
+
if (llama_decode(ctx, llama_batch_get_one(const_cast<llama_token*>(tokens.data()), n_eval))) {
|
| 1817 |
+
LOG_ERR("%s : failed to eval\n", __func__);
|
| 1818 |
+
return false;
|
| 1819 |
+
}
|
| 1820 |
+
n_past += n_eval;
|
| 1821 |
+
}
|
| 1822 |
+
|
| 1823 |
+
return true;
|
| 1824 |
+
}
|
llama.cpp/common/common.h
ADDED
|
@@ -0,0 +1,931 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Various helper functions and utilities
|
| 2 |
+
|
| 3 |
+
#pragma once
|
| 4 |
+
|
| 5 |
+
#include "ggml-opt.h"
|
| 6 |
+
#include "llama-cpp.h"
|
| 7 |
+
|
| 8 |
+
#include <set>
|
| 9 |
+
#include <sstream>
|
| 10 |
+
#include <string>
|
| 11 |
+
#include <string_view>
|
| 12 |
+
#include <vector>
|
| 13 |
+
#include <map>
|
| 14 |
+
|
| 15 |
+
#if defined(_WIN32) && !defined(_WIN32_WINNT)
|
| 16 |
+
#define _WIN32_WINNT 0x0A00
|
| 17 |
+
#endif
|
| 18 |
+
|
| 19 |
+
#ifdef _WIN32
|
| 20 |
+
#define DIRECTORY_SEPARATOR '\\'
|
| 21 |
+
#else
|
| 22 |
+
#define DIRECTORY_SEPARATOR '/'
|
| 23 |
+
#endif // _WIN32
|
| 24 |
+
|
| 25 |
+
#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0)
|
| 26 |
+
#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0)
|
| 27 |
+
|
| 28 |
+
#define print_build_info() do { \
|
| 29 |
+
fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \
|
| 30 |
+
fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \
|
| 31 |
+
} while(0)
|
| 32 |
+
|
| 33 |
+
struct common_time_meas {
|
| 34 |
+
common_time_meas(int64_t & t_acc, bool disable = false);
|
| 35 |
+
~common_time_meas();
|
| 36 |
+
|
| 37 |
+
const int64_t t_start_us;
|
| 38 |
+
|
| 39 |
+
int64_t & t_acc;
|
| 40 |
+
};
|
| 41 |
+
|
| 42 |
+
struct common_adapter_lora_info {
|
| 43 |
+
std::string path;
|
| 44 |
+
float scale;
|
| 45 |
+
|
| 46 |
+
std::string task_name;
|
| 47 |
+
std::string prompt_prefix;
|
| 48 |
+
|
| 49 |
+
struct llama_adapter_lora * ptr;
|
| 50 |
+
};
|
| 51 |
+
|
| 52 |
+
using llama_tokens = std::vector<llama_token>;
|
| 53 |
+
|
| 54 |
+
// build info
|
| 55 |
+
extern int LLAMA_BUILD_NUMBER;
|
| 56 |
+
extern const char * LLAMA_COMMIT;
|
| 57 |
+
extern const char * LLAMA_COMPILER;
|
| 58 |
+
extern const char * LLAMA_BUILD_TARGET;
|
| 59 |
+
|
| 60 |
+
const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
|
| 61 |
+
|
| 62 |
+
struct common_control_vector_load_info;
|
| 63 |
+
|
| 64 |
+
//
|
| 65 |
+
// CPU utils
|
| 66 |
+
//
|
| 67 |
+
|
| 68 |
+
struct cpu_params {
|
| 69 |
+
int n_threads = -1;
|
| 70 |
+
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
| 71 |
+
bool mask_valid = false; // Default: any CPU
|
| 72 |
+
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
| 73 |
+
bool strict_cpu = false; // Use strict CPU placement
|
| 74 |
+
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
| 75 |
+
};
|
| 76 |
+
|
| 77 |
+
int32_t cpu_get_num_physical_cores();
|
| 78 |
+
int32_t cpu_get_num_math();
|
| 79 |
+
|
| 80 |
+
//
|
| 81 |
+
// Common params
|
| 82 |
+
//
|
| 83 |
+
|
| 84 |
+
enum llama_example {
|
| 85 |
+
LLAMA_EXAMPLE_BATCHED,
|
| 86 |
+
LLAMA_EXAMPLE_DEBUG,
|
| 87 |
+
LLAMA_EXAMPLE_COMMON,
|
| 88 |
+
LLAMA_EXAMPLE_SPECULATIVE,
|
| 89 |
+
LLAMA_EXAMPLE_COMPLETION,
|
| 90 |
+
LLAMA_EXAMPLE_CLI,
|
| 91 |
+
LLAMA_EXAMPLE_EMBEDDING,
|
| 92 |
+
LLAMA_EXAMPLE_PERPLEXITY,
|
| 93 |
+
LLAMA_EXAMPLE_RETRIEVAL,
|
| 94 |
+
LLAMA_EXAMPLE_PASSKEY,
|
| 95 |
+
LLAMA_EXAMPLE_IMATRIX,
|
| 96 |
+
LLAMA_EXAMPLE_BENCH,
|
| 97 |
+
LLAMA_EXAMPLE_SERVER,
|
| 98 |
+
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
| 99 |
+
LLAMA_EXAMPLE_EXPORT_LORA,
|
| 100 |
+
LLAMA_EXAMPLE_MTMD,
|
| 101 |
+
LLAMA_EXAMPLE_LOOKUP,
|
| 102 |
+
LLAMA_EXAMPLE_PARALLEL,
|
| 103 |
+
LLAMA_EXAMPLE_TTS,
|
| 104 |
+
LLAMA_EXAMPLE_DIFFUSION,
|
| 105 |
+
LLAMA_EXAMPLE_FINETUNE,
|
| 106 |
+
LLAMA_EXAMPLE_FIT_PARAMS,
|
| 107 |
+
|
| 108 |
+
LLAMA_EXAMPLE_COUNT,
|
| 109 |
+
};
|
| 110 |
+
|
| 111 |
+
enum common_sampler_type {
|
| 112 |
+
COMMON_SAMPLER_TYPE_NONE = 0,
|
| 113 |
+
COMMON_SAMPLER_TYPE_DRY = 1,
|
| 114 |
+
COMMON_SAMPLER_TYPE_TOP_K = 2,
|
| 115 |
+
COMMON_SAMPLER_TYPE_TOP_P = 3,
|
| 116 |
+
COMMON_SAMPLER_TYPE_MIN_P = 4,
|
| 117 |
+
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
|
| 118 |
+
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
|
| 119 |
+
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
|
| 120 |
+
COMMON_SAMPLER_TYPE_XTC = 8,
|
| 121 |
+
COMMON_SAMPLER_TYPE_INFILL = 9,
|
| 122 |
+
COMMON_SAMPLER_TYPE_PENALTIES = 10,
|
| 123 |
+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
|
| 124 |
+
COMMON_SAMPLER_TYPE_ADAPTIVE_P = 12,
|
| 125 |
+
};
|
| 126 |
+
|
| 127 |
+
// dimensionality reduction methods, used by cvector-generator
|
| 128 |
+
enum dimre_method {
|
| 129 |
+
DIMRE_METHOD_PCA,
|
| 130 |
+
DIMRE_METHOD_MEAN,
|
| 131 |
+
};
|
| 132 |
+
|
| 133 |
+
enum common_conversation_mode {
|
| 134 |
+
COMMON_CONVERSATION_MODE_DISABLED = 0,
|
| 135 |
+
COMMON_CONVERSATION_MODE_ENABLED = 1,
|
| 136 |
+
COMMON_CONVERSATION_MODE_AUTO = 2,
|
| 137 |
+
};
|
| 138 |
+
|
| 139 |
+
enum common_grammar_trigger_type {
|
| 140 |
+
COMMON_GRAMMAR_TRIGGER_TYPE_TOKEN,
|
| 141 |
+
COMMON_GRAMMAR_TRIGGER_TYPE_WORD,
|
| 142 |
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
|
| 143 |
+
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
| 144 |
+
};
|
| 145 |
+
|
| 146 |
+
struct common_grammar_trigger {
|
| 147 |
+
common_grammar_trigger_type type;
|
| 148 |
+
std::string value;
|
| 149 |
+
llama_token token = LLAMA_TOKEN_NULL;
|
| 150 |
+
};
|
| 151 |
+
|
| 152 |
+
enum common_params_sampling_config : uint64_t {
|
| 153 |
+
COMMON_PARAMS_SAMPLING_CONFIG_SAMPLERS = 1 << 0,
|
| 154 |
+
COMMON_PARAMS_SAMPLING_CONFIG_TOP_K = 1 << 1,
|
| 155 |
+
COMMON_PARAMS_SAMPLING_CONFIG_TOP_P = 1 << 2,
|
| 156 |
+
COMMON_PARAMS_SAMPLING_CONFIG_MIN_P = 1 << 3,
|
| 157 |
+
COMMON_PARAMS_SAMPLING_CONFIG_XTC_PROBABILITY = 1 << 4,
|
| 158 |
+
COMMON_PARAMS_SAMPLING_CONFIG_XTC_THRESHOLD = 1 << 5,
|
| 159 |
+
COMMON_PARAMS_SAMPLING_CONFIG_TEMP = 1 << 6,
|
| 160 |
+
COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_LAST_N = 1 << 7,
|
| 161 |
+
COMMON_PARAMS_SAMPLING_CONFIG_PENALTY_REPEAT = 1 << 8,
|
| 162 |
+
COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT = 1 << 9,
|
| 163 |
+
COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_TAU = 1 << 10,
|
| 164 |
+
COMMON_PARAMS_SAMPLING_CONFIG_MIROSTAT_ETA = 1 << 11,
|
| 165 |
+
};
|
| 166 |
+
|
| 167 |
+
enum common_speculative_type {
|
| 168 |
+
COMMON_SPECULATIVE_TYPE_NONE, // no speculative decoding
|
| 169 |
+
COMMON_SPECULATIVE_TYPE_DRAFT, // draft model
|
| 170 |
+
COMMON_SPECULATIVE_TYPE_EAGLE3, // eagle draft model
|
| 171 |
+
COMMON_SPECULATIVE_TYPE_NGRAM_SIMPLE, // simple self-speculative decoding
|
| 172 |
+
COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K, // self-speculative decoding with n-gram keys only
|
| 173 |
+
COMMON_SPECULATIVE_TYPE_NGRAM_MAP_K4V, // self-speculative decoding with n-gram keys and 4 m-gram values
|
| 174 |
+
COMMON_SPECULATIVE_TYPE_NGRAM_MOD,
|
| 175 |
+
COMMON_SPECULATIVE_TYPE_NGRAM_CACHE, // self-speculative decoding with 3-level n-gram cache
|
| 176 |
+
COMMON_SPECULATIVE_TYPE_COUNT // number of types, unknown type
|
| 177 |
+
};
|
| 178 |
+
|
| 179 |
+
// sampling parameters
|
| 180 |
+
struct common_params_sampling {
|
| 181 |
+
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
| 182 |
+
|
| 183 |
+
int32_t n_prev = 64; // number of previous tokens to remember
|
| 184 |
+
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
| 185 |
+
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
| 186 |
+
int32_t top_k = 40; // <= 0 to use vocab size
|
| 187 |
+
float top_p = 0.95f; // 1.0 = disabled
|
| 188 |
+
float min_p = 0.05f; // 0.0 = disabled
|
| 189 |
+
float xtc_probability = 0.00f; // 0.0 = disabled
|
| 190 |
+
float xtc_threshold = 0.10f; // > 0.5 disables XTC
|
| 191 |
+
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
| 192 |
+
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
| 193 |
+
float dynatemp_range = 0.00f; // 0.0 = disabled
|
| 194 |
+
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
| 195 |
+
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
| 196 |
+
float penalty_repeat = 1.00f; // 1.0 = disabled
|
| 197 |
+
float penalty_freq = 0.00f; // 0.0 = disabled
|
| 198 |
+
float penalty_present = 0.00f; // 0.0 = disabled
|
| 199 |
+
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
|
| 200 |
+
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
|
| 201 |
+
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
|
| 202 |
+
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
| 203 |
+
float adaptive_target = -1.0f; // select tokens near this probability (valid range 0.0 to 1.0; negative = disabled)
|
| 204 |
+
float adaptive_decay = 0.90f; // EMA decay for adaptation; history ≈ 1/(1-decay) tokens (0.0 - 0.99)
|
| 205 |
+
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
| 206 |
+
float top_n_sigma = -1.00f; // -1.0 = disabled
|
| 207 |
+
float mirostat_tau = 5.00f; // target entropy
|
| 208 |
+
float mirostat_eta = 0.10f; // learning rate
|
| 209 |
+
bool ignore_eos = false;
|
| 210 |
+
bool no_perf = false; // disable performance metrics
|
| 211 |
+
bool timing_per_token = false;
|
| 212 |
+
|
| 213 |
+
uint64_t user_sampling_config = 0; // bitfield to track user-specified samplers
|
| 214 |
+
|
| 215 |
+
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
|
| 216 |
+
|
| 217 |
+
std::vector<enum common_sampler_type> samplers = {
|
| 218 |
+
COMMON_SAMPLER_TYPE_PENALTIES,
|
| 219 |
+
COMMON_SAMPLER_TYPE_DRY,
|
| 220 |
+
COMMON_SAMPLER_TYPE_TOP_N_SIGMA,
|
| 221 |
+
COMMON_SAMPLER_TYPE_TOP_K,
|
| 222 |
+
COMMON_SAMPLER_TYPE_TYPICAL_P,
|
| 223 |
+
COMMON_SAMPLER_TYPE_TOP_P,
|
| 224 |
+
COMMON_SAMPLER_TYPE_MIN_P,
|
| 225 |
+
COMMON_SAMPLER_TYPE_XTC,
|
| 226 |
+
COMMON_SAMPLER_TYPE_TEMPERATURE,
|
| 227 |
+
};
|
| 228 |
+
|
| 229 |
+
std::string grammar; // optional BNF-like grammar to constrain sampling
|
| 230 |
+
bool grammar_lazy = false;
|
| 231 |
+
std::vector<common_grammar_trigger> grammar_triggers; // optional triggers (for lazy grammars)
|
| 232 |
+
std::set<llama_token> preserved_tokens;
|
| 233 |
+
|
| 234 |
+
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
| 235 |
+
std::vector<llama_logit_bias> logit_bias_eog; // pre-calculated logit biases for EOG tokens
|
| 236 |
+
|
| 237 |
+
bool backend_sampling = false;
|
| 238 |
+
|
| 239 |
+
bool has_logit_bias() const {
|
| 240 |
+
return !logit_bias.empty();
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
// print the parameters into a string
|
| 244 |
+
std::string print() const;
|
| 245 |
+
};
|
| 246 |
+
|
| 247 |
+
struct common_params_model {
|
| 248 |
+
std::string path = ""; // model local path // NOLINT
|
| 249 |
+
std::string url = ""; // model url to download // NOLINT
|
| 250 |
+
std::string hf_repo = ""; // HF repo // NOLINT
|
| 251 |
+
std::string hf_file = ""; // HF file // NOLINT
|
| 252 |
+
std::string docker_repo = ""; // Docker repo // NOLINT
|
| 253 |
+
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
|
| 254 |
+
};
|
| 255 |
+
|
| 256 |
+
struct common_ngram_mod;
|
| 257 |
+
|
| 258 |
+
struct common_params_speculative {
|
| 259 |
+
common_speculative_type type = COMMON_SPECULATIVE_TYPE_NONE; // type of speculative decoding
|
| 260 |
+
|
| 261 |
+
// general-purpose speculative decoding parameters
|
| 262 |
+
|
| 263 |
+
int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding
|
| 264 |
+
int32_t n_min = 0; // minimum number of draft tokens to use for speculative decoding
|
| 265 |
+
float p_split = 0.1f; // speculative decoding split probability
|
| 266 |
+
float p_min = 0.75f; // minimum speculative decoding probability (greedy)
|
| 267 |
+
|
| 268 |
+
// ngram-based speculative decoding
|
| 269 |
+
|
| 270 |
+
uint16_t ngram_size_n = 12; // ngram size for lookup
|
| 271 |
+
uint16_t ngram_size_m = 48; // mgram size for speculative tokens
|
| 272 |
+
uint16_t ngram_min_hits = 1; // minimum hits at ngram/mgram lookup for mgram to be proposed
|
| 273 |
+
|
| 274 |
+
std::shared_ptr<common_ngram_mod> ngram_mod;
|
| 275 |
+
|
| 276 |
+
std::string lookup_cache_static; // path of static ngram cache file for lookup decoding // NOLINT
|
| 277 |
+
std::string lookup_cache_dynamic; // path of dynamic ngram cache file for lookup decoding // NOLINT
|
| 278 |
+
|
| 279 |
+
// draft-model speculative decoding
|
| 280 |
+
|
| 281 |
+
struct common_params_model mparams_dft;
|
| 282 |
+
|
| 283 |
+
llama_model * model_dft = nullptr; // a llama_model that can be shared by multiple speculative contexts
|
| 284 |
+
|
| 285 |
+
llama_context_params cparams_dft; // these are the parameters for the draft llama_context
|
| 286 |
+
|
| 287 |
+
int32_t n_ctx = 0; // draft context size
|
| 288 |
+
int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
| 289 |
+
|
| 290 |
+
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
| 291 |
+
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
| 292 |
+
|
| 293 |
+
struct cpu_params cpuparams;
|
| 294 |
+
struct cpu_params cpuparams_batch;
|
| 295 |
+
|
| 296 |
+
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
| 297 |
+
|
| 298 |
+
std::vector<std::pair<std::string, std::string>> replacements; // main to speculative model replacements
|
| 299 |
+
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
|
| 300 |
+
|
| 301 |
+
bool has_dft() const {
|
| 302 |
+
return !mparams_dft.path.empty() || !mparams_dft.hf_repo.empty();
|
| 303 |
+
}
|
| 304 |
+
};
|
| 305 |
+
|
| 306 |
+
struct common_params_vocoder {
|
| 307 |
+
struct common_params_model model;
|
| 308 |
+
|
| 309 |
+
std::string speaker_file = ""; // speaker file path // NOLINT
|
| 310 |
+
|
| 311 |
+
bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT
|
| 312 |
+
};
|
| 313 |
+
|
| 314 |
+
struct common_params_diffusion {
|
| 315 |
+
int32_t steps = 128;
|
| 316 |
+
bool visual_mode = false;
|
| 317 |
+
|
| 318 |
+
float eps = 0; // epsilon for timesteps
|
| 319 |
+
int32_t block_length = 0; // block length for generation
|
| 320 |
+
|
| 321 |
+
int32_t algorithm = 4; // default algorithm: low-confidence
|
| 322 |
+
float alg_temp = 0.0f; // algorithm temperature
|
| 323 |
+
|
| 324 |
+
float cfg_scale = 0; // classifier-free guidance scale
|
| 325 |
+
bool add_gumbel_noise = false; // add gumbel noise to the logits if temp > 0.0
|
| 326 |
+
};
|
| 327 |
+
|
| 328 |
+
// reasoning API response format (not to be confused as chat template's reasoning format)
|
| 329 |
+
// only used by server
|
| 330 |
+
enum common_reasoning_format {
|
| 331 |
+
COMMON_REASONING_FORMAT_NONE,
|
| 332 |
+
COMMON_REASONING_FORMAT_AUTO, // Same as deepseek, using `message.reasoning_content`
|
| 333 |
+
COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY, // Extract thinking tag contents and return as `message.reasoning_content`, or leave inline in <think> tags in stream mode
|
| 334 |
+
COMMON_REASONING_FORMAT_DEEPSEEK, // Extract thinking tag contents and return as `message.reasoning_content`, including in streaming deltas.
|
| 335 |
+
// do not extend this enum unless you absolutely have to
|
| 336 |
+
// in most cases, use COMMON_REASONING_FORMAT_AUTO
|
| 337 |
+
// see: https://github.com/ggml-org/llama.cpp/pull/15408
|
| 338 |
+
};
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
struct lr_opt {
|
| 342 |
+
float lr0 = 1e-5; // learning rate at first epoch
|
| 343 |
+
float lr_min = -1;
|
| 344 |
+
float decay_epochs = -1; // if >0, the learning rate starts at lr0 and decays to lr_min after this many epochs
|
| 345 |
+
float scale_epoch = 0;
|
| 346 |
+
float wd = 0;
|
| 347 |
+
unsigned epochs = 2;
|
| 348 |
+
|
| 349 |
+
unsigned epoch; // set by optimizer outer (epochs) loop
|
| 350 |
+
// learning rate decay - constant LR per epoch only for now
|
| 351 |
+
float get_lr(float e) const;
|
| 352 |
+
float get_lr() const { return get_lr(epoch); }
|
| 353 |
+
// must call after arg parse, before get_lr
|
| 354 |
+
void init();
|
| 355 |
+
};
|
| 356 |
+
|
| 357 |
+
struct ggml_opt_optimizer_params common_opt_lr_pars(void * userdata);
|
| 358 |
+
|
| 359 |
+
struct common_params {
|
| 360 |
+
int32_t n_predict = -1; // max. number of new tokens to predict, -1 == no limit
|
| 361 |
+
int32_t n_ctx = 0; // context size, 0 == context the model was trained with
|
| 362 |
+
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
| 363 |
+
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
| 364 |
+
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
| 365 |
+
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
| 366 |
+
int32_t n_parallel = 1; // number of parallel sequences to decode
|
| 367 |
+
int32_t n_sequences = 1; // number of sequences to decode
|
| 368 |
+
int32_t grp_attn_n = 1; // group-attention factor
|
| 369 |
+
int32_t grp_attn_w = 512; // group-attention width
|
| 370 |
+
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
|
| 371 |
+
float rope_freq_base = 0.0f; // RoPE base frequency
|
| 372 |
+
float rope_freq_scale = 0.0f; // RoPE frequency scaling factor
|
| 373 |
+
float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor
|
| 374 |
+
float yarn_attn_factor = -1.0f; // YaRN magnitude scaling factor
|
| 375 |
+
float yarn_beta_fast = -1.0f; // YaRN low correction dim
|
| 376 |
+
float yarn_beta_slow = -1.0f; // YaRN high correction dim
|
| 377 |
+
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
| 378 |
+
|
| 379 |
+
// offload params
|
| 380 |
+
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
| 381 |
+
|
| 382 |
+
int32_t n_gpu_layers = -1; // number of layers to store in VRAM, -1 is auto, <= -2 is all
|
| 383 |
+
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
| 384 |
+
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
| 385 |
+
bool fit_params = true; // whether to fit unset model/context parameters to free device memory
|
| 386 |
+
int32_t fit_params_min_ctx = 4096; // minimum context size to set when trying to reduce memory use
|
| 387 |
+
|
| 388 |
+
// margin per device in bytes for fitting parameters to free memory:
|
| 389 |
+
std::vector<size_t> fit_params_target = std::vector<size_t>(llama_max_devices(), 1024 * 1024*1024);
|
| 390 |
+
|
| 391 |
+
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
| 392 |
+
|
| 393 |
+
struct cpu_params cpuparams;
|
| 394 |
+
struct cpu_params cpuparams_batch;
|
| 395 |
+
|
| 396 |
+
ggml_backend_sched_eval_callback cb_eval = nullptr;
|
| 397 |
+
void * cb_eval_user_data = nullptr;
|
| 398 |
+
|
| 399 |
+
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
| 400 |
+
|
| 401 |
+
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
| 402 |
+
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
| 403 |
+
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
|
| 404 |
+
enum llama_flash_attn_type flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO; // whether to use Flash Attention
|
| 405 |
+
|
| 406 |
+
struct common_params_sampling sampling;
|
| 407 |
+
struct common_params_speculative speculative;
|
| 408 |
+
struct common_params_vocoder vocoder;
|
| 409 |
+
struct common_params_diffusion diffusion;
|
| 410 |
+
|
| 411 |
+
struct common_params_model model;
|
| 412 |
+
|
| 413 |
+
std::set<std::string> model_alias; // model aliases // NOLINT
|
| 414 |
+
std::set<std::string> model_tags; // model tags (informational, not used for routing) // NOLINT
|
| 415 |
+
std::string hf_token = ""; // HF token // NOLINT
|
| 416 |
+
std::string prompt = ""; // NOLINT
|
| 417 |
+
std::string system_prompt = ""; // NOLINT
|
| 418 |
+
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
| 419 |
+
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
|
| 420 |
+
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
|
| 421 |
+
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
|
| 422 |
+
std::string logits_file = ""; // file for saving *all* logits // NOLINT
|
| 423 |
+
|
| 424 |
+
// llama-debug specific options
|
| 425 |
+
std::string logits_output_dir = "data"; // directory for saving logits output files // NOLINT
|
| 426 |
+
bool save_logits = false; // whether to save logits to files // NOLINT
|
| 427 |
+
std::vector<std::string> tensor_filter; // filter tensor names for debug output (regex) // NOLINT
|
| 428 |
+
|
| 429 |
+
std::vector<std::string> in_files; // all input files
|
| 430 |
+
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
| 431 |
+
std::vector<llama_model_kv_override> kv_overrides;
|
| 432 |
+
std::vector<llama_model_tensor_buft_override> tensor_buft_overrides;
|
| 433 |
+
|
| 434 |
+
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
|
| 435 |
+
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
|
| 436 |
+
|
| 437 |
+
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
| 438 |
+
|
| 439 |
+
int32_t verbosity = 3; // LOG_LEVEL_INFO
|
| 440 |
+
int32_t control_vector_layer_start = -1; // layer range for control vector
|
| 441 |
+
int32_t control_vector_layer_end = -1; // layer range for control vector
|
| 442 |
+
bool offline = false;
|
| 443 |
+
|
| 444 |
+
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
|
| 445 |
+
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line
|
| 446 |
+
// (which is more convenient to use for plotting)
|
| 447 |
+
//
|
| 448 |
+
bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt
|
| 449 |
+
size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score
|
| 450 |
+
|
| 451 |
+
bool winogrande = false; // compute Winogrande score over random tasks from datafile supplied in prompt
|
| 452 |
+
size_t winogrande_tasks = 0; // number of tasks to use when computing the Winogrande score. If 0, all tasks will be computed
|
| 453 |
+
|
| 454 |
+
bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt
|
| 455 |
+
size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed
|
| 456 |
+
|
| 457 |
+
bool kl_divergence = false; // compute KL divergence
|
| 458 |
+
|
| 459 |
+
bool usage = false; // print usage
|
| 460 |
+
bool completion = false; // print source-able completion script
|
| 461 |
+
bool use_color = false; // use color to distinguish generations and inputs
|
| 462 |
+
bool special = false; // enable special token output
|
| 463 |
+
bool interactive = false; // interactive mode
|
| 464 |
+
bool interactive_first = false; // wait for user input immediately
|
| 465 |
+
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
| 466 |
+
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
| 467 |
+
|
| 468 |
+
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
| 469 |
+
bool multiline_input = false; // reverse the usage of `\`
|
| 470 |
+
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
| 471 |
+
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
| 472 |
+
bool no_perf = false; // disable performance metrics
|
| 473 |
+
bool show_timings = true; // show timing information on CLI
|
| 474 |
+
bool ctx_shift = false; // context shift on infinite text generation
|
| 475 |
+
bool swa_full = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)
|
| 476 |
+
bool kv_unified = false; // enable unified KV cache
|
| 477 |
+
|
| 478 |
+
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
| 479 |
+
bool use_mmap = true; // enable mmap to use filesystem cache
|
| 480 |
+
bool use_direct_io = false; // read from disk without buffering
|
| 481 |
+
bool use_mlock = false; // use mlock to keep model in memory
|
| 482 |
+
bool verbose_prompt = false; // print prompt tokens before generation
|
| 483 |
+
bool display_prompt = true; // print prompt before generation
|
| 484 |
+
bool no_kv_offload = false; // disable KV offloading
|
| 485 |
+
bool warmup = true; // warmup run
|
| 486 |
+
bool check_tensors = false; // validate tensor data
|
| 487 |
+
bool no_op_offload = false; // globally disable offload host tensor operations to device
|
| 488 |
+
bool no_extra_bufts = false; // disable extra buffer types (used for weight repacking)
|
| 489 |
+
bool no_host = false; // bypass host buffer allowing extra buffers to be used
|
| 490 |
+
|
| 491 |
+
bool single_turn = false; // single turn chat conversation
|
| 492 |
+
|
| 493 |
+
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
| 494 |
+
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
| 495 |
+
|
| 496 |
+
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
| 497 |
+
|
| 498 |
+
// multimodal models (see tools/mtmd)
|
| 499 |
+
struct common_params_model mmproj;
|
| 500 |
+
bool mmproj_use_gpu = true; // use GPU for multimodal model
|
| 501 |
+
bool no_mmproj = false; // explicitly disable multimodal model
|
| 502 |
+
std::vector<std::string> image; // path to image file(s)
|
| 503 |
+
int image_min_tokens = -1;
|
| 504 |
+
int image_max_tokens = -1;
|
| 505 |
+
|
| 506 |
+
// finetune
|
| 507 |
+
struct lr_opt lr;
|
| 508 |
+
enum ggml_opt_optimizer_type optimizer = GGML_OPT_OPTIMIZER_TYPE_ADAMW;
|
| 509 |
+
float val_split = 0.05f; // fraction of the data used for the validation set
|
| 510 |
+
|
| 511 |
+
// embedding
|
| 512 |
+
bool embedding = false; // get only sentence embedding
|
| 513 |
+
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
| 514 |
+
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
| 515 |
+
std::string embd_sep = "\n"; // separator of embeddings
|
| 516 |
+
std::string cls_sep = "\t"; // separator of classification sequences
|
| 517 |
+
|
| 518 |
+
// server params
|
| 519 |
+
int32_t port = 8080; // server listens on this network port
|
| 520 |
+
int32_t timeout_read = 600; // http read timeout in seconds
|
| 521 |
+
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
| 522 |
+
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
| 523 |
+
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
|
| 524 |
+
bool cache_prompt = true; // whether to enable prompt caching
|
| 525 |
+
int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot
|
| 526 |
+
int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc.
|
| 527 |
+
|
| 528 |
+
std::string hostname = "127.0.0.1";
|
| 529 |
+
std::string public_path = ""; // NOLINT
|
| 530 |
+
std::string api_prefix = ""; // NOLINT
|
| 531 |
+
std::string chat_template = ""; // NOLINT
|
| 532 |
+
bool use_jinja = true; // NOLINT
|
| 533 |
+
bool enable_chat_template = true;
|
| 534 |
+
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
| 535 |
+
int reasoning_budget = -1;
|
| 536 |
+
bool prefill_assistant = true; // if true, any trailing assistant message will be prefilled into the response
|
| 537 |
+
int sleep_idle_seconds = -1; // if >0, server will sleep after this many seconds of idle time
|
| 538 |
+
|
| 539 |
+
std::vector<std::string> api_keys;
|
| 540 |
+
|
| 541 |
+
std::string ssl_file_key = ""; // NOLINT
|
| 542 |
+
std::string ssl_file_cert = ""; // NOLINT
|
| 543 |
+
|
| 544 |
+
std::map<std::string, std::string> default_template_kwargs;
|
| 545 |
+
|
| 546 |
+
// webui configs
|
| 547 |
+
bool webui = true;
|
| 548 |
+
std::string webui_config_json;
|
| 549 |
+
|
| 550 |
+
// "advanced" endpoints are disabled by default for better security
|
| 551 |
+
bool endpoint_slots = true;
|
| 552 |
+
bool endpoint_props = false; // only control POST requests, not GET
|
| 553 |
+
bool endpoint_metrics = false;
|
| 554 |
+
|
| 555 |
+
// router server configs
|
| 556 |
+
std::string models_dir = ""; // directory containing models for the router server
|
| 557 |
+
std::string models_preset = ""; // directory containing model presets for the router server
|
| 558 |
+
int models_max = 4; // maximum number of models to load simultaneously
|
| 559 |
+
bool models_autoload = true; // automatically load models when requested via the router server
|
| 560 |
+
|
| 561 |
+
bool log_json = false;
|
| 562 |
+
|
| 563 |
+
std::string slot_save_path;
|
| 564 |
+
std::string media_path; // path to directory for loading media files
|
| 565 |
+
|
| 566 |
+
float slot_prompt_similarity = 0.1f;
|
| 567 |
+
|
| 568 |
+
// batched-bench params
|
| 569 |
+
bool is_pp_shared = false;
|
| 570 |
+
bool is_tg_separate = false;
|
| 571 |
+
|
| 572 |
+
std::vector<int32_t> n_pp;
|
| 573 |
+
std::vector<int32_t> n_tg;
|
| 574 |
+
std::vector<int32_t> n_pl;
|
| 575 |
+
|
| 576 |
+
// retrieval params
|
| 577 |
+
std::vector<std::string> context_files; // context files to embed
|
| 578 |
+
|
| 579 |
+
int32_t chunk_size = 64; // chunk size for context embedding
|
| 580 |
+
|
| 581 |
+
std::string chunk_separator = "\n"; // chunk separator for context embedding
|
| 582 |
+
|
| 583 |
+
// passkey params
|
| 584 |
+
int32_t n_junk = 250; // number of times to repeat the junk text
|
| 585 |
+
int32_t i_pos = -1; // position of the passkey in the junk text
|
| 586 |
+
|
| 587 |
+
// imatrix params
|
| 588 |
+
int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations
|
| 589 |
+
int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations
|
| 590 |
+
int32_t i_chunk = 0; // start processing from this chunk
|
| 591 |
+
int8_t imat_dat = 0; // whether the legacy imatrix.dat format should be output (gguf <= 0 < dat)
|
| 592 |
+
|
| 593 |
+
bool process_output = false; // collect data for the output tensor
|
| 594 |
+
bool compute_ppl = true; // whether to compute perplexity
|
| 595 |
+
bool show_statistics = false; // show imatrix statistics per tensor
|
| 596 |
+
bool parse_special = false; // whether to parse special tokens during imatrix tokenization
|
| 597 |
+
|
| 598 |
+
// cvector-generator params
|
| 599 |
+
int n_pca_batch = 100;
|
| 600 |
+
int n_pca_iterations = 1000;
|
| 601 |
+
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
| 602 |
+
std::string cvector_positive_file = "tools/cvector-generator/positive.txt";
|
| 603 |
+
std::string cvector_negative_file = "tools/cvector-generator/negative.txt";
|
| 604 |
+
|
| 605 |
+
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
| 606 |
+
|
| 607 |
+
// batched-bench params
|
| 608 |
+
bool batched_bench_output_jsonl = false;
|
| 609 |
+
|
| 610 |
+
// common params
|
| 611 |
+
std::string out_file; // output filename for all example programs
|
| 612 |
+
// optional callback for model loading progress and cancellation:
|
| 613 |
+
// called with a progress value between 0.0 and 1.0.
|
| 614 |
+
// return false from callback to abort model loading or true to continue
|
| 615 |
+
llama_progress_callback load_progress_callback = NULL;
|
| 616 |
+
void * load_progress_callback_user_data = NULL;
|
| 617 |
+
};
|
| 618 |
+
|
| 619 |
+
// call once at the start of a program if it uses libcommon
|
| 620 |
+
// initializes the logging system and prints info about the build
|
| 621 |
+
void common_init();
|
| 622 |
+
|
| 623 |
+
std::string common_params_get_system_info(const common_params & params);
|
| 624 |
+
|
| 625 |
+
bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
| 626 |
+
bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
| 627 |
+
void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr);
|
| 628 |
+
bool set_process_priority(enum ggml_sched_priority prio);
|
| 629 |
+
|
| 630 |
+
//
|
| 631 |
+
// String utils
|
| 632 |
+
//
|
| 633 |
+
|
| 634 |
+
#ifdef __GNUC__
|
| 635 |
+
# if defined(__MINGW32__) && !defined(__clang__)
|
| 636 |
+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
| 637 |
+
# else
|
| 638 |
+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
| 639 |
+
# endif
|
| 640 |
+
#else
|
| 641 |
+
# define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
|
| 642 |
+
#endif
|
| 643 |
+
|
| 644 |
+
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
|
| 645 |
+
std::string string_format(const char * fmt, ...);
|
| 646 |
+
|
| 647 |
+
std::string string_strip(const std::string & str);
|
| 648 |
+
std::string string_get_sortable_timestamp();
|
| 649 |
+
|
| 650 |
+
std::string string_join(const std::vector<std::string> & values, const std::string & separator);
|
| 651 |
+
std::vector<std::string> string_split(const std::string & str, const std::string & delimiter);
|
| 652 |
+
std::string string_repeat(const std::string & str, size_t n);
|
| 653 |
+
|
| 654 |
+
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
|
| 655 |
+
|
| 656 |
+
std::string regex_escape(const std::string & s);
|
| 657 |
+
|
| 658 |
+
template<class T>
|
| 659 |
+
static std::vector<T> string_split(const std::string & str, char delim) {
|
| 660 |
+
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");
|
| 661 |
+
std::vector<T> values;
|
| 662 |
+
std::istringstream str_stream(str);
|
| 663 |
+
std::string token;
|
| 664 |
+
while (std::getline(str_stream, token, delim)) {
|
| 665 |
+
T value;
|
| 666 |
+
std::istringstream token_stream(token);
|
| 667 |
+
token_stream >> value;
|
| 668 |
+
values.push_back(value);
|
| 669 |
+
}
|
| 670 |
+
return values;
|
| 671 |
+
}
|
| 672 |
+
|
| 673 |
+
template<>
|
| 674 |
+
inline std::vector<std::string> string_split<std::string>(const std::string & str, char delim)
|
| 675 |
+
{
|
| 676 |
+
std::vector<std::string> parts;
|
| 677 |
+
size_t begin_pos = 0;
|
| 678 |
+
size_t delim_pos = str.find(delim);
|
| 679 |
+
while (delim_pos != std::string::npos) {
|
| 680 |
+
std::string part = str.substr(begin_pos, delim_pos - begin_pos);
|
| 681 |
+
parts.emplace_back(part);
|
| 682 |
+
begin_pos = delim_pos + 1;
|
| 683 |
+
delim_pos = str.find(delim, begin_pos);
|
| 684 |
+
}
|
| 685 |
+
parts.emplace_back(str.substr(begin_pos));
|
| 686 |
+
return parts;
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
// remove when moving to c++20
|
| 690 |
+
inline bool string_starts_with(std::string_view str, std::string_view prefix) {
|
| 691 |
+
return str.size() >= prefix.size() &&
|
| 692 |
+
str.compare(0, prefix.size(), prefix) == 0;
|
| 693 |
+
}
|
| 694 |
+
|
| 695 |
+
// remove when moving to c++20
|
| 696 |
+
inline bool string_ends_with(std::string_view str, std::string_view suffix) {
|
| 697 |
+
return str.size() >= suffix.size() &&
|
| 698 |
+
str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0;
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
inline bool string_remove_suffix(std::string & str, std::string_view suffix) {
|
| 702 |
+
if (string_ends_with(str, suffix)) {
|
| 703 |
+
str.resize(str.size() - suffix.size());
|
| 704 |
+
return true;
|
| 705 |
+
}
|
| 706 |
+
return false;
|
| 707 |
+
}
|
| 708 |
+
|
| 709 |
+
inline size_t string_find_partial_stop(std::string_view str, std::string_view stop) {
|
| 710 |
+
if (!str.empty() && !stop.empty()) {
|
| 711 |
+
const size_t max_len = std::min(str.size(), stop.size());
|
| 712 |
+
const char last_char = str.back();
|
| 713 |
+
for (size_t len = max_len; len > 0; --len) {
|
| 714 |
+
if (stop[len - 1] == last_char) {
|
| 715 |
+
if (string_ends_with(str, stop.substr(0, len))) {
|
| 716 |
+
return str.size() - len;
|
| 717 |
+
}
|
| 718 |
+
}
|
| 719 |
+
}
|
| 720 |
+
}
|
| 721 |
+
return std::string::npos;
|
| 722 |
+
}
|
| 723 |
+
|
| 724 |
+
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
| 725 |
+
void string_process_escapes(std::string & input);
|
| 726 |
+
|
| 727 |
+
std::string string_from(bool value);
|
| 728 |
+
std::string string_from(const std::vector<int> & values);
|
| 729 |
+
std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
|
| 730 |
+
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch);
|
| 731 |
+
|
| 732 |
+
//
|
| 733 |
+
// Filesystem utils
|
| 734 |
+
//
|
| 735 |
+
|
| 736 |
+
bool fs_validate_filename(const std::string & filename, bool allow_subdirs = false);
|
| 737 |
+
bool fs_create_directory_with_parents(const std::string & path);
|
| 738 |
+
bool fs_is_directory(const std::string & path);
|
| 739 |
+
|
| 740 |
+
std::string fs_get_cache_directory();
|
| 741 |
+
std::string fs_get_cache_file(const std::string & filename);
|
| 742 |
+
|
| 743 |
+
struct common_file_info {
|
| 744 |
+
std::string path;
|
| 745 |
+
std::string name;
|
| 746 |
+
size_t size = 0; // in bytes
|
| 747 |
+
bool is_dir = false;
|
| 748 |
+
};
|
| 749 |
+
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
| 750 |
+
|
| 751 |
+
//
|
| 752 |
+
// TTY utils
|
| 753 |
+
//
|
| 754 |
+
|
| 755 |
+
// Auto-detect if colors can be enabled based on terminal and environment
|
| 756 |
+
bool tty_can_use_colors();
|
| 757 |
+
|
| 758 |
+
//
|
| 759 |
+
// Model utils
|
| 760 |
+
//
|
| 761 |
+
|
| 762 |
+
struct common_sampler;
|
| 763 |
+
|
| 764 |
+
// note: defines the model, context, samplers, ets. lifetimes
|
| 765 |
+
struct common_init_result {
|
| 766 |
+
common_init_result(common_params & params);
|
| 767 |
+
~common_init_result();
|
| 768 |
+
|
| 769 |
+
llama_model * model();
|
| 770 |
+
llama_context * context();
|
| 771 |
+
|
| 772 |
+
common_sampler * sampler(llama_seq_id seq_id);
|
| 773 |
+
void reset_samplers();
|
| 774 |
+
|
| 775 |
+
std::vector<llama_adapter_lora_ptr> & lora();
|
| 776 |
+
|
| 777 |
+
private:
|
| 778 |
+
struct impl;
|
| 779 |
+
std::unique_ptr<impl> pimpl;
|
| 780 |
+
};
|
| 781 |
+
|
| 782 |
+
using common_init_result_ptr = std::unique_ptr<common_init_result>;
|
| 783 |
+
|
| 784 |
+
common_init_result_ptr common_init_from_params(common_params & params);
|
| 785 |
+
|
| 786 |
+
struct llama_model_params common_model_params_to_llama ( common_params & params);
|
| 787 |
+
struct llama_context_params common_context_params_to_llama(const common_params & params);
|
| 788 |
+
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
|
| 789 |
+
|
| 790 |
+
// clear LoRA adapters from context, then apply new list of adapters
|
| 791 |
+
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
|
| 792 |
+
|
| 793 |
+
std::string get_model_endpoint();
|
| 794 |
+
|
| 795 |
+
//
|
| 796 |
+
// Batch utils
|
| 797 |
+
//
|
| 798 |
+
|
| 799 |
+
void common_batch_clear(struct llama_batch & batch);
|
| 800 |
+
|
| 801 |
+
void common_batch_add(
|
| 802 |
+
struct llama_batch & batch,
|
| 803 |
+
llama_token id,
|
| 804 |
+
llama_pos pos,
|
| 805 |
+
const std::vector<llama_seq_id> & seq_ids,
|
| 806 |
+
bool logits);
|
| 807 |
+
|
| 808 |
+
// decodes a single batch of tokens for a prompt and manages session tokens
|
| 809 |
+
//
|
| 810 |
+
// Note: We save state before the last token so that we can replay it to ensure
|
| 811 |
+
// compatibility with all memory types. Recurrent/hybrid models cannot remove
|
| 812 |
+
// tokens from memory, so this approach works across all model architectures.
|
| 813 |
+
bool common_prompt_batch_decode(
|
| 814 |
+
struct llama_context * ctx,
|
| 815 |
+
const std::vector<llama_token> & embd,
|
| 816 |
+
int & n_past,
|
| 817 |
+
int n_batch,
|
| 818 |
+
std::string_view state_path,
|
| 819 |
+
bool save_state);
|
| 820 |
+
|
| 821 |
+
// replays the last token after loading state to regenerate logits
|
| 822 |
+
// used after loading session state to ensure the sampling context has valid logits
|
| 823 |
+
bool common_replay_last_token(struct llama_context * ctx, llama_token last_token, int32_t pos);
|
| 824 |
+
|
| 825 |
+
//
|
| 826 |
+
// Vocab utils
|
| 827 |
+
//
|
| 828 |
+
|
| 829 |
+
// tokenizes a string into a vector of tokens
|
| 830 |
+
// should work similar to Python's `tokenizer.encode`
|
| 831 |
+
std::vector<llama_token> common_tokenize(
|
| 832 |
+
const struct llama_context * ctx,
|
| 833 |
+
const std::string & text,
|
| 834 |
+
bool add_special,
|
| 835 |
+
bool parse_special = false);
|
| 836 |
+
|
| 837 |
+
std::vector<llama_token> common_tokenize(
|
| 838 |
+
const struct llama_vocab * vocab,
|
| 839 |
+
const std::string & text,
|
| 840 |
+
bool add_special,
|
| 841 |
+
bool parse_special = false);
|
| 842 |
+
|
| 843 |
+
// tokenizes a token into a piece, optionally renders special/control tokens
|
| 844 |
+
// should work similar to Python's `tokenizer.id_to_piece`
|
| 845 |
+
std::string common_token_to_piece(
|
| 846 |
+
const struct llama_context * ctx,
|
| 847 |
+
llama_token token,
|
| 848 |
+
bool special = true);
|
| 849 |
+
|
| 850 |
+
std::string common_token_to_piece(
|
| 851 |
+
const struct llama_vocab * vocab,
|
| 852 |
+
llama_token token,
|
| 853 |
+
bool special = true);
|
| 854 |
+
|
| 855 |
+
// detokenizes a vector of tokens into a string
|
| 856 |
+
// should work similar to Python's `tokenizer.decode`
|
| 857 |
+
// optionally renders special/control tokens
|
| 858 |
+
std::string common_detokenize(
|
| 859 |
+
const struct llama_context * ctx,
|
| 860 |
+
const std::vector<llama_token> & tokens,
|
| 861 |
+
bool special = true);
|
| 862 |
+
|
| 863 |
+
std::string common_detokenize(
|
| 864 |
+
const struct llama_vocab * vocab,
|
| 865 |
+
const std::vector<llama_token> & tokens,
|
| 866 |
+
bool special = true);
|
| 867 |
+
|
| 868 |
+
//
|
| 869 |
+
// Embedding utils
|
| 870 |
+
//
|
| 871 |
+
|
| 872 |
+
// TODO: repace embd_norm with an enum
|
| 873 |
+
void common_embd_normalize(const float * inp, float * out, int n, int embd_norm);
|
| 874 |
+
|
| 875 |
+
float common_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
| 876 |
+
|
| 877 |
+
//
|
| 878 |
+
// Control vector utils
|
| 879 |
+
//
|
| 880 |
+
|
| 881 |
+
struct common_control_vector_data {
|
| 882 |
+
int n_embd;
|
| 883 |
+
|
| 884 |
+
// stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
|
| 885 |
+
std::vector<float> data;
|
| 886 |
+
};
|
| 887 |
+
|
| 888 |
+
struct common_control_vector_load_info {
|
| 889 |
+
float strength;
|
| 890 |
+
|
| 891 |
+
std::string fname;
|
| 892 |
+
};
|
| 893 |
+
|
| 894 |
+
// Load control vectors, scale each by strength, and add them together.
|
| 895 |
+
// On error, returns {-1, empty}
|
| 896 |
+
common_control_vector_data common_control_vector_load(const std::vector<common_control_vector_load_info> & load_infos);
|
| 897 |
+
|
| 898 |
+
//
|
| 899 |
+
// Split utils
|
| 900 |
+
//
|
| 901 |
+
|
| 902 |
+
namespace {
|
| 903 |
+
|
| 904 |
+
const char * const LLM_KV_SPLIT_NO = "split.no";
|
| 905 |
+
const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
| 906 |
+
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
| 907 |
+
|
| 908 |
+
}
|
| 909 |
+
|
| 910 |
+
//
|
| 911 |
+
// MoE utils
|
| 912 |
+
//
|
| 913 |
+
|
| 914 |
+
const char * const LLM_FFN_EXPS_REGEX = "\\.ffn_(up|down|gate)_(ch|)exps";
|
| 915 |
+
|
| 916 |
+
inline std::string llm_ffn_exps_block_regex(int idx) {
|
| 917 |
+
return string_format("blk\\.%d%s", idx, LLM_FFN_EXPS_REGEX);
|
| 918 |
+
}
|
| 919 |
+
|
| 920 |
+
inline llama_model_tensor_buft_override llm_ffn_exps_cpu_override() {
|
| 921 |
+
return { LLM_FFN_EXPS_REGEX, ggml_backend_cpu_buffer_type() };
|
| 922 |
+
}
|
| 923 |
+
|
| 924 |
+
//
|
| 925 |
+
// training utils
|
| 926 |
+
//
|
| 927 |
+
|
| 928 |
+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride);
|
| 929 |
+
|
| 930 |
+
// "adamw" or "sgd" (case insensitive)
|
| 931 |
+
enum ggml_opt_optimizer_type common_opt_get_optimizer(const char *);
|
llama.cpp/common/console.cpp
ADDED
|
@@ -0,0 +1,1137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "console.h"
|
| 2 |
+
#include "log.h"
|
| 3 |
+
#include <vector>
|
| 4 |
+
#include <iostream>
|
| 5 |
+
#include <cassert>
|
| 6 |
+
#include <cstddef>
|
| 7 |
+
#include <cctype>
|
| 8 |
+
#include <cwctype>
|
| 9 |
+
#include <cstdint>
|
| 10 |
+
#include <condition_variable>
|
| 11 |
+
#include <mutex>
|
| 12 |
+
#include <thread>
|
| 13 |
+
#include <stdarg.h>
|
| 14 |
+
|
| 15 |
+
#if defined(_WIN32)
|
| 16 |
+
#define WIN32_LEAN_AND_MEAN
|
| 17 |
+
#ifndef NOMINMAX
|
| 18 |
+
#define NOMINMAX
|
| 19 |
+
#endif
|
| 20 |
+
#include <windows.h>
|
| 21 |
+
#include <fcntl.h>
|
| 22 |
+
#include <io.h>
|
| 23 |
+
#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING
|
| 24 |
+
#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004
|
| 25 |
+
#endif
|
| 26 |
+
#else
|
| 27 |
+
#include <climits>
|
| 28 |
+
#include <sys/ioctl.h>
|
| 29 |
+
#include <unistd.h>
|
| 30 |
+
#include <wchar.h>
|
| 31 |
+
#include <stdio.h>
|
| 32 |
+
#include <stdlib.h>
|
| 33 |
+
#include <signal.h>
|
| 34 |
+
#include <termios.h>
|
| 35 |
+
#endif
|
| 36 |
+
|
| 37 |
+
#define ANSI_COLOR_RED "\x1b[31m"
|
| 38 |
+
#define ANSI_COLOR_GREEN "\x1b[32m"
|
| 39 |
+
#define ANSI_COLOR_YELLOW "\x1b[33m"
|
| 40 |
+
#define ANSI_COLOR_BLUE "\x1b[34m"
|
| 41 |
+
#define ANSI_COLOR_MAGENTA "\x1b[35m"
|
| 42 |
+
#define ANSI_COLOR_CYAN "\x1b[36m"
|
| 43 |
+
#define ANSI_COLOR_GRAY "\x1b[90m"
|
| 44 |
+
#define ANSI_COLOR_RESET "\x1b[0m"
|
| 45 |
+
#define ANSI_BOLD "\x1b[1m"
|
| 46 |
+
|
| 47 |
+
namespace console {
|
| 48 |
+
|
| 49 |
+
#if defined (_WIN32)
|
| 50 |
+
namespace {
|
| 51 |
+
// Use private-use unicode values to represent special keys that are not reported
|
| 52 |
+
// as characters (e.g. arrows on Windows). These values should never clash with
|
| 53 |
+
// real input and let the rest of the code handle navigation uniformly.
|
| 54 |
+
static constexpr char32_t KEY_ARROW_LEFT = 0xE000;
|
| 55 |
+
static constexpr char32_t KEY_ARROW_RIGHT = 0xE001;
|
| 56 |
+
static constexpr char32_t KEY_ARROW_UP = 0xE002;
|
| 57 |
+
static constexpr char32_t KEY_ARROW_DOWN = 0xE003;
|
| 58 |
+
static constexpr char32_t KEY_HOME = 0xE004;
|
| 59 |
+
static constexpr char32_t KEY_END = 0xE005;
|
| 60 |
+
static constexpr char32_t KEY_CTRL_ARROW_LEFT = 0xE006;
|
| 61 |
+
static constexpr char32_t KEY_CTRL_ARROW_RIGHT = 0xE007;
|
| 62 |
+
static constexpr char32_t KEY_DELETE = 0xE008;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
//
|
| 66 |
+
// Console state
|
| 67 |
+
//
|
| 68 |
+
#endif
|
| 69 |
+
|
| 70 |
+
static bool advanced_display = false;
|
| 71 |
+
static bool simple_io = true;
|
| 72 |
+
static display_type current_display = DISPLAY_TYPE_RESET;
|
| 73 |
+
|
| 74 |
+
static FILE* out = stdout;
|
| 75 |
+
|
| 76 |
+
#if defined (_WIN32)
|
| 77 |
+
static void* hConsole;
|
| 78 |
+
#else
|
| 79 |
+
static FILE* tty = nullptr;
|
| 80 |
+
static termios initial_state;
|
| 81 |
+
#endif
|
| 82 |
+
|
| 83 |
+
//
|
| 84 |
+
// Init and cleanup
|
| 85 |
+
//
|
| 86 |
+
|
| 87 |
+
void init(bool use_simple_io, bool use_advanced_display) {
|
| 88 |
+
advanced_display = use_advanced_display;
|
| 89 |
+
simple_io = use_simple_io;
|
| 90 |
+
#if defined(_WIN32)
|
| 91 |
+
// Windows-specific console initialization
|
| 92 |
+
DWORD dwMode = 0;
|
| 93 |
+
hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
| 94 |
+
if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) {
|
| 95 |
+
hConsole = GetStdHandle(STD_ERROR_HANDLE);
|
| 96 |
+
if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) {
|
| 97 |
+
hConsole = nullptr;
|
| 98 |
+
simple_io = true;
|
| 99 |
+
}
|
| 100 |
+
}
|
| 101 |
+
if (hConsole) {
|
| 102 |
+
// Check conditions combined to reduce nesting
|
| 103 |
+
if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) &&
|
| 104 |
+
!SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
|
| 105 |
+
advanced_display = false;
|
| 106 |
+
}
|
| 107 |
+
// Set console output codepage to UTF8
|
| 108 |
+
SetConsoleOutputCP(CP_UTF8);
|
| 109 |
+
}
|
| 110 |
+
HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE);
|
| 111 |
+
if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) {
|
| 112 |
+
// Set console input codepage to UTF16
|
| 113 |
+
_setmode(_fileno(stdin), _O_WTEXT);
|
| 114 |
+
|
| 115 |
+
// Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT)
|
| 116 |
+
if (simple_io) {
|
| 117 |
+
dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT;
|
| 118 |
+
} else {
|
| 119 |
+
dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
|
| 120 |
+
}
|
| 121 |
+
if (!SetConsoleMode(hConIn, dwMode)) {
|
| 122 |
+
simple_io = true;
|
| 123 |
+
}
|
| 124 |
+
}
|
| 125 |
+
if (simple_io) {
|
| 126 |
+
_setmode(_fileno(stdin), _O_U8TEXT);
|
| 127 |
+
}
|
| 128 |
+
#else
|
| 129 |
+
// POSIX-specific console initialization
|
| 130 |
+
if (!simple_io) {
|
| 131 |
+
struct termios new_termios;
|
| 132 |
+
tcgetattr(STDIN_FILENO, &initial_state);
|
| 133 |
+
new_termios = initial_state;
|
| 134 |
+
new_termios.c_lflag &= ~(ICANON | ECHO);
|
| 135 |
+
new_termios.c_cc[VMIN] = 1;
|
| 136 |
+
new_termios.c_cc[VTIME] = 0;
|
| 137 |
+
tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
|
| 138 |
+
|
| 139 |
+
tty = fopen("/dev/tty", "w+");
|
| 140 |
+
if (tty != nullptr) {
|
| 141 |
+
out = tty;
|
| 142 |
+
}
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
setlocale(LC_ALL, "");
|
| 146 |
+
#endif
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
void cleanup() {
|
| 150 |
+
// Reset console display
|
| 151 |
+
set_display(DISPLAY_TYPE_RESET);
|
| 152 |
+
|
| 153 |
+
#if !defined(_WIN32)
|
| 154 |
+
// Restore settings on POSIX systems
|
| 155 |
+
if (!simple_io) {
|
| 156 |
+
if (tty != nullptr) {
|
| 157 |
+
out = stdout;
|
| 158 |
+
fclose(tty);
|
| 159 |
+
tty = nullptr;
|
| 160 |
+
}
|
| 161 |
+
tcsetattr(STDIN_FILENO, TCSANOW, &initial_state);
|
| 162 |
+
}
|
| 163 |
+
#endif
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
//
|
| 167 |
+
// Display and IO
|
| 168 |
+
//
|
| 169 |
+
|
| 170 |
+
// Keep track of current display and only emit ANSI code if it changes
|
| 171 |
+
void set_display(display_type display) {
|
| 172 |
+
if (advanced_display && current_display != display) {
|
| 173 |
+
common_log_flush(common_log_main());
|
| 174 |
+
switch(display) {
|
| 175 |
+
case DISPLAY_TYPE_RESET:
|
| 176 |
+
fprintf(out, ANSI_COLOR_RESET);
|
| 177 |
+
break;
|
| 178 |
+
case DISPLAY_TYPE_INFO:
|
| 179 |
+
fprintf(out, ANSI_COLOR_MAGENTA);
|
| 180 |
+
break;
|
| 181 |
+
case DISPLAY_TYPE_PROMPT:
|
| 182 |
+
fprintf(out, ANSI_COLOR_YELLOW);
|
| 183 |
+
break;
|
| 184 |
+
case DISPLAY_TYPE_REASONING:
|
| 185 |
+
fprintf(out, ANSI_COLOR_GRAY);
|
| 186 |
+
break;
|
| 187 |
+
case DISPLAY_TYPE_USER_INPUT:
|
| 188 |
+
fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN);
|
| 189 |
+
break;
|
| 190 |
+
case DISPLAY_TYPE_ERROR:
|
| 191 |
+
fprintf(out, ANSI_BOLD ANSI_COLOR_RED);
|
| 192 |
+
}
|
| 193 |
+
current_display = display;
|
| 194 |
+
fflush(out);
|
| 195 |
+
}
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
static char32_t getchar32() {
|
| 199 |
+
#if defined(_WIN32)
|
| 200 |
+
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
| 201 |
+
wchar_t high_surrogate = 0;
|
| 202 |
+
|
| 203 |
+
while (true) {
|
| 204 |
+
INPUT_RECORD record;
|
| 205 |
+
DWORD count;
|
| 206 |
+
if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) {
|
| 207 |
+
return WEOF;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) {
|
| 211 |
+
wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar;
|
| 212 |
+
if (wc == 0) {
|
| 213 |
+
const DWORD ctrl_mask = LEFT_CTRL_PRESSED | RIGHT_CTRL_PRESSED;
|
| 214 |
+
const bool ctrl_pressed = (record.Event.KeyEvent.dwControlKeyState & ctrl_mask) != 0;
|
| 215 |
+
switch (record.Event.KeyEvent.wVirtualKeyCode) {
|
| 216 |
+
case VK_LEFT: return ctrl_pressed ? KEY_CTRL_ARROW_LEFT : KEY_ARROW_LEFT;
|
| 217 |
+
case VK_RIGHT: return ctrl_pressed ? KEY_CTRL_ARROW_RIGHT : KEY_ARROW_RIGHT;
|
| 218 |
+
case VK_UP: return KEY_ARROW_UP;
|
| 219 |
+
case VK_DOWN: return KEY_ARROW_DOWN;
|
| 220 |
+
case VK_HOME: return KEY_HOME;
|
| 221 |
+
case VK_END: return KEY_END;
|
| 222 |
+
case VK_DELETE: return KEY_DELETE;
|
| 223 |
+
default: continue;
|
| 224 |
+
}
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
| 228 |
+
high_surrogate = wc;
|
| 229 |
+
continue;
|
| 230 |
+
}
|
| 231 |
+
if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate
|
| 232 |
+
if (high_surrogate != 0) { // Check if we have a high surrogate
|
| 233 |
+
return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000;
|
| 234 |
+
}
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
high_surrogate = 0; // Reset the high surrogate
|
| 238 |
+
return static_cast<char32_t>(wc);
|
| 239 |
+
}
|
| 240 |
+
}
|
| 241 |
+
#else
|
| 242 |
+
wchar_t wc = getwchar();
|
| 243 |
+
if (static_cast<wint_t>(wc) == WEOF) {
|
| 244 |
+
return WEOF;
|
| 245 |
+
}
|
| 246 |
+
|
| 247 |
+
#if WCHAR_MAX == 0xFFFF
|
| 248 |
+
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
| 249 |
+
wchar_t low_surrogate = getwchar();
|
| 250 |
+
if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate
|
| 251 |
+
return (static_cast<char32_t>(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000;
|
| 252 |
+
}
|
| 253 |
+
}
|
| 254 |
+
if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair
|
| 255 |
+
return 0xFFFD; // Return the replacement character U+FFFD
|
| 256 |
+
}
|
| 257 |
+
#endif
|
| 258 |
+
|
| 259 |
+
return static_cast<char32_t>(wc);
|
| 260 |
+
#endif
|
| 261 |
+
}
|
| 262 |
+
|
| 263 |
+
static void pop_cursor() {
|
| 264 |
+
#if defined(_WIN32)
|
| 265 |
+
if (hConsole != NULL) {
|
| 266 |
+
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
| 267 |
+
GetConsoleScreenBufferInfo(hConsole, &bufferInfo);
|
| 268 |
+
|
| 269 |
+
COORD newCursorPosition = bufferInfo.dwCursorPosition;
|
| 270 |
+
if (newCursorPosition.X == 0) {
|
| 271 |
+
newCursorPosition.X = bufferInfo.dwSize.X - 1;
|
| 272 |
+
newCursorPosition.Y -= 1;
|
| 273 |
+
} else {
|
| 274 |
+
newCursorPosition.X -= 1;
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
SetConsoleCursorPosition(hConsole, newCursorPosition);
|
| 278 |
+
return;
|
| 279 |
+
}
|
| 280 |
+
#endif
|
| 281 |
+
putc('\b', out);
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
static int estimateWidth(char32_t codepoint) {
|
| 285 |
+
#if defined(_WIN32)
|
| 286 |
+
(void)codepoint;
|
| 287 |
+
return 1;
|
| 288 |
+
#else
|
| 289 |
+
return wcwidth(codepoint);
|
| 290 |
+
#endif
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
|
| 294 |
+
#if defined(_WIN32)
|
| 295 |
+
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
| 296 |
+
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
|
| 297 |
+
// go with the default
|
| 298 |
+
return expectedWidth;
|
| 299 |
+
}
|
| 300 |
+
COORD initialPosition = bufferInfo.dwCursorPosition;
|
| 301 |
+
DWORD nNumberOfChars = length;
|
| 302 |
+
WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL);
|
| 303 |
+
|
| 304 |
+
CONSOLE_SCREEN_BUFFER_INFO newBufferInfo;
|
| 305 |
+
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
|
| 306 |
+
|
| 307 |
+
// Figure out our real position if we're in the last column
|
| 308 |
+
if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) {
|
| 309 |
+
DWORD nNumberOfChars;
|
| 310 |
+
WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL);
|
| 311 |
+
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
int width = newBufferInfo.dwCursorPosition.X - initialPosition.X;
|
| 315 |
+
if (width < 0) {
|
| 316 |
+
width += newBufferInfo.dwSize.X;
|
| 317 |
+
}
|
| 318 |
+
return width;
|
| 319 |
+
#else
|
| 320 |
+
// We can trust expectedWidth if we've got one
|
| 321 |
+
if (expectedWidth >= 0 || tty == nullptr) {
|
| 322 |
+
fwrite(utf8_codepoint, length, 1, out);
|
| 323 |
+
return expectedWidth;
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
fputs("\033[6n", tty); // Query cursor position
|
| 327 |
+
int x1;
|
| 328 |
+
int y1;
|
| 329 |
+
int x2;
|
| 330 |
+
int y2;
|
| 331 |
+
int results = 0;
|
| 332 |
+
results = fscanf(tty, "\033[%d;%dR", &y1, &x1);
|
| 333 |
+
|
| 334 |
+
fwrite(utf8_codepoint, length, 1, tty);
|
| 335 |
+
|
| 336 |
+
fputs("\033[6n", tty); // Query cursor position
|
| 337 |
+
results += fscanf(tty, "\033[%d;%dR", &y2, &x2);
|
| 338 |
+
|
| 339 |
+
if (results != 4) {
|
| 340 |
+
return expectedWidth;
|
| 341 |
+
}
|
| 342 |
+
|
| 343 |
+
int width = x2 - x1;
|
| 344 |
+
if (width < 0) {
|
| 345 |
+
// Calculate the width considering text wrapping
|
| 346 |
+
struct winsize w;
|
| 347 |
+
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
|
| 348 |
+
width += w.ws_col;
|
| 349 |
+
}
|
| 350 |
+
return width;
|
| 351 |
+
#endif
|
| 352 |
+
}
|
| 353 |
+
|
| 354 |
+
static void replace_last(char ch) {
|
| 355 |
+
#if defined(_WIN32)
|
| 356 |
+
pop_cursor();
|
| 357 |
+
put_codepoint(&ch, 1, 1);
|
| 358 |
+
#else
|
| 359 |
+
fprintf(out, "\b%c", ch);
|
| 360 |
+
#endif
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
static char32_t decode_utf8(const std::string & input, size_t pos, size_t & advance) {
|
| 364 |
+
unsigned char c = static_cast<unsigned char>(input[pos]);
|
| 365 |
+
if ((c & 0x80u) == 0u) {
|
| 366 |
+
advance = 1;
|
| 367 |
+
return c;
|
| 368 |
+
}
|
| 369 |
+
if ((c & 0xE0u) == 0xC0u && pos + 1 < input.size()) {
|
| 370 |
+
unsigned char c1 = static_cast<unsigned char>(input[pos + 1]);
|
| 371 |
+
if ((c1 & 0xC0u) != 0x80u) {
|
| 372 |
+
advance = 1;
|
| 373 |
+
return 0xFFFD;
|
| 374 |
+
}
|
| 375 |
+
advance = 2;
|
| 376 |
+
return ((c & 0x1Fu) << 6) | (static_cast<unsigned char>(input[pos + 1]) & 0x3Fu);
|
| 377 |
+
}
|
| 378 |
+
if ((c & 0xF0u) == 0xE0u && pos + 2 < input.size()) {
|
| 379 |
+
unsigned char c1 = static_cast<unsigned char>(input[pos + 1]);
|
| 380 |
+
unsigned char c2 = static_cast<unsigned char>(input[pos + 2]);
|
| 381 |
+
if ((c1 & 0xC0u) != 0x80u || (c2 & 0xC0u) != 0x80u) {
|
| 382 |
+
advance = 1;
|
| 383 |
+
return 0xFFFD;
|
| 384 |
+
}
|
| 385 |
+
advance = 3;
|
| 386 |
+
return ((c & 0x0Fu) << 12) |
|
| 387 |
+
((static_cast<unsigned char>(input[pos + 1]) & 0x3Fu) << 6) |
|
| 388 |
+
(static_cast<unsigned char>(input[pos + 2]) & 0x3Fu);
|
| 389 |
+
}
|
| 390 |
+
if ((c & 0xF8u) == 0xF0u && pos + 3 < input.size()) {
|
| 391 |
+
unsigned char c1 = static_cast<unsigned char>(input[pos + 1]);
|
| 392 |
+
unsigned char c2 = static_cast<unsigned char>(input[pos + 2]);
|
| 393 |
+
unsigned char c3 = static_cast<unsigned char>(input[pos + 3]);
|
| 394 |
+
if ((c1 & 0xC0u) != 0x80u || (c2 & 0xC0u) != 0x80u || (c3 & 0xC0u) != 0x80u) {
|
| 395 |
+
advance = 1;
|
| 396 |
+
return 0xFFFD;
|
| 397 |
+
}
|
| 398 |
+
advance = 4;
|
| 399 |
+
return ((c & 0x07u) << 18) |
|
| 400 |
+
((static_cast<unsigned char>(input[pos + 1]) & 0x3Fu) << 12) |
|
| 401 |
+
((static_cast<unsigned char>(input[pos + 2]) & 0x3Fu) << 6) |
|
| 402 |
+
(static_cast<unsigned char>(input[pos + 3]) & 0x3Fu);
|
| 403 |
+
}
|
| 404 |
+
|
| 405 |
+
advance = 1;
|
| 406 |
+
return 0xFFFD; // replacement character for invalid input
|
| 407 |
+
}
|
| 408 |
+
|
| 409 |
+
static void append_utf8(char32_t ch, std::string & out) {
|
| 410 |
+
if (ch <= 0x7F) {
|
| 411 |
+
out.push_back(static_cast<unsigned char>(ch));
|
| 412 |
+
} else if (ch <= 0x7FF) {
|
| 413 |
+
out.push_back(static_cast<unsigned char>(0xC0 | ((ch >> 6) & 0x1F)));
|
| 414 |
+
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
| 415 |
+
} else if (ch <= 0xFFFF) {
|
| 416 |
+
out.push_back(static_cast<unsigned char>(0xE0 | ((ch >> 12) & 0x0F)));
|
| 417 |
+
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
| 418 |
+
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
| 419 |
+
} else if (ch <= 0x10FFFF) {
|
| 420 |
+
out.push_back(static_cast<unsigned char>(0xF0 | ((ch >> 18) & 0x07)));
|
| 421 |
+
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 12) & 0x3F)));
|
| 422 |
+
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
| 423 |
+
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
| 424 |
+
} else {
|
| 425 |
+
// Invalid Unicode code point
|
| 426 |
+
}
|
| 427 |
+
}
|
| 428 |
+
|
| 429 |
+
// Helper function to remove the last UTF-8 character from a string
|
| 430 |
+
static size_t prev_utf8_char_pos(const std::string & line, size_t pos) {
|
| 431 |
+
if (pos == 0) return 0;
|
| 432 |
+
pos--;
|
| 433 |
+
while (pos > 0 && (line[pos] & 0xC0) == 0x80) {
|
| 434 |
+
pos--;
|
| 435 |
+
}
|
| 436 |
+
return pos;
|
| 437 |
+
}
|
| 438 |
+
|
| 439 |
+
static size_t next_utf8_char_pos(const std::string & line, size_t pos) {
|
| 440 |
+
if (pos >= line.length()) return line.length();
|
| 441 |
+
pos++;
|
| 442 |
+
while (pos < line.length() && (line[pos] & 0xC0) == 0x80) {
|
| 443 |
+
pos++;
|
| 444 |
+
}
|
| 445 |
+
return pos;
|
| 446 |
+
}
|
| 447 |
+
|
| 448 |
+
static void move_cursor(int delta);
|
| 449 |
+
static void move_word_left(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths, const std::string & line);
|
| 450 |
+
static void move_word_right(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths, const std::string & line);
|
| 451 |
+
static void move_to_line_start(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths);
|
| 452 |
+
static void move_to_line_end(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths, const std::string & line);
|
| 453 |
+
|
| 454 |
+
static void delete_at_cursor(std::string & line, std::vector<int> & widths, size_t & char_pos, size_t & byte_pos) {
|
| 455 |
+
if (char_pos >= widths.size()) {
|
| 456 |
+
return;
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
size_t next_pos = next_utf8_char_pos(line, byte_pos);
|
| 460 |
+
int w = widths[char_pos];
|
| 461 |
+
size_t char_len = next_pos - byte_pos;
|
| 462 |
+
|
| 463 |
+
line.erase(byte_pos, char_len);
|
| 464 |
+
widths.erase(widths.begin() + char_pos);
|
| 465 |
+
|
| 466 |
+
size_t p = byte_pos;
|
| 467 |
+
int tail_width = 0;
|
| 468 |
+
for (size_t i = char_pos; i < widths.size(); ++i) {
|
| 469 |
+
size_t following = next_utf8_char_pos(line, p);
|
| 470 |
+
put_codepoint(line.c_str() + p, following - p, widths[i]);
|
| 471 |
+
tail_width += widths[i];
|
| 472 |
+
p = following;
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
for (int i = 0; i < w; ++i) {
|
| 476 |
+
fputc(' ', out);
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
move_cursor(-(tail_width + w));
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
static void clear_current_line(const std::vector<int> & widths) {
|
| 483 |
+
int total_width = 0;
|
| 484 |
+
for (int w : widths) {
|
| 485 |
+
total_width += (w > 0 ? w : 1);
|
| 486 |
+
}
|
| 487 |
+
|
| 488 |
+
if (total_width > 0) {
|
| 489 |
+
std::string spaces(total_width, ' ');
|
| 490 |
+
fwrite(spaces.c_str(), 1, total_width, out);
|
| 491 |
+
move_cursor(-total_width);
|
| 492 |
+
}
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
static void set_line_contents(std::string new_line, std::string & line, std::vector<int> & widths, size_t & char_pos,
|
| 496 |
+
size_t & byte_pos) {
|
| 497 |
+
move_to_line_start(char_pos, byte_pos, widths);
|
| 498 |
+
clear_current_line(widths);
|
| 499 |
+
|
| 500 |
+
line = std::move(new_line);
|
| 501 |
+
widths.clear();
|
| 502 |
+
byte_pos = 0;
|
| 503 |
+
char_pos = 0;
|
| 504 |
+
|
| 505 |
+
size_t idx = 0;
|
| 506 |
+
while (idx < line.size()) {
|
| 507 |
+
size_t advance = 0;
|
| 508 |
+
char32_t cp = decode_utf8(line, idx, advance);
|
| 509 |
+
int expected_width = estimateWidth(cp);
|
| 510 |
+
int real_width = put_codepoint(line.c_str() + idx, advance, expected_width);
|
| 511 |
+
if (real_width < 0) real_width = 0;
|
| 512 |
+
widths.push_back(real_width);
|
| 513 |
+
idx += advance;
|
| 514 |
+
++char_pos;
|
| 515 |
+
byte_pos = idx;
|
| 516 |
+
}
|
| 517 |
+
}
|
| 518 |
+
|
| 519 |
+
static void move_to_line_start(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths) {
|
| 520 |
+
int back_width = 0;
|
| 521 |
+
for (size_t i = 0; i < char_pos; ++i) {
|
| 522 |
+
back_width += widths[i];
|
| 523 |
+
}
|
| 524 |
+
move_cursor(-back_width);
|
| 525 |
+
char_pos = 0;
|
| 526 |
+
byte_pos = 0;
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
static void move_to_line_end(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths, const std::string & line) {
|
| 530 |
+
int forward_width = 0;
|
| 531 |
+
for (size_t i = char_pos; i < widths.size(); ++i) {
|
| 532 |
+
forward_width += widths[i];
|
| 533 |
+
}
|
| 534 |
+
move_cursor(forward_width);
|
| 535 |
+
char_pos = widths.size();
|
| 536 |
+
byte_pos = line.length();
|
| 537 |
+
}
|
| 538 |
+
|
| 539 |
+
static bool has_ctrl_modifier(const std::string & params) {
|
| 540 |
+
size_t start = 0;
|
| 541 |
+
while (start < params.size()) {
|
| 542 |
+
size_t end = params.find(';', start);
|
| 543 |
+
size_t len = (end == std::string::npos) ? params.size() - start : end - start;
|
| 544 |
+
if (len > 0) {
|
| 545 |
+
int value = 0;
|
| 546 |
+
for (size_t i = 0; i < len; ++i) {
|
| 547 |
+
char ch = params[start + i];
|
| 548 |
+
if (!std::isdigit(static_cast<unsigned char>(ch))) {
|
| 549 |
+
value = -1;
|
| 550 |
+
break;
|
| 551 |
+
}
|
| 552 |
+
value = value * 10 + (ch - '0');
|
| 553 |
+
}
|
| 554 |
+
if (value == 5) {
|
| 555 |
+
return true;
|
| 556 |
+
}
|
| 557 |
+
}
|
| 558 |
+
|
| 559 |
+
if (end == std::string::npos) {
|
| 560 |
+
break;
|
| 561 |
+
}
|
| 562 |
+
start = end + 1;
|
| 563 |
+
}
|
| 564 |
+
return false;
|
| 565 |
+
}
|
| 566 |
+
|
| 567 |
+
static bool is_space_codepoint(char32_t cp) {
|
| 568 |
+
return std::iswspace(static_cast<wint_t>(cp)) != 0;
|
| 569 |
+
}
|
| 570 |
+
|
| 571 |
+
static void move_word_left(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths, const std::string & line) {
|
| 572 |
+
if (char_pos == 0) {
|
| 573 |
+
return;
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
size_t new_char_pos = char_pos;
|
| 577 |
+
size_t new_byte_pos = byte_pos;
|
| 578 |
+
int move_width = 0;
|
| 579 |
+
|
| 580 |
+
while (new_char_pos > 0) {
|
| 581 |
+
size_t prev_byte = prev_utf8_char_pos(line, new_byte_pos);
|
| 582 |
+
size_t advance = 0;
|
| 583 |
+
char32_t cp = decode_utf8(line, prev_byte, advance);
|
| 584 |
+
if (!is_space_codepoint(cp)) {
|
| 585 |
+
break;
|
| 586 |
+
}
|
| 587 |
+
move_width += widths[new_char_pos - 1];
|
| 588 |
+
new_char_pos--;
|
| 589 |
+
new_byte_pos = prev_byte;
|
| 590 |
+
}
|
| 591 |
+
|
| 592 |
+
while (new_char_pos > 0) {
|
| 593 |
+
size_t prev_byte = prev_utf8_char_pos(line, new_byte_pos);
|
| 594 |
+
size_t advance = 0;
|
| 595 |
+
char32_t cp = decode_utf8(line, prev_byte, advance);
|
| 596 |
+
if (is_space_codepoint(cp)) {
|
| 597 |
+
break;
|
| 598 |
+
}
|
| 599 |
+
move_width += widths[new_char_pos - 1];
|
| 600 |
+
new_char_pos--;
|
| 601 |
+
new_byte_pos = prev_byte;
|
| 602 |
+
}
|
| 603 |
+
|
| 604 |
+
move_cursor(-move_width);
|
| 605 |
+
char_pos = new_char_pos;
|
| 606 |
+
byte_pos = new_byte_pos;
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
static void move_word_right(size_t & char_pos, size_t & byte_pos, const std::vector<int> & widths, const std::string & line) {
|
| 610 |
+
if (char_pos >= widths.size()) {
|
| 611 |
+
return;
|
| 612 |
+
}
|
| 613 |
+
|
| 614 |
+
size_t new_char_pos = char_pos;
|
| 615 |
+
size_t new_byte_pos = byte_pos;
|
| 616 |
+
int move_width = 0;
|
| 617 |
+
|
| 618 |
+
while (new_char_pos < widths.size()) {
|
| 619 |
+
size_t advance = 0;
|
| 620 |
+
char32_t cp = decode_utf8(line, new_byte_pos, advance);
|
| 621 |
+
if (!is_space_codepoint(cp)) {
|
| 622 |
+
break;
|
| 623 |
+
}
|
| 624 |
+
move_width += widths[new_char_pos];
|
| 625 |
+
new_char_pos++;
|
| 626 |
+
new_byte_pos += advance;
|
| 627 |
+
}
|
| 628 |
+
|
| 629 |
+
while (new_char_pos < widths.size()) {
|
| 630 |
+
size_t advance = 0;
|
| 631 |
+
char32_t cp = decode_utf8(line, new_byte_pos, advance);
|
| 632 |
+
if (is_space_codepoint(cp)) {
|
| 633 |
+
break;
|
| 634 |
+
}
|
| 635 |
+
move_width += widths[new_char_pos];
|
| 636 |
+
new_char_pos++;
|
| 637 |
+
new_byte_pos += advance;
|
| 638 |
+
}
|
| 639 |
+
|
| 640 |
+
while (new_char_pos < widths.size()) {
|
| 641 |
+
size_t advance = 0;
|
| 642 |
+
char32_t cp = decode_utf8(line, new_byte_pos, advance);
|
| 643 |
+
if (!is_space_codepoint(cp)) {
|
| 644 |
+
break;
|
| 645 |
+
}
|
| 646 |
+
move_width += widths[new_char_pos];
|
| 647 |
+
new_char_pos++;
|
| 648 |
+
new_byte_pos += advance;
|
| 649 |
+
}
|
| 650 |
+
|
| 651 |
+
move_cursor(move_width);
|
| 652 |
+
char_pos = new_char_pos;
|
| 653 |
+
byte_pos = new_byte_pos;
|
| 654 |
+
}
|
| 655 |
+
|
| 656 |
+
static void move_cursor(int delta) {
|
| 657 |
+
if (delta == 0) return;
|
| 658 |
+
#if defined(_WIN32)
|
| 659 |
+
if (hConsole != NULL) {
|
| 660 |
+
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
| 661 |
+
GetConsoleScreenBufferInfo(hConsole, &bufferInfo);
|
| 662 |
+
COORD newCursorPosition = bufferInfo.dwCursorPosition;
|
| 663 |
+
int width = bufferInfo.dwSize.X;
|
| 664 |
+
int newX = newCursorPosition.X + delta;
|
| 665 |
+
int newY = newCursorPosition.Y;
|
| 666 |
+
|
| 667 |
+
while (newX >= width) {
|
| 668 |
+
newX -= width;
|
| 669 |
+
newY++;
|
| 670 |
+
}
|
| 671 |
+
while (newX < 0) {
|
| 672 |
+
newX += width;
|
| 673 |
+
newY--;
|
| 674 |
+
}
|
| 675 |
+
|
| 676 |
+
newCursorPosition.X = newX;
|
| 677 |
+
newCursorPosition.Y = newY;
|
| 678 |
+
SetConsoleCursorPosition(hConsole, newCursorPosition);
|
| 679 |
+
}
|
| 680 |
+
#else
|
| 681 |
+
if (delta < 0) {
|
| 682 |
+
for (int i = 0; i < -delta; i++) fprintf(out, "\b");
|
| 683 |
+
} else {
|
| 684 |
+
for (int i = 0; i < delta; i++) fprintf(out, "\033[C");
|
| 685 |
+
}
|
| 686 |
+
#endif
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
struct history_t {
|
| 690 |
+
std::vector<std::string> entries;
|
| 691 |
+
size_t viewing_idx = SIZE_MAX;
|
| 692 |
+
std::string backup_line; // current line before viewing history
|
| 693 |
+
void add(const std::string & line) {
|
| 694 |
+
if (line.empty()) {
|
| 695 |
+
return;
|
| 696 |
+
}
|
| 697 |
+
// avoid duplicates with the last entry
|
| 698 |
+
if (entries.empty() || entries.back() != line) {
|
| 699 |
+
entries.push_back(line);
|
| 700 |
+
}
|
| 701 |
+
// also clear viewing state
|
| 702 |
+
end_viewing();
|
| 703 |
+
}
|
| 704 |
+
bool prev(std::string & cur_line) {
|
| 705 |
+
if (entries.empty()) {
|
| 706 |
+
return false;
|
| 707 |
+
}
|
| 708 |
+
if (viewing_idx == SIZE_MAX) {
|
| 709 |
+
return false;
|
| 710 |
+
}
|
| 711 |
+
if (viewing_idx > 0) {
|
| 712 |
+
viewing_idx--;
|
| 713 |
+
}
|
| 714 |
+
cur_line = entries[viewing_idx];
|
| 715 |
+
return true;
|
| 716 |
+
}
|
| 717 |
+
bool next(std::string & cur_line) {
|
| 718 |
+
if (entries.empty() || viewing_idx == SIZE_MAX) {
|
| 719 |
+
return false;
|
| 720 |
+
}
|
| 721 |
+
viewing_idx++;
|
| 722 |
+
if (viewing_idx >= entries.size()) {
|
| 723 |
+
cur_line = backup_line;
|
| 724 |
+
end_viewing();
|
| 725 |
+
} else {
|
| 726 |
+
cur_line = entries[viewing_idx];
|
| 727 |
+
}
|
| 728 |
+
return true;
|
| 729 |
+
}
|
| 730 |
+
void begin_viewing(const std::string & line) {
|
| 731 |
+
backup_line = line;
|
| 732 |
+
viewing_idx = entries.size();
|
| 733 |
+
}
|
| 734 |
+
void end_viewing() {
|
| 735 |
+
viewing_idx = SIZE_MAX;
|
| 736 |
+
backup_line.clear();
|
| 737 |
+
}
|
| 738 |
+
bool is_viewing() const {
|
| 739 |
+
return viewing_idx != SIZE_MAX;
|
| 740 |
+
}
|
| 741 |
+
} history;
|
| 742 |
+
|
| 743 |
+
static bool readline_advanced(std::string & line, bool multiline_input) {
|
| 744 |
+
if (out != stdout) {
|
| 745 |
+
fflush(stdout);
|
| 746 |
+
}
|
| 747 |
+
|
| 748 |
+
line.clear();
|
| 749 |
+
std::vector<int> widths;
|
| 750 |
+
bool is_special_char = false;
|
| 751 |
+
bool end_of_stream = false;
|
| 752 |
+
|
| 753 |
+
size_t byte_pos = 0; // current byte index
|
| 754 |
+
size_t char_pos = 0; // current character index (one char can be multiple bytes)
|
| 755 |
+
|
| 756 |
+
char32_t input_char;
|
| 757 |
+
while (true) {
|
| 758 |
+
assert(char_pos <= byte_pos);
|
| 759 |
+
assert(char_pos <= widths.size());
|
| 760 |
+
auto history_prev = [&]() {
|
| 761 |
+
if (!history.is_viewing()) {
|
| 762 |
+
history.begin_viewing(line);
|
| 763 |
+
}
|
| 764 |
+
std::string new_line;
|
| 765 |
+
if (!history.prev(new_line)) {
|
| 766 |
+
return;
|
| 767 |
+
}
|
| 768 |
+
set_line_contents(new_line, line, widths, char_pos, byte_pos);
|
| 769 |
+
};
|
| 770 |
+
auto history_next = [&]() {
|
| 771 |
+
if (history.is_viewing()) {
|
| 772 |
+
std::string new_line;
|
| 773 |
+
if (!history.next(new_line)) {
|
| 774 |
+
return;
|
| 775 |
+
}
|
| 776 |
+
set_line_contents(new_line, line, widths, char_pos, byte_pos);
|
| 777 |
+
}
|
| 778 |
+
};
|
| 779 |
+
|
| 780 |
+
fflush(out); // Ensure all output is displayed before waiting for input
|
| 781 |
+
input_char = getchar32();
|
| 782 |
+
|
| 783 |
+
if (input_char == '\r' || input_char == '\n') {
|
| 784 |
+
break;
|
| 785 |
+
}
|
| 786 |
+
|
| 787 |
+
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D */) {
|
| 788 |
+
end_of_stream = true;
|
| 789 |
+
break;
|
| 790 |
+
}
|
| 791 |
+
|
| 792 |
+
if (is_special_char) {
|
| 793 |
+
replace_last(line.back());
|
| 794 |
+
is_special_char = false;
|
| 795 |
+
}
|
| 796 |
+
|
| 797 |
+
if (input_char == '\033') { // Escape sequence
|
| 798 |
+
char32_t code = getchar32();
|
| 799 |
+
if (code == '[') {
|
| 800 |
+
std::string params;
|
| 801 |
+
while (true) {
|
| 802 |
+
code = getchar32();
|
| 803 |
+
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~' || code == (char32_t) WEOF) {
|
| 804 |
+
break;
|
| 805 |
+
}
|
| 806 |
+
params.push_back(static_cast<char>(code));
|
| 807 |
+
}
|
| 808 |
+
|
| 809 |
+
const bool ctrl_modifier = has_ctrl_modifier(params);
|
| 810 |
+
|
| 811 |
+
if (code == 'D') { // left
|
| 812 |
+
if (ctrl_modifier) {
|
| 813 |
+
move_word_left(char_pos, byte_pos, widths, line);
|
| 814 |
+
} else if (char_pos > 0) {
|
| 815 |
+
int w = widths[char_pos - 1];
|
| 816 |
+
move_cursor(-w);
|
| 817 |
+
char_pos--;
|
| 818 |
+
byte_pos = prev_utf8_char_pos(line, byte_pos);
|
| 819 |
+
}
|
| 820 |
+
} else if (code == 'C') { // right
|
| 821 |
+
if (ctrl_modifier) {
|
| 822 |
+
move_word_right(char_pos, byte_pos, widths, line);
|
| 823 |
+
} else if (char_pos < widths.size()) {
|
| 824 |
+
int w = widths[char_pos];
|
| 825 |
+
move_cursor(w);
|
| 826 |
+
char_pos++;
|
| 827 |
+
byte_pos = next_utf8_char_pos(line, byte_pos);
|
| 828 |
+
}
|
| 829 |
+
} else if (code == 'H') { // home
|
| 830 |
+
move_to_line_start(char_pos, byte_pos, widths);
|
| 831 |
+
} else if (code == 'F') { // end
|
| 832 |
+
move_to_line_end(char_pos, byte_pos, widths, line);
|
| 833 |
+
} else if (code == 'A' || code == 'B') {
|
| 834 |
+
// up/down
|
| 835 |
+
if (code == 'A') {
|
| 836 |
+
history_prev();
|
| 837 |
+
is_special_char = false;
|
| 838 |
+
} else if (code == 'B') {
|
| 839 |
+
history_next();
|
| 840 |
+
is_special_char = false;
|
| 841 |
+
}
|
| 842 |
+
} else if ((code == '~' || (code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z')) && !params.empty()) {
|
| 843 |
+
std::string digits;
|
| 844 |
+
for (char ch : params) {
|
| 845 |
+
if (ch == ';') {
|
| 846 |
+
break;
|
| 847 |
+
}
|
| 848 |
+
if (std::isdigit(static_cast<unsigned char>(ch))) {
|
| 849 |
+
digits.push_back(ch);
|
| 850 |
+
}
|
| 851 |
+
}
|
| 852 |
+
|
| 853 |
+
if (code == '~') {
|
| 854 |
+
if (digits == "1" || digits == "7") { // home
|
| 855 |
+
move_to_line_start(char_pos, byte_pos, widths);
|
| 856 |
+
} else if (digits == "4" || digits == "8") { // end
|
| 857 |
+
move_to_line_end(char_pos, byte_pos, widths, line);
|
| 858 |
+
} else if (digits == "3") { // delete
|
| 859 |
+
delete_at_cursor(line, widths, char_pos, byte_pos);
|
| 860 |
+
}
|
| 861 |
+
}
|
| 862 |
+
}
|
| 863 |
+
} else if (code == 0x1B) {
|
| 864 |
+
// Discard the rest of the escape sequence
|
| 865 |
+
while ((code = getchar32()) != (char32_t) WEOF) {
|
| 866 |
+
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
|
| 867 |
+
break;
|
| 868 |
+
}
|
| 869 |
+
}
|
| 870 |
+
}
|
| 871 |
+
#if defined(_WIN32)
|
| 872 |
+
} else if (input_char == KEY_ARROW_LEFT) {
|
| 873 |
+
if (char_pos > 0) {
|
| 874 |
+
int w = widths[char_pos - 1];
|
| 875 |
+
move_cursor(-w);
|
| 876 |
+
char_pos--;
|
| 877 |
+
byte_pos = prev_utf8_char_pos(line, byte_pos);
|
| 878 |
+
}
|
| 879 |
+
} else if (input_char == KEY_ARROW_RIGHT) {
|
| 880 |
+
if (char_pos < widths.size()) {
|
| 881 |
+
int w = widths[char_pos];
|
| 882 |
+
move_cursor(w);
|
| 883 |
+
char_pos++;
|
| 884 |
+
byte_pos = next_utf8_char_pos(line, byte_pos);
|
| 885 |
+
}
|
| 886 |
+
} else if (input_char == KEY_CTRL_ARROW_LEFT) {
|
| 887 |
+
move_word_left(char_pos, byte_pos, widths, line);
|
| 888 |
+
} else if (input_char == KEY_CTRL_ARROW_RIGHT) {
|
| 889 |
+
move_word_right(char_pos, byte_pos, widths, line);
|
| 890 |
+
} else if (input_char == KEY_HOME) {
|
| 891 |
+
move_to_line_start(char_pos, byte_pos, widths);
|
| 892 |
+
} else if (input_char == KEY_END) {
|
| 893 |
+
move_to_line_end(char_pos, byte_pos, widths, line);
|
| 894 |
+
} else if (input_char == KEY_DELETE) {
|
| 895 |
+
delete_at_cursor(line, widths, char_pos, byte_pos);
|
| 896 |
+
} else if (input_char == KEY_ARROW_UP || input_char == KEY_ARROW_DOWN) {
|
| 897 |
+
if (input_char == KEY_ARROW_UP) {
|
| 898 |
+
history_prev();
|
| 899 |
+
is_special_char = false;
|
| 900 |
+
} else if (input_char == KEY_ARROW_DOWN) {
|
| 901 |
+
history_next();
|
| 902 |
+
is_special_char = false;
|
| 903 |
+
}
|
| 904 |
+
#endif
|
| 905 |
+
} else if (input_char == 0x08 || input_char == 0x7F) { // Backspace
|
| 906 |
+
if (char_pos > 0) {
|
| 907 |
+
int w = widths[char_pos - 1];
|
| 908 |
+
move_cursor(-w);
|
| 909 |
+
char_pos--;
|
| 910 |
+
size_t prev_pos = prev_utf8_char_pos(line, byte_pos);
|
| 911 |
+
size_t char_len = byte_pos - prev_pos;
|
| 912 |
+
byte_pos = prev_pos;
|
| 913 |
+
|
| 914 |
+
// remove the character
|
| 915 |
+
line.erase(byte_pos, char_len);
|
| 916 |
+
widths.erase(widths.begin() + char_pos);
|
| 917 |
+
|
| 918 |
+
// redraw tail
|
| 919 |
+
size_t p = byte_pos;
|
| 920 |
+
int tail_width = 0;
|
| 921 |
+
for (size_t i = char_pos; i < widths.size(); ++i) {
|
| 922 |
+
size_t next_p = next_utf8_char_pos(line, p);
|
| 923 |
+
put_codepoint(line.c_str() + p, next_p - p, widths[i]);
|
| 924 |
+
tail_width += widths[i];
|
| 925 |
+
p = next_p;
|
| 926 |
+
}
|
| 927 |
+
|
| 928 |
+
// clear display
|
| 929 |
+
for (int i = 0; i < w; ++i) {
|
| 930 |
+
fputc(' ', out);
|
| 931 |
+
}
|
| 932 |
+
move_cursor(-(tail_width + w));
|
| 933 |
+
}
|
| 934 |
+
} else {
|
| 935 |
+
// insert character
|
| 936 |
+
std::string new_char_str;
|
| 937 |
+
append_utf8(input_char, new_char_str);
|
| 938 |
+
int w = estimateWidth(input_char);
|
| 939 |
+
|
| 940 |
+
if (char_pos == widths.size()) {
|
| 941 |
+
// insert at the end
|
| 942 |
+
line += new_char_str;
|
| 943 |
+
int real_w = put_codepoint(new_char_str.c_str(), new_char_str.length(), w);
|
| 944 |
+
if (real_w < 0) real_w = 0;
|
| 945 |
+
widths.push_back(real_w);
|
| 946 |
+
byte_pos += new_char_str.length();
|
| 947 |
+
char_pos++;
|
| 948 |
+
} else {
|
| 949 |
+
// insert in middle
|
| 950 |
+
line.insert(byte_pos, new_char_str);
|
| 951 |
+
|
| 952 |
+
int real_w = put_codepoint(new_char_str.c_str(), new_char_str.length(), w);
|
| 953 |
+
if (real_w < 0) real_w = 0;
|
| 954 |
+
|
| 955 |
+
widths.insert(widths.begin() + char_pos, real_w);
|
| 956 |
+
|
| 957 |
+
// print the tail
|
| 958 |
+
size_t p = byte_pos + new_char_str.length();
|
| 959 |
+
int tail_width = 0;
|
| 960 |
+
for (size_t i = char_pos + 1; i < widths.size(); ++i) {
|
| 961 |
+
size_t next_p = next_utf8_char_pos(line, p);
|
| 962 |
+
put_codepoint(line.c_str() + p, next_p - p, widths[i]);
|
| 963 |
+
tail_width += widths[i];
|
| 964 |
+
p = next_p;
|
| 965 |
+
}
|
| 966 |
+
|
| 967 |
+
move_cursor(-tail_width);
|
| 968 |
+
|
| 969 |
+
byte_pos += new_char_str.length();
|
| 970 |
+
char_pos++;
|
| 971 |
+
}
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
if (!line.empty() && (line.back() == '\\' || line.back() == '/')) {
|
| 975 |
+
replace_last(line.back());
|
| 976 |
+
is_special_char = true;
|
| 977 |
+
}
|
| 978 |
+
}
|
| 979 |
+
|
| 980 |
+
bool has_more = multiline_input;
|
| 981 |
+
if (is_special_char) {
|
| 982 |
+
replace_last(' ');
|
| 983 |
+
pop_cursor();
|
| 984 |
+
|
| 985 |
+
char last = line.back();
|
| 986 |
+
line.pop_back();
|
| 987 |
+
if (last == '\\') {
|
| 988 |
+
line += '\n';
|
| 989 |
+
fputc('\n', out);
|
| 990 |
+
has_more = !has_more;
|
| 991 |
+
} else {
|
| 992 |
+
// llama will just eat the single space, it won't act as a space
|
| 993 |
+
if (line.length() == 1 && line.back() == ' ') {
|
| 994 |
+
line.clear();
|
| 995 |
+
pop_cursor();
|
| 996 |
+
}
|
| 997 |
+
has_more = false;
|
| 998 |
+
}
|
| 999 |
+
} else {
|
| 1000 |
+
if (end_of_stream) {
|
| 1001 |
+
has_more = false;
|
| 1002 |
+
} else {
|
| 1003 |
+
line += '\n';
|
| 1004 |
+
fputc('\n', out);
|
| 1005 |
+
}
|
| 1006 |
+
}
|
| 1007 |
+
|
| 1008 |
+
if (!end_of_stream && !line.empty()) {
|
| 1009 |
+
// remove the trailing newline for history storage
|
| 1010 |
+
if (!line.empty() && line.back() == '\n') {
|
| 1011 |
+
line.pop_back();
|
| 1012 |
+
}
|
| 1013 |
+
// TODO: maybe support multiline history entries?
|
| 1014 |
+
history.add(line);
|
| 1015 |
+
}
|
| 1016 |
+
|
| 1017 |
+
fflush(out);
|
| 1018 |
+
return has_more;
|
| 1019 |
+
}
|
| 1020 |
+
|
| 1021 |
+
static bool readline_simple(std::string & line, bool multiline_input) {
|
| 1022 |
+
#if defined(_WIN32)
|
| 1023 |
+
std::wstring wline;
|
| 1024 |
+
if (!std::getline(std::wcin, wline)) {
|
| 1025 |
+
// Input stream is bad or EOF received
|
| 1026 |
+
line.clear();
|
| 1027 |
+
GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
|
| 1028 |
+
return false;
|
| 1029 |
+
}
|
| 1030 |
+
|
| 1031 |
+
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL);
|
| 1032 |
+
line.resize(size_needed);
|
| 1033 |
+
WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL);
|
| 1034 |
+
#else
|
| 1035 |
+
if (!std::getline(std::cin, line)) {
|
| 1036 |
+
// Input stream is bad or EOF received
|
| 1037 |
+
line.clear();
|
| 1038 |
+
return false;
|
| 1039 |
+
}
|
| 1040 |
+
#endif
|
| 1041 |
+
if (!line.empty()) {
|
| 1042 |
+
char last = line.back();
|
| 1043 |
+
if (last == '/') { // Always return control on '/' symbol
|
| 1044 |
+
line.pop_back();
|
| 1045 |
+
return false;
|
| 1046 |
+
}
|
| 1047 |
+
if (last == '\\') { // '\\' changes the default action
|
| 1048 |
+
line.pop_back();
|
| 1049 |
+
multiline_input = !multiline_input;
|
| 1050 |
+
}
|
| 1051 |
+
}
|
| 1052 |
+
line += '\n';
|
| 1053 |
+
|
| 1054 |
+
// By default, continue input if multiline_input is set
|
| 1055 |
+
return multiline_input;
|
| 1056 |
+
}
|
| 1057 |
+
|
| 1058 |
+
bool readline(std::string & line, bool multiline_input) {
|
| 1059 |
+
if (simple_io) {
|
| 1060 |
+
return readline_simple(line, multiline_input);
|
| 1061 |
+
}
|
| 1062 |
+
return readline_advanced(line, multiline_input);
|
| 1063 |
+
}
|
| 1064 |
+
|
| 1065 |
+
namespace spinner {
|
| 1066 |
+
static const char LOADING_CHARS[] = {'|', '/', '-', '\\'};
|
| 1067 |
+
static std::condition_variable cv_stop;
|
| 1068 |
+
static std::thread th;
|
| 1069 |
+
static size_t frame = 0; // only modified by one thread
|
| 1070 |
+
static bool running = false;
|
| 1071 |
+
static std::mutex mtx;
|
| 1072 |
+
static auto wait_time = std::chrono::milliseconds(100);
|
| 1073 |
+
static void draw_next_frame() {
|
| 1074 |
+
// don't need lock because only one thread modifies running
|
| 1075 |
+
frame = (frame + 1) % sizeof(LOADING_CHARS);
|
| 1076 |
+
replace_last(LOADING_CHARS[frame]);
|
| 1077 |
+
fflush(out);
|
| 1078 |
+
}
|
| 1079 |
+
void start() {
|
| 1080 |
+
std::unique_lock<std::mutex> lock(mtx);
|
| 1081 |
+
if (simple_io || running) {
|
| 1082 |
+
return;
|
| 1083 |
+
}
|
| 1084 |
+
common_log_flush(common_log_main());
|
| 1085 |
+
fprintf(out, "%c", LOADING_CHARS[0]);
|
| 1086 |
+
fflush(out);
|
| 1087 |
+
frame = 1;
|
| 1088 |
+
running = true;
|
| 1089 |
+
th = std::thread([]() {
|
| 1090 |
+
std::unique_lock<std::mutex> lock(mtx);
|
| 1091 |
+
while (true) {
|
| 1092 |
+
if (cv_stop.wait_for(lock, wait_time, []{ return !running; })) {
|
| 1093 |
+
break;
|
| 1094 |
+
}
|
| 1095 |
+
draw_next_frame();
|
| 1096 |
+
}
|
| 1097 |
+
});
|
| 1098 |
+
}
|
| 1099 |
+
void stop() {
|
| 1100 |
+
{
|
| 1101 |
+
std::unique_lock<std::mutex> lock(mtx);
|
| 1102 |
+
if (simple_io || !running) {
|
| 1103 |
+
return;
|
| 1104 |
+
}
|
| 1105 |
+
running = false;
|
| 1106 |
+
cv_stop.notify_all();
|
| 1107 |
+
}
|
| 1108 |
+
if (th.joinable()) {
|
| 1109 |
+
th.join();
|
| 1110 |
+
}
|
| 1111 |
+
replace_last(' ');
|
| 1112 |
+
pop_cursor();
|
| 1113 |
+
fflush(out);
|
| 1114 |
+
}
|
| 1115 |
+
}
|
| 1116 |
+
|
| 1117 |
+
void log(const char * fmt, ...) {
|
| 1118 |
+
va_list args;
|
| 1119 |
+
va_start(args, fmt);
|
| 1120 |
+
vfprintf(out, fmt, args);
|
| 1121 |
+
va_end(args);
|
| 1122 |
+
}
|
| 1123 |
+
|
| 1124 |
+
void error(const char * fmt, ...) {
|
| 1125 |
+
va_list args;
|
| 1126 |
+
va_start(args, fmt);
|
| 1127 |
+
display_type cur = current_display;
|
| 1128 |
+
set_display(DISPLAY_TYPE_ERROR);
|
| 1129 |
+
vfprintf(out, fmt, args);
|
| 1130 |
+
set_display(cur); // restore previous color
|
| 1131 |
+
va_end(args);
|
| 1132 |
+
}
|
| 1133 |
+
|
| 1134 |
+
void flush() {
|
| 1135 |
+
fflush(out);
|
| 1136 |
+
}
|
| 1137 |
+
}
|
llama.cpp/common/console.h
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// Console functions
|
| 2 |
+
|
| 3 |
+
#pragma once
|
| 4 |
+
|
| 5 |
+
#include "common.h"
|
| 6 |
+
|
| 7 |
+
#include <string>
|
| 8 |
+
|
| 9 |
+
enum display_type {
|
| 10 |
+
DISPLAY_TYPE_RESET = 0,
|
| 11 |
+
DISPLAY_TYPE_INFO,
|
| 12 |
+
DISPLAY_TYPE_PROMPT,
|
| 13 |
+
DISPLAY_TYPE_REASONING,
|
| 14 |
+
DISPLAY_TYPE_USER_INPUT,
|
| 15 |
+
DISPLAY_TYPE_ERROR
|
| 16 |
+
};
|
| 17 |
+
|
| 18 |
+
namespace console {
|
| 19 |
+
void init(bool use_simple_io, bool use_advanced_display);
|
| 20 |
+
void cleanup();
|
| 21 |
+
void set_display(display_type display);
|
| 22 |
+
bool readline(std::string & line, bool multiline_input);
|
| 23 |
+
|
| 24 |
+
namespace spinner {
|
| 25 |
+
void start();
|
| 26 |
+
void stop();
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
// note: the logging API below output directly to stdout
|
| 30 |
+
// it can negatively impact performance if used on inference thread
|
| 31 |
+
// only use in in a dedicated CLI thread
|
| 32 |
+
// for logging in inference thread, use log.h instead
|
| 33 |
+
|
| 34 |
+
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
|
| 35 |
+
void log(const char * fmt, ...);
|
| 36 |
+
|
| 37 |
+
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
|
| 38 |
+
void error(const char * fmt, ...);
|
| 39 |
+
|
| 40 |
+
void flush();
|
| 41 |
+
}
|
llama.cpp/common/debug.cpp
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#include "debug.h"
|
| 2 |
+
|
| 3 |
+
#include "log.h"
|
| 4 |
+
|
| 5 |
+
#include <cmath>
|
| 6 |
+
#include <string>
|
| 7 |
+
|
| 8 |
+
static std::string common_ggml_ne_string(const ggml_tensor * t) {
|
| 9 |
+
std::string str;
|
| 10 |
+
for (int i = 0; i < GGML_MAX_DIMS; ++i) {
|
| 11 |
+
str += std::to_string(t->ne[i]);
|
| 12 |
+
if (i + 1 < GGML_MAX_DIMS) {
|
| 13 |
+
str += ", ";
|
| 14 |
+
}
|
| 15 |
+
}
|
| 16 |
+
return str;
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
static float common_ggml_get_float_value(const uint8_t * data,
|
| 20 |
+
ggml_type type,
|
| 21 |
+
const size_t * nb,
|
| 22 |
+
size_t i0,
|
| 23 |
+
size_t i1,
|
| 24 |
+
size_t i2,
|
| 25 |
+
size_t i3) {
|
| 26 |
+
size_t i = i3 * nb[3] + i2 * nb[2] + i1 * nb[1] + i0 * nb[0];
|
| 27 |
+
float v;
|
| 28 |
+
if (type == GGML_TYPE_F16) {
|
| 29 |
+
v = ggml_fp16_to_fp32(*(const ggml_fp16_t *) &data[i]);
|
| 30 |
+
} else if (type == GGML_TYPE_F32) {
|
| 31 |
+
v = *(const float *) &data[i];
|
| 32 |
+
} else if (type == GGML_TYPE_I64) {
|
| 33 |
+
v = (float) *(const int64_t *) &data[i];
|
| 34 |
+
} else if (type == GGML_TYPE_I32) {
|
| 35 |
+
v = (float) *(const int32_t *) &data[i];
|
| 36 |
+
} else if (type == GGML_TYPE_I16) {
|
| 37 |
+
v = (float) *(const int16_t *) &data[i];
|
| 38 |
+
} else if (type == GGML_TYPE_I8) {
|
| 39 |
+
v = (float) *(const int8_t *) &data[i];
|
| 40 |
+
} else if (type == GGML_TYPE_BF16) {
|
| 41 |
+
v = ggml_bf16_to_fp32(*(const ggml_bf16_t *) &data[i]);
|
| 42 |
+
} else {
|
| 43 |
+
GGML_ABORT("fatal error");
|
| 44 |
+
}
|
| 45 |
+
return v;
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
#define INDENT " "
|
| 49 |
+
|
| 50 |
+
template <bool abort>
|
| 51 |
+
void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n) {
|
| 52 |
+
GGML_ASSERT(n > 0);
|
| 53 |
+
float sum = 0;
|
| 54 |
+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
|
| 55 |
+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
|
| 56 |
+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
|
| 57 |
+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
|
| 58 |
+
const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
|
| 59 |
+
sum += v;
|
| 60 |
+
}
|
| 61 |
+
}
|
| 62 |
+
}
|
| 63 |
+
}
|
| 64 |
+
for (int64_t i3 = 0; i3 < ne[3]; i3++) {
|
| 65 |
+
LOG(INDENT "[\n");
|
| 66 |
+
for (int64_t i2 = 0; i2 < ne[2]; i2++) {
|
| 67 |
+
if (i2 == n && ne[2] > 2 * n) {
|
| 68 |
+
LOG(INDENT INDENT "..., \n");
|
| 69 |
+
i2 = ne[2] - n;
|
| 70 |
+
}
|
| 71 |
+
LOG(INDENT INDENT "[\n");
|
| 72 |
+
for (int64_t i1 = 0; i1 < ne[1]; i1++) {
|
| 73 |
+
if (i1 == n && ne[1] > 2 * n) {
|
| 74 |
+
LOG(INDENT INDENT INDENT "..., \n");
|
| 75 |
+
i1 = ne[1] - n;
|
| 76 |
+
}
|
| 77 |
+
LOG(INDENT INDENT INDENT "[");
|
| 78 |
+
for (int64_t i0 = 0; i0 < ne[0]; i0++) {
|
| 79 |
+
if (i0 == n && ne[0] > 2 * n) {
|
| 80 |
+
LOG(" ..., ");
|
| 81 |
+
i0 = ne[0] - n;
|
| 82 |
+
}
|
| 83 |
+
const float v = common_ggml_get_float_value(data, type, nb, i0, i1, i2, i3);
|
| 84 |
+
LOG("%12.4f", v);
|
| 85 |
+
if (i0 < ne[0] - 1) {
|
| 86 |
+
LOG(", ");
|
| 87 |
+
}
|
| 88 |
+
}
|
| 89 |
+
LOG(" ],\n");
|
| 90 |
+
}
|
| 91 |
+
LOG(INDENT INDENT "],\n");
|
| 92 |
+
}
|
| 93 |
+
LOG(INDENT "]\n");
|
| 94 |
+
LOG(INDENT "sum = %f\n", sum);
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
if constexpr (abort) {
|
| 98 |
+
if (std::isnan(sum)) {
|
| 99 |
+
LOG("encountered NaN - aborting\n");
|
| 100 |
+
exit(0);
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
/**
|
| 106 |
+
* GGML operations callback during the graph execution.
|
| 107 |
+
*
|
| 108 |
+
* @param t current tensor
|
| 109 |
+
* @param ask when ask is true, the scheduler wants to know if we are interested in data from this tensor
|
| 110 |
+
* if we return true, a follow-up call will be made with ask=false in which we can do the actual collection.
|
| 111 |
+
* see ggml_backend_sched_eval_callback
|
| 112 |
+
* @param user_data user data to pass at each call back
|
| 113 |
+
* @return true to receive data or continue the graph, false otherwise
|
| 114 |
+
*/
|
| 115 |
+
template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data) {
|
| 116 |
+
auto * cb_data = (base_callback_data *) user_data;
|
| 117 |
+
|
| 118 |
+
const struct ggml_tensor * src0 = t->src[0];
|
| 119 |
+
const struct ggml_tensor * src1 = t->src[1];
|
| 120 |
+
|
| 121 |
+
if (ask) {
|
| 122 |
+
return true; // Always retrieve data
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
bool matches_filter = cb_data->tensor_filters.empty();
|
| 126 |
+
|
| 127 |
+
if (!matches_filter) {
|
| 128 |
+
for (const auto & filter : cb_data->tensor_filters) {
|
| 129 |
+
if (std::regex_search(t->name, filter)) {
|
| 130 |
+
matches_filter = true;
|
| 131 |
+
break;
|
| 132 |
+
}
|
| 133 |
+
}
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
char src1_str[128] = { 0 };
|
| 137 |
+
if (src1) {
|
| 138 |
+
snprintf(src1_str, sizeof(src1_str), "%s{%s}", src1->name, common_ggml_ne_string(src1).c_str());
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
if (matches_filter) {
|
| 142 |
+
LOG("%s: %24s = (%s) %10s(%s{%s}, %s}) = {%s}\n", __func__, t->name, ggml_type_name(t->type),
|
| 143 |
+
ggml_op_desc(t), src0->name, common_ggml_ne_string(src0).c_str(), src1 ? src1_str : "",
|
| 144 |
+
common_ggml_ne_string(t).c_str());
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
const bool is_host = ggml_backend_buffer_is_host(t->buffer);
|
| 148 |
+
|
| 149 |
+
if (!is_host) {
|
| 150 |
+
auto n_bytes = ggml_nbytes(t);
|
| 151 |
+
cb_data->data.resize(n_bytes);
|
| 152 |
+
ggml_backend_tensor_get(t, cb_data->data.data(), 0, n_bytes);
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
if (!ggml_is_quantized(t->type) && matches_filter) {
|
| 156 |
+
uint8_t * data = is_host ? (uint8_t *) t->data : cb_data->data.data();
|
| 157 |
+
common_debug_print_tensor<abort_on_nan>(data, t->type, t->ne, t->nb, 3);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
return true;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
// Explicit template instantiations
|
| 164 |
+
template bool common_debug_cb_eval<false>(ggml_tensor *, bool, void *);
|
| 165 |
+
template bool common_debug_cb_eval<true>(ggml_tensor *, bool, void *);
|
| 166 |
+
template void common_debug_print_tensor<false>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);
|
| 167 |
+
template void common_debug_print_tensor<true>(uint8_t *, ggml_type, const int64_t *, const size_t *, int64_t);
|
llama.cpp/common/debug.h
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#pragma once
|
| 2 |
+
#include "common.h"
|
| 3 |
+
#include <string>
|
| 4 |
+
#include <vector>
|
| 5 |
+
#include <regex>
|
| 6 |
+
|
| 7 |
+
// common debug functions and structs
|
| 8 |
+
|
| 9 |
+
// Print a tensor's detailed data
|
| 10 |
+
// data - the tensor's data in byte format
|
| 11 |
+
// type - the tensor's quantization type
|
| 12 |
+
// ne - the tensor dimensions array
|
| 13 |
+
// nb - the tensor strides array
|
| 14 |
+
// n - the number of rows/columns to fully print
|
| 15 |
+
template <bool abort_on_nan> void common_debug_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne, const size_t * nb, int64_t n);
|
| 16 |
+
|
| 17 |
+
// Intended to use as callback for ggml_backend_sched_eval_callback
|
| 18 |
+
// prints tensors that are processed in the computation graph
|
| 19 |
+
// by default prints all tensors, but can be configured by creating a `base_callback_data` instance with
|
| 20 |
+
// non-empty filter_patterns. See examples/debug.ccp for possible usage patterns
|
| 21 |
+
// The template parameter determins whether an error should be thrown whenever a NaN is encountered
|
| 22 |
+
// in a tensor (useful for stopping debug sessions on first erroneous tensor)
|
| 23 |
+
// The callback data will be passed as the third parameter (user_data)
|
| 24 |
+
template <bool abort_on_nan> bool common_debug_cb_eval(struct ggml_tensor * t, bool ask, void * user_data);
|
| 25 |
+
struct base_callback_data {
|
| 26 |
+
std::vector<uint8_t> data;
|
| 27 |
+
std::vector<std::regex> tensor_filters;
|
| 28 |
+
|
| 29 |
+
base_callback_data() = default;
|
| 30 |
+
|
| 31 |
+
base_callback_data(common_params & params, const std::vector<std::string> & filter_patterns) {
|
| 32 |
+
for (const auto & pattern : filter_patterns) {
|
| 33 |
+
try {
|
| 34 |
+
std::string anchored_pattern = "^" + pattern;
|
| 35 |
+
tensor_filters.emplace_back(anchored_pattern, std::regex::optimize);
|
| 36 |
+
} catch (const std::regex_error & e) {
|
| 37 |
+
throw std::runtime_error("Invalid regex pattern '" + pattern + "': " + e.what());
|
| 38 |
+
}
|
| 39 |
+
}
|
| 40 |
+
params.cb_eval = common_debug_cb_eval<false>;
|
| 41 |
+
params.cb_eval_user_data = this;
|
| 42 |
+
}
|
| 43 |
+
};
|