#!/usr/bin/env bash set -euo pipefail SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) LLAMA_SERVER=${LLAMA_SERVER:-llama-server} if ! command -v "$LLAMA_SERVER" >/dev/null 2>&1; then echo "llama-server was not found. Install llama.cpp or set LLAMA_SERVER=/path/to/llama-server." >&2 exit 1 fi if [[ -n "${MIMO_MODEL:-}" ]]; then MODEL=$MIMO_MODEL else shopt -s nullglob CANDIDATES=("$SCRIPT_DIR"/MiMo-V2.5-coder-Q2-00001-of-*.gguf) shopt -u nullglob if [[ ${#CANDIDATES[@]} -eq 0 ]]; then echo "No first GGUF shard found next to run-server.sh." >&2 exit 1 fi MODEL=${CANDIDATES[0]} fi ARGS=( --model "$MODEL" --host "${MIMO_HOST:-127.0.0.1}" --port "${MIMO_PORT:-8080}" --ctx-size "${MIMO_CTX:-100000}" --parallel "${MIMO_PARALLEL:-1}" --batch-size "${MIMO_BATCH:-512}" --ubatch-size "${MIMO_UBATCH:-128}" --threads "${MIMO_THREADS:-12}" --threads-batch "${MIMO_THREADS_BATCH:-18}" --prio "${MIMO_PRIO:-0}" --poll "${MIMO_POLL:-80}" --flash-attn on --jinja --fit "${MIMO_FIT:-on}" --fit-target "${MIMO_FIT_TARGET:-4096}" --fit-ctx "${MIMO_FIT_CTX:-100000}" --gpu-layers "${MIMO_GPU_LAYERS:-auto}" --cache-type-k "${MIMO_CACHE_K:-f16}" --cache-type-v "${MIMO_CACHE_V:-f16}" --reasoning "${MIMO_REASONING:-off}" ) if [[ "${MIMO_CPU_MOE:-0}" == "1" ]]; then ARGS+=(--cpu-moe) fi if [[ -n "${MIMO_DEVICE:-}" ]]; then ARGS+=(--device "$MIMO_DEVICE") fi if [[ -n "${MIMO_TOOLS:-}" ]]; then ARGS+=(--tools "$MIMO_TOOLS") fi exec "$LLAMA_SERVER" "${ARGS[@]}" "$@"