#!/bin/bash
set -e

# ==========================
# 工作目錄設定
# ==========================
export WORK="/home/user/app"
SERVER_BIN="$WORK/llama-server"

cd "$WORK"
unzip llama_cpp_avx512_17565.zip
echo "PWD"
pwd

#wget https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q5_K_M.gguf
#wget https://huggingface.co/unsloth/Qwen3-0.6B-GGUF/resolve/main/Qwen3-0.6B-Q2_K.gguf
#wget https://huggingface.co/unsloth/LFM2-8B-A1B-GGUF/resolve/main/LFM2-8B-A1B-UD-Q4_K_XL.gguf
#wget https://huggingface.co/unsloth/ERNIE-4.5-21B-A3B-Thinking-GGUF/resolve/main/ERNIE-4.5-21B-A3B-Thinking-UD-IQ2_M.gguf
wget https://huggingface.co/unsloth/LFM2.5-1.2B-Instruct-GGUF/resolve/main/LFM2.5-1.2B-Instruct-Q4_0.gguf
echo "ls -l"
ls -l

#    --models-dir $WORK \
$SERVER_BIN \
    --model LFM2.5-1.2B-Instruct-Q4_0.gguf \
    --threads 2 \
    --ctx-size 4096 \
    --mlock \
    --temp 1.1 \
    --top-p 0.95 \
    --jinja \
    --chat-template ""  \
    --host 0.0.0.0 \
    --port 7860 \
    --models-max 1 \
    --verbose