#!/bin/bash
echo "$PWD"
#export PATH="llama-b6795-bin-ubuntu-x64/build/bin:$PATH"
#huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF --include  --local-dir . --local-dir-use-symlinks False
#huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF Qwen3-4B-Thinking-2507-Q4_K_M.gguf  --local-dir . --local-dir-use-symlinks False
echo "$(ls)"
# Starting server
echo "Starting Llama-Cpp-Python server"
sleep 10
python3 -m llama_cpp.server --model Qwen3-4B-Thinking-2507-GGUF --host 0.0.0.0 --port 7860
#cd llama-b6795-bin-ubuntu-x64/build/bin && chmod +x ./llama-server && ./llama-server --model /app/Qwen3-4B-Thinking-2507-Q4_K_M.gguf --host 0.0.0.0 --port 7860 --temp 1.0 --top-k 64 --top-k 0.95 --min-p 0.0 --log-file llama.log &
wait