#!/bin/bash echo "$PWD" #export PATH="llama-b6795-bin-ubuntu-x64/build/bin:$PATH" #huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF --include --local-dir . --local-dir-use-symlinks False #huggingface-cli download unsloth/Qwen3-4B-Thinking-2507-GGUF Qwen3-4B-Thinking-2507-Q4_K_M.gguf --local-dir . --local-dir-use-symlinks False echo "$(ls)" # Starting server echo "Starting Llama-Cpp-Python server" sleep 10 python3 -m llama_cpp.server --model Qwen3-4B-Thinking-2507-GGUF --host 0.0.0.0 --port 7860 #cd llama-b6795-bin-ubuntu-x64/build/bin && chmod +x ./llama-server && ./llama-server --model /app/Qwen3-4B-Thinking-2507-Q4_K_M.gguf --host 0.0.0.0 --port 7860 --temp 1.0 --top-k 64 --top-k 0.95 --min-p 0.0 --log-file llama.log & wait