#!/bin/bash
set -e

echo "Starting llama.cpp server..."
# The binary is located at /app/llama-server in the ggml-org/llama.cpp image
/app/llama-server \
  -m /app/SmolLM2-1.7B-Instruct-Q4_K_M.gguf \
  --host 0.0.0.0 \
  --port 8080 \
  -t 2 \
  -c 2048 \
  --log-disable &

echo "Waiting for server to initialize..."
sleep 10

echo "Starting Gradio App..."
# Explicitly use the venv's python
/opt/venv/bin/python3 /app/app.py