#!/bin/bash set -e echo "Starting llama.cpp server..." # The binary is located at /app/llama-server in the ggml-org/llama.cpp image /app/llama-server \ -m /app/SmolLM2-1.7B-Instruct-Q4_K_M.gguf \ --host 0.0.0.0 \ --port 8080 \ -t 2 \ -c 2048 \ --log-disable & echo "Waiting for server to initialize..." sleep 10 echo "Starting Gradio App..." # Explicitly use the venv's python /opt/venv/bin/python3 /app/app.py