#!/usr/bin/env bash
set -euo pipefail

# Ensure HF cache lives on a writable path (persistent if your platform supports volumes)
export HF_HOME=${HF_HOME:-/root/.cache/huggingface}
export TRANSFORMERS_CACHE=${TRANSFORMERS_CACHE:-$HF_HOME/transformers}
export RUNNING_GUNICORN=1
export PYTHONUNBUFFERED=1
export TOKENIZERS_PARALLELISM=false
export WEB_CONCURRENCY=1   # Keep single worker so you don't load models multiple times

# Start gunicorn (threaded so /process stays responsive)
exec gunicorn --bind 0.0.0.0:${PORT:-8080} \
  --workers ${WEB_CONCURRENCY} \
  --threads 4 \
  --timeout 0 \
  app:app