madverse / scripts /start.sh
PavaniKadari's picture
Fix HF Spaces deployment issue
14fd625
#!/bin/bash
# ============================================================
# MAdVerse (AdCraft AI) — Startup Script for HF Spaces / Docker
# Downloads dataset from Zenodo on first boot, then starts server.
# ============================================================
set -e
ZENODO_BASE="https://zenodo.org/records/10657763/files"
DATA_DIR="/app/data/images"
ANNOTATIONS_DIR="/app/data/annotations"
MARKER_FILE="/app/data/images/.download_complete"
DB_FILE="${MADVERSE_DB_PATH:-/app/products_db/adcraft.db}"
HF_CACHE_DIR="${HF_HOME:-/app/.cache/huggingface}"
echo "============================================================"
echo " MAdVerse AI — Container Startup"
echo "============================================================"
# Load environment variables from /app/.env when present (useful with bind mounts)
if [ -f "/app/.env" ]; then
echo "[STARTUP] Loading environment from /app/.env"
set -a
# shellcheck disable=SC1091
. /app/.env
set +a
else
echo "[STARTUP] /app/.env not found (this is normal if using --env-file)."
fi
# Create required directories
mkdir -p /app/outputs /app/uploads /app/products_db /app/data/images /app/data/indices
mkdir -p "${HF_CACHE_DIR}"
echo "[STARTUP] HuggingFace cache directory: ${HF_CACHE_DIR}"
if [ -d "${HF_CACHE_DIR}/hub" ] && [ "$(ls -A "${HF_CACHE_DIR}/hub" 2>/dev/null | wc -l)" -gt 0 ]; then
echo "[STARTUP] HuggingFace cache detected. Model downloads should be skipped when available."
else
echo "[STARTUP] HuggingFace cache is empty. First run may download CLIP model weights."
fi
if [ -n "${HF_TOKEN:-}" ] || [ -n "${TOGETHER_API_KEY:-}" ]; then
echo "[STARTUP] Image generation API key status: configured (HF and/or Together)."
else
echo "[STARTUP] Image generation API key status: not configured -> local gradient fallback may produce solid color images."
fi
# Optional one-shot reset for stale Docker volume DB data
if [ "${MADVERSE_RESET_DB_ON_START:-0}" = "1" ]; then
echo "[STARTUP] MADVERSE_RESET_DB_ON_START=1 -> removing DB file: ${DB_FILE}"
rm -f "${DB_FILE}"
fi
# ------------------------------------------------------------------
# Step 1: Download dataset images from Zenodo (only on first boot)
# ------------------------------------------------------------------
# HF Spaces: Set SKIP_DATASET_DOWNLOAD=1 in Secrets to skip this step
# The embeddings/FAISS work without images for ad generation
if [ "${SKIP_DATASET_DOWNLOAD:-0}" = "1" ]; then
echo "[STARTUP] Dataset download SKIPPED (SKIP_DATASET_DOWNLOAD=1)"
echo "[STARTUP] Embeddings and FAISS index will be used for generation."
echo "[STARTUP] Original dataset images not needed for ad generation."
touch "$MARKER_FILE"
elif [ -f "$MARKER_FILE" ]; then
echo "[STARTUP] Dataset already downloaded. Skipping."
elif [ -d "$DATA_DIR/Advert_Gallery" ] && [ -d "$DATA_DIR/Epaper1" ] && [ -d "$DATA_DIR/Epaper2" ] && [ -d "$DATA_DIR/OnlineAds" ]; then
# All image folders exist - consider download complete even if marker is missing
echo "[STARTUP] Dataset folders detected. Skipping download."
touch "$MARKER_FILE"
else
echo "[STARTUP] Downloading MAdVerse dataset from Zenodo..."
echo "[STARTUP] This only happens on the FIRST boot (~10-15 min)."
echo ""
cd /tmp
# Download and extract each image archive
for archive in Advert_Gallery Epaper1 Epaper2 OnlineAds; do
echo "[DOWNLOAD] Fetching ${archive}.zip ..."
curl -L --retry 3 --retry-delay 5 -o "${archive}.zip" \
"${ZENODO_BASE}/${archive}.zip?download=1" 2>&1 | tail -1
echo "[EXTRACT] Unzipping ${archive}.zip ..."
unzip -q -o "${archive}.zip" -d "$DATA_DIR/" 2>/dev/null || true
rm -f "${archive}.zip"
echo "[DONE] ${archive} extracted."
echo ""
done
# Download annotation files (if not already baked into image)
if [ ! -f "$ANNOTATIONS_DIR/web_annot_j.json" ]; then
echo "[DOWNLOAD] Fetching annotation files..."
for annot in adgal_annot_j.json epaper1_annotation.json epaper2_annotation.json web_annot_j.json dataset_readme.md; do
curl -L --retry 3 -o "$ANNOTATIONS_DIR/$annot" \
"${ZENODO_BASE}/${annot}?download=1" 2>&1 | tail -1
done
fi
# Mark download as complete
touch "$MARKER_FILE"
echo ""
echo "[STARTUP] Dataset download complete!"
fi
# ------------------------------------------------------------------
# Step 2: Start the FastAPI server
# ------------------------------------------------------------------
echo ""
echo "============================================================"
echo " Starting MAdVerse AI Server"
echo " Port: ${PORT:-7860}"
echo "============================================================"
echo ""
cd /app
exec python -m uvicorn app.main:app \
--host 0.0.0.0 \
--port "${PORT:-7860}" \
--log-level info \
--no-access-log