darshvit20's picture
Add image download step to start.sh
e7f503e
#!/bin/bash
set -e
echo "=========================================="
echo " Visual Search β€” HuggingFace Space"
echo "=========================================="
HF_USER="darshvit20"
MODEL_REPO="${HF_USER}/visual-search-clip"
DATASET_REPO="${HF_USER}/visual-search-dataset"
# ── Download ONNX models if not already present ────────────────────────────
if [ ! -f "/app/models/clip_vision_int8.onnx" ]; then
echo "[1/4] Downloading ONNX models from HuggingFace..."
python3 -c "
from huggingface_hub import hf_hub_download, list_repo_files
import os
repo = '${MODEL_REPO}'
dest = '/app/models'
os.makedirs(dest, exist_ok=True)
for f in list_repo_files(repo, repo_type='model'):
print(f' Downloading {f}...')
hf_hub_download(repo_id=repo, filename=f, repo_type='model', local_dir=dest)
print(' Models ready.')
"
else
echo "[1/4] Models already present, skipping download."
fi
# ── Download embeddings if not already present ─────────────────────────────
if [ ! -f "/app/embeddings/faiss.index" ]; then
echo "[2/4] Downloading embeddings from HuggingFace..."
python3 -c "
from huggingface_hub import hf_hub_download, list_repo_files
import os
repo = '${DATASET_REPO}'
dest = '/app/embeddings'
os.makedirs(dest, exist_ok=True)
for f in list_repo_files(repo, repo_type='dataset'):
if f.startswith('embeddings/') or f in ['faiss.index', 'metadata.pkl']:
print(f' Downloading {f}...')
hf_hub_download(repo_id=repo, filename=f, repo_type='dataset', local_dir=dest)
print(' Embeddings ready.')
"
else
echo "[2/4] Embeddings already present, skipping download."
fi
# ── Download images if not already present ─────────────────────────────────
if [ ! -d "/app/images" ] || [ -z "$(ls -A /app/images 2>/dev/null)" ]; then
echo "[3/4] Downloading images from HuggingFace..."
python3 -c "
from huggingface_hub import snapshot_download
repo = '${DATASET_REPO}'
snapshot_download(
repo_id=repo,
repo_type='dataset',
local_dir='/app',
allow_patterns=['images/*'],
)
print(' Images ready.')
"
else
echo "[3/4] Images already present, skipping download."
fi
# ── Wait for encoder to be healthy before starting API ─────────────────────
echo "[4/4] Starting services..."
supervisord -c /etc/supervisor/conf.d/supervisord.conf &
SUPER_PID=$!
echo " Waiting for encoder to load ONNX model..."
for i in $(seq 1 30); do
if python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8001/health')" 2>/dev/null; then
echo " Encoder ready!"
break
fi
sleep 2
done
wait $SUPER_PID