#!/bin/bash # Ensure we are in the app directory cd /app # 1. Initialize local structure python3 hf_sync.py init # 2. Try to download existing data if [ -n "$DATASET_REPO_ID" ]; then python3 hf_sync.py download else echo "DATASET_REPO_ID not set, skipping initial download." fi # 3. Setup symlinks # Remove existing if any rm -rf database.db output processed uploads # Ensure data_repo has what we need mkdir -p data_repo/output data_repo/processed data_repo/uploads # Create symlinks ln -sf data_repo/database.db database.db ln -sf data_repo/output output ln -sf data_repo/processed processed ln -sf data_repo/uploads uploads # 4. Start periodic background upload if [ -n "$DATASET_REPO_ID" ] && [ -n "$HF_TOKEN" ]; then ( while true; do sleep 120 # Every 2 minutes echo "Performing scheduled backup to HF Datasets..." python3 hf_sync.py upload done ) & else echo "DATASET_REPO_ID or HF_TOKEN not set, periodic backup disabled." fi # 5. Start the application echo "Starting application on port 7680..." # Using gunicorn with eventlet for SocketIO support if needed, # otherwise standard gunicorn. Since eventlet isn't in requirements.txt, # we'll try to install it or use threads. exec gunicorn --bind 0.0.0.0:7680 --worker-class eventlet -w 1 app:app