DaCrow13
feat: Introduce `start_space.sh` to orchestrate DVC model pulling and service startup, and update Nginx upstream addresses to 127.0.0.1.
e4c8301
raw
history blame
3.26 kB
#!/bin/bash
# Fail on error
set -e
# Ensure DVC is initialized (in case .dvc folder was not copied)
if [ ! -d ".dvc" ]; then
echo "Initializing DVC..."
dvc init --no-scm
dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc
fi
# Determine credentials
# Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub)
USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD}
if [ -n "$USER" ] && [ -n "$PASS" ]; then
echo "$(date) - Configuring DVC authentication for DagsHub..."
# Configure local config (not committed)
dvc remote modify origin --local auth basic
dvc remote modify origin --local user "$USER"
dvc remote modify origin --local password "$PASS"
else
echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private."
fi
echo "$(date) - Pulling models from DVC..."
# Pull only the necessary files for inference
dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
models/tfidf_vectorizer.pkl.dvc \
models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..."
# Create Nginx temp directories
mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp
echo "$(date) - Checking models existence..."
ls -la models/
echo "$(date) - Starting FastAPI application in background..."
# Using 0.0.0.0 to be safe
uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 &
# Wait for API to start
echo "$(date) - Waiting for API to start (30s)..."
for i in {1..30}; do
if curl -s http://127.0.0.1:8000/health > /dev/null; then
echo "$(date) - API is UP!"
break
fi
echo "$(date) - Waiting... ($i/30)"
sleep 2
done
echo "$(date) - Starting Nginx reverse proxy..."
if ! command -v nginx &> /dev/null; then
echo "$(date) - ERROR: nginx not found in PATH"
exit 1
fi
nginx -c /app/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 &
echo "$(date) - Waiting for Nginx to initialize..."
sleep 5
# Check if Nginx is running
if ps aux | grep -v grep | grep -q "nginx"; then
echo "$(date) - Nginx is running."
else
echo "$(date) - ERROR: Nginx failed to start. Logs:"
cat /tmp/nginx_startup.log
fi
echo "$(date) - Final backend check before starting Streamlit..."
curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!"
echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..."
export API_BASE_URL="http://127.0.0.1:8000"
streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \
--server.port 8501 \
--server.address 127.0.0.1 \
--server.enableCORS=false \
--server.enableXsrfProtection=false \
--server.headless true &
# Wait for Streamlit to start
echo "$(date) - Waiting for Streamlit to start (30s)..."
for i in {1..30}; do
if curl -s http://127.0.0.1:8501/healthz > /dev/null; then
echo "$(date) - Streamlit is UP!"
break
fi
echo "$(date) - Waiting for Streamlit... ($i/30)"
sleep 2
done
echo "$(date) - Process started. Tailing Nginx logs for debug..."
tail -f /tmp/nginx_startup.log /tmp/fastapi.log