Spaces:
Sleeping
Sleeping
DaCrow13
Remove self-scraping from Prometheus config to prevent false positive ServiceDown alert
b22bd7a
| # Fail on error | |
| set -e | |
| # Ensure DVC is initialized (in case .dvc folder was not copied) | |
| if [ ! -d ".dvc" ]; then | |
| echo "Initializing DVC..." | |
| dvc init --no-scm | |
| dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc | |
| fi | |
| # Determine credentials | |
| # Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub) | |
| USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME} | |
| PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD} | |
| if [ -n "$USER" ] && [ -n "$PASS" ]; then | |
| echo "$(date) - Configuring DVC authentication for DagsHub..." | |
| # Configure local config (not committed) | |
| dvc remote modify origin --local auth basic | |
| dvc remote modify origin --local user "$USER" | |
| dvc remote modify origin --local password "$PASS" | |
| else | |
| echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private." | |
| fi | |
| echo "$(date) - Pulling models from DVC..." | |
| # Pull only the necessary files for inference | |
| dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \ | |
| models/tfidf_vectorizer.pkl.dvc \ | |
| models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..." | |
| # Create Nginx temp directories | |
| mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp | |
| echo "$(date) - Checking models existence..." | |
| ls -la models/ | |
| echo "$(date) - Starting FastAPI application in background..." | |
| # Using 0.0.0.0 to be safe | |
| uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 & | |
| # Wait for API to start | |
| echo "$(date) - Waiting for API to start (30s)..." | |
| for i in {1..30}; do | |
| if curl -s http://127.0.0.1:8000/health > /dev/null; then | |
| echo "$(date) - API is UP!" | |
| break | |
| fi | |
| echo "$(date) - Waiting... ($i/30)" | |
| sleep 2 | |
| done | |
| echo "$(date) - Configuring and starting Prometheus..." | |
| # Copy alert rules | |
| cp monitoring/prometheus/alert_rules.yml /tmp/alert_rules.yml | |
| # Create a config for the space | |
| cat <<EOF > /tmp/prometheus.yml | |
| global: | |
| scrape_interval: 15s | |
| evaluation_interval: 15s | |
| rule_files: | |
| - "/tmp/alert_rules.yml" | |
| scrape_configs: | |
| - job_name: 'hopcroft-api' | |
| metrics_path: '/metrics' | |
| static_configs: | |
| - targets: ['127.0.0.1:8000'] | |
| scrape_interval: 10s | |
| EOF | |
| # Start Prometheus | |
| # --web.external-url needs to match the path in Nginx | |
| prometheus \ | |
| --config.file=/tmp/prometheus.yml \ | |
| --storage.tsdb.path=/tmp/prometheus_data \ | |
| --web.listen-address=0.0.0.0:9090 \ | |
| --web.external-url=/prometheus/ \ | |
| --web.route-prefix=/prometheus/ \ | |
| >> /tmp/prometheus.log 2>&1 & | |
| echo "$(date) - Starting Nginx reverse proxy..." | |
| if ! command -v nginx &> /dev/null; then | |
| echo "$(date) - ERROR: nginx not found in PATH" | |
| exit 1 | |
| fi | |
| nginx -c /app/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 & | |
| echo "$(date) - Waiting for Nginx to initialize..." | |
| sleep 5 | |
| # Check if Nginx is running | |
| if ps aux | grep -v grep | grep -q "nginx"; then | |
| echo "$(date) - Nginx is running." | |
| else | |
| echo "$(date) - ERROR: Nginx failed to start. Logs:" | |
| cat /tmp/nginx_startup.log | |
| fi | |
| echo "$(date) - Final backend check before starting Streamlit..." | |
| curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!" | |
| echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..." | |
| export API_BASE_URL="http://127.0.0.1:8000" | |
| streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \ | |
| --server.port 8501 \ | |
| --server.address 127.0.0.1 \ | |
| --server.enableCORS=false \ | |
| --server.enableXsrfProtection=false \ | |
| --server.headless true & | |
| # Wait for Streamlit to start | |
| echo "$(date) - Waiting for Streamlit to start (30s)..." | |
| for i in {1..30}; do | |
| if curl -s http://127.0.0.1:8501/healthz > /dev/null; then | |
| echo "$(date) - Streamlit is UP!" | |
| break | |
| fi | |
| echo "$(date) - Waiting for Streamlit... ($i/30)" | |
| sleep 2 | |
| done | |
| echo "$(date) - Process started. Tailing Nginx logs for debug..." | |
| tail -f /tmp/nginx_startup.log /tmp/fastapi.log | |