|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
set -e |
|
|
|
|
|
|
|
|
if [ ! -d ".dvc" ]; then |
|
|
echo "Initializing DVC..." |
|
|
dvc init --no-scm |
|
|
dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc |
|
|
fi |
|
|
|
|
|
|
|
|
|
|
|
USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME} |
|
|
PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD} |
|
|
|
|
|
if [ -n "$USER" ] && [ -n "$PASS" ]; then |
|
|
echo "$(date) - Configuring DVC authentication for DagsHub..." |
|
|
|
|
|
dvc remote modify origin --local auth basic |
|
|
dvc remote modify origin --local user "$USER" |
|
|
dvc remote modify origin --local password "$PASS" |
|
|
else |
|
|
echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private." |
|
|
fi |
|
|
|
|
|
echo "$(date) - Pulling models from DVC..." |
|
|
|
|
|
dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \ |
|
|
models/tfidf_vectorizer.pkl.dvc \ |
|
|
models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..." |
|
|
|
|
|
|
|
|
mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp |
|
|
|
|
|
echo "$(date) - Checking models existence..." |
|
|
ls -la models/ |
|
|
|
|
|
echo "$(date) - Starting FastAPI application in background..." |
|
|
|
|
|
uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 & |
|
|
|
|
|
|
|
|
echo "$(date) - Waiting for API to start (30s)..." |
|
|
for i in {1..30}; do |
|
|
if curl -s http://127.0.0.1:8000/health > /dev/null; then |
|
|
echo "$(date) - API is UP!" |
|
|
break |
|
|
fi |
|
|
echo "$(date) - Waiting... ($i/30)" |
|
|
sleep 2 |
|
|
done |
|
|
|
|
|
echo "$(date) - Configuring and starting Prometheus..." |
|
|
|
|
|
|
|
|
find /app/monitoring/grafana/provisioning/datasources -name '*.yml' -exec sed -i 's/prometheus:9090/127.0.0.1:9090\/prometheus/g' {} + |
|
|
|
|
|
|
|
|
cp /etc/prometheus/prometheus.yml /tmp/prometheus.yml |
|
|
cp /etc/prometheus/alert_rules.yml /tmp/alert_rules.yml |
|
|
cp /etc/alertmanager/config.yml /tmp/alertmanager.yml |
|
|
|
|
|
|
|
|
|
|
|
sed -i 's/hopcroft-api:8080/127.0.0.1:8000/g' /tmp/prometheus.yml |
|
|
|
|
|
sed -i 's/hopcroft-api:8080/127.0.0.1:8000/g' /tmp/alertmanager.yml |
|
|
|
|
|
sed -i 's/alertmanager:9093/127.0.0.1:9093/g' /tmp/prometheus.yml |
|
|
|
|
|
sed -i 's/pushgateway:9091/127.0.0.1:9091/g' /tmp/prometheus.yml |
|
|
|
|
|
sed -i 's|"alert_rules.yml"|"/tmp/alert_rules.yml"|g' /tmp/prometheus.yml |
|
|
|
|
|
|
|
|
|
|
|
sed -i 's/job_name: "prometheus"/job_name: "prometheus"\n metrics_path: "\/prometheus\/metrics"/g' /tmp/prometheus.yml |
|
|
|
|
|
sed -i 's/job_name: "pushgateway"/job_name: "pushgateway"\n metrics_path: "\/pushgateway\/metrics"/g' /tmp/prometheus.yml |
|
|
|
|
|
sed -i 's/ - static_configs:/ - path_prefix: "\/alertmanager\/"\n static_configs:/g' /tmp/prometheus.yml |
|
|
|
|
|
echo "$(date) - Starting Alertmanager..." |
|
|
alertmanager \ |
|
|
--config.file=/tmp/alertmanager.yml \ |
|
|
--storage.path=/tmp/alertmanager_data \ |
|
|
--web.route-prefix=/alertmanager/ \ |
|
|
>> /tmp/alertmanager.log 2>&1 & |
|
|
|
|
|
echo "$(date) - Starting Pushgateway..." |
|
|
pushgateway \ |
|
|
--persistence.file=/tmp/pushgateway_data \ |
|
|
--web.route-prefix=/pushgateway/ \ |
|
|
>> /tmp/pushgateway.log 2>&1 & |
|
|
|
|
|
|
|
|
|
|
|
PROM_EXTERNAL_URL="/prometheus/" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prometheus \ |
|
|
--config.file=/tmp/prometheus.yml \ |
|
|
--storage.tsdb.path=/tmp/prometheus_data \ |
|
|
--web.listen-address=0.0.0.0:9090 \ |
|
|
--web.external-url=$PROM_EXTERNAL_URL \ |
|
|
--web.route-prefix=/prometheus/ \ |
|
|
>> /tmp/prometheus.log 2>&1 & |
|
|
|
|
|
|
|
|
if [ -n "$SPACE_ID" ]; then |
|
|
|
|
|
|
|
|
SPACE_AUTHOR=$(echo $SPACE_ID | cut -d'/' -f1) |
|
|
SPACE_NAME=$(echo $SPACE_ID | cut -d'/' -f2) |
|
|
SPACE_HOST="${SPACE_AUTHOR}-${SPACE_NAME}.hf.space" |
|
|
|
|
|
echo "$(date) - Detected HF Space environment (ID: $SPACE_ID). Configured Host: $SPACE_HOST" |
|
|
GRAFANA_ROOT_URL="https://$SPACE_HOST/grafana/" |
|
|
else |
|
|
echo "$(date) - No SPACE_ID found. Defaulting Grafana to localhost." |
|
|
GRAFANA_ROOT_URL="http://localhost:3000/grafana/" |
|
|
fi |
|
|
|
|
|
|
|
|
GRAFANA_BIN=$(which grafana-server || echo "/usr/sbin/grafana-server") |
|
|
echo "$(date) - Found Grafana binary at: $GRAFANA_BIN" |
|
|
|
|
|
echo "$(date) - Starting Grafana with Root URL: $GRAFANA_ROOT_URL" |
|
|
|
|
|
|
|
|
$GRAFANA_BIN --homepath=/usr/share/grafana \ |
|
|
--config=/app/monitoring/grafana/grafana.ini \ |
|
|
cfg:paths.data=/tmp/grafana_data \ |
|
|
cfg:paths.logs=/tmp/grafana_logs \ |
|
|
cfg:paths.plugins=/usr/share/grafana/plugins \ |
|
|
cfg:paths.provisioning=/app/monitoring/grafana/provisioning \ |
|
|
cfg:server.root_url="$GRAFANA_ROOT_URL" \ |
|
|
cfg:server.serve_from_sub_path=true \ |
|
|
cfg:server.http_port=3000 \ |
|
|
> /tmp/grafana.log 2>&1 & |
|
|
|
|
|
|
|
|
echo "$(date) - Waiting for Grafana (20s)..." |
|
|
for i in {1..20}; do |
|
|
if curl -s http://127.0.0.1:3000/api/health > /dev/null; then |
|
|
echo "$(date) - Grafana is UP!" |
|
|
|
|
|
echo "$(date) - VERIFYING GRAFANA ROOT RESPONSE:" |
|
|
curl -v http://127.0.0.1:3000/ 2>&1 | head -n 20 |
|
|
break |
|
|
fi |
|
|
sleep 1 |
|
|
done |
|
|
|
|
|
|
|
|
if ! curl -s http://127.0.0.1:3000/api/health > /dev/null; then |
|
|
echo "$(date) - ERROR: Grafana failed to start within 20 seconds. Dumping logs:" |
|
|
cat /tmp/grafana.log |
|
|
fi |
|
|
|
|
|
|
|
|
echo "$(date) - Starting Nginx reverse proxy..." |
|
|
if ! command -v nginx &> /dev/null; then |
|
|
echo "$(date) - ERROR: nginx not found in PATH" |
|
|
exit 1 |
|
|
fi |
|
|
nginx -c /app/docker/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 & |
|
|
|
|
|
echo "$(date) - Waiting for Nginx to initialize..." |
|
|
sleep 5 |
|
|
|
|
|
|
|
|
if ps aux | grep -v grep | grep -q "nginx"; then |
|
|
echo "$(date) - Nginx is running." |
|
|
else |
|
|
echo "$(date) - ERROR: Nginx failed to start. Logs:" |
|
|
cat /tmp/nginx_startup.log |
|
|
fi |
|
|
|
|
|
echo "$(date) - Final backend check before starting Streamlit..." |
|
|
curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!" |
|
|
|
|
|
echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..." |
|
|
export API_BASE_URL="http://127.0.0.1:8000" |
|
|
streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \ |
|
|
--server.port 8501 \ |
|
|
--server.address 127.0.0.1 \ |
|
|
--server.enableCORS=false \ |
|
|
--server.enableXsrfProtection=false \ |
|
|
--server.headless true & |
|
|
|
|
|
|
|
|
echo "$(date) - Waiting for Streamlit to start (30s)..." |
|
|
for i in {1..30}; do |
|
|
if curl -s http://127.0.0.1:8501/healthz > /dev/null; then |
|
|
echo "$(date) - Streamlit is UP!" |
|
|
break |
|
|
fi |
|
|
echo "$(date) - Waiting for Streamlit... ($i/30)" |
|
|
sleep 2 |
|
|
done |
|
|
|
|
|
echo "$(date) - Process started. Tailing logs for debug..." |
|
|
tail -f /tmp/nginx_startup.log /tmp/fastapi.log /tmp/grafana.log /tmp/prometheus.log |
|
|
|