File size: 8,081 Bytes
d721bf1 2a2521b d721bf1 b2242d7 d721bf1 b2242d7 d721bf1 b2242d7 d721bf1 2a2521b b2242d7 2a2521b eb469ad b2242d7 2b68a06 b2242d7 2a2521b b2242d7 d721bf1 c9732a0 946fd5e 6bd4abe 946fd5e 6bd4abe 946fd5e 6bd4abe c9732a0 8fb2104 d7e0435 8fb2104 c9732a0 8fb2104 c9732a0 8fb2104 c9732a0 129c060 08b8051 d11db8f 08b8051 698f424 08b8051 698f424 a88a684 698f424 a88a684 764b731 8c80239 08b8051 d11db8f 698f424 764b731 22ba1d6 385d201 22ba1d6 764b731 129c060 b2242d7 2b68a06 b2242d7 2b68a06 7af74d7 9e1edfd b2242d7 2b68a06 b2242d7 2b68a06 b2242d7 2b68a06 eb469ad b2242d7 e4c8301 08c36f0 e4c8301 08c36f0 e4c8301 9290463 55a805d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
#!/bin/bash
# Fail on error
set -e
# Ensure DVC is initialized (in case .dvc folder was not copied)
if [ ! -d ".dvc" ]; then
echo "Initializing DVC..."
dvc init --no-scm
dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc
fi
# Determine credentials
# Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub)
USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD}
if [ -n "$USER" ] && [ -n "$PASS" ]; then
echo "$(date) - Configuring DVC authentication for DagsHub..."
# Configure local config (not committed)
dvc remote modify origin --local auth basic
dvc remote modify origin --local user "$USER"
dvc remote modify origin --local password "$PASS"
else
echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private."
fi
echo "$(date) - Pulling models from DVC..."
# Pull only the necessary files for inference
dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
models/tfidf_vectorizer.pkl.dvc \
models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..."
# Create Nginx temp directories
mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp
echo "$(date) - Checking models existence..."
ls -la models/
echo "$(date) - Starting FastAPI application in background..."
# Using 0.0.0.0 to be safe
uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 &
# Wait for API to start
echo "$(date) - Waiting for API to start (30s)..."
for i in {1..30}; do
if curl -s http://127.0.0.1:8000/health > /dev/null; then
echo "$(date) - API is UP!"
break
fi
echo "$(date) - Waiting... ($i/30)"
sleep 2
done
echo "$(date) - Configuring and starting Prometheus..."
# Patch Grafana Datasource for Localhost (HF Space) and fix URL path
# Replace prometheus:9090 with 127.0.0.1:9090/prometheus in all datasource configs
find /app/monitoring/grafana/provisioning/datasources -name '*.yml' -exec sed -i 's/prometheus:9090/127.0.0.1:9090\/prometheus/g' {} +
# Copy production configs to /tmp for modification
cp /etc/prometheus/prometheus.yml /tmp/prometheus.yml
cp /etc/prometheus/alert_rules.yml /tmp/alert_rules.yml
cp /etc/alertmanager/config.yml /tmp/alertmanager.yml
# Modify Prometheus config for local execution (replace docker-compose service names with localhost)
# hopcroft-api:8080 -> 127.0.0.1:8000 (API runs on 8000 in Space)
sed -i 's/hopcroft-api:8080/127.0.0.1:8000/g' /tmp/prometheus.yml
# Alertmanager: hopcroft-api:8080 -> 127.0.0.1:8000
sed -i 's/hopcroft-api:8080/127.0.0.1:8000/g' /tmp/alertmanager.yml
# alertmanager:9093 -> 127.0.0.1:9093
sed -i 's/alertmanager:9093/127.0.0.1:9093/g' /tmp/prometheus.yml
# pushgateway:9091 -> 127.0.0.1:9091
sed -i 's/pushgateway:9091/127.0.0.1:9091/g' /tmp/prometheus.yml
# Fix alert_rules path to be absolute or relative to execution
sed -i 's|"alert_rules.yml"|"/tmp/alert_rules.yml"|g' /tmp/prometheus.yml
# FIX: Add path prefixes to match --web.route-prefix arguments
# Add metrics_path for self-scraping prometheus
sed -i 's/job_name: "prometheus"/job_name: "prometheus"\n metrics_path: "\/prometheus\/metrics"/g' /tmp/prometheus.yml
# Add metrics_path for pushgateway
sed -i 's/job_name: "pushgateway"/job_name: "pushgateway"\n metrics_path: "\/pushgateway\/metrics"/g' /tmp/prometheus.yml
# Add path_prefix for Alertmanager
sed -i 's/ - static_configs:/ - path_prefix: "\/alertmanager\/"\n static_configs:/g' /tmp/prometheus.yml
echo "$(date) - Starting Alertmanager..."
alertmanager \
--config.file=/tmp/alertmanager.yml \
--storage.path=/tmp/alertmanager_data \
--web.route-prefix=/alertmanager/ \
>> /tmp/alertmanager.log 2>&1 &
echo "$(date) - Starting Pushgateway..."
pushgateway \
--persistence.file=/tmp/pushgateway_data \
--web.route-prefix=/pushgateway/ \
>> /tmp/pushgateway.log 2>&1 &
# Determine Prometheus External URL
# Always use relative path so it works on both huggingface.co and .hf.space domains
PROM_EXTERNAL_URL="/prometheus/"
# Start Prometheus
# --web.external-url needs to match the public URL for correct link generation
# --web.route-prefix needs to match the path Nginx proxies to (/prometheus/)
prometheus \
--config.file=/tmp/prometheus.yml \
--storage.tsdb.path=/tmp/prometheus_data \
--web.listen-address=0.0.0.0:9090 \
--web.external-url=$PROM_EXTERNAL_URL \
--web.route-prefix=/prometheus/ \
>> /tmp/prometheus.log 2>&1 &
# Start Grafana
if [ -n "$SPACE_ID" ]; then
# Parse username and space name from SPACE_ID (user/space)
# This allows us to construct the .hf.space domain which avoids CORS/Asset loading issues
SPACE_AUTHOR=$(echo $SPACE_ID | cut -d'/' -f1)
SPACE_NAME=$(echo $SPACE_ID | cut -d'/' -f2)
SPACE_HOST="${SPACE_AUTHOR}-${SPACE_NAME}.hf.space"
echo "$(date) - Detected HF Space environment (ID: $SPACE_ID). Configured Host: $SPACE_HOST"
GRAFANA_ROOT_URL="https://$SPACE_HOST/grafana/"
else
echo "$(date) - No SPACE_ID found. Defaulting Grafana to localhost."
GRAFANA_ROOT_URL="http://localhost:3000/grafana/"
fi
# Locate Grafana binary
GRAFANA_BIN=$(which grafana-server || echo "/usr/sbin/grafana-server")
echo "$(date) - Found Grafana binary at: $GRAFANA_BIN"
echo "$(date) - Starting Grafana with Root URL: $GRAFANA_ROOT_URL"
# Use the project's grafana.ini which we have permissions to read
$GRAFANA_BIN --homepath=/usr/share/grafana \
--config=/app/monitoring/grafana/grafana.ini \
cfg:paths.data=/tmp/grafana_data \
cfg:paths.logs=/tmp/grafana_logs \
cfg:paths.plugins=/usr/share/grafana/plugins \
cfg:paths.provisioning=/app/monitoring/grafana/provisioning \
cfg:server.root_url="$GRAFANA_ROOT_URL" \
cfg:server.serve_from_sub_path=true \
cfg:server.http_port=3000 \
> /tmp/grafana.log 2>&1 &
# Wait for Grafana to start
echo "$(date) - Waiting for Grafana (20s)..."
for i in {1..20}; do
if curl -s http://127.0.0.1:3000/api/health > /dev/null; then
echo "$(date) - Grafana is UP!"
# Debug: Check what Grafana responds at root
echo "$(date) - VERIFYING GRAFANA ROOT RESPONSE:"
curl -v http://127.0.0.1:3000/ 2>&1 | head -n 20
break
fi
sleep 1
done
# If Grafana is still down, print logs
if ! curl -s http://127.0.0.1:3000/api/health > /dev/null; then
echo "$(date) - ERROR: Grafana failed to start within 20 seconds. Dumping logs:"
cat /tmp/grafana.log
fi
echo "$(date) - Starting Nginx reverse proxy..."
if ! command -v nginx &> /dev/null; then
echo "$(date) - ERROR: nginx not found in PATH"
exit 1
fi
nginx -c /app/docker/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 &
echo "$(date) - Waiting for Nginx to initialize..."
sleep 5
# Check if Nginx is running
if ps aux | grep -v grep | grep -q "nginx"; then
echo "$(date) - Nginx is running."
else
echo "$(date) - ERROR: Nginx failed to start. Logs:"
cat /tmp/nginx_startup.log
fi
echo "$(date) - Final backend check before starting Streamlit..."
curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!"
echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..."
export API_BASE_URL="http://127.0.0.1:8000"
streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \
--server.port 8501 \
--server.address 127.0.0.1 \
--server.enableCORS=false \
--server.enableXsrfProtection=false \
--server.headless true &
# Wait for Streamlit to start
echo "$(date) - Waiting for Streamlit to start (30s)..."
for i in {1..30}; do
if curl -s http://127.0.0.1:8501/healthz > /dev/null; then
echo "$(date) - Streamlit is UP!"
break
fi
echo "$(date) - Waiting for Streamlit... ($i/30)"
sleep 2
done
echo "$(date) - Process started. Tailing logs for debug..."
tail -f /tmp/nginx_startup.log /tmp/fastapi.log /tmp/grafana.log /tmp/prometheus.log
|