File size: 8,081 Bytes
d721bf1
 
 
 
 
2a2521b
 
 
 
 
 
 
d721bf1
 
 
 
 
 
b2242d7
d721bf1
 
 
 
 
b2242d7
d721bf1
 
b2242d7
d721bf1
2a2521b
 
b2242d7
2a2521b
eb469ad
 
 
b2242d7
2b68a06
 
b2242d7
 
 
2a2521b
 
b2242d7
 
 
 
 
 
 
 
 
d721bf1
c9732a0
946fd5e
 
 
6bd4abe
 
 
 
 
 
 
 
 
946fd5e
 
6bd4abe
 
 
 
 
 
 
946fd5e
 
 
 
 
 
 
 
6bd4abe
 
 
 
 
 
 
 
 
 
 
 
c9732a0
8fb2104
d7e0435
 
8fb2104
c9732a0
8fb2104
 
c9732a0
 
 
 
8fb2104
c9732a0
 
 
129c060
08b8051
d11db8f
 
 
 
 
 
 
 
08b8051
 
 
 
 
698f424
 
 
 
08b8051
698f424
a88a684
698f424
a88a684
764b731
 
 
8c80239
08b8051
d11db8f
698f424
764b731
22ba1d6
 
 
 
 
 
385d201
 
 
22ba1d6
 
 
 
 
764b731
 
 
 
 
 
129c060
b2242d7
2b68a06
b2242d7
2b68a06
 
7af74d7
9e1edfd
b2242d7
2b68a06
 
 
 
b2242d7
2b68a06
b2242d7
2b68a06
 
eb469ad
b2242d7
 
 
e4c8301
 
08c36f0
 
e4c8301
08c36f0
e4c8301
 
 
 
 
 
 
 
 
 
 
 
 
9290463
55a805d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
#!/bin/bash

# Fail on error
set -e

# Ensure DVC is initialized (in case .dvc folder was not copied)
if [ ! -d ".dvc" ]; then
    echo "Initializing DVC..."
    dvc init --no-scm
    dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc
fi

# Determine credentials
# Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub)
USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD}

if [ -n "$USER" ] && [ -n "$PASS" ]; then
    echo "$(date) - Configuring DVC authentication for DagsHub..."
    # Configure local config (not committed)
    dvc remote modify origin --local auth basic
    dvc remote modify origin --local user "$USER"
    dvc remote modify origin --local password "$PASS"
else
    echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private."
fi

echo "$(date) - Pulling models from DVC..."
# Pull only the necessary files for inference
dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
         models/tfidf_vectorizer.pkl.dvc \
         models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..."

# Create Nginx temp directories
mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp

echo "$(date) - Checking models existence..."
ls -la models/

echo "$(date) - Starting FastAPI application in background..."
# Using 0.0.0.0 to be safe
uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 &

# Wait for API to start
echo "$(date) - Waiting for API to start (30s)..."
for i in {1..30}; do
    if curl -s http://127.0.0.1:8000/health > /dev/null; then
        echo "$(date) - API is UP!"
        break
    fi
    echo "$(date) - Waiting... ($i/30)"
    sleep 2
done

echo "$(date) - Configuring and starting Prometheus..."
# Patch Grafana Datasource for Localhost (HF Space) and fix URL path
# Replace prometheus:9090 with 127.0.0.1:9090/prometheus in all datasource configs
find /app/monitoring/grafana/provisioning/datasources -name '*.yml' -exec sed -i 's/prometheus:9090/127.0.0.1:9090\/prometheus/g' {} +

# Copy production configs to /tmp for modification
cp /etc/prometheus/prometheus.yml /tmp/prometheus.yml
cp /etc/prometheus/alert_rules.yml /tmp/alert_rules.yml
cp /etc/alertmanager/config.yml /tmp/alertmanager.yml

# Modify Prometheus config for local execution (replace docker-compose service names with localhost)
# hopcroft-api:8080 -> 127.0.0.1:8000 (API runs on 8000 in Space)
sed -i 's/hopcroft-api:8080/127.0.0.1:8000/g' /tmp/prometheus.yml
# Alertmanager: hopcroft-api:8080 -> 127.0.0.1:8000
sed -i 's/hopcroft-api:8080/127.0.0.1:8000/g' /tmp/alertmanager.yml
# alertmanager:9093 -> 127.0.0.1:9093
sed -i 's/alertmanager:9093/127.0.0.1:9093/g' /tmp/prometheus.yml
# pushgateway:9091 -> 127.0.0.1:9091
sed -i 's/pushgateway:9091/127.0.0.1:9091/g' /tmp/prometheus.yml
# Fix alert_rules path to be absolute or relative to execution
sed -i 's|"alert_rules.yml"|"/tmp/alert_rules.yml"|g' /tmp/prometheus.yml

# FIX: Add path prefixes to match --web.route-prefix arguments
# Add metrics_path for self-scraping prometheus
sed -i 's/job_name: "prometheus"/job_name: "prometheus"\n    metrics_path: "\/prometheus\/metrics"/g' /tmp/prometheus.yml
# Add metrics_path for pushgateway
sed -i 's/job_name: "pushgateway"/job_name: "pushgateway"\n    metrics_path: "\/pushgateway\/metrics"/g' /tmp/prometheus.yml
# Add path_prefix for Alertmanager
sed -i 's/    - static_configs:/    - path_prefix: "\/alertmanager\/"\n      static_configs:/g' /tmp/prometheus.yml

echo "$(date) - Starting Alertmanager..."
alertmanager \
    --config.file=/tmp/alertmanager.yml \
    --storage.path=/tmp/alertmanager_data \
    --web.route-prefix=/alertmanager/ \
    >> /tmp/alertmanager.log 2>&1 &

echo "$(date) - Starting Pushgateway..."
pushgateway \
    --persistence.file=/tmp/pushgateway_data \
    --web.route-prefix=/pushgateway/ \
    >> /tmp/pushgateway.log 2>&1 &

# Determine Prometheus External URL
# Always use relative path so it works on both huggingface.co and .hf.space domains
PROM_EXTERNAL_URL="/prometheus/"

# Start Prometheus
# --web.external-url needs to match the public URL for correct link generation
# --web.route-prefix needs to match the path Nginx proxies to (/prometheus/)
prometheus \
    --config.file=/tmp/prometheus.yml \
    --storage.tsdb.path=/tmp/prometheus_data \
    --web.listen-address=0.0.0.0:9090 \
    --web.external-url=$PROM_EXTERNAL_URL \
    --web.route-prefix=/prometheus/ \
    >> /tmp/prometheus.log 2>&1 &

# Start Grafana
if [ -n "$SPACE_ID" ]; then
    # Parse username and space name from SPACE_ID (user/space)
    # This allows us to construct the .hf.space domain which avoids CORS/Asset loading issues
    SPACE_AUTHOR=$(echo $SPACE_ID | cut -d'/' -f1)
    SPACE_NAME=$(echo $SPACE_ID | cut -d'/' -f2)
    SPACE_HOST="${SPACE_AUTHOR}-${SPACE_NAME}.hf.space"
    
    echo "$(date) - Detected HF Space environment (ID: $SPACE_ID). Configured Host: $SPACE_HOST"
    GRAFANA_ROOT_URL="https://$SPACE_HOST/grafana/"
else
    echo "$(date) - No SPACE_ID found. Defaulting Grafana to localhost."
    GRAFANA_ROOT_URL="http://localhost:3000/grafana/"
fi

# Locate Grafana binary
GRAFANA_BIN=$(which grafana-server || echo "/usr/sbin/grafana-server")
echo "$(date) - Found Grafana binary at: $GRAFANA_BIN"

echo "$(date) - Starting Grafana with Root URL: $GRAFANA_ROOT_URL"

# Use the project's grafana.ini which we have permissions to read
$GRAFANA_BIN --homepath=/usr/share/grafana \
    --config=/app/monitoring/grafana/grafana.ini \
    cfg:paths.data=/tmp/grafana_data \
    cfg:paths.logs=/tmp/grafana_logs \
    cfg:paths.plugins=/usr/share/grafana/plugins \
    cfg:paths.provisioning=/app/monitoring/grafana/provisioning \
    cfg:server.root_url="$GRAFANA_ROOT_URL" \
    cfg:server.serve_from_sub_path=true \
    cfg:server.http_port=3000 \
    > /tmp/grafana.log 2>&1 &

# Wait for Grafana to start
echo "$(date) - Waiting for Grafana (20s)..."
for i in {1..20}; do
    if curl -s http://127.0.0.1:3000/api/health > /dev/null; then
        echo "$(date) - Grafana is UP!"
        # Debug: Check what Grafana responds at root
        echo "$(date) - VERIFYING GRAFANA ROOT RESPONSE:"
        curl -v http://127.0.0.1:3000/ 2>&1 | head -n 20
        break
    fi
    sleep 1
done

# If Grafana is still down, print logs
if ! curl -s http://127.0.0.1:3000/api/health > /dev/null; then
    echo "$(date) - ERROR: Grafana failed to start within 20 seconds. Dumping logs:"
    cat /tmp/grafana.log
fi


echo "$(date) - Starting Nginx reverse proxy..."
if ! command -v nginx &> /dev/null; then
    echo "$(date) - ERROR: nginx not found in PATH"
    exit 1
fi
nginx -c /app/docker/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 &

echo "$(date) - Waiting for Nginx to initialize..."
sleep 5

# Check if Nginx is running
if ps aux | grep -v grep | grep -q "nginx"; then
    echo "$(date) - Nginx is running."
else
    echo "$(date) - ERROR: Nginx failed to start. Logs:"
    cat /tmp/nginx_startup.log
fi

echo "$(date) - Final backend check before starting Streamlit..."
curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!"

echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..."
export API_BASE_URL="http://127.0.0.1:8000"
streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \
    --server.port 8501 \
    --server.address 127.0.0.1 \
    --server.enableCORS=false \
    --server.enableXsrfProtection=false \
    --server.headless true &

# Wait for Streamlit to start
echo "$(date) - Waiting for Streamlit to start (30s)..."
for i in {1..30}; do
    if curl -s http://127.0.0.1:8501/healthz > /dev/null; then
        echo "$(date) - Streamlit is UP!"
        break
    fi
    echo "$(date) - Waiting for Streamlit... ($i/30)"
    sleep 2
done

echo "$(date) - Process started. Tailing logs for debug..."
tail -f /tmp/nginx_startup.log /tmp/fastapi.log /tmp/grafana.log /tmp/prometheus.log