File size: 4,043 Bytes
dd8494c
 
 
 
 
cf9fd07
 
 
 
 
 
 
dd8494c
 
 
 
 
 
834ca3f
dd8494c
 
 
 
 
834ca3f
dd8494c
 
834ca3f
dd8494c
cf9fd07
 
834ca3f
cf9fd07
cc2ed29
 
 
834ca3f
0fbe9df
 
834ca3f
 
 
cf9fd07
 
834ca3f
 
 
 
 
 
 
 
 
dd8494c
b4e6edc
be7f335
 
 
b4e6edc
 
 
 
 
 
be7f335
 
 
b4e6edc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
834ca3f
0fbe9df
834ca3f
0fbe9df
 
 
8caf0a0
834ca3f
0fbe9df
 
 
 
834ca3f
0fbe9df
834ca3f
0fbe9df
 
cc2ed29
834ca3f
 
 
edcfb9a
 
5ea22fe
 
edcfb9a
5ea22fe
edcfb9a
 
 
 
 
 
 
 
 
 
 
 
 
859da78
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/bin/bash

# Fail on error
set -e

# Ensure DVC is initialized (in case .dvc folder was not copied)
if [ ! -d ".dvc" ]; then
    echo "Initializing DVC..."
    dvc init --no-scm
    dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc
fi

# Determine credentials
# Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub)
USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD}

if [ -n "$USER" ] && [ -n "$PASS" ]; then
    echo "$(date) - Configuring DVC authentication for DagsHub..."
    # Configure local config (not committed)
    dvc remote modify origin --local auth basic
    dvc remote modify origin --local user "$USER"
    dvc remote modify origin --local password "$PASS"
else
    echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private."
fi

echo "$(date) - Pulling models from DVC..."
# Pull only the necessary files for inference
dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
         models/tfidf_vectorizer.pkl.dvc \
         models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..."

# Create Nginx temp directories
mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp

echo "$(date) - Checking models existence..."
ls -la models/

echo "$(date) - Starting FastAPI application in background..."
# Using 0.0.0.0 to be safe
uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 &

# Wait for API to start
echo "$(date) - Waiting for API to start (30s)..."
for i in {1..30}; do
    if curl -s http://127.0.0.1:8000/health > /dev/null; then
        echo "$(date) - API is UP!"
        break
    fi
    echo "$(date) - Waiting... ($i/30)"
    sleep 2
done

echo "$(date) - Configuring and starting Prometheus..."
# Copy alert rules
cp monitoring/prometheus/alert_rules.yml /tmp/alert_rules.yml

# Create a config for the space
cat <<EOF > /tmp/prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "/tmp/alert_rules.yml"

scrape_configs:
  - job_name: 'hopcroft-api'
    metrics_path: '/metrics'
    static_configs:
      - targets: ['127.0.0.1:8000']
    scrape_interval: 10s
EOF

# Start Prometheus
# --web.external-url needs to match the path in Nginx
prometheus \
    --config.file=/tmp/prometheus.yml \
    --storage.tsdb.path=/tmp/prometheus_data \
    --web.listen-address=0.0.0.0:9090 \
    --web.external-url=/prometheus/ \
    --web.route-prefix=/prometheus/ \
    >> /tmp/prometheus.log 2>&1 &

echo "$(date) - Starting Nginx reverse proxy..."
if ! command -v nginx &> /dev/null; then
    echo "$(date) - ERROR: nginx not found in PATH"
    exit 1
fi
nginx -c /app/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 &

echo "$(date) - Waiting for Nginx to initialize..."
sleep 5

# Check if Nginx is running
if ps aux | grep -v grep | grep -q "nginx"; then
    echo "$(date) - Nginx is running."
else
    echo "$(date) - ERROR: Nginx failed to start. Logs:"
    cat /tmp/nginx_startup.log
fi

echo "$(date) - Final backend check before starting Streamlit..."
curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!"

echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..."
export API_BASE_URL="http://127.0.0.1:8000"
streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \
    --server.port 8501 \
    --server.address 127.0.0.1 \
    --server.enableCORS=false \
    --server.enableXsrfProtection=false \
    --server.headless true &

# Wait for Streamlit to start
echo "$(date) - Waiting for Streamlit to start (30s)..."
for i in {1..30}; do
    if curl -s http://127.0.0.1:8501/healthz > /dev/null; then
        echo "$(date) - Streamlit is UP!"
        break
    fi
    echo "$(date) - Waiting for Streamlit... ($i/30)"
    sleep 2
done

echo "$(date) - Process started. Tailing Nginx logs for debug..."
tail -f /tmp/nginx_startup.log /tmp/fastapi.log