File size: 3,258 Bytes
d721bf1
 
 
 
 
2a2521b
 
 
 
 
 
 
d721bf1
 
 
 
 
 
b2242d7
d721bf1
 
 
 
 
b2242d7
d721bf1
 
b2242d7
d721bf1
2a2521b
 
b2242d7
2a2521b
eb469ad
 
 
b2242d7
2b68a06
 
b2242d7
 
 
2a2521b
 
b2242d7
 
 
 
 
 
 
 
 
d721bf1
b2242d7
2b68a06
b2242d7
2b68a06
 
 
9e1edfd
b2242d7
2b68a06
 
 
 
b2242d7
2b68a06
b2242d7
2b68a06
 
eb469ad
b2242d7
 
 
e4c8301
 
08c36f0
 
e4c8301
08c36f0
e4c8301
 
 
 
 
 
 
 
 
 
 
 
 
9290463
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/bin/bash

# Fail on error
set -e

# Ensure DVC is initialized (in case .dvc folder was not copied)
if [ ! -d ".dvc" ]; then
    echo "Initializing DVC..."
    dvc init --no-scm
    dvc remote add -d origin https://dagshub.com/se4ai2526-uniba/Hopcroft.dvc
fi

# Determine credentials
# Prefer specific DAGSHUB vars, fallback to MLFLOW vars (often the same for DagsHub)
USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD}

if [ -n "$USER" ] && [ -n "$PASS" ]; then
    echo "$(date) - Configuring DVC authentication for DagsHub..."
    # Configure local config (not committed)
    dvc remote modify origin --local auth basic
    dvc remote modify origin --local user "$USER"
    dvc remote modify origin --local password "$PASS"
else
    echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private."
fi

echo "$(date) - Pulling models from DVC..."
# Pull only the necessary files for inference
dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
         models/tfidf_vectorizer.pkl.dvc \
         models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..."

# Create Nginx temp directories
mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp

echo "$(date) - Checking models existence..."
ls -la models/

echo "$(date) - Starting FastAPI application in background..."
# Using 0.0.0.0 to be safe
uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 &

# Wait for API to start
echo "$(date) - Waiting for API to start (30s)..."
for i in {1..30}; do
    if curl -s http://127.0.0.1:8000/health > /dev/null; then
        echo "$(date) - API is UP!"
        break
    fi
    echo "$(date) - Waiting... ($i/30)"
    sleep 2
done

echo "$(date) - Starting Nginx reverse proxy..."
if ! command -v nginx &> /dev/null; then
    echo "$(date) - ERROR: nginx not found in PATH"
    exit 1
fi
nginx -c /app/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 &

echo "$(date) - Waiting for Nginx to initialize..."
sleep 5

# Check if Nginx is running
if ps aux | grep -v grep | grep -q "nginx"; then
    echo "$(date) - Nginx is running."
else
    echo "$(date) - ERROR: Nginx failed to start. Logs:"
    cat /tmp/nginx_startup.log
fi

echo "$(date) - Final backend check before starting Streamlit..."
curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!"

echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..."
export API_BASE_URL="http://127.0.0.1:8000"
streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \
    --server.port 8501 \
    --server.address 127.0.0.1 \
    --server.enableCORS=false \
    --server.enableXsrfProtection=false \
    --server.headless true &

# Wait for Streamlit to start
echo "$(date) - Waiting for Streamlit to start (30s)..."
for i in {1..30}; do
    if curl -s http://127.0.0.1:8501/healthz > /dev/null; then
        echo "$(date) - Streamlit is UP!"
        break
    fi
    echo "$(date) - Waiting for Streamlit... ($i/30)"
    sleep 2
done

echo "$(date) - Process started. Tailing Nginx logs for debug..."
tail -f /tmp/nginx_startup.log /tmp/fastapi.log