DaCrow13 commited on
Commit
b46e1e7
·
2 Parent(s): 9e57cd4 a3e03f8

Merge pull request #29 from se4ai2526-uniba/milestone-5-sync-6

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +3 -0
  3. README.md +1 -0
  4. nginx.conf +92 -0
  5. requirements.txt +0 -1
  6. scripts/start_space.sh +65 -11
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ docs/img/*.png filter=lfs diff=lfs merge=lfs -text
Dockerfile CHANGED
@@ -11,6 +11,9 @@ ENV PYTHONDONTWRITEBYTECODE=1 \
11
  RUN apt-get update && apt-get install -y \
12
  git \
13
  dos2unix \
 
 
 
14
  && rm -rf /var/lib/apt/lists/*
15
 
16
  # Create a non-root user
 
11
  RUN apt-get update && apt-get install -y \
12
  git \
13
  dos2unix \
14
+ nginx \
15
+ procps \
16
+ curl \
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
  # Create a non-root user
README.md CHANGED
@@ -5,6 +5,7 @@ colorFrom: blue
5
  colorTo: green
6
  sdk: docker
7
  app_port: 7860
 
8
  ---
9
 
10
  # Hopcroft_Skill-Classification-Tool-Competition
 
5
  colorTo: green
6
  sdk: docker
7
  app_port: 7860
8
+ api_docs_url: /docs
9
  ---
10
 
11
  # Hopcroft_Skill-Classification-Tool-Competition
nginx.conf ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ worker_processes 1;
2
+ pid /tmp/nginx.pid;
3
+ error_log stderr info; # Log to stderr to see errors in HF Space Logs
4
+
5
+ events {
6
+ worker_connections 1024;
7
+ }
8
+
9
+ http {
10
+ include /etc/nginx/mime.types;
11
+ default_type application/octet-stream;
12
+
13
+ # HF Space runs as non-root, use /tmp for everything
14
+ access_log /dev/stdout;
15
+ client_body_temp_path /tmp/client_temp;
16
+ proxy_temp_path /tmp/proxy_temp;
17
+ fastcgi_temp_path /tmp/fastcgi_temp;
18
+ uwsgi_temp_path /tmp/uwsgi_temp;
19
+ scgi_temp_path /tmp/scgi_temp;
20
+
21
+ sendfile on;
22
+ keepalive_timeout 65;
23
+
24
+ upstream streamlit {
25
+ server 127.0.0.1:8501;
26
+ }
27
+
28
+ upstream fastapi {
29
+ server 127.0.0.1:8000;
30
+ }
31
+
32
+ server {
33
+ listen 7860;
34
+ server_name localhost;
35
+
36
+ # Health endpoint for HF readiness check
37
+ location /health {
38
+ proxy_pass http://fastapi/health;
39
+ proxy_set_header Host $host;
40
+ }
41
+
42
+ # FastAPI Documentation
43
+ location /docs {
44
+ proxy_pass http://fastapi/docs;
45
+ proxy_set_header Host $host;
46
+ proxy_set_header X-Real-IP $remote_addr;
47
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
48
+ proxy_set_header X-Forwarded-Proto $scheme;
49
+ }
50
+
51
+ location /redoc {
52
+ proxy_pass http://fastapi/redoc;
53
+ proxy_set_header Host $host;
54
+ }
55
+
56
+ location /openapi.json {
57
+ proxy_pass http://fastapi/openapi.json;
58
+ proxy_set_header Host $host;
59
+ }
60
+
61
+ # FastAPI API Endpoints
62
+ location /predict {
63
+ proxy_pass http://fastapi/predict;
64
+ proxy_set_header Host $host;
65
+ }
66
+
67
+ location /predictions {
68
+ proxy_pass http://fastapi/predictions;
69
+ proxy_set_header Host $host;
70
+ }
71
+
72
+ # Streamlit (Catch-all)
73
+ location / {
74
+ proxy_pass http://streamlit;
75
+ proxy_set_header Host $host;
76
+ proxy_set_header X-Real-IP $remote_addr;
77
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
78
+ proxy_set_header X-Forwarded-Proto $scheme;
79
+ proxy_set_header X-Forwarded-Host $host;
80
+
81
+ # WebSocket support for Streamlit
82
+ proxy_http_version 1.1;
83
+ proxy_set_header Upgrade $http_upgrade;
84
+ proxy_set_header Connection "upgrade";
85
+ proxy_read_timeout 86400;
86
+
87
+ # Prevent 502 if Streamlit is slow
88
+ proxy_connect_timeout 60s;
89
+ proxy_send_timeout 60s;
90
+ }
91
+ }
92
+ }
requirements.txt CHANGED
@@ -49,7 +49,6 @@ pytest-xdist>=3.0.0
49
 
50
  # Load testing
51
  locust>=2.20.0
52
-
53
  # Data validation and quality
54
  great_expectations>=0.18.0
55
  deepchecks>=0.18.0
 
49
 
50
  # Load testing
51
  locust>=2.20.0
 
52
  # Data validation and quality
53
  great_expectations>=0.18.0
54
  deepchecks>=0.18.0
scripts/start_space.sh CHANGED
@@ -16,28 +16,82 @@ USER=${DAGSHUB_USERNAME:-$MLFLOW_TRACKING_USERNAME}
16
  PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD}
17
 
18
  if [ -n "$USER" ] && [ -n "$PASS" ]; then
19
- echo "Configuring DVC authentication for DagsHub..."
20
  # Configure local config (not committed)
21
  dvc remote modify origin --local auth basic
22
  dvc remote modify origin --local user "$USER"
23
  dvc remote modify origin --local password "$PASS"
24
  else
25
- echo "WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private."
26
  fi
27
 
28
- echo "Pulling models from DVC..."
29
  # Pull only the necessary files for inference
30
  dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
31
  models/tfidf_vectorizer.pkl.dvc \
32
- models/label_names.pkl.dvc
33
 
34
- echo "Starting FastAPI application in background..."
35
- uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 &
 
 
 
 
 
 
 
36
 
37
  # Wait for API to start
38
- echo "Waiting for API to start..."
39
- sleep 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- echo "Starting Streamlit application..."
42
- export API_BASE_URL="http://localhost:8000"
43
- streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py --server.port 7860 --server.address 0.0.0.0
 
16
  PASS=${DAGSHUB_TOKEN:-$MLFLOW_TRACKING_PASSWORD}
17
 
18
  if [ -n "$USER" ] && [ -n "$PASS" ]; then
19
+ echo "$(date) - Configuring DVC authentication for DagsHub..."
20
  # Configure local config (not committed)
21
  dvc remote modify origin --local auth basic
22
  dvc remote modify origin --local user "$USER"
23
  dvc remote modify origin --local password "$PASS"
24
  else
25
+ echo "$(date) - WARNING: No DagsHub credentials found. DVC pull might fail if the remote is private."
26
  fi
27
 
28
+ echo "$(date) - Pulling models from DVC..."
29
  # Pull only the necessary files for inference
30
  dvc pull models/random_forest_tfidf_gridsearch.pkl.dvc \
31
  models/tfidf_vectorizer.pkl.dvc \
32
+ models/label_names.pkl.dvc || echo "DVC pull failed, but continuing..."
33
 
34
+ # Create Nginx temp directories
35
+ mkdir -p /tmp/client_temp /tmp/proxy_temp /tmp/fastcgi_temp /tmp/uwsgi_temp /tmp/scgi_temp
36
+
37
+ echo "$(date) - Checking models existence..."
38
+ ls -la models/
39
+
40
+ echo "$(date) - Starting FastAPI application in background..."
41
+ # Using 0.0.0.0 to be safe
42
+ uvicorn hopcroft_skill_classification_tool_competition.main:app --host 0.0.0.0 --port 8000 >> /tmp/fastapi.log 2>&1 &
43
 
44
  # Wait for API to start
45
+ echo "$(date) - Waiting for API to start (30s)..."
46
+ for i in {1..30}; do
47
+ if curl -s http://127.0.0.1:8000/health > /dev/null; then
48
+ echo "$(date) - API is UP!"
49
+ break
50
+ fi
51
+ echo "$(date) - Waiting... ($i/30)"
52
+ sleep 2
53
+ done
54
+
55
+ echo "$(date) - Starting Nginx reverse proxy..."
56
+ if ! command -v nginx &> /dev/null; then
57
+ echo "$(date) - ERROR: nginx not found in PATH"
58
+ exit 1
59
+ fi
60
+ nginx -c /app/nginx.conf -g "daemon off;" >> /tmp/nginx_startup.log 2>&1 &
61
+
62
+ echo "$(date) - Waiting for Nginx to initialize..."
63
+ sleep 5
64
+
65
+ # Check if Nginx is running
66
+ if ps aux | grep -v grep | grep -q "nginx"; then
67
+ echo "$(date) - Nginx is running."
68
+ else
69
+ echo "$(date) - ERROR: Nginx failed to start. Logs:"
70
+ cat /tmp/nginx_startup.log
71
+ fi
72
+
73
+ echo "$(date) - Final backend check before starting Streamlit..."
74
+ curl -v http://127.0.0.1:8000/health || echo "FastAPI health check failed!"
75
+
76
+ echo "$(date) - Starting Streamlit application on 127.0.0.1:8501..."
77
+ export API_BASE_URL="http://127.0.0.1:8000"
78
+ streamlit run hopcroft_skill_classification_tool_competition/streamlit_app.py \
79
+ --server.port 8501 \
80
+ --server.address 127.0.0.1 \
81
+ --server.enableCORS=false \
82
+ --server.enableXsrfProtection=false \
83
+ --server.headless true &
84
+
85
+ # Wait for Streamlit to start
86
+ echo "$(date) - Waiting for Streamlit to start (30s)..."
87
+ for i in {1..30}; do
88
+ if curl -s http://127.0.0.1:8501/healthz > /dev/null; then
89
+ echo "$(date) - Streamlit is UP!"
90
+ break
91
+ fi
92
+ echo "$(date) - Waiting for Streamlit... ($i/30)"
93
+ sleep 2
94
+ done
95
 
96
+ echo "$(date) - Process started. Tailing Nginx logs for debug..."
97
+ tail -f /tmp/nginx_startup.log /tmp/fastapi.log