antofra10 commited on
Commit
278cc30
·
unverified ·
2 Parent(s): 6c56755 c9732a0

Merge pull request #34 from se4ai2526-uniba/Milestone-6

Browse files

Add GitHub Actions CI pipeline for unit tests and Docker image building.

.github/workflows/ci.yml CHANGED
@@ -7,9 +7,8 @@ on:
7
  branches: [ "main" ]
8
 
9
  jobs:
10
- build-and-test:
11
  runs-on: ubuntu-latest
12
-
13
  steps:
14
  - name: Checkout code
15
  uses: actions/checkout@v3
@@ -26,7 +25,7 @@ jobs:
26
  uses: actions/setup-python@v4
27
  with:
28
  python-version: "3.10"
29
- cache: 'pip' # Enable caching for pip
30
 
31
  - name: Install dependencies
32
  run: |
@@ -35,17 +34,17 @@ jobs:
35
  pip install torch --index-url https://download.pytorch.org/whl/cpu
36
  # Install other dependencies
37
  pip install -r requirements.txt --no-cache-dir
 
38
 
39
  - name: Lint with Ruff
40
  run: |
41
- # Using make lint as defined in Makefile
42
  make lint
43
 
44
  - name: Run Unit Tests
45
  run: |
46
- # Run tests and generate HTML report
47
  pytest tests/unit/ -v -m unit --html=report.html --self-contained-html
48
 
 
49
  - name: Upload Test Report
50
  if: failure()
51
  uses: actions/upload-artifact@v4
@@ -53,6 +52,32 @@ jobs:
53
  name: test-report
54
  path: report.html
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  - name: Configure DVC
57
  run: |
58
  dvc remote modify origin --local auth basic
 
7
  branches: [ "main" ]
8
 
9
  jobs:
10
+ unit-tests:
11
  runs-on: ubuntu-latest
 
12
  steps:
13
  - name: Checkout code
14
  uses: actions/checkout@v3
 
25
  uses: actions/setup-python@v4
26
  with:
27
  python-version: "3.10"
28
+ cache: 'pip'
29
 
30
  - name: Install dependencies
31
  run: |
 
34
  pip install torch --index-url https://download.pytorch.org/whl/cpu
35
  # Install other dependencies
36
  pip install -r requirements.txt --no-cache-dir
37
+ pip install -e .
38
 
39
  - name: Lint with Ruff
40
  run: |
 
41
  make lint
42
 
43
  - name: Run Unit Tests
44
  run: |
 
45
  pytest tests/unit/ -v -m unit --html=report.html --self-contained-html
46
 
47
+ # Preserved contribution from Antonio Fratta
48
  - name: Upload Test Report
49
  if: failure()
50
  uses: actions/upload-artifact@v4
 
52
  name: test-report
53
  path: report.html
54
 
55
+ build-image:
56
+ needs: unit-tests
57
+ runs-on: ubuntu-latest
58
+ steps:
59
+ - name: Checkout code
60
+ uses: actions/checkout@v3
61
+
62
+ - name: Free Disk Space
63
+ run: |
64
+ sudo rm -rf /usr/share/dotnet
65
+ sudo rm -rf /usr/local/lib/android
66
+ sudo rm -rf /opt/ghc
67
+ sudo rm -rf /opt/hostedtoolcache/CodeQL
68
+ sudo docker image prune --all --force
69
+
70
+ - name: Set up Python 3.10
71
+ uses: actions/setup-python@v4
72
+ with:
73
+ python-version: "3.10"
74
+ cache: 'pip'
75
+
76
+ - name: Install DVC
77
+ run: |
78
+ python -m pip install --upgrade pip
79
+ pip install dvc dvc-s3
80
+
81
  - name: Configure DVC
82
  run: |
83
  dvc remote modify origin --local auth basic
Dockerfile CHANGED
@@ -14,6 +14,7 @@ RUN apt-get update && apt-get install -y \
14
  nginx \
15
  procps \
16
  curl \
 
17
  && rm -rf /var/lib/apt/lists/*
18
 
19
  # Create a non-root user
 
14
  nginx \
15
  procps \
16
  curl \
17
+ prometheus \
18
  && rm -rf /var/lib/apt/lists/*
19
 
20
  # Create a non-root user
hopcroft_skill_classification_tool_competition/main.py CHANGED
@@ -22,7 +22,7 @@ import os
22
  import time
23
  from typing import List
24
 
25
- from fastapi import FastAPI, HTTPException, status, Request, Response
26
  from fastapi.responses import JSONResponse, RedirectResponse
27
  import mlflow
28
  from prometheus_client import (
@@ -135,20 +135,14 @@ async def monitor_requests(request: Request, call_next):
135
  try:
136
  response = await call_next(request)
137
  status_code = response.status_code
138
- REQUESTS_TOTAL.labels(
139
- method=method, endpoint=endpoint, http_status=status_code
140
- ).inc()
141
  return response
142
  except Exception as e:
143
- REQUESTS_TOTAL.labels(
144
- method=method, endpoint=endpoint, http_status=500
145
- ).inc()
146
  raise e
147
  finally:
148
  duration = time.time() - start_time
149
- REQUEST_DURATION_SECONDS.labels(method=method, endpoint=endpoint).observe(
150
- duration
151
- )
152
  IN_PROGRESS_REQUESTS.labels(method=method, endpoint=endpoint).dec()
153
 
154
 
 
22
  import time
23
  from typing import List
24
 
25
+ from fastapi import FastAPI, HTTPException, Request, Response, status
26
  from fastapi.responses import JSONResponse, RedirectResponse
27
  import mlflow
28
  from prometheus_client import (
 
135
  try:
136
  response = await call_next(request)
137
  status_code = response.status_code
138
+ REQUESTS_TOTAL.labels(method=method, endpoint=endpoint, http_status=status_code).inc()
 
 
139
  return response
140
  except Exception as e:
141
+ REQUESTS_TOTAL.labels(method=method, endpoint=endpoint, http_status=500).inc()
 
 
142
  raise e
143
  finally:
144
  duration = time.time() - start_time
145
+ REQUEST_DURATION_SECONDS.labels(method=method, endpoint=endpoint).observe(duration)
 
 
146
  IN_PROGRESS_REQUESTS.labels(method=method, endpoint=endpoint).dec()
147
 
148
 
monitoring/README.md CHANGED
@@ -49,6 +49,11 @@ We used Better Stack Uptime to monitor the availability of the production deploy
49
  - https://dacrow13-hopcroft-skill-classification.hf.space/openapi.json
50
  - https://dacrow13-hopcroft-skill-classification.hf.space/docs
51
 
 
 
 
 
 
52
  **Checks and alerts**
53
  - Monitors are configured to run from multiple locations.
54
  - Email notifications are enabled for failures.
 
49
  - https://dacrow13-hopcroft-skill-classification.hf.space/openapi.json
50
  - https://dacrow13-hopcroft-skill-classification.hf.space/docs
51
 
52
+ ## Prometheus on Hugging Face Space
53
+
54
+ Prometheus is also running directly on the Hugging Face Space and is accessible at:
55
+ - https://dacrow13-hopcroft-skill-classification.hf.space/prometheus/
56
+
57
  **Checks and alerts**
58
  - Monitors are configured to run from multiple locations.
59
  - Email notifications are enabled for failures.
monitoring/locust/README.md CHANGED
@@ -28,16 +28,32 @@ Apri il browser e vai a: **http://localhost:8089**
28
 
29
  Nella Web UI, configura i seguenti parametri:
30
 
31
- | Parametro | Descrizione | Valore Consigliato |
32
  |-----------|-------------|-------------------|
33
- | **Host** | URL dell'API da testare | `http://localhost:8080` (Docker) o `http://localhost:8000` (locale) |
34
- | **Number of users** | Numero totale di utenti simulati | 10-100 |
35
- | **Spawn rate** | Utenti da creare al secondo | 1-10 |
36
 
37
  ### 4. Avvia il Test
38
 
39
  Clicca su **"Start swarming"** per avviare il test di carico.
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  ## Task Implementati
42
 
43
  Lo script simula il comportamento di utenti reali con i seguenti task:
@@ -73,21 +89,25 @@ Durante il test, Locust fornisce le seguenti metriche in tempo reale:
73
  ### Esecuzione Headless (senza UI)
74
 
75
  ```bash
76
- locust -f locustfile.py --headless -u 50 -r 5 -t 5m --host http://localhost:8000
 
 
 
 
77
  ```
78
 
79
  | Opzione | Descrizione |
80
  |---------|-------------|
81
  | `--headless` | Esegui senza Web UI |
82
- | `-u 50` | 50 utenti simulati |
83
- | `-r 5` | 5 utenti creati al secondo |
84
- | `-t 5m` | Durata del test: 5 minuti |
85
- | `--host` | URL dell'API |
86
 
87
  ### Esportazione Risultati
88
 
89
  ```bash
90
- locust -f locustfile.py --headless -u 50 -r 5 -t 5m --host http://localhost:8000 --csv=results
91
  ```
92
 
93
  Questo creerà file CSV con i risultati del test.
@@ -99,3 +119,9 @@ monitoring/locust/
99
  ├── locustfile.py # Script principale di load testing
100
  └── README.md # Questa documentazione
101
  ```
 
 
 
 
 
 
 
28
 
29
  Nella Web UI, configura i seguenti parametri:
30
 
31
+ | Parametro | Descrizione | Valore di Default |
32
  |-----------|-------------|-------------------|
33
+ | **Host** | URL dell'API da testare | `https://dacrow13-hopcroft-skill-classification.hf.space` |
34
+ | **Number of users** | Numero totale di utenti simulati | 5-10 (HF Spaces) / 10-100 (locale) |
35
+ | **Spawn rate** | Utenti da creare al secondo | 1-2 |
36
 
37
  ### 4. Avvia il Test
38
 
39
  Clicca su **"Start swarming"** per avviare il test di carico.
40
 
41
+ ## Ambienti di Test
42
+
43
+ Lo script è preconfigurato per testare l'API deployata su **HuggingFace Spaces**:
44
+
45
+ | Ambiente | URL | Note |
46
+ |----------|-----|------|
47
+ | **HuggingFace Spaces** (default) | `https://dacrow13-hopcroft-skill-classification.hf.space` | Produzione |
48
+ | Docker locale | `http://localhost:8080` | Sviluppo con Docker |
49
+ | Uvicorn locale | `http://localhost:8000` | Sviluppo senza Docker |
50
+
51
+ Per testare un ambiente diverso, usa il flag `--host`:
52
+
53
+ ```bash
54
+ locust -f locustfile.py --host http://localhost:8080
55
+ ```
56
+
57
  ## Task Implementati
58
 
59
  Lo script simula il comportamento di utenti reali con i seguenti task:
 
89
  ### Esecuzione Headless (senza UI)
90
 
91
  ```bash
92
+ # Test su HuggingFace Spaces (default)
93
+ locust -f locustfile.py --headless -u 5 -r 1 -t 2m
94
+
95
+ # Test su Docker locale
96
+ locust -f locustfile.py --headless -u 50 -r 5 -t 5m --host http://localhost:8080
97
  ```
98
 
99
  | Opzione | Descrizione |
100
  |---------|-------------|
101
  | `--headless` | Esegui senza Web UI |
102
+ | `-u 5` | 5 utenti simulati |
103
+ | `-r 1` | 1 utente creato al secondo |
104
+ | `-t 2m` | Durata del test: 2 minuti |
105
+ | `--host` | URL dell'API (override) |
106
 
107
  ### Esportazione Risultati
108
 
109
  ```bash
110
+ locust -f locustfile.py --headless -u 5 -r 1 -t 2m --csv=results
111
  ```
112
 
113
  Questo creerà file CSV con i risultati del test.
 
119
  ├── locustfile.py # Script principale di load testing
120
  └── README.md # Questa documentazione
121
  ```
122
+
123
+ ## Note Importanti
124
+
125
+ - **HuggingFace Spaces**: Usa un numero ridotto di utenti (5-10) per non sovraccaricare il servizio gratuito
126
+ - **Latenza**: I test su HF Spaces avranno latenze maggiori rispetto ai test locali
127
+ - **Cold Start**: Il primo request potrebbe essere lento se lo Space è in sleep mode
monitoring/locust/locustfile.py CHANGED
@@ -19,8 +19,8 @@ class SkillClassificationUser(HttpUser):
19
  """
20
 
21
  # Default host for the API (can be overridden via --host flag or Web UI)
22
- # Use http://localhost:8080 for Docker or http://localhost:8000 for local dev
23
- host = "http://localhost:8080"
24
 
25
  # Wait between 1 and 5 seconds between tasks to simulate real user behavior
26
  wait_time = between(1, 5)
 
19
  """
20
 
21
  # Default host for the API (can be overridden via --host flag or Web UI)
22
+ # HuggingFace Spaces URL (or use http://localhost:8080 for Docker, http://localhost:8000 for local dev)
23
+ host = "https://dacrow13-hopcroft-skill-classification.hf.space"
24
 
25
  # Wait between 1 and 5 seconds between tasks to simulate real user behavior
26
  wait_time = between(1, 5)
nginx.conf CHANGED
@@ -29,6 +29,10 @@ http {
29
  server 127.0.0.1:8000;
30
  }
31
 
 
 
 
 
32
  server {
33
  listen 7860;
34
  server_name localhost;
@@ -69,6 +73,15 @@ http {
69
  proxy_set_header Host $host;
70
  }
71
 
 
 
 
 
 
 
 
 
 
72
  # Streamlit (Catch-all)
73
  location / {
74
  proxy_pass http://streamlit;
 
29
  server 127.0.0.1:8000;
30
  }
31
 
32
+ upstream prometheus {
33
+ server 127.0.0.1:9090;
34
+ }
35
+
36
  server {
37
  listen 7860;
38
  server_name localhost;
 
73
  proxy_set_header Host $host;
74
  }
75
 
76
+ # Prometheus UI
77
+ location /prometheus/ {
78
+ proxy_pass http://prometheus/prometheus/;
79
+ proxy_set_header Host $host;
80
+ proxy_set_header X-Real-IP $remote_addr;
81
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
82
+ proxy_set_header X-Forwarded-Proto $scheme;
83
+ }
84
+
85
  # Streamlit (Catch-all)
86
  location / {
87
  proxy_pass http://streamlit;
scripts/start_space.sh CHANGED
@@ -52,6 +52,35 @@ for i in {1..30}; do
52
  sleep 2
53
  done
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  echo "$(date) - Starting Nginx reverse proxy..."
56
  if ! command -v nginx &> /dev/null; then
57
  echo "$(date) - ERROR: nginx not found in PATH"
 
52
  sleep 2
53
  done
54
 
55
+ echo "$(date) - Configuring and starting Prometheus..."
56
+ # Create a config for the space
57
+ cat <<EOF > /tmp/prometheus.yml
58
+ global:
59
+ scrape_interval: 15s
60
+ evaluation_interval: 15s
61
+
62
+ scrape_configs:
63
+ - job_name: 'hopcroft-api'
64
+ metrics_path: '/metrics'
65
+ static_configs:
66
+ - targets: ['127.0.0.1:8000']
67
+ scrape_interval: 10s
68
+
69
+ - job_name: 'prometheus'
70
+ static_configs:
71
+ - targets: ['127.0.0.1:9090']
72
+ EOF
73
+
74
+ # Start Prometheus
75
+ # --web.external-url needs to match the path in Nginx
76
+ prometheus \
77
+ --config.file=/tmp/prometheus.yml \
78
+ --storage.tsdb.path=/tmp/prometheus_data \
79
+ --web.listen-address=0.0.0.0:9090 \
80
+ --web.external-url=/prometheus/ \
81
+ --web.route-prefix=/prometheus/ \
82
+ >> /tmp/prometheus.log 2>&1 &
83
+
84
  echo "$(date) - Starting Nginx reverse proxy..."
85
  if ! command -v nginx &> /dev/null; then
86
  echo "$(date) - ERROR: nginx not found in PATH"