DaCrow13 commited on
Commit
be7f335
·
1 Parent(s): f0a830d

Add test endpoints and enable alerts on Space

Browse files
hopcroft_skill_classification_tool_competition/main.py CHANGED
@@ -13,6 +13,8 @@ Endpoints:
13
  GET /health - Health check
14
  POST /predict - Single issue classification
15
  POST /predict/batch - Batch classification
 
 
16
  """
17
 
18
  from contextlib import asynccontextmanager
@@ -20,6 +22,7 @@ from datetime import datetime
20
  import json
21
  import os
22
  import time
 
23
  from typing import List
24
 
25
  from fastapi import FastAPI, HTTPException, Request, Response, status
@@ -484,6 +487,22 @@ async def list_predictions(skip: int = 0, limit: int = 10) -> List[PredictionRec
484
  )
485
 
486
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  @app.exception_handler(ValidationError)
488
  async def validation_exception_handler(request, exc: ValidationError):
489
  """Handle Pydantic validation errors."""
 
13
  GET /health - Health check
14
  POST /predict - Single issue classification
15
  POST /predict/batch - Batch classification
16
+ GET /test/500 - Trigger 500 error (Testing only)
17
+ GET /test/slow - Trigger slow response (Testing only)
18
  """
19
 
20
  from contextlib import asynccontextmanager
 
22
  import json
23
  import os
24
  import time
25
+ import asyncio
26
  from typing import List
27
 
28
  from fastapi import FastAPI, HTTPException, Request, Response, status
 
487
  )
488
 
489
 
490
+ @app.get("/test/500", include_in_schema=False)
491
+ async def test_500_error():
492
+ """Trigger a 500 error for testing alerts."""
493
+ raise HTTPException(
494
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
495
+ detail="Test 500 Error Triggered",
496
+ )
497
+
498
+
499
+ @app.get("/test/slow", include_in_schema=False)
500
+ async def test_slow_request():
501
+ """Trigger a slow response for testing alerts."""
502
+ await asyncio.sleep(3) # Sleep for 3 seconds (alert threshold is 2s)
503
+ return {"message": "Slow response completed"}
504
+
505
+
506
  @app.exception_handler(ValidationError)
507
  async def validation_exception_handler(request, exc: ValidationError):
508
  """Handle Pydantic validation errors."""
scripts/start_space.sh CHANGED
@@ -53,12 +53,18 @@ for i in {1..30}; do
53
  done
54
 
55
  echo "$(date) - Configuring and starting Prometheus..."
 
 
 
56
  # Create a config for the space
57
  cat <<EOF > /tmp/prometheus.yml
58
  global:
59
  scrape_interval: 15s
60
  evaluation_interval: 15s
61
 
 
 
 
62
  scrape_configs:
63
  - job_name: 'hopcroft-api'
64
  metrics_path: '/metrics'
 
53
  done
54
 
55
  echo "$(date) - Configuring and starting Prometheus..."
56
+ # Copy alert rules
57
+ cp monitoring/prometheus/alert_rules.yml /tmp/alert_rules.yml
58
+
59
  # Create a config for the space
60
  cat <<EOF > /tmp/prometheus.yml
61
  global:
62
  scrape_interval: 15s
63
  evaluation_interval: 15s
64
 
65
+ rule_files:
66
+ - "/tmp/alert_rules.yml"
67
+
68
  scrape_configs:
69
  - job_name: 'hopcroft-api'
70
  metrics_path: '/metrics'
scripts/trigger_alerts.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import time
3
+ import sys
4
+
5
+ SPACE_URL = "https://dacrow13-hopcroft-skill-classification.hf.space"
6
+
7
+ def trigger_500_errors(count=20):
8
+ print(f"Triggering {count} 500 errors...")
9
+ for i in range(count):
10
+ try:
11
+ requests.get(f"{SPACE_URL}/test/500")
12
+ print(f"Request {i+1}/{count} sent", end="\r")
13
+ except Exception as e:
14
+ print(f"Error sending request: {e}")
15
+ time.sleep(0.5)
16
+ print("\nDone triggering 500 errors.")
17
+
18
+ def trigger_slow_requests(count=10):
19
+ print(f"Triggering {count} slow requests...")
20
+ for i in range(count):
21
+ try:
22
+ requests.get(f"{SPACE_URL}/test/slow")
23
+ print(f"Request {i+1}/{count} sent", end="\r")
24
+ except Exception as e:
25
+ print(f"Error sending request: {e}")
26
+ # No sleep needed here as the request itself takes 3s
27
+ print("\nDone triggering slow requests.")
28
+
29
+ if __name__ == "__main__":
30
+ print("Starting alert trigger script...")
31
+ trigger_500_errors()
32
+ trigger_slow_requests()
33
+ print("Check Prometheus Alerts tab: https://dacrow13-hopcroft-skill-classification.hf.space/prometheus/alerts")