Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ Pings all Spaces in an organization on a schedule to prevent them from sleeping.
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
import json
|
| 8 |
-
import
|
| 9 |
from datetime import datetime, timezone
|
| 10 |
from pathlib import Path
|
| 11 |
|
|
@@ -16,15 +16,17 @@ from apscheduler.schedulers.background import BackgroundScheduler
|
|
| 16 |
from apscheduler.triggers.interval import IntervalTrigger
|
| 17 |
|
| 18 |
# Configuration
|
| 19 |
-
ORG_NAME = os.environ.get("ORG_NAME", "
|
| 20 |
PING_INTERVAL_HOURS = int(os.environ.get("PING_INTERVAL_HOURS", "6"))
|
| 21 |
REQUEST_TIMEOUT = int(os.environ.get("REQUEST_TIMEOUT", "30"))
|
|
|
|
|
|
|
| 22 |
LOG_FILE = Path("run_logs.json")
|
| 23 |
MAX_LOG_ENTRIES = 100 # Keep last N runs
|
| 24 |
|
| 25 |
# Global state
|
| 26 |
scheduler = BackgroundScheduler()
|
| 27 |
-
api = HfApi()
|
| 28 |
|
| 29 |
|
| 30 |
def load_logs() -> list:
|
|
@@ -47,13 +49,20 @@ def save_logs(logs: list):
|
|
| 47 |
|
| 48 |
def ping_space(space_id: str) -> dict:
|
| 49 |
"""Ping a single Space and return the result."""
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
try:
|
| 52 |
-
response = requests.get(
|
| 53 |
return {
|
| 54 |
"space_id": space_id,
|
| 55 |
"status": "success",
|
| 56 |
"status_code": response.status_code,
|
|
|
|
| 57 |
"error": None
|
| 58 |
}
|
| 59 |
except requests.Timeout:
|
|
@@ -61,19 +70,32 @@ def ping_space(space_id: str) -> dict:
|
|
| 61 |
"space_id": space_id,
|
| 62 |
"status": "timeout",
|
| 63 |
"status_code": None,
|
|
|
|
| 64 |
"error": f"Request timed out after {REQUEST_TIMEOUT}s"
|
| 65 |
}
|
| 66 |
except Exception as e:
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
|
| 74 |
|
| 75 |
def run_ping_job(triggered_by: str = "scheduler") -> dict:
|
| 76 |
-
"""Run the ping job for all Spaces in the org."""
|
| 77 |
start_time = datetime.now(timezone.utc)
|
| 78 |
|
| 79 |
# Get all Spaces in the org
|
|
@@ -96,11 +118,18 @@ def run_ping_job(triggered_by: str = "scheduler") -> dict:
|
|
| 96 |
save_logs(logs)
|
| 97 |
return run_result
|
| 98 |
|
| 99 |
-
# Ping
|
| 100 |
results = []
|
| 101 |
-
for space in spaces
|
| 102 |
-
|
| 103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
|
| 105 |
end_time = datetime.now(timezone.utc)
|
| 106 |
duration = (end_time - start_time).total_seconds()
|
|
@@ -211,12 +240,15 @@ def get_logs_display() -> str:
|
|
| 211 |
def get_status() -> str:
|
| 212 |
"""Get current scheduler status."""
|
| 213 |
next_run = scheduler.get_jobs()[0].next_run_time if scheduler.get_jobs() else None
|
|
|
|
| 214 |
|
| 215 |
return f"""## Space Keeper Status
|
| 216 |
|
| 217 |
**Organization:** `{ORG_NAME}`
|
| 218 |
**Ping Interval:** Every {PING_INTERVAL_HOURS} hours
|
|
|
|
| 219 |
**Request Timeout:** {REQUEST_TIMEOUT} seconds
|
|
|
|
| 220 |
**Next Scheduled Run:** {next_run.strftime('%Y-%m-%d %H:%M:%S UTC') if next_run else 'Not scheduled'}
|
| 221 |
|
| 222 |
---
|
|
|
|
| 5 |
|
| 6 |
import os
|
| 7 |
import json
|
| 8 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 9 |
from datetime import datetime, timezone
|
| 10 |
from pathlib import Path
|
| 11 |
|
|
|
|
| 16 |
from apscheduler.triggers.interval import IntervalTrigger
|
| 17 |
|
| 18 |
# Configuration
|
| 19 |
+
ORG_NAME = os.environ.get("ORG_NAME", "MCP-1st-Birthday")
|
| 20 |
PING_INTERVAL_HOURS = int(os.environ.get("PING_INTERVAL_HOURS", "6"))
|
| 21 |
REQUEST_TIMEOUT = int(os.environ.get("REQUEST_TIMEOUT", "30"))
|
| 22 |
+
PARALLEL_REQUESTS = int(os.environ.get("PARALLEL_REQUESTS", "10")) # Ping 10 Spaces at once
|
| 23 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", None) # Optional: needed for private Spaces
|
| 24 |
LOG_FILE = Path("run_logs.json")
|
| 25 |
MAX_LOG_ENTRIES = 100 # Keep last N runs
|
| 26 |
|
| 27 |
# Global state
|
| 28 |
scheduler = BackgroundScheduler()
|
| 29 |
+
api = HfApi(token=HF_TOKEN) if HF_TOKEN else HfApi()
|
| 30 |
|
| 31 |
|
| 32 |
def load_logs() -> list:
|
|
|
|
| 49 |
|
| 50 |
def ping_space(space_id: str) -> dict:
|
| 51 |
"""Ping a single Space and return the result."""
|
| 52 |
+
# Convert space_id (org/name) to the actual app URL
|
| 53 |
+
# e.g., "MCP-1st-Birthday/my-app" -> "mcp-1st-birthday-my-app.hf.space"
|
| 54 |
+
org, name = space_id.split("/")
|
| 55 |
+
app_url = f"https://{org.lower()}-{name.lower()}.hf.space"
|
| 56 |
+
hf_page_url = f"https://huggingface.co/spaces/{space_id}"
|
| 57 |
+
|
| 58 |
+
# Try the app URL first (this actually wakes up the Space)
|
| 59 |
try:
|
| 60 |
+
response = requests.get(app_url, timeout=REQUEST_TIMEOUT)
|
| 61 |
return {
|
| 62 |
"space_id": space_id,
|
| 63 |
"status": "success",
|
| 64 |
"status_code": response.status_code,
|
| 65 |
+
"url_pinged": app_url,
|
| 66 |
"error": None
|
| 67 |
}
|
| 68 |
except requests.Timeout:
|
|
|
|
| 70 |
"space_id": space_id,
|
| 71 |
"status": "timeout",
|
| 72 |
"status_code": None,
|
| 73 |
+
"url_pinged": app_url,
|
| 74 |
"error": f"Request timed out after {REQUEST_TIMEOUT}s"
|
| 75 |
}
|
| 76 |
except Exception as e:
|
| 77 |
+
# Fallback: try the HF page URL
|
| 78 |
+
try:
|
| 79 |
+
response = requests.get(hf_page_url, timeout=REQUEST_TIMEOUT)
|
| 80 |
+
return {
|
| 81 |
+
"space_id": space_id,
|
| 82 |
+
"status": "success",
|
| 83 |
+
"status_code": response.status_code,
|
| 84 |
+
"url_pinged": hf_page_url,
|
| 85 |
+
"error": None
|
| 86 |
+
}
|
| 87 |
+
except Exception as e2:
|
| 88 |
+
return {
|
| 89 |
+
"space_id": space_id,
|
| 90 |
+
"status": "error",
|
| 91 |
+
"status_code": None,
|
| 92 |
+
"url_pinged": app_url,
|
| 93 |
+
"error": str(e)
|
| 94 |
+
}
|
| 95 |
|
| 96 |
|
| 97 |
def run_ping_job(triggered_by: str = "scheduler") -> dict:
|
| 98 |
+
"""Run the ping job for all Spaces in the org using parallel requests."""
|
| 99 |
start_time = datetime.now(timezone.utc)
|
| 100 |
|
| 101 |
# Get all Spaces in the org
|
|
|
|
| 118 |
save_logs(logs)
|
| 119 |
return run_result
|
| 120 |
|
| 121 |
+
# Ping Spaces in parallel (10 at a time by default)
|
| 122 |
results = []
|
| 123 |
+
space_ids = [space.id for space in spaces]
|
| 124 |
+
|
| 125 |
+
with ThreadPoolExecutor(max_workers=PARALLEL_REQUESTS) as executor:
|
| 126 |
+
# Submit all ping tasks
|
| 127 |
+
future_to_space = {executor.submit(ping_space, space_id): space_id for space_id in space_ids}
|
| 128 |
+
|
| 129 |
+
# Collect results as they complete
|
| 130 |
+
for future in as_completed(future_to_space):
|
| 131 |
+
result = future.result()
|
| 132 |
+
results.append(result)
|
| 133 |
|
| 134 |
end_time = datetime.now(timezone.utc)
|
| 135 |
duration = (end_time - start_time).total_seconds()
|
|
|
|
| 240 |
def get_status() -> str:
|
| 241 |
"""Get current scheduler status."""
|
| 242 |
next_run = scheduler.get_jobs()[0].next_run_time if scheduler.get_jobs() else None
|
| 243 |
+
token_status = "✅ Configured" if HF_TOKEN else "❌ Not set (only public Spaces will be listed)"
|
| 244 |
|
| 245 |
return f"""## Space Keeper Status
|
| 246 |
|
| 247 |
**Organization:** `{ORG_NAME}`
|
| 248 |
**Ping Interval:** Every {PING_INTERVAL_HOURS} hours
|
| 249 |
+
**Parallel Requests:** {PARALLEL_REQUESTS} Spaces at once
|
| 250 |
**Request Timeout:** {REQUEST_TIMEOUT} seconds
|
| 251 |
+
**HF Token:** {token_status}
|
| 252 |
**Next Scheduled Run:** {next_run.strftime('%Y-%m-%d %H:%M:%S UTC') if next_run else 'Not scheduled'}
|
| 253 |
|
| 254 |
---
|