a9 commited on
Commit
a34cccb
·
verified ·
1 Parent(s): 6fb88f3

Upload 24 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM node:18 as frontend
2
+
3
+ WORKDIR /app
4
+ COPY frontend ./frontend
5
+ WORKDIR /app/frontend
6
+ RUN npm install && npm run build
7
+
8
+ FROM python:3.11
9
+
10
+ WORKDIR /app
11
+ COPY . .
12
+
13
+ # copy frontend build
14
+ COPY --from=frontend /app/frontend/dist ./static
15
+
16
+ RUN pip install fastapi uvicorn httpx
17
+
18
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"]
api/routes/metrics.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.services.storage import storage
3
+
4
+ router = APIRouter()
5
+
6
+ @router.get("/")
7
+ def get_metrics(repoId: str, range: str = "hour"):
8
+ data = storage.get_aggregated_metrics(repoId, range)
9
+
10
+ return {
11
+ "timestamps": [m["timestamp"] for m in data],
12
+ "cpu": {
13
+ "avg": [m["cpu_avg"] for m in data],
14
+ "max": [m["cpu_max"] for m in data],
15
+ "min": [m["cpu_min"] for m in data],
16
+ },
17
+ "memory": {
18
+ "avg": [m["mem_avg"] for m in data],
19
+ "max": [m["mem_max"] for m in data],
20
+ "min": [m["mem_min"] for m in data],
21
+ },
22
+ }
api/routes/repos.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException
2
+ from app.models.schema import Repo
3
+ from app.services.storage import storage
4
+ from app.services.connection_registry import registry
5
+
6
+ router = APIRouter()
7
+
8
+ @router.post("/", response_model=Repo)
9
+ def add_repo(data: dict):
10
+ repo = Repo(namespace=data["namespace"], repo=data["repo"])
11
+ storage.add_repo(repo)
12
+ registry.register(repo)
13
+ return repo
14
+
15
+ @router.get("/")
16
+ def list_repos():
17
+ return storage.list_repos()
18
+
19
+ @router.delete("/{repo_id}")
20
+ def delete_repo(repo_id: str):
21
+ registry.unregister(repo_id)
22
+ storage.remove_repo(repo_id)
23
+ return {"status": "deleted"}
api/routes/status.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from app.services.storage import InMemoryStorage
3
+
4
+ router = APIRouter()
5
+ storage = InMemoryStorage()
6
+
7
+
8
+ @router.get("/")
9
+ def get_status(repoId: str):
10
+ return storage.get_status(repoId)
frontend/package.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "spaceprobe-dashboard",
3
+ "version": "1.0.0",
4
+ "private": true,
5
+ "scripts": {
6
+ "dev": "vite",
7
+ "build": "vite build",
8
+ "preview": "vite preview"
9
+ },
10
+ "dependencies": {
11
+ "@tanstack/react-query": "^5.0.0",
12
+ "axios": "^1.6.0",
13
+ "lucide-react": "^0.344.0",
14
+ "react": "^18.2.0",
15
+ "react-dom": "^18.2.0",
16
+ "recharts": "^2.8.0",
17
+ "zustand": "^4.5.0"
18
+ },
19
+ "devDependencies": {
20
+ "@types/react": "^18.2.0",
21
+ "@vitejs/plugin-react": "^4.0.0",
22
+ "tailwindcss": "^3.4.0",
23
+ "typescript": "^5.0.0",
24
+ "vite": "^5.0.0"
25
+ }
26
+ }
frontend/src/App.tsx ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import Dashboard from "./pages/Dashboard";
2
+
3
+ export default function App() {
4
+ return <Dashboard />;
5
+ }
frontend/src/api/client.ts ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import axios from "axios";
2
+
3
+ const api = axios.create({
4
+ baseURL: "/api",
5
+ });
6
+
7
+ export const getRepos = () => api.get("/repos").then(r => r.data);
8
+
9
+ export const addRepo = (data: { namespace: string; repo: string }) =>
10
+ api.post("/repos", data);
11
+
12
+ export const deleteRepo = (id: string) =>
13
+ api.delete(`/repos/${id}`);
14
+
15
+ export const getMetrics = (repoId: string, range: string) =>
16
+ api.get("/metrics", { params: { repoId, range } }).then(r => r.data);
17
+
18
+ export const getStatus = (repoId: string) =>
19
+ api.get("/status", { params: { repoId } }).then(r => r.data);
frontend/src/components/AddRepoModal.tsx ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useState } from "react";
2
+ import { addRepo } from "../api/client";
3
+
4
+ export default function AddRepoModal({ onClose }) {
5
+ const [namespace, setNamespace] = useState("");
6
+ const [repo, setRepo] = useState("");
7
+
8
+ const submit = async () => {
9
+ await addRepo({ namespace, repo });
10
+ onClose();
11
+ };
12
+
13
+ return (
14
+ <div className="fixed inset-0 bg-black/60 flex justify-center items-center">
15
+ <div className="bg-zinc-800 p-6 rounded-xl">
16
+ <input
17
+ placeholder="namespace"
18
+ onChange={(e) => setNamespace(e.target.value)}
19
+ />
20
+ <input
21
+ placeholder="repo"
22
+ onChange={(e) => setRepo(e.target.value)}
23
+ />
24
+ <button onClick={submit}>Add</button>
25
+ </div>
26
+ </div>
27
+ );
28
+ }
frontend/src/components/ChartPanel.tsx ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import {
2
+ LineChart,
3
+ Line,
4
+ XAxis,
5
+ Tooltip,
6
+ ResponsiveContainer,
7
+ } from "recharts";
8
+
9
+ export default function ChartPanel({ data, metric }) {
10
+ const formatted = data.timestamps.map((t, i) => ({
11
+ time: new Date(t).toLocaleTimeString(),
12
+ avg: data[metric].avg[i],
13
+ max: data[metric].max[i],
14
+ min: data[metric].min[i],
15
+ }));
16
+
17
+ return (
18
+ <div className="h-64">
19
+ <ResponsiveContainer>
20
+ <LineChart data={formatted}>
21
+ <XAxis dataKey="time" hide />
22
+ <Tooltip />
23
+ <Line dataKey="avg" stroke="#00ffcc" />
24
+ <Line dataKey="max" stroke="#ff4d4f" />
25
+ <Line dataKey="min" stroke="#8884d8" />
26
+ </LineChart>
27
+ </ResponsiveContainer>
28
+ </div>
29
+ );
30
+ }
frontend/src/components/RepoCard.tsx ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useQuery } from "@tanstack/react-query";
2
+ import { getMetrics, getStatus, deleteRepo } from "../api/client";
3
+ import ChartPanel from "./ChartPanel";
4
+ import StatusBadge from "./StatusBadge";
5
+ import { useAppStore } from "../store/useAppStore";
6
+ import { useAutoRefresh } from "../hooks/useAutoRefresh";
7
+
8
+ export default function RepoCard({ repo }) {
9
+ const { selectedRange, autoRefresh } = useAppStore();
10
+
11
+ const metricsQuery = useQuery({
12
+ queryKey: ["metrics", repo.id, selectedRange],
13
+ queryFn: () => getMetrics(repo.id, selectedRange),
14
+ });
15
+
16
+ const statusQuery = useQuery({
17
+ queryKey: ["status", repo.id],
18
+ queryFn: () => getStatus(repo.id),
19
+ });
20
+
21
+ useAutoRefresh(metricsQuery.refetch, autoRefresh);
22
+ useAutoRefresh(statusQuery.refetch, autoRefresh);
23
+
24
+ return (
25
+ <div className="bg-zinc-900 p-4 rounded-xl shadow-lg">
26
+ <div className="flex justify-between">
27
+ <h2>{repo.namespace}/{repo.repo}</h2>
28
+ <button onClick={() => deleteRepo(repo.id)}>❌</button>
29
+ </div>
30
+
31
+ {statusQuery.data && (
32
+ <StatusBadge
33
+ state={statusQuery.data.state}
34
+ stage={statusQuery.data.stage}
35
+ />
36
+ )}
37
+
38
+ {metricsQuery.data ? (
39
+ <>
40
+ <ChartPanel data={metricsQuery.data} metric="cpu" />
41
+ <ChartPanel data={metricsQuery.data} metric="memory" />
42
+ </>
43
+ ) : (
44
+ <p>Loading...</p>
45
+ )}
46
+ </div>
47
+ );
48
+ }
frontend/src/components/StatusBadge.tsx ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ export default function StatusBadge({ state, stage }) {
2
+ const color =
3
+ state === "CONNECTED"
4
+ ? "bg-green-500"
5
+ : state === "ERROR"
6
+ ? "bg-red-500"
7
+ : "bg-gray-500";
8
+
9
+ return (
10
+ <div className={`px-3 py-1 rounded ${color}`}>
11
+ {stage}
12
+ </div>
13
+ );
14
+ }
frontend/src/hooks/useAutoRefresh.ts ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useEffect } from "react";
2
+
3
+ export const useAutoRefresh = (refetch: () => void, enabled: boolean) => {
4
+ useEffect(() => {
5
+ if (!enabled) return;
6
+
7
+ const id = setInterval(refetch, 5000);
8
+ return () => clearInterval(id);
9
+ }, [enabled]);
10
+ };
frontend/src/main.tsx ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import React from "react";
2
+ import ReactDOM from "react-dom/client";
3
+ import App from "./App";
4
+ import "./index.css";
5
+
6
+ ReactDOM.createRoot(document.getElementById("root")!).render(<App />);
frontend/src/pages/Dashboard.tsx ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { useQuery } from "@tanstack/react-query";
2
+ import { getRepos } from "../api/client";
3
+ import RepoCard from "../components/RepoCard";
4
+ import AddRepoModal from "../components/AddRepoModal";
5
+ import { useState } from "react";
6
+ import { useAppStore } from "../store/useAppStore";
7
+
8
+ export default function Dashboard() {
9
+ const { data } = useQuery({ queryKey: ["repos"], queryFn: getRepos });
10
+ const [open, setOpen] = useState(false);
11
+ const { setRange, toggleRefresh } = useAppStore();
12
+
13
+ return (
14
+ <div className="p-6 bg-black min-h-screen text-white">
15
+ <div className="flex justify-between mb-6">
16
+ <button onClick={() => setOpen(true)}>+ Add Repo</button>
17
+
18
+ <select onChange={(e) => setRange(e.target.value as any)}>
19
+ <option value="hour">Hour</option>
20
+ <option value="day">Day</option>
21
+ <option value="week">Week</option>
22
+ <option value="month">Month</option>
23
+ </select>
24
+
25
+ <button onClick={toggleRefresh}>Toggle Refresh</button>
26
+ </div>
27
+
28
+ <div className="grid grid-cols-3 gap-4">
29
+ {data?.map((repo) => (
30
+ <RepoCard key={repo.id} repo={repo} />
31
+ ))}
32
+ </div>
33
+
34
+ {open && <AddRepoModal onClose={() => setOpen(false)} />}
35
+ </div>
36
+ );
37
+ }
frontend/src/store/useAppStore.ts ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { create } from "zustand";
2
+
3
+ type Range = "hour" | "day" | "week" | "month";
4
+
5
+ interface State {
6
+ selectedRange: Range;
7
+ autoRefresh: boolean;
8
+ setRange: (r: Range) => void;
9
+ toggleRefresh: () => void;
10
+ }
11
+
12
+ export const useAppStore = create<State>((set) => ({
13
+ selectedRange: "hour",
14
+ autoRefresh: true,
15
+ setRange: (r) => set({ selectedRange: r }),
16
+ toggleRefresh: () =>
17
+ set((s) => ({ autoRefresh: !s.autoRefresh })),
18
+ }));
frontend/tailwind.config.js ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ export default {
2
+ content: ["./index.html", "./src/**/*.{ts,tsx}"],
3
+ theme: {
4
+ extend: {},
5
+ },
6
+ plugins: [],
7
+ };
frontend/vite.config.ts ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ export default {
2
+ server: {
3
+ proxy: {
4
+ "/api": "http://localhost:8000",
5
+ },
6
+ },
7
+ };
main.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from app.api.routes import repos, metrics, status
3
+ from app.services.storage import storage
4
+ import asyncio
5
+
6
+ app = FastAPI(title="SpaceProbe API")
7
+
8
+ app.include_router(repos.router, prefix="/api/repos")
9
+ app.include_router(metrics.router, prefix="/api/metrics")
10
+ app.include_router(status.router, prefix="/api/status")
11
+
12
+
13
+ from fastapi.staticfiles import StaticFiles
14
+
15
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
16
+
17
+ @app.on_event("startup")
18
+ async def start_aggregator_service():
19
+ async def loop():
20
+ while True:
21
+ for repo_id in storage.repos.keys():
22
+ storage.aggregate(repo_id)
23
+ await asyncio.sleep(5)
24
+
25
+ asyncio.create_task(loop())
models/schema.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, Field
2
+ from datetime import datetime
3
+ from typing import Literal, List, Dict, Optional
4
+ import uuid
5
+
6
+ class Repo(BaseModel):
7
+ id: str = Field(default_factory=lambda: str(uuid.uuid4()))
8
+ namespace: str
9
+ repo: str
10
+
11
+ class MetricPoint(BaseModel):
12
+ timestamp: datetime
13
+ cpu_usage_pct: float
14
+ memory_used_bytes: int
15
+ memory_total_bytes: int
16
+
17
+ class RepoStatus(BaseModel):
18
+ state: Literal["CONNECTED", "DISCONNECTED", "ERROR"]
19
+ stage: str
20
+ last_updated: datetime
services/connection_manager.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ from datetime import datetime
4
+ from app.services.stream_client import SSEClient
5
+ from app.services.storage import storage
6
+ from app.models.schema import MetricPoint, RepoStatus
7
+ from app.utils.time import exponential_backoff
8
+
9
+ class RepoConnection:
10
+ def __init__(self, repo_id: str, namespace: str, repo: str):
11
+ self.repo_id = repo_id
12
+ self.namespace = namespace
13
+ self.repo_name = repo
14
+ self.tasks = []
15
+ self.is_running = False
16
+
17
+ async def _listen_events(self):
18
+ attempt = 0
19
+ url = f"https://huggingface.co/api/spaces/{self.namespace}/{self.repo_name}/events"
20
+ while True:
21
+ try:
22
+ async for data in SSEClient(url).connect():
23
+ attempt = 0
24
+ payload = json.loads(data)
25
+ stage = payload.get("compute", {}).get("status", {}).get("stage", "Unknown")
26
+
27
+ storage.set_status(self.repo_id, RepoStatus(
28
+ state="CONNECTED", stage=stage, last_updated=datetime.utcnow()
29
+ ))
30
+
31
+ if stage == "Running" and not self.is_running:
32
+ self.is_running = True
33
+ self.tasks.append(asyncio.create_task(self._listen_metrics()))
34
+ elif stage != "Running":
35
+ self.is_running = False
36
+
37
+ except Exception:
38
+ attempt += 1
39
+ await exponential_backoff(attempt)
40
+
41
+ async def _listen_metrics(self):
42
+ url = f"https://huggingface.co/api/spaces/{self.namespace}/{self.repo_name}/metrics"
43
+ while self.is_running:
44
+ try:
45
+ async for data in SSEClient(url).connect():
46
+ payload = json.loads(data)
47
+ storage.add_metric(self.repo_id, MetricPoint(
48
+ timestamp=datetime.utcnow(),
49
+ cpu_usage_pct=payload.get("cpu_usage_pct", 0),
50
+ memory_used_bytes=payload.get("memory_used_bytes", 0),
51
+ memory_total_bytes=payload.get("memory_total_bytes", 1)
52
+ ))
53
+ except Exception:
54
+ await asyncio.sleep(1)
55
+
56
+ def start(self):
57
+ self.tasks.append(asyncio.create_task(self._listen_events()))
58
+
59
+ def stop(self):
60
+ for t in self.tasks: t.cancel()
services/connection_registry.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict
2
+ from app.services.connection_manager import RepoConnection
3
+
4
+ class ConnectionRegistry:
5
+ def __init__(self):
6
+ self.connections: Dict[str, RepoConnection] = {}
7
+
8
+ def register(self, repo):
9
+ if repo.id in self.connections: return
10
+ conn = RepoConnection(repo.id, repo.namespace, repo.repo)
11
+ self.connections[repo.id] = conn
12
+ conn.start()
13
+
14
+ def unregister(self, repo_id: str):
15
+ if conn := self.connections.pop(repo_id, None):
16
+ conn.stop()
17
+
18
+ registry = ConnectionRegistry()
services/storage.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+ from datetime import datetime, timedelta
3
+ from app.models.schema import Repo, MetricPoint, RepoStatus
4
+
5
+ class InMemoryStorage:
6
+ def __init__(self):
7
+ self.repos = {}
8
+ self.status = {}
9
+ self.metrics = {}
10
+
11
+ def add_repo(self, repo: Repo):
12
+ self.repos[repo.id] = repo
13
+ self.metrics[repo.id] = {
14
+ "raw": [],
15
+ "hour": [],
16
+ "day": [],
17
+ "week": [],
18
+ "month": []
19
+ }
20
+
21
+ def add_metric(self, repo_id: str, metric: MetricPoint):
22
+ if repo_id not in self.metrics:
23
+ return
24
+
25
+ raw = self.metrics[repo_id]["raw"]
26
+ raw.append(metric)
27
+
28
+ # ✅ Keep raw for 1 hour
29
+ cutoff = datetime.utcnow() - timedelta(hours=1)
30
+ self.metrics[repo_id]["raw"] = [
31
+ m for m in raw if m.timestamp > cutoff
32
+ ]
33
+
34
+ def aggregate(self, repo_id: str):
35
+ buckets = self.metrics.get(repo_id)
36
+ if not buckets:
37
+ return
38
+
39
+ raw = buckets["raw"]
40
+
41
+ def compute(points):
42
+ if not points:
43
+ return None
44
+
45
+ cpu_vals = [p.cpu_usage_pct for p in points]
46
+ mem_vals = [
47
+ (p.memory_used_bytes / p.memory_total_bytes) * 100
48
+ for p in points
49
+ ]
50
+
51
+ return {
52
+ "timestamp": datetime.utcnow(),
53
+ "cpu_avg": sum(cpu_vals) / len(cpu_vals),
54
+ "cpu_max": max(cpu_vals),
55
+ "cpu_min": min(cpu_vals),
56
+ "mem_avg": sum(mem_vals) / len(mem_vals),
57
+ "mem_max": max(mem_vals),
58
+ "mem_min": min(mem_vals),
59
+ }
60
+
61
+ now = datetime.utcnow()
62
+
63
+ intervals = {
64
+ "hour": timedelta(seconds=30),
65
+ "day": timedelta(minutes=30),
66
+ "week": timedelta(hours=12),
67
+ "month": timedelta(hours=12),
68
+ }
69
+
70
+ for key, delta in intervals.items():
71
+ series = buckets[key]
72
+
73
+ if not series or now - series[-1]["timestamp"] >= delta:
74
+ agg = compute(raw)
75
+ if agg:
76
+ series.append(agg)
77
+
78
+ # prune (keep reasonable size)
79
+ if len(series) > 1000:
80
+ series.pop(0)
81
+
82
+ # Global instance
83
+ storage = InMemoryStorage()
services/stream_client.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import httpx
2
+ from typing import AsyncGenerator
3
+
4
+ class SSEClient:
5
+ def __init__(self, url: str):
6
+ self.url = url
7
+
8
+ async def connect(self) -> AsyncGenerator[str, None]:
9
+ async with httpx.AsyncClient(timeout=None) as client:
10
+ async with client.stream("GET", self.url) as response:
11
+ async for line in response.aiter_lines():
12
+ if line.startswith("data:"):
13
+ yield line[5:].strip()
utils/time.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import asyncio
2
+
3
+ async def exponential_backoff(attempt: int, base: float = 1.0, max_delay: float = 30.0):
4
+ delay = min(base * (2 ** attempt), max_delay)
5
+ await asyncio.sleep(delay)