bobber commited on
Commit
3dcef90
ยท
verified ยท
1 Parent(s): 50ba7af

Telemetry collector API

Browse files
Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+ from threading import Lock
6
+
7
+ import gradio as gr
8
+ from huggingface_hub import HfApi
9
+
10
+ DATASET_REPO = "bobber/routangseng-telemetry"
11
+ HF_TOKEN = os.environ.get("HF_TOKEN")
12
+ BUFFER = []
13
+ BUFFER_LOCK = Lock()
14
+ FLUSH_SIZE = 5 # flush every 5 entries
15
+
16
+ api = HfApi(token=HF_TOKEN)
17
+
18
+
19
+ def flush_buffer():
20
+ """Append buffered entries to the dataset."""
21
+ global BUFFER
22
+ with BUFFER_LOCK:
23
+ if not BUFFER:
24
+ return 0
25
+ entries = BUFFER.copy()
26
+ BUFFER = []
27
+
28
+ # Download existing data, append, re-upload
29
+ try:
30
+ path = api.hf_hub_download(DATASET_REPO, "data/telemetry.jsonl", repo_type="dataset")
31
+ existing = open(path).read().strip()
32
+ except Exception:
33
+ existing = ""
34
+
35
+ new_lines = "\n".join(json.dumps(e, ensure_ascii=False) for e in entries)
36
+ content = (existing + "\n" + new_lines).strip() + "\n"
37
+
38
+ api.upload_file(
39
+ path_or_fileobj=content.encode("utf-8"),
40
+ path_in_repo="data/telemetry.jsonl",
41
+ repo_id=DATASET_REPO,
42
+ repo_type="dataset",
43
+ commit_message=f"Add {len(entries)} telemetry entries",
44
+ )
45
+ return len(entries)
46
+
47
+
48
+ def collect(data: str):
49
+ """Receive telemetry JSON from the WebGPU Space."""
50
+ try:
51
+ entry = json.loads(data)
52
+ # Validate required fields
53
+ if "question" not in entry or "answer" not in entry:
54
+ return {"status": "error", "message": "missing question or answer"}
55
+
56
+ entry["received_at"] = datetime.utcnow().isoformat() + "Z"
57
+
58
+ with BUFFER_LOCK:
59
+ BUFFER.append(entry)
60
+ buf_size = len(BUFFER)
61
+
62
+ if buf_size >= FLUSH_SIZE:
63
+ n = flush_buffer()
64
+ return {"status": "ok", "flushed": n}
65
+
66
+ return {"status": "ok", "buffered": buf_size}
67
+ except json.JSONDecodeError:
68
+ return {"status": "error", "message": "invalid JSON"}
69
+
70
+
71
+ def manual_flush():
72
+ """Manually flush buffer to dataset."""
73
+ n = flush_buffer()
74
+ return f"Flushed {n} entries"
75
+
76
+
77
+ def status():
78
+ """Check buffer status."""
79
+ return f"Buffer: {len(BUFFER)} entries"
80
+
81
+
82
+ # Gradio interface (minimal UI + API)
83
+ with gr.Blocks(title="่‚‰็ณ–็”Ÿ Telemetry") as demo:
84
+ gr.Markdown("# ่‚‰็ณ–็”Ÿ Telemetry Collector\nReceives Q&A data from the WebGPU Space.")
85
+
86
+ with gr.Row():
87
+ status_btn = gr.Button("Check Status")
88
+ flush_btn = gr.Button("Flush to Dataset")
89
+ output = gr.Textbox(label="Status")
90
+
91
+ status_btn.click(fn=status, outputs=output)
92
+ flush_btn.click(fn=manual_flush, outputs=output)
93
+
94
+ # API endpoint: POST /api/collect
95
+ demo.queue()
96
+
97
+ if __name__ == "__main__":
98
+ demo.launch()