Spaces:

build-small-hackathon
/

tiny-army

Running

polats commited on 6 days ago

Commit

bd4a81a

1 Parent(s): 2f7e532

Add /persona/selftest to measure pure generation tok/s inside the Space

Files changed (1) hide show

app.py CHANGED Viewed

@@ -228,6 +228,23 @@ def persona_status():
     return llm.status()
 # Persona generation, woid-protocol-compatible so web/personaStream.js consumes it
 # unchanged: emits `model` → `delta`* → `persona-done` → `done` (or `error`). The
 # blocking llama.cpp generator runs in a worker thread bridged to this async SSE

     return llm.status()
+@fastapi_app.get("/persona/selftest")
+def persona_selftest():
+    """Measure pure generation speed inside the Space (no proxy, no lock race)."""
+    import time
+    t0 = time.time()
+    n = 0
+    try:
+        for _ in llm.stream_chat("You are terse.", "Count from one to twenty.",
+                                 max_tokens=24, temperature=0.1):
+            n += 1
+    except Exception as e:
+        return {"error": str(e), "tokens": n, "seconds": round(time.time() - t0, 2)}
+    s = time.time() - t0
+    return {"tokens": n, "seconds": round(s, 2),
+            "tok_per_sec": round(n / s, 2) if s else None, **llm.status()}
 # Persona generation, woid-protocol-compatible so web/personaStream.js consumes it
 # unchanged: emits `model` → `delta`* → `persona-done` → `done` (or `error`). The
 # blocking llama.cpp generator runs in a worker thread bridged to this async SSE