Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- Dockerfile +7 -0
- README.md +89 -7
- main.py +1138 -0
- requirements.txt +3 -0
Dockerfile
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.11-slim
|
| 2 |
+
WORKDIR /app
|
| 3 |
+
COPY requirements.txt .
|
| 4 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 5 |
+
COPY main.py .
|
| 6 |
+
EXPOSE 7860
|
| 7 |
+
CMD ["python", "main.py"]
|
README.md
CHANGED
|
@@ -1,11 +1,93 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
-
pinned:
|
| 8 |
-
|
|
|
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: agent-learn — FORGE Learning Layer
|
| 3 |
+
emoji: 🧠
|
| 4 |
+
colorFrom: red
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
+
pinned: true
|
| 8 |
+
license: mit
|
| 9 |
+
short_description: Persistent Q-table, reward scoring, and RLHF store for FORGE
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# 🧠 agent-learn
|
| 13 |
+
### FORGE Persistent Learning Layer
|
| 14 |
+
|
| 15 |
+
Owns: Q-table (persistent), reward scoring pipeline, RLHF data store, skill candidate review.
|
| 16 |
+
Replaces the critical NEXUS /tmp Q-table that resets on every restart.
|
| 17 |
+
|
| 18 |
+
## What it does
|
| 19 |
+
|
| 20 |
+
1. **Q-table** — agents ask "what's the best action for my current state?" → epsilon-greedy response
|
| 21 |
+
2. **Reward pipeline** — pulls unscored traces from agent-trace, scores them, writes rewards back
|
| 22 |
+
3. **RLHF store** — labeled approve/reject completions for future fine-tuning
|
| 23 |
+
4. **Skill candidates** — patterns detected by agents that recur enough to become FORGE skills
|
| 24 |
+
|
| 25 |
+
## REST API
|
| 26 |
+
|
| 27 |
+
```
|
| 28 |
+
GET /api/q?agent=&state={} Get all Q-values for agent+state
|
| 29 |
+
POST /api/q/best Best action (epsilon-greedy): {agent, state, actions[]}
|
| 30 |
+
POST /api/q/update Q-value update: {agent, state, action, reward, next_state?}
|
| 31 |
+
POST /api/q/hint Manual nudge: {agent, state, action, nudge}
|
| 32 |
+
GET /api/q/stats Q-table stats
|
| 33 |
+
|
| 34 |
+
POST /api/score Score a single trace event → reward
|
| 35 |
+
POST /api/sync Trigger trace pull + reward scoring now
|
| 36 |
+
|
| 37 |
+
GET /api/rlhf List RLHF entries
|
| 38 |
+
POST /api/rlhf Add labeled completion
|
| 39 |
+
PATCH /api/rlhf/{id} Update label/reward
|
| 40 |
+
|
| 41 |
+
GET /api/candidates List skill candidates (status=pending)
|
| 42 |
+
PATCH /api/candidates/{id} Update candidate (status: promoted|rejected)
|
| 43 |
+
|
| 44 |
+
GET /api/stats Full learning stats
|
| 45 |
+
GET /api/reward-trend Hourly avg reward trend
|
| 46 |
+
```
|
| 47 |
+
|
| 48 |
+
## MCP
|
| 49 |
+
|
| 50 |
+
```
|
| 51 |
+
GET /mcp/sse SSE transport
|
| 52 |
+
POST /mcp JSON-RPC 2.0
|
| 53 |
+
|
| 54 |
+
Tools: learn_q_get, learn_q_best, learn_q_update, learn_q_hint,
|
| 55 |
+
learn_stats, learn_rlhf_add, learn_score_trace,
|
| 56 |
+
learn_candidate_add, learn_sync
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
## Secrets
|
| 60 |
+
|
| 61 |
+
| Key | Description |
|
| 62 |
+
|-----|-------------|
|
| 63 |
+
| `LEARN_KEY` | Optional write auth key |
|
| 64 |
+
| `TRACE_URL` | agent-trace URL (default: https://chris4k-agent-trace.hf.space) |
|
| 65 |
+
| `TRACE_KEY` | agent-trace auth key (if set) |
|
| 66 |
+
| `LEARN_RATE` | Q-learning α (default: 0.1) |
|
| 67 |
+
| `DISCOUNT` | Q-learning γ (default: 0.9) |
|
| 68 |
+
| `EPSILON` | Exploration rate (default: 0.15) |
|
| 69 |
+
| `SYNC_INTERVAL` | Trace pull interval seconds (default: 120) |
|
| 70 |
+
|
| 71 |
+
## NEXUS integration (replacing /tmp Q-table)
|
| 72 |
+
|
| 73 |
+
```python
|
| 74 |
+
LEARN_URL = "https://chris4k-agent-learn.hf.space"
|
| 75 |
+
|
| 76 |
+
# Before routing: ask LEARN for best model
|
| 77 |
+
resp = requests.post(f"{LEARN_URL}/api/q/best", json={
|
| 78 |
+
"agent": "nexus",
|
| 79 |
+
"state": {"agent": "nexus", "event": "model_selection"},
|
| 80 |
+
"actions": ["qwen/qwen3.5-35b-a3b", "claude-haiku-4-5", "hf_api", "local_cpu"]
|
| 81 |
+
}, timeout=3)
|
| 82 |
+
best_model = resp.json()["action"]
|
| 83 |
+
|
| 84 |
+
# After inference: update Q-value
|
| 85 |
+
requests.post(f"{LEARN_URL}/api/q/update", json={
|
| 86 |
+
"agent": "nexus",
|
| 87 |
+
"state": {"agent": "nexus", "event": "model_selection"},
|
| 88 |
+
"action": best_model,
|
| 89 |
+
"reward": 0.8
|
| 90 |
+
}, timeout=3)
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
Built by [Chris4K](https://huggingface.co/Chris4K) — ki-fusion-labs.de
|
main.py
ADDED
|
@@ -0,0 +1,1138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
agent-learn — FORGE Persistent Learning Layer
|
| 3 |
+
Owns: Q-table (persistent), reward scoring pipeline, RLHF data store.
|
| 4 |
+
Reads traces from agent-trace, writes rewards back, updates Q-values.
|
| 5 |
+
Agents query here for best actions; NEXUS replaces its /tmp Q-table with this.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import asyncio, hashlib, json, math, os, sqlite3, time, uuid
|
| 9 |
+
from contextlib import asynccontextmanager
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
import uvicorn
|
| 13 |
+
from fastapi import FastAPI, HTTPException, Query, Request
|
| 14 |
+
from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
|
| 15 |
+
|
| 16 |
+
# ---------------------------------------------------------------------------
|
| 17 |
+
# Config
|
| 18 |
+
# ---------------------------------------------------------------------------
|
| 19 |
+
DB_PATH = Path(os.getenv("LEARN_DB", "/tmp/learn.db"))
|
| 20 |
+
PORT = int(os.getenv("PORT", "7860"))
|
| 21 |
+
LEARN_KEY = os.getenv("LEARN_KEY", "")
|
| 22 |
+
TRACE_URL = os.getenv("TRACE_URL", "https://chris4k-agent-trace.hf.space")
|
| 23 |
+
TRACE_KEY = os.getenv("TRACE_KEY", "")
|
| 24 |
+
LEARN_RATE = float(os.getenv("LEARN_RATE", "0.1")) # α
|
| 25 |
+
DISCOUNT = float(os.getenv("DISCOUNT", "0.9")) # γ
|
| 26 |
+
EPSILON = float(os.getenv("EPSILON", "0.15")) # exploration rate
|
| 27 |
+
SYNC_INTERVAL= int(os.getenv("SYNC_INTERVAL", "120")) # seconds between trace pulls
|
| 28 |
+
|
| 29 |
+
# ---------------------------------------------------------------------------
|
| 30 |
+
# Database
|
| 31 |
+
# ---------------------------------------------------------------------------
|
| 32 |
+
def get_db():
|
| 33 |
+
conn = sqlite3.connect(str(DB_PATH), check_same_thread=False)
|
| 34 |
+
conn.row_factory = sqlite3.Row
|
| 35 |
+
conn.execute("PRAGMA journal_mode=WAL")
|
| 36 |
+
conn.execute("PRAGMA synchronous=NORMAL")
|
| 37 |
+
return conn
|
| 38 |
+
|
| 39 |
+
def init_db():
|
| 40 |
+
conn = get_db()
|
| 41 |
+
conn.executescript("""
|
| 42 |
+
-- Q-table: one row per (agent, state_hash, action)
|
| 43 |
+
CREATE TABLE IF NOT EXISTS qtable (
|
| 44 |
+
id TEXT PRIMARY KEY,
|
| 45 |
+
agent TEXT NOT NULL,
|
| 46 |
+
state_hash TEXT NOT NULL,
|
| 47 |
+
state_json TEXT NOT NULL DEFAULT '{}',
|
| 48 |
+
action TEXT NOT NULL,
|
| 49 |
+
q_value REAL NOT NULL DEFAULT 0.0,
|
| 50 |
+
visits INTEGER NOT NULL DEFAULT 0,
|
| 51 |
+
last_reward REAL,
|
| 52 |
+
updated_at REAL NOT NULL
|
| 53 |
+
);
|
| 54 |
+
CREATE UNIQUE INDEX IF NOT EXISTS idx_qt_key ON qtable(agent, state_hash, action);
|
| 55 |
+
CREATE INDEX IF NOT EXISTS idx_qt_agent ON qtable(agent);
|
| 56 |
+
CREATE INDEX IF NOT EXISTS idx_qt_action ON qtable(action);
|
| 57 |
+
|
| 58 |
+
-- Reward log: every scored trace event
|
| 59 |
+
CREATE TABLE IF NOT EXISTS rewards (
|
| 60 |
+
id TEXT PRIMARY KEY,
|
| 61 |
+
trace_id TEXT NOT NULL,
|
| 62 |
+
agent TEXT NOT NULL,
|
| 63 |
+
event_type TEXT NOT NULL,
|
| 64 |
+
raw_score REAL NOT NULL,
|
| 65 |
+
components TEXT NOT NULL DEFAULT '{}',
|
| 66 |
+
ts REAL NOT NULL
|
| 67 |
+
);
|
| 68 |
+
CREATE INDEX IF NOT EXISTS idx_rw_agent ON rewards(agent);
|
| 69 |
+
CREATE INDEX IF NOT EXISTS idx_rw_ts ON rewards(ts DESC);
|
| 70 |
+
|
| 71 |
+
-- RLHF store: labeled completions for future fine-tuning
|
| 72 |
+
CREATE TABLE IF NOT EXISTS rlhf (
|
| 73 |
+
id TEXT PRIMARY KEY,
|
| 74 |
+
agent TEXT NOT NULL DEFAULT 'unknown',
|
| 75 |
+
prompt TEXT NOT NULL,
|
| 76 |
+
completion TEXT NOT NULL,
|
| 77 |
+
label TEXT NOT NULL DEFAULT 'unlabeled', -- approved|rejected|unlabeled
|
| 78 |
+
reward REAL,
|
| 79 |
+
source TEXT NOT NULL DEFAULT 'human', -- human|auto|model
|
| 80 |
+
meta TEXT NOT NULL DEFAULT '{}',
|
| 81 |
+
created_at REAL NOT NULL
|
| 82 |
+
);
|
| 83 |
+
CREATE INDEX IF NOT EXISTS idx_rlhf_agent ON rlhf(agent);
|
| 84 |
+
CREATE INDEX IF NOT EXISTS idx_rlhf_label ON rlhf(label);
|
| 85 |
+
|
| 86 |
+
-- Cursor: last ts pulled from agent-trace per agent
|
| 87 |
+
CREATE TABLE IF NOT EXISTS sync_cursor (
|
| 88 |
+
agent TEXT PRIMARY KEY,
|
| 89 |
+
last_ts REAL NOT NULL DEFAULT 0.0
|
| 90 |
+
);
|
| 91 |
+
|
| 92 |
+
-- Skill candidates surfaced from traces
|
| 93 |
+
CREATE TABLE IF NOT EXISTS skill_candidates (
|
| 94 |
+
id TEXT PRIMARY KEY,
|
| 95 |
+
description TEXT NOT NULL,
|
| 96 |
+
agent TEXT NOT NULL,
|
| 97 |
+
frequency INTEGER NOT NULL DEFAULT 1,
|
| 98 |
+
status TEXT NOT NULL DEFAULT 'pending', -- pending|promoted|rejected
|
| 99 |
+
created_at REAL NOT NULL,
|
| 100 |
+
updated_at REAL NOT NULL
|
| 101 |
+
);
|
| 102 |
+
""")
|
| 103 |
+
conn.commit(); conn.close()
|
| 104 |
+
|
| 105 |
+
# ---------------------------------------------------------------------------
|
| 106 |
+
# Q-table operations
|
| 107 |
+
# ---------------------------------------------------------------------------
|
| 108 |
+
def _state_hash(state: dict) -> str:
|
| 109 |
+
canonical = json.dumps(state, sort_keys=True, separators=(',',':'))
|
| 110 |
+
return hashlib.sha256(canonical.encode()).hexdigest()[:16]
|
| 111 |
+
|
| 112 |
+
def q_get(agent: str, state: dict) -> list:
|
| 113 |
+
"""Return all (action, q_value, visits) rows for this agent+state."""
|
| 114 |
+
sh = _state_hash(state)
|
| 115 |
+
conn = get_db()
|
| 116 |
+
rows = conn.execute(
|
| 117 |
+
"SELECT action, q_value, visits, last_reward FROM qtable WHERE agent=? AND state_hash=? ORDER BY q_value DESC",
|
| 118 |
+
(agent, sh)).fetchall()
|
| 119 |
+
conn.close()
|
| 120 |
+
return [dict(r) for r in rows]
|
| 121 |
+
|
| 122 |
+
def q_best_action(agent: str, state: dict, actions: list) -> dict:
|
| 123 |
+
"""
|
| 124 |
+
Epsilon-greedy action selection.
|
| 125 |
+
Returns {"action": str, "q_value": float, "strategy": "exploit"|"explore"|"init"}
|
| 126 |
+
"""
|
| 127 |
+
import random
|
| 128 |
+
sh = _state_hash(state)
|
| 129 |
+
conn = get_db()
|
| 130 |
+
rows = conn.execute(
|
| 131 |
+
"SELECT action, q_value, visits FROM qtable WHERE agent=? AND state_hash=? ORDER BY q_value DESC",
|
| 132 |
+
(agent, sh)).fetchall()
|
| 133 |
+
conn.close()
|
| 134 |
+
|
| 135 |
+
known = {r["action"]: (r["q_value"], r["visits"]) for r in rows}
|
| 136 |
+
# Filter to valid actions
|
| 137 |
+
valid = [a for a in actions if a]
|
| 138 |
+
|
| 139 |
+
if not valid:
|
| 140 |
+
return {"action": None, "q_value": 0.0, "strategy": "no_actions"}
|
| 141 |
+
|
| 142 |
+
# Explore: random action
|
| 143 |
+
if random.random() < EPSILON:
|
| 144 |
+
a = random.choice(valid)
|
| 145 |
+
return {"action": a, "q_value": known.get(a, (0.0, 0))[0], "strategy": "explore"}
|
| 146 |
+
|
| 147 |
+
# Exploit: best known, or init with 0 for unknowns
|
| 148 |
+
best_a, best_q = None, float('-inf')
|
| 149 |
+
for a in valid:
|
| 150 |
+
q = known.get(a, (0.0, 0))[0]
|
| 151 |
+
if q > best_q:
|
| 152 |
+
best_q, best_a = q, a
|
| 153 |
+
|
| 154 |
+
strategy = "exploit" if best_a in known else "init"
|
| 155 |
+
return {"action": best_a or valid[0], "q_value": best_q if best_q > float('-inf') else 0.0,
|
| 156 |
+
"strategy": strategy}
|
| 157 |
+
|
| 158 |
+
def q_update(agent: str, state: dict, action: str, reward: float,
|
| 159 |
+
next_state: dict = None) -> dict:
|
| 160 |
+
"""
|
| 161 |
+
Q-learning update: Q(s,a) ← Q(s,a) + α[r + γ·max_Q(s') - Q(s,a)]
|
| 162 |
+
"""
|
| 163 |
+
sh = _state_hash(state)
|
| 164 |
+
now = time.time()
|
| 165 |
+
conn = get_db()
|
| 166 |
+
|
| 167 |
+
# Current Q(s,a)
|
| 168 |
+
row = conn.execute(
|
| 169 |
+
"SELECT q_value, visits FROM qtable WHERE agent=? AND state_hash=? AND action=?",
|
| 170 |
+
(agent, sh, action)).fetchone()
|
| 171 |
+
q_old = row["q_value"] if row else 0.0
|
| 172 |
+
visits = (row["visits"] if row else 0) + 1
|
| 173 |
+
|
| 174 |
+
# max Q(s') if next_state provided
|
| 175 |
+
max_q_next = 0.0
|
| 176 |
+
if next_state:
|
| 177 |
+
nsh = _state_hash(next_state)
|
| 178 |
+
best_next = conn.execute(
|
| 179 |
+
"SELECT MAX(q_value) FROM qtable WHERE agent=? AND state_hash=?",
|
| 180 |
+
(agent, nsh)).fetchone()[0]
|
| 181 |
+
max_q_next = best_next or 0.0
|
| 182 |
+
|
| 183 |
+
q_new = q_old + LEARN_RATE * (reward + DISCOUNT * max_q_next - q_old)
|
| 184 |
+
|
| 185 |
+
row_id = str(uuid.uuid4())
|
| 186 |
+
conn.execute("""
|
| 187 |
+
INSERT INTO qtable (id,agent,state_hash,state_json,action,q_value,visits,last_reward,updated_at)
|
| 188 |
+
VALUES (?,?,?,?,?,?,?,?,?)
|
| 189 |
+
ON CONFLICT(agent,state_hash,action) DO UPDATE SET
|
| 190 |
+
q_value=excluded.q_value, visits=excluded.visits,
|
| 191 |
+
last_reward=excluded.last_reward, updated_at=excluded.updated_at
|
| 192 |
+
""", (row_id, agent, sh, json.dumps(state), action, q_new, visits, reward, now))
|
| 193 |
+
conn.commit(); conn.close()
|
| 194 |
+
|
| 195 |
+
return {"agent": agent, "action": action, "q_old": round(q_old, 5),
|
| 196 |
+
"q_new": round(q_new, 5), "reward": reward, "visits": visits}
|
| 197 |
+
|
| 198 |
+
def q_hint(agent: str, state: dict, action: str, nudge: float) -> dict:
|
| 199 |
+
"""Manual Q-value nudge (bias from operator). Additive."""
|
| 200 |
+
sh = _state_hash(state)
|
| 201 |
+
now = time.time()
|
| 202 |
+
conn = get_db()
|
| 203 |
+
row = conn.execute(
|
| 204 |
+
"SELECT q_value, visits FROM qtable WHERE agent=? AND state_hash=? AND action=?",
|
| 205 |
+
(agent, sh, action)).fetchone()
|
| 206 |
+
q_old = row["q_value"] if row else 0.0
|
| 207 |
+
visits = (row["visits"] if row else 0)
|
| 208 |
+
q_new = q_old + nudge
|
| 209 |
+
conn.execute("""
|
| 210 |
+
INSERT INTO qtable (id,agent,state_hash,state_json,action,q_value,visits,last_reward,updated_at)
|
| 211 |
+
VALUES (?,?,?,?,?,?,?,?,?)
|
| 212 |
+
ON CONFLICT(agent,state_hash,action) DO UPDATE SET
|
| 213 |
+
q_value=excluded.q_value, updated_at=excluded.updated_at
|
| 214 |
+
""", (str(uuid.uuid4()), agent, sh, json.dumps(state), action, q_new, visits, None, now))
|
| 215 |
+
conn.commit(); conn.close()
|
| 216 |
+
return {"agent": agent, "action": action, "q_old": round(q_old,5),
|
| 217 |
+
"q_new": round(q_new,5), "nudge": nudge}
|
| 218 |
+
|
| 219 |
+
def q_stats() -> dict:
|
| 220 |
+
conn = get_db()
|
| 221 |
+
total = conn.execute("SELECT COUNT(*) FROM qtable").fetchone()[0]
|
| 222 |
+
agents = conn.execute("SELECT agent, COUNT(*) as n, AVG(q_value) as avg_q, MAX(q_value) as max_q "
|
| 223 |
+
"FROM qtable GROUP BY agent ORDER BY n DESC").fetchall()
|
| 224 |
+
top = conn.execute("SELECT agent, action, q_value, visits FROM qtable "
|
| 225 |
+
"ORDER BY q_value DESC LIMIT 10").fetchall()
|
| 226 |
+
worst = conn.execute("SELECT agent, action, q_value, visits FROM qtable "
|
| 227 |
+
"ORDER BY q_value ASC LIMIT 10").fetchall()
|
| 228 |
+
conn.close()
|
| 229 |
+
return {
|
| 230 |
+
"total_entries": total,
|
| 231 |
+
"by_agent": [dict(r) for r in agents],
|
| 232 |
+
"top_actions": [dict(r) for r in top],
|
| 233 |
+
"worst_actions": [dict(r) for r in worst],
|
| 234 |
+
}
|
| 235 |
+
|
| 236 |
+
# ---------------------------------------------------------------------------
|
| 237 |
+
# Reward scoring
|
| 238 |
+
# ---------------------------------------------------------------------------
|
| 239 |
+
def score_trace_event(ev: dict) -> tuple[float, dict]:
|
| 240 |
+
"""
|
| 241 |
+
Score a trace event → reward in [-1.0, 1.0].
|
| 242 |
+
Returns (score, components).
|
| 243 |
+
"""
|
| 244 |
+
components = {}
|
| 245 |
+
score = 0.0
|
| 246 |
+
|
| 247 |
+
# Base: error is always bad
|
| 248 |
+
if ev.get("status") == "error":
|
| 249 |
+
components["error_penalty"] = -0.4
|
| 250 |
+
score -= 0.4
|
| 251 |
+
|
| 252 |
+
# Latency score for LLM calls (lower = better)
|
| 253 |
+
lat = ev.get("latency_ms")
|
| 254 |
+
if lat is not None and ev.get("event_type") == "llm_call":
|
| 255 |
+
if lat < 500:
|
| 256 |
+
v = 0.3; components["latency_fast"] = v
|
| 257 |
+
elif lat < 1500:
|
| 258 |
+
v = 0.1; components["latency_ok"] = v
|
| 259 |
+
elif lat < 4000:
|
| 260 |
+
v = -0.1; components["latency_slow"] = v
|
| 261 |
+
else:
|
| 262 |
+
v = -0.3; components["latency_very_slow"] = v
|
| 263 |
+
score += v
|
| 264 |
+
|
| 265 |
+
# Token efficiency for LLM calls
|
| 266 |
+
tin = ev.get("tokens_in") or 0
|
| 267 |
+
tout = ev.get("tokens_out") or 0
|
| 268 |
+
if tin > 0 and tout > 0 and ev.get("event_type") == "llm_call":
|
| 269 |
+
ratio = tout / max(tin, 1)
|
| 270 |
+
if ratio > 0.5:
|
| 271 |
+
v = 0.1; components["token_efficiency"] = v; score += v
|
| 272 |
+
elif ratio < 0.05:
|
| 273 |
+
v = -0.05; components["token_low_output"] = v; score += v
|
| 274 |
+
|
| 275 |
+
# ReAct step: reward progress
|
| 276 |
+
if ev.get("event_type") == "react_step":
|
| 277 |
+
components["react_progress"] = 0.1
|
| 278 |
+
score += 0.1
|
| 279 |
+
|
| 280 |
+
# Skill load: reward reuse over re-implementation
|
| 281 |
+
if ev.get("event_type") == "skill_load":
|
| 282 |
+
components["skill_reuse"] = 0.15
|
| 283 |
+
score += 0.15
|
| 284 |
+
|
| 285 |
+
# Self-reflect: always reward
|
| 286 |
+
if ev.get("event_type") == "self_reflect":
|
| 287 |
+
components["reflection_bonus"] = 0.2
|
| 288 |
+
score += 0.2
|
| 289 |
+
|
| 290 |
+
# Clamp to [-1, 1]
|
| 291 |
+
score = max(-1.0, min(1.0, score))
|
| 292 |
+
return round(score, 4), components
|
| 293 |
+
|
| 294 |
+
# ---------------------------------------------------------------------------
|
| 295 |
+
# Trace sync pipeline
|
| 296 |
+
# ---------------------------------------------------------------------------
|
| 297 |
+
_http_client = None
|
| 298 |
+
|
| 299 |
+
def _get_http():
|
| 300 |
+
global _http_client
|
| 301 |
+
if _http_client is None:
|
| 302 |
+
try:
|
| 303 |
+
import httpx
|
| 304 |
+
_http_client = httpx.Client(timeout=10.0)
|
| 305 |
+
except ImportError:
|
| 306 |
+
import urllib.request as _ur
|
| 307 |
+
_http_client = "urllib"
|
| 308 |
+
return _http_client
|
| 309 |
+
|
| 310 |
+
def _http_get(url, params=None) -> dict:
|
| 311 |
+
client = _get_http()
|
| 312 |
+
if hasattr(client, "get"):
|
| 313 |
+
r = client.get(url, params=params)
|
| 314 |
+
return r.json()
|
| 315 |
+
else:
|
| 316 |
+
import urllib.request, urllib.parse
|
| 317 |
+
if params:
|
| 318 |
+
url = url + "?" + urllib.parse.urlencode(params)
|
| 319 |
+
with urllib.request.urlopen(url, timeout=10) as resp:
|
| 320 |
+
return json.loads(resp.read())
|
| 321 |
+
|
| 322 |
+
def _http_patch(url, data: dict) -> bool:
|
| 323 |
+
client = _get_http()
|
| 324 |
+
if hasattr(client, "patch"):
|
| 325 |
+
r = client.patch(url, json=data)
|
| 326 |
+
return r.status_code < 300
|
| 327 |
+
else:
|
| 328 |
+
import urllib.request
|
| 329 |
+
req = urllib.request.Request(url, data=json.dumps(data).encode(),
|
| 330 |
+
headers={"Content-Type":"application/json"}, method="PATCH")
|
| 331 |
+
try:
|
| 332 |
+
urllib.request.urlopen(req, timeout=5)
|
| 333 |
+
return True
|
| 334 |
+
except Exception:
|
| 335 |
+
return False
|
| 336 |
+
|
| 337 |
+
def pull_and_score_traces() -> dict:
|
| 338 |
+
"""
|
| 339 |
+
Pull unscored traces from agent-trace, score them, write rewards back.
|
| 340 |
+
Returns summary stats.
|
| 341 |
+
"""
|
| 342 |
+
conn = get_db()
|
| 343 |
+
cursor_rows = {r["agent"]: r["last_ts"]
|
| 344 |
+
for r in conn.execute("SELECT agent, last_ts FROM sync_cursor").fetchall()}
|
| 345 |
+
conn.close()
|
| 346 |
+
|
| 347 |
+
try:
|
| 348 |
+
data = _http_get(f"{TRACE_URL}/api/traces",
|
| 349 |
+
{"has_reward": "false", "since_hours": 48, "limit": 200})
|
| 350 |
+
events = data.get("events", [])
|
| 351 |
+
except Exception as e:
|
| 352 |
+
return {"ok": False, "error": str(e)}
|
| 353 |
+
|
| 354 |
+
scored = 0
|
| 355 |
+
skipped = 0
|
| 356 |
+
reward_sum = 0.0
|
| 357 |
+
new_cursors = {}
|
| 358 |
+
|
| 359 |
+
for ev in events:
|
| 360 |
+
agent = ev.get("agent", "unknown")
|
| 361 |
+
ts = ev.get("ts", 0)
|
| 362 |
+
|
| 363 |
+
# Skip already-rewarded
|
| 364 |
+
if ev.get("reward") is not None:
|
| 365 |
+
skipped += 1
|
| 366 |
+
continue
|
| 367 |
+
|
| 368 |
+
reward, components = score_trace_event(ev)
|
| 369 |
+
|
| 370 |
+
# Write reward back to agent-trace
|
| 371 |
+
try:
|
| 372 |
+
_http_patch(f"{TRACE_URL}/api/trace/{ev['id']}/reward",
|
| 373 |
+
{"reward": reward, "source": "learn"})
|
| 374 |
+
except Exception:
|
| 375 |
+
pass # best-effort
|
| 376 |
+
|
| 377 |
+
# Log reward locally
|
| 378 |
+
conn = get_db()
|
| 379 |
+
conn.execute("""
|
| 380 |
+
INSERT OR IGNORE INTO rewards (id,trace_id,agent,event_type,raw_score,components,ts)
|
| 381 |
+
VALUES (?,?,?,?,?,?,?)
|
| 382 |
+
""", (str(uuid.uuid4()), ev["id"], agent,
|
| 383 |
+
ev.get("event_type","custom"), reward,
|
| 384 |
+
json.dumps(components), time.time()))
|
| 385 |
+
conn.commit(); conn.close()
|
| 386 |
+
|
| 387 |
+
# Q-table update: map event → (state, action)
|
| 388 |
+
_update_qtable_from_trace(ev, reward)
|
| 389 |
+
|
| 390 |
+
scored += 1
|
| 391 |
+
reward_sum += reward
|
| 392 |
+
new_cursors[agent] = max(new_cursors.get(agent, 0), ts)
|
| 393 |
+
|
| 394 |
+
# Update cursors
|
| 395 |
+
if new_cursors:
|
| 396 |
+
conn = get_db()
|
| 397 |
+
for agent, ts in new_cursors.items():
|
| 398 |
+
conn.execute("INSERT INTO sync_cursor (agent,last_ts) VALUES (?,?) "
|
| 399 |
+
"ON CONFLICT(agent) DO UPDATE SET last_ts=MAX(last_ts,excluded.last_ts)",
|
| 400 |
+
(agent, ts))
|
| 401 |
+
conn.commit(); conn.close()
|
| 402 |
+
|
| 403 |
+
return {
|
| 404 |
+
"ok": True,
|
| 405 |
+
"scored": scored,
|
| 406 |
+
"skipped": skipped,
|
| 407 |
+
"avg_reward": round(reward_sum / max(scored, 1), 4),
|
| 408 |
+
}
|
| 409 |
+
|
| 410 |
+
def _update_qtable_from_trace(ev: dict, reward: float):
|
| 411 |
+
"""Map a trace event to a Q-table update."""
|
| 412 |
+
agent = ev.get("agent", "unknown")
|
| 413 |
+
event_type = ev.get("event_type", "custom")
|
| 414 |
+
model = ev.get("model", "")
|
| 415 |
+
tool = ev.get("tool_name", "")
|
| 416 |
+
lat = ev.get("latency_ms")
|
| 417 |
+
|
| 418 |
+
# State: context that was available when the decision was made
|
| 419 |
+
# Action: the choice that was made
|
| 420 |
+
if event_type == "llm_call" and model:
|
| 421 |
+
# State: which agent, what kind of task
|
| 422 |
+
state = {"agent": agent, "event": "model_selection"}
|
| 423 |
+
action = model
|
| 424 |
+
q_update(agent, state, action, reward)
|
| 425 |
+
|
| 426 |
+
elif event_type == "tool_use" and tool:
|
| 427 |
+
state = {"agent": agent, "event": "tool_selection"}
|
| 428 |
+
action = tool
|
| 429 |
+
q_update(agent, state, action, reward)
|
| 430 |
+
|
| 431 |
+
elif event_type == "skill_load" and ev.get("skill_id"):
|
| 432 |
+
state = {"agent": agent, "event": "skill_selection"}
|
| 433 |
+
action = ev["skill_id"]
|
| 434 |
+
q_update(agent, state, action, reward)
|
| 435 |
+
|
| 436 |
+
# ---------------------------------------------------------------------------
|
| 437 |
+
# RLHF store
|
| 438 |
+
# ---------------------------------------------------------------------------
|
| 439 |
+
def rlhf_add(agent: str, prompt: str, completion: str,
|
| 440 |
+
label: str = "unlabeled", reward: float = None,
|
| 441 |
+
source: str = "human", meta: dict = None) -> str:
|
| 442 |
+
now = time.time()
|
| 443 |
+
rid = str(uuid.uuid4())
|
| 444 |
+
label = label if label in ("approved","rejected","unlabeled") else "unlabeled"
|
| 445 |
+
conn = get_db()
|
| 446 |
+
conn.execute("""
|
| 447 |
+
INSERT INTO rlhf (id,agent,prompt,completion,label,reward,source,meta,created_at)
|
| 448 |
+
VALUES (?,?,?,?,?,?,?,?,?)
|
| 449 |
+
""", (rid, agent, prompt, completion, label, reward,
|
| 450 |
+
source, json.dumps(meta or {}), now))
|
| 451 |
+
conn.commit(); conn.close()
|
| 452 |
+
return rid
|
| 453 |
+
|
| 454 |
+
def rlhf_label(entry_id: str, label: str, reward: float = None) -> bool:
|
| 455 |
+
label = label if label in ("approved","rejected","unlabeled") else "unlabeled"
|
| 456 |
+
conn = get_db()
|
| 457 |
+
n = conn.execute(
|
| 458 |
+
"UPDATE rlhf SET label=?, reward=? WHERE id=?", (label, reward, entry_id)
|
| 459 |
+
).rowcount
|
| 460 |
+
conn.commit(); conn.close()
|
| 461 |
+
return n > 0
|
| 462 |
+
|
| 463 |
+
def rlhf_list(agent: str = "", label: str = "", limit: int = 50) -> list:
|
| 464 |
+
conn = get_db()
|
| 465 |
+
where, params = [], []
|
| 466 |
+
if agent: where.append("agent=?"); params.append(agent)
|
| 467 |
+
if label: where.append("label=?"); params.append(label)
|
| 468 |
+
sql = ("SELECT * FROM rlhf" +
|
| 469 |
+
(f" WHERE {' AND '.join(where)}" if where else "") +
|
| 470 |
+
" ORDER BY created_at DESC LIMIT ?")
|
| 471 |
+
rows = conn.execute(sql, params+[limit]).fetchall()
|
| 472 |
+
conn.close()
|
| 473 |
+
result = []
|
| 474 |
+
for r in rows:
|
| 475 |
+
d = dict(r)
|
| 476 |
+
try: d["meta"] = json.loads(d["meta"])
|
| 477 |
+
except Exception: pass
|
| 478 |
+
result.append(d)
|
| 479 |
+
return result
|
| 480 |
+
|
| 481 |
+
def rlhf_stats() -> dict:
|
| 482 |
+
conn = get_db()
|
| 483 |
+
rows = conn.execute("SELECT label, COUNT(*) as n FROM rlhf GROUP BY label").fetchall()
|
| 484 |
+
conn.close()
|
| 485 |
+
total = sum(r["n"] for r in rows)
|
| 486 |
+
return {"total": total, "by_label": {r["label"]: r["n"] for r in rows}}
|
| 487 |
+
|
| 488 |
+
# ---------------------------------------------------------------------------
|
| 489 |
+
# Skill candidates
|
| 490 |
+
# ---------------------------------------------------------------------------
|
| 491 |
+
def candidate_add(description: str, agent: str) -> str:
|
| 492 |
+
conn = get_db()
|
| 493 |
+
# Dedup: if description matches existing pending candidate, increment frequency
|
| 494 |
+
existing = conn.execute(
|
| 495 |
+
"SELECT id, frequency FROM skill_candidates WHERE description=? AND status='pending'",
|
| 496 |
+
(description,)).fetchone()
|
| 497 |
+
if existing:
|
| 498 |
+
conn.execute("UPDATE skill_candidates SET frequency=frequency+1, updated_at=? WHERE id=?",
|
| 499 |
+
(time.time(), existing["id"]))
|
| 500 |
+
conn.commit(); conn.close()
|
| 501 |
+
return existing["id"]
|
| 502 |
+
cid = str(uuid.uuid4())
|
| 503 |
+
now = time.time()
|
| 504 |
+
conn.execute("""
|
| 505 |
+
INSERT INTO skill_candidates (id,description,agent,frequency,status,created_at,updated_at)
|
| 506 |
+
VALUES (?,?,?,1,'pending',?,?)
|
| 507 |
+
""", (cid, description, agent, now, now))
|
| 508 |
+
conn.commit(); conn.close()
|
| 509 |
+
return cid
|
| 510 |
+
|
| 511 |
+
def candidate_update(cid: str, status: str) -> bool:
|
| 512 |
+
conn = get_db()
|
| 513 |
+
n = conn.execute("UPDATE skill_candidates SET status=?, updated_at=? WHERE id=?",
|
| 514 |
+
(status, time.time(), cid)).rowcount
|
| 515 |
+
conn.commit(); conn.close()
|
| 516 |
+
return n > 0
|
| 517 |
+
|
| 518 |
+
def candidates_list(status: str = "pending") -> list:
|
| 519 |
+
conn = get_db()
|
| 520 |
+
rows = conn.execute(
|
| 521 |
+
"SELECT * FROM skill_candidates WHERE status=? ORDER BY frequency DESC, created_at DESC",
|
| 522 |
+
(status,)).fetchall()
|
| 523 |
+
conn.close()
|
| 524 |
+
return [dict(r) for r in rows]
|
| 525 |
+
|
| 526 |
+
# ---------------------------------------------------------------------------
|
| 527 |
+
# Learn stats
|
| 528 |
+
# ---------------------------------------------------------------------------
|
| 529 |
+
def learn_stats() -> dict:
|
| 530 |
+
conn = get_db()
|
| 531 |
+
rw_count = conn.execute("SELECT COUNT(*) FROM rewards").fetchone()[0]
|
| 532 |
+
rw_avg = conn.execute("SELECT AVG(raw_score) FROM rewards").fetchone()[0]
|
| 533 |
+
rw_24h = conn.execute("SELECT COUNT(*), AVG(raw_score) FROM rewards WHERE ts>=?",
|
| 534 |
+
(time.time()-86400,)).fetchone()
|
| 535 |
+
rlhf_s = rlhf_stats()
|
| 536 |
+
cands = conn.execute("SELECT COUNT(*) FROM skill_candidates WHERE status='pending'").fetchone()[0]
|
| 537 |
+
conn.close()
|
| 538 |
+
qs = q_stats()
|
| 539 |
+
return {
|
| 540 |
+
"qtable": qs,
|
| 541 |
+
"rewards": {
|
| 542 |
+
"total": rw_count,
|
| 543 |
+
"avg_all_time": round(rw_avg or 0, 4),
|
| 544 |
+
"last_24h": {"count": rw_24h[0], "avg": round(rw_24h[1] or 0, 4)},
|
| 545 |
+
},
|
| 546 |
+
"rlhf": rlhf_s,
|
| 547 |
+
"skill_candidates_pending": cands,
|
| 548 |
+
}
|
| 549 |
+
|
| 550 |
+
def reward_trend(hours: int = 24, bucket_minutes: int = 60) -> list:
|
| 551 |
+
conn = get_db()
|
| 552 |
+
since = time.time() - hours * 3600
|
| 553 |
+
rows = conn.execute(
|
| 554 |
+
"SELECT ts, raw_score, agent, event_type FROM rewards WHERE ts>=? ORDER BY ts",
|
| 555 |
+
(since,)).fetchall()
|
| 556 |
+
conn.close()
|
| 557 |
+
if not rows:
|
| 558 |
+
return []
|
| 559 |
+
# Bucket by hour
|
| 560 |
+
buckets = {}
|
| 561 |
+
for r in rows:
|
| 562 |
+
h = int(r["ts"] // 3600) * 3600
|
| 563 |
+
if h not in buckets:
|
| 564 |
+
buckets[h] = {"ts": h, "count": 0, "total": 0.0}
|
| 565 |
+
buckets[h]["count"] += 1
|
| 566 |
+
buckets[h]["total"] += r["raw_score"]
|
| 567 |
+
return [{"ts": v["ts"], "count": v["count"],
|
| 568 |
+
"avg_reward": round(v["total"]/v["count"],4)}
|
| 569 |
+
for v in sorted(buckets.values(), key=lambda x: x["ts"])]
|
| 570 |
+
|
| 571 |
+
# ---------------------------------------------------------------------------
|
| 572 |
+
# Background sync loop
|
| 573 |
+
# ---------------------------------------------------------------------------
|
| 574 |
+
async def _sync_loop():
|
| 575 |
+
while True:
|
| 576 |
+
await asyncio.sleep(SYNC_INTERVAL)
|
| 577 |
+
try:
|
| 578 |
+
pull_and_score_traces()
|
| 579 |
+
except Exception:
|
| 580 |
+
pass
|
| 581 |
+
|
| 582 |
+
# ---------------------------------------------------------------------------
|
| 583 |
+
# Seed
|
| 584 |
+
# ---------------------------------------------------------------------------
|
| 585 |
+
def seed_demo():
|
| 586 |
+
conn = get_db()
|
| 587 |
+
n = conn.execute("SELECT COUNT(*) FROM qtable").fetchone()[0]
|
| 588 |
+
conn.close()
|
| 589 |
+
if n > 0: return
|
| 590 |
+
# Seed NEXUS model selection Q-table from prior knowledge
|
| 591 |
+
now = time.time()
|
| 592 |
+
entries = [
|
| 593 |
+
# ki-fusion RTX5090 is best when available
|
| 594 |
+
("nexus", {"agent":"nexus","event":"model_selection"}, "qwen/qwen3.5-35b-a3b", 0.72),
|
| 595 |
+
("nexus", {"agent":"nexus","event":"model_selection"}, "claude-haiku-4-5", 0.55),
|
| 596 |
+
("nexus", {"agent":"nexus","event":"model_selection"}, "hf_api", 0.30),
|
| 597 |
+
("nexus", {"agent":"nexus","event":"model_selection"}, "local_cpu", 0.10),
|
| 598 |
+
# Tool selection
|
| 599 |
+
("pulse", {"agent":"pulse","event":"tool_selection"}, "kanban_create", 0.65),
|
| 600 |
+
("pulse", {"agent":"pulse","event":"tool_selection"}, "slot_reserve", 0.60),
|
| 601 |
+
("pulse", {"agent":"pulse","event":"tool_selection"}, "trigger_agent", 0.50),
|
| 602 |
+
# Skill reuse
|
| 603 |
+
("pulse", {"agent":"pulse","event":"skill_selection"}, "calculator", 0.40),
|
| 604 |
+
("pulse", {"agent":"pulse","event":"skill_selection"}, "forge_client", 0.55),
|
| 605 |
+
]
|
| 606 |
+
for agent, state, action, q in entries:
|
| 607 |
+
sh = _state_hash(state)
|
| 608 |
+
conn = get_db()
|
| 609 |
+
conn.execute("""
|
| 610 |
+
INSERT OR IGNORE INTO qtable (id,agent,state_hash,state_json,action,q_value,visits,last_reward,updated_at)
|
| 611 |
+
VALUES (?,?,?,?,?,?,0,NULL,?)
|
| 612 |
+
""", (str(uuid.uuid4()), agent, sh, json.dumps(state), action, q, now))
|
| 613 |
+
conn.commit(); conn.close()
|
| 614 |
+
# Seed RLHF examples
|
| 615 |
+
examples = [
|
| 616 |
+
("nexus", "Route this query to the best available LLM.",
|
| 617 |
+
"I will use ki-fusion RTX5090 (qwen3.5-35b) as it has the best quality/speed ratio.",
|
| 618 |
+
"approved", 0.9),
|
| 619 |
+
("nexus", "Route this query to the best available LLM.",
|
| 620 |
+
"I will use local_cpu for this complex multi-step reasoning task.",
|
| 621 |
+
"rejected", -0.3),
|
| 622 |
+
("pulse", "Schedule this long-running background task.",
|
| 623 |
+
"I will reserve an LLM slot before starting and release it on completion.",
|
| 624 |
+
"approved", 0.8),
|
| 625 |
+
]
|
| 626 |
+
for agent, prompt, completion, label, reward in examples:
|
| 627 |
+
rlhf_add(agent, prompt, completion, label, reward, "seed")
|
| 628 |
+
# Seed a skill candidate
|
| 629 |
+
candidate_add("Pattern: agents repeatedly fetch the same URL multiple times per session → caching skill needed", "learn")
|
| 630 |
+
|
| 631 |
+
# ---------------------------------------------------------------------------
|
| 632 |
+
# MCP
|
| 633 |
+
# ---------------------------------------------------------------------------
|
| 634 |
+
MCP_TOOLS = [
|
| 635 |
+
{"name":"learn_q_get","description":"Get all Q-values for an agent+state.",
|
| 636 |
+
"inputSchema":{"type":"object","required":["agent","state"],
|
| 637 |
+
"properties":{"agent":{"type":"string"},"state":{"type":"object"}}}},
|
| 638 |
+
{"name":"learn_q_best","description":"Get best action (epsilon-greedy) for an agent+state.",
|
| 639 |
+
"inputSchema":{"type":"object","required":["agent","state","actions"],
|
| 640 |
+
"properties":{"agent":{"type":"string"},"state":{"type":"object"},
|
| 641 |
+
"actions":{"type":"array","items":{"type":"string"}}}}},
|
| 642 |
+
{"name":"learn_q_update","description":"Update Q-value after taking an action and observing reward.",
|
| 643 |
+
"inputSchema":{"type":"object","required":["agent","state","action","reward"],
|
| 644 |
+
"properties":{"agent":{"type":"string"},"state":{"type":"object"},
|
| 645 |
+
"action":{"type":"string"},"reward":{"type":"number"},
|
| 646 |
+
"next_state":{"type":"object"}}}},
|
| 647 |
+
{"name":"learn_q_hint","description":"Manually nudge a Q-value (operator override).",
|
| 648 |
+
"inputSchema":{"type":"object","required":["agent","state","action","nudge"],
|
| 649 |
+
"properties":{"agent":{"type":"string"},"state":{"type":"object"},
|
| 650 |
+
"action":{"type":"string"},"nudge":{"type":"number"}}}},
|
| 651 |
+
{"name":"learn_stats","description":"Get learning system statistics.",
|
| 652 |
+
"inputSchema":{"type":"object","properties":{}}},
|
| 653 |
+
{"name":"learn_rlhf_add","description":"Add a labeled completion to the RLHF store.",
|
| 654 |
+
"inputSchema":{"type":"object","required":["agent","prompt","completion"],
|
| 655 |
+
"properties":{"agent":{"type":"string"},"prompt":{"type":"string"},
|
| 656 |
+
"completion":{"type":"string"},"label":{"type":"string"},
|
| 657 |
+
"reward":{"type":"number"},"source":{"type":"string"}}}},
|
| 658 |
+
{"name":"learn_score_trace","description":"Score a single trace event and return reward.",
|
| 659 |
+
"inputSchema":{"type":"object","required":["event"],
|
| 660 |
+
"properties":{"event":{"type":"object","description":"Trace event dict"}}}},
|
| 661 |
+
{"name":"learn_candidate_add","description":"Add a skill candidate for review.",
|
| 662 |
+
"inputSchema":{"type":"object","required":["description","agent"],
|
| 663 |
+
"properties":{"description":{"type":"string"},"agent":{"type":"string"}}}},
|
| 664 |
+
{"name":"learn_sync","description":"Trigger immediate trace pull and reward scoring.",
|
| 665 |
+
"inputSchema":{"type":"object","properties":{}}},
|
| 666 |
+
]
|
| 667 |
+
|
| 668 |
+
def handle_mcp(method, params, req_id):
|
| 669 |
+
def ok(r): return {"jsonrpc":"2.0","id":req_id,"result":r}
|
| 670 |
+
def txt(d): return ok({"content":[{"type":"text","text":json.dumps(d)}]})
|
| 671 |
+
if method=="initialize":
|
| 672 |
+
return ok({"protocolVersion":"2024-11-05",
|
| 673 |
+
"serverInfo":{"name":"agent-learn","version":"1.0.0"},
|
| 674 |
+
"capabilities":{"tools":{}}})
|
| 675 |
+
if method=="tools/list": return ok({"tools":MCP_TOOLS})
|
| 676 |
+
if method=="tools/call":
|
| 677 |
+
n, a = params.get("name",""), params.get("arguments",{})
|
| 678 |
+
if n=="learn_q_get": return txt({"entries":q_get(a["agent"],a["state"])})
|
| 679 |
+
if n=="learn_q_best": return txt(q_best_action(a["agent"],a["state"],a.get("actions",[])))
|
| 680 |
+
if n=="learn_q_update": return txt(q_update(a["agent"],a["state"],a["action"],float(a["reward"]),a.get("next_state")))
|
| 681 |
+
if n=="learn_q_hint": return txt(q_hint(a["agent"],a["state"],a["action"],float(a["nudge"])))
|
| 682 |
+
if n=="learn_stats": return txt(learn_stats())
|
| 683 |
+
if n=="learn_rlhf_add":
|
| 684 |
+
rid = rlhf_add(a["agent"],a["prompt"],a["completion"],
|
| 685 |
+
a.get("label","unlabeled"),a.get("reward"),a.get("source","mcp"))
|
| 686 |
+
return txt({"ok":True,"id":rid})
|
| 687 |
+
if n=="learn_score_trace":
|
| 688 |
+
score, comp = score_trace_event(a.get("event",{}))
|
| 689 |
+
return txt({"reward":score,"components":comp})
|
| 690 |
+
if n=="learn_candidate_add":
|
| 691 |
+
cid = candidate_add(a["description"],a["agent"])
|
| 692 |
+
return txt({"ok":True,"id":cid})
|
| 693 |
+
if n=="learn_sync": return txt(pull_and_score_traces())
|
| 694 |
+
return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Unknown tool: {n}"}}
|
| 695 |
+
if method in ("notifications/initialized","notifications/cancelled"): return None
|
| 696 |
+
return {"jsonrpc":"2.0","id":req_id,"error":{"code":-32601,"message":f"Method not found: {method}"}}
|
| 697 |
+
|
| 698 |
+
# ---------------------------------------------------------------------------
|
| 699 |
+
# FastAPI app
|
| 700 |
+
# ---------------------------------------------------------------------------
|
| 701 |
+
@asynccontextmanager
|
| 702 |
+
async def lifespan(app):
|
| 703 |
+
init_db(); seed_demo()
|
| 704 |
+
asyncio.create_task(_sync_loop())
|
| 705 |
+
yield
|
| 706 |
+
|
| 707 |
+
app = FastAPI(title="agent-learn", version="1.0.0", lifespan=lifespan)
|
| 708 |
+
|
| 709 |
+
def _auth(r): return not LEARN_KEY or r.headers.get("x-learn-key","") == LEARN_KEY
|
| 710 |
+
|
| 711 |
+
# --- Q-table REST ---
|
| 712 |
+
@app.get("/api/q")
|
| 713 |
+
async def api_q_get(agent:str=Query(...), state:str=Query("{}") ):
|
| 714 |
+
try: s = json.loads(state)
|
| 715 |
+
except Exception: raise HTTPException(400,"state must be JSON")
|
| 716 |
+
return JSONResponse({"entries": q_get(agent, s)})
|
| 717 |
+
|
| 718 |
+
@app.post("/api/q/best")
|
| 719 |
+
async def api_q_best(request:Request):
|
| 720 |
+
b = await request.json()
|
| 721 |
+
return JSONResponse(q_best_action(b["agent"], b.get("state",{}), b.get("actions",[])))
|
| 722 |
+
|
| 723 |
+
@app.post("/api/q/update")
|
| 724 |
+
async def api_q_update(request:Request):
|
| 725 |
+
if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key")
|
| 726 |
+
b = await request.json()
|
| 727 |
+
return JSONResponse(q_update(b["agent"],b.get("state",{}),b["action"],float(b["reward"]),b.get("next_state")))
|
| 728 |
+
|
| 729 |
+
@app.post("/api/q/hint")
|
| 730 |
+
async def api_q_hint(request:Request):
|
| 731 |
+
if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key")
|
| 732 |
+
b = await request.json()
|
| 733 |
+
return JSONResponse(q_hint(b["agent"],b.get("state",{}),b["action"],float(b["nudge"])))
|
| 734 |
+
|
| 735 |
+
@app.get("/api/q/stats")
|
| 736 |
+
async def api_q_stats(): return JSONResponse(q_stats())
|
| 737 |
+
|
| 738 |
+
# --- Scoring ---
|
| 739 |
+
@app.post("/api/score")
|
| 740 |
+
async def api_score(request:Request):
|
| 741 |
+
b = await request.json()
|
| 742 |
+
score, comp = score_trace_event(b)
|
| 743 |
+
return JSONResponse({"reward": score, "components": comp})
|
| 744 |
+
|
| 745 |
+
@app.post("/api/sync")
|
| 746 |
+
async def api_sync(request:Request):
|
| 747 |
+
if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key")
|
| 748 |
+
result = pull_and_score_traces()
|
| 749 |
+
return JSONResponse(result)
|
| 750 |
+
|
| 751 |
+
# --- RLHF ---
|
| 752 |
+
@app.get("/api/rlhf")
|
| 753 |
+
async def api_rlhf_list(agent:str=Query(""), label:str=Query(""), limit:int=Query(50)):
|
| 754 |
+
return JSONResponse({"entries": rlhf_list(agent,label,limit)})
|
| 755 |
+
|
| 756 |
+
@app.post("/api/rlhf", status_code=201)
|
| 757 |
+
async def api_rlhf_add(request:Request):
|
| 758 |
+
if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key")
|
| 759 |
+
b = await request.json()
|
| 760 |
+
rid = rlhf_add(b.get("agent","unknown"),b["prompt"],b["completion"],
|
| 761 |
+
b.get("label","unlabeled"),b.get("reward"),b.get("source","api"),b.get("meta"))
|
| 762 |
+
return JSONResponse({"ok":True,"id":rid})
|
| 763 |
+
|
| 764 |
+
@app.patch("/api/rlhf/{entry_id}")
|
| 765 |
+
async def api_rlhf_label(entry_id:str, request:Request):
|
| 766 |
+
if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key")
|
| 767 |
+
b = await request.json()
|
| 768 |
+
ok = rlhf_label(entry_id, b.get("label","unlabeled"), b.get("reward"))
|
| 769 |
+
return JSONResponse({"ok":ok})
|
| 770 |
+
|
| 771 |
+
# --- Skill candidates ---
|
| 772 |
+
@app.get("/api/candidates")
|
| 773 |
+
async def api_candidates(status:str=Query("pending")):
|
| 774 |
+
return JSONResponse({"candidates": candidates_list(status)})
|
| 775 |
+
|
| 776 |
+
@app.patch("/api/candidates/{cid}")
|
| 777 |
+
async def api_candidate_update(cid:str, request:Request):
|
| 778 |
+
if not _auth(request): raise HTTPException(403,"Invalid X-Learn-Key")
|
| 779 |
+
b = await request.json()
|
| 780 |
+
ok = candidate_update(cid, b.get("status","pending"))
|
| 781 |
+
return JSONResponse({"ok":ok})
|
| 782 |
+
|
| 783 |
+
# --- Stats ---
|
| 784 |
+
@app.get("/api/stats")
|
| 785 |
+
async def api_stats(): return JSONResponse(learn_stats())
|
| 786 |
+
|
| 787 |
+
@app.get("/api/reward-trend")
|
| 788 |
+
async def api_trend(hours:int=Query(24)): return JSONResponse({"trend":reward_trend(hours)})
|
| 789 |
+
|
| 790 |
+
@app.get("/api/health")
|
| 791 |
+
async def api_health():
|
| 792 |
+
conn=get_db(); n=conn.execute("SELECT COUNT(*) FROM qtable").fetchone()[0]; conn.close()
|
| 793 |
+
return JSONResponse({"ok":True,"qtable_entries":n,"version":"1.0.0"})
|
| 794 |
+
|
| 795 |
+
# --- MCP ---
|
| 796 |
+
@app.get("/mcp/sse")
|
| 797 |
+
async def mcp_sse(request:Request):
|
| 798 |
+
async def gen():
|
| 799 |
+
yield f"data: {json.dumps({'jsonrpc':'2.0','method':'connected','params':{}})}\n\n"
|
| 800 |
+
yield f"data: {json.dumps({'jsonrpc':'2.0','method':'notifications/tools','params':{'tools':MCP_TOOLS}})}\n\n"
|
| 801 |
+
while True:
|
| 802 |
+
if await request.is_disconnected(): break
|
| 803 |
+
yield ": ping\n\n"; await asyncio.sleep(15)
|
| 804 |
+
return StreamingResponse(gen(), media_type="text/event-stream",
|
| 805 |
+
headers={"Cache-Control":"no-cache","Connection":"keep-alive","X-Accel-Buffering":"no"})
|
| 806 |
+
|
| 807 |
+
@app.post("/mcp")
|
| 808 |
+
async def mcp_rpc(request:Request):
|
| 809 |
+
try: body = await request.json()
|
| 810 |
+
except Exception: return JSONResponse({"jsonrpc":"2.0","id":None,"error":{"code":-32700,"message":"Parse error"}})
|
| 811 |
+
if isinstance(body,list):
|
| 812 |
+
return JSONResponse([r for r in [handle_mcp(x.get("method",""),x.get("params",{}),x.get("id")) for x in body] if r])
|
| 813 |
+
r = handle_mcp(body.get("method",""),body.get("params",{}),body.get("id"))
|
| 814 |
+
return JSONResponse(r or {"jsonrpc":"2.0","id":body.get("id"),"result":{}})
|
| 815 |
+
|
| 816 |
+
# ---------------------------------------------------------------------------
|
| 817 |
+
# SPA Dashboard
|
| 818 |
+
# ---------------------------------------------------------------------------
|
| 819 |
+
SPA = r"""<!DOCTYPE html>
|
| 820 |
+
<html lang="en">
|
| 821 |
+
<head>
|
| 822 |
+
<meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
|
| 823 |
+
<title>🧠 LEARN — FORGE Learning Layer</title>
|
| 824 |
+
<style>
|
| 825 |
+
@import url('https://fonts.googleapis.com/css2?family=Space+Mono:wght@400;700&family=Syne:wght@400;600;800&family=DM+Mono:wght@300;400;500&display=swap');
|
| 826 |
+
*{box-sizing:border-box;margin:0;padding:0}
|
| 827 |
+
:root{--bg:#06060d;--sf:#0d0d18;--sf2:#121222;--br:#1a1a2e;--ac:#ff6b00;--tx:#dde0f0;--mu:#50507a;--gr:#00ff88;--rd:#ff4455;--cy:#06b6d4;--pu:#8b5cf6;--ye:#f59e0b;--pk:#ec4899}
|
| 828 |
+
html,body{height:100%;background:var(--bg);color:var(--tx);font-family:'Syne',sans-serif}
|
| 829 |
+
::-webkit-scrollbar{width:5px;height:5px}::-webkit-scrollbar-track{background:var(--sf)}::-webkit-scrollbar-thumb{background:var(--br);border-radius:3px}
|
| 830 |
+
.app{display:grid;grid-template-rows:52px 1fr;height:100vh;overflow:hidden}
|
| 831 |
+
.hdr{display:flex;align-items:center;gap:1rem;padding:0 1.5rem;border-bottom:1px solid var(--br);background:var(--sf)}
|
| 832 |
+
.logo{font-family:'Space Mono',monospace;font-size:1.1rem;font-weight:700;color:var(--ac)}
|
| 833 |
+
.sub{font-family:'DM Mono',monospace;font-size:.6rem;color:var(--mu);letter-spacing:.2em;text-transform:uppercase}
|
| 834 |
+
.hstats{display:flex;gap:1.5rem;margin-left:auto}
|
| 835 |
+
.hs{text-align:center}.hs-n{font-family:'Space Mono',monospace;font-size:1rem;font-weight:700;color:var(--ac)}
|
| 836 |
+
.hs-l{font-family:'DM Mono',monospace;font-size:.58rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em}
|
| 837 |
+
.tabs{display:flex;border-bottom:1px solid var(--br);background:var(--sf)}
|
| 838 |
+
.tab{padding:.55rem 1.3rem;font-family:'DM Mono',monospace;font-size:.72rem;color:var(--mu);border-bottom:2px solid transparent;cursor:pointer;letter-spacing:.05em;transition:all .15s}
|
| 839 |
+
.tab.active{color:var(--ac);border-bottom-color:var(--ac)}
|
| 840 |
+
.tab:hover{color:var(--tx)}
|
| 841 |
+
.body{flex:1;overflow-y:auto;padding:1.25rem}
|
| 842 |
+
|
| 843 |
+
/* Cards */
|
| 844 |
+
.kpis{display:grid;grid-template-columns:repeat(4,1fr);gap:.75rem;margin-bottom:1.25rem}
|
| 845 |
+
.kpi{background:var(--sf);border:1px solid var(--br);border-radius:8px;padding:.9rem 1rem}
|
| 846 |
+
.kpi-n{font-family:'Space Mono',monospace;font-size:1.6rem;font-weight:700;color:var(--ac);line-height:1}
|
| 847 |
+
.kpi-l{font-family:'DM Mono',monospace;font-size:.6rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em;margin-top:4px}
|
| 848 |
+
.kpi-sub{font-family:'DM Mono',monospace;font-size:.65rem;color:var(--mu);margin-top:2px}
|
| 849 |
+
|
| 850 |
+
/* Q-table */
|
| 851 |
+
.qtable-grid{display:grid;grid-template-columns:repeat(auto-fill,minmax(280px,1fr));gap:.75rem}
|
| 852 |
+
.qt-agent{background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden}
|
| 853 |
+
.qt-agent-hdr{padding:.6rem 1rem;border-bottom:1px solid var(--br);font-family:'Space Mono',monospace;font-size:.8rem;font-weight:700;color:var(--ac);display:flex;align-items:center;gap:.5rem}
|
| 854 |
+
.qt-row{display:flex;align-items:center;padding:.35rem 1rem;gap:.6rem;border-bottom:1px solid #0d0d18;font-family:'DM Mono',monospace;font-size:.72rem}
|
| 855 |
+
.qt-row:last-child{border-bottom:none}
|
| 856 |
+
.qt-action{flex:1;color:var(--tx);overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
|
| 857 |
+
.qt-bar{width:80px;height:6px;background:var(--br);border-radius:3px;overflow:hidden;flex-shrink:0}
|
| 858 |
+
.qt-bar-fill{height:100%;border-radius:3px;transition:width .3s}
|
| 859 |
+
.qt-val{font-weight:700;width:48px;text-align:right;flex-shrink:0}
|
| 860 |
+
.qt-vis{font-size:.6rem;color:var(--mu);width:30px;text-align:right;flex-shrink:0}
|
| 861 |
+
|
| 862 |
+
/* Reward trend */
|
| 863 |
+
.trend-container{background:var(--sf);border:1px solid var(--br);border-radius:8px;padding:1rem;margin-bottom:1rem}
|
| 864 |
+
.trend-title{font-family:'DM Mono',monospace;font-size:.65rem;color:var(--mu);text-transform:uppercase;letter-spacing:.15em;margin-bottom:.75rem}
|
| 865 |
+
.trend-chart{height:80px;display:flex;align-items:flex-end;gap:3px}
|
| 866 |
+
.t-bar-wrap{flex:1;display:flex;flex-direction:column;align-items:center;height:100%}
|
| 867 |
+
.t-bar{width:100%;border-radius:2px 2px 0 0;min-height:2px;transition:height .3s}
|
| 868 |
+
.t-lbl{font-family:'DM Mono',monospace;font-size:.5rem;color:var(--mu);margin-top:2px;text-align:center}
|
| 869 |
+
|
| 870 |
+
/* RLHF table */
|
| 871 |
+
.rlhf-table{width:100%;border-collapse:collapse;font-family:'DM Mono',monospace;font-size:.75rem}
|
| 872 |
+
.rlhf-table th{padding:.4rem .75rem;text-align:left;font-size:.62rem;color:var(--mu);text-transform:uppercase;letter-spacing:.1em;border-bottom:1px solid var(--br)}
|
| 873 |
+
.rlhf-table td{padding:.45rem .75rem;border-bottom:1px solid #0d0d18;max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
|
| 874 |
+
.rlhf-table tr:hover td{background:var(--sf)}
|
| 875 |
+
.badge{display:inline-block;padding:1px 7px;border-radius:4px;font-size:.62rem}
|
| 876 |
+
.badge-approved{background:#001a08;color:var(--gr);border:1px solid #004422}
|
| 877 |
+
.badge-rejected{background:#1a0000;color:var(--rd);border:1px solid #440011}
|
| 878 |
+
.badge-unlabeled{background:var(--sf2);color:var(--mu);border:1px solid var(--br)}
|
| 879 |
+
|
| 880 |
+
/* Skill candidates */
|
| 881 |
+
.cand-card{background:var(--sf);border:1px solid var(--br);border-radius:8px;padding:.8rem 1rem;margin-bottom:.6rem;display:flex;align-items:flex-start;gap:1rem}
|
| 882 |
+
.cand-desc{flex:1;font-size:.82rem;line-height:1.6}
|
| 883 |
+
.cand-meta{font-family:'DM Mono',monospace;font-size:.62rem;color:var(--mu)}
|
| 884 |
+
.cand-freq{font-family:'Space Mono',monospace;font-size:1.2rem;font-weight:700;color:var(--ye);min-width:30px;text-align:center}
|
| 885 |
+
.btn{padding:.4rem .9rem;border:none;border-radius:5px;cursor:pointer;font-family:'DM Mono',monospace;font-size:.7rem;transition:all .15s}
|
| 886 |
+
.btn-approve{background:#001a08;color:var(--gr);border:1px solid #004422}
|
| 887 |
+
.btn-approve:hover{background:#003010}
|
| 888 |
+
.btn-reject{background:#1a0000;color:var(--rd);border:1px solid #440011}
|
| 889 |
+
.btn-reject:hover{background:#300010}
|
| 890 |
+
.btn-sync{background:var(--sf2);color:var(--ac);border:1px solid var(--br);margin-left:auto}
|
| 891 |
+
.btn-sync:hover{border-color:var(--ac)}
|
| 892 |
+
|
| 893 |
+
/* Config panel */
|
| 894 |
+
.config-row{display:flex;align-items:center;padding:.6rem 1rem;border-bottom:1px solid var(--br);font-family:'DM Mono',monospace;font-size:.78rem}
|
| 895 |
+
.config-key{color:var(--mu);width:160px;text-transform:uppercase;font-size:.65rem;letter-spacing:.1em}
|
| 896 |
+
.config-val{color:var(--cy);font-weight:700}
|
| 897 |
+
.config-desc{color:var(--mu);font-size:.65rem;margin-left:.75rem}
|
| 898 |
+
|
| 899 |
+
.section{font-family:'DM Mono',monospace;font-size:.65rem;color:var(--pu);text-transform:uppercase;letter-spacing:.15em;margin:.75rem 0 .4rem}
|
| 900 |
+
.empty{text-align:center;padding:2rem;color:var(--mu);font-family:'DM Mono',monospace;font-size:.8rem}
|
| 901 |
+
</style>
|
| 902 |
+
</head>
|
| 903 |
+
<body>
|
| 904 |
+
<div class="app">
|
| 905 |
+
<header class="hdr">
|
| 906 |
+
<div><div class="logo">🧠 LEARN</div><div class="sub">FORGE Learning Layer</div></div>
|
| 907 |
+
<div class="hstats">
|
| 908 |
+
<div class="hs"><div class="hs-n" id="hQ">—</div><div class="hs-l">Q-entries</div></div>
|
| 909 |
+
<div class="hs"><div class="hs-n" id="hR" style="color:var(--gr)">—</div><div class="hs-l">Rewards</div></div>
|
| 910 |
+
<div class="hs"><div class="hs-n" id="hA">—</div><div class="hs-l">Avg reward</div></div>
|
| 911 |
+
<div class="hs"><div class="hs-n" id="hC" style="color:var(--ye)">—</div><div class="hs-l">Candidates</div></div>
|
| 912 |
+
</div>
|
| 913 |
+
</header>
|
| 914 |
+
<div style="display:flex;flex-direction:column;overflow:hidden;flex:1">
|
| 915 |
+
<div class="tabs">
|
| 916 |
+
<div class="tab active" onclick="showTab('qtable')">⚙ Q-Table</div>
|
| 917 |
+
<div class="tab" onclick="showTab('rewards')">🏆 Rewards</div>
|
| 918 |
+
<div class="tab" onclick="showTab('rlhf')">👥 RLHF</div>
|
| 919 |
+
<div class="tab" onclick="showTab('candidates')">💡 Skill Candidates</div>
|
| 920 |
+
<div class="tab" onclick="showTab('config')">⚙︎ Config</div>
|
| 921 |
+
<button class="btn btn-sync" onclick="triggerSync()" style="margin:auto 1rem auto auto;padding:.3rem .75rem">↻ Sync Traces</button>
|
| 922 |
+
</div>
|
| 923 |
+
<div class="body" id="tabBody"></div>
|
| 924 |
+
</div>
|
| 925 |
+
</div>
|
| 926 |
+
<script>
|
| 927 |
+
let stats=null, trend=[], rlhf=[], candidates=[], currentTab='qtable';
|
| 928 |
+
|
| 929 |
+
async function loadAll(){
|
| 930 |
+
[stats,trend] = await Promise.all([
|
| 931 |
+
fetch('/api/stats').then(r=>r.json()),
|
| 932 |
+
fetch('/api/reward-trend?hours=24').then(r=>r.json()).then(d=>d.trend||[])
|
| 933 |
+
]);
|
| 934 |
+
document.getElementById('hQ').textContent=stats.qtable?.total_entries||0;
|
| 935 |
+
document.getElementById('hR').textContent=stats.rewards?.total||0;
|
| 936 |
+
document.getElementById('hA').textContent=stats.rewards?.avg_all_time?.toFixed(3)||'—';
|
| 937 |
+
document.getElementById('hC').textContent=stats.skill_candidates_pending||0;
|
| 938 |
+
renderTab();
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
+
async function loadRLHF(){ rlhf = (await fetch('/api/rlhf?limit=50').then(r=>r.json())).entries||[]; }
|
| 942 |
+
async function loadCandidates(){ candidates = (await fetch('/api/candidates').then(r=>r.json())).candidates||[]; }
|
| 943 |
+
|
| 944 |
+
function showTab(t){
|
| 945 |
+
currentTab=t;
|
| 946 |
+
document.querySelectorAll('.tab').forEach((el,i)=>el.classList.toggle('active',['qtable','rewards','rlhf','candidates','config'][i]===t));
|
| 947 |
+
renderTab();
|
| 948 |
+
}
|
| 949 |
+
|
| 950 |
+
async function renderTab(){
|
| 951 |
+
if(currentTab==='qtable') renderQTable();
|
| 952 |
+
else if(currentTab==='rewards') renderRewards();
|
| 953 |
+
else if(currentTab==='rlhf') { await loadRLHF(); renderRLHF(); }
|
| 954 |
+
else if(currentTab==='candidates'){ await loadCandidates(); renderCandidates(); }
|
| 955 |
+
else if(currentTab==='config') renderConfig();
|
| 956 |
+
}
|
| 957 |
+
|
| 958 |
+
function renderQTable(){
|
| 959 |
+
const qt = stats?.qtable || {};
|
| 960 |
+
const byAgent = qt.by_agent || [];
|
| 961 |
+
const top = qt.top_actions || [];
|
| 962 |
+
// Group top by agent
|
| 963 |
+
const grouped = {};
|
| 964 |
+
top.forEach(r=>{ if(!grouped[r.agent]) grouped[r.agent]=[];grouped[r.agent].push(r) });
|
| 965 |
+
byAgent.forEach(a=>{ if(!grouped[a.agent]) grouped[a.agent]=[] });
|
| 966 |
+
|
| 967 |
+
const html = `
|
| 968 |
+
<div class="kpis">
|
| 969 |
+
<div class="kpi"><div class="kpi-n">${qt.total_entries||0}</div><div class="kpi-l">Total entries</div></div>
|
| 970 |
+
${byAgent.slice(0,3).map(a=>`<div class="kpi"><div class="kpi-n" style="font-size:1.2rem">${a.n}</div><div class="kpi-l">${a.agent}</div><div class="kpi-sub">avg Q: ${(a.avg_q||0).toFixed(3)}</div></div>`).join('')}
|
| 971 |
+
</div>
|
| 972 |
+
<div class="section">Best Q-values per agent</div>
|
| 973 |
+
<div class="qtable-grid">
|
| 974 |
+
${Object.entries(grouped).map(([agent, rows])=>{
|
| 975 |
+
const maxQ = Math.max(...rows.map(r=>r.q_value||0), 0.001);
|
| 976 |
+
return `<div class="qt-agent">
|
| 977 |
+
<div class="qt-agent-hdr">⚙ ${agent}</div>
|
| 978 |
+
${rows.length ? rows.map(r=>{
|
| 979 |
+
const pct = Math.max(0,Math.min(100,(r.q_value/maxQ)*100));
|
| 980 |
+
const col = r.q_value>0.5?'var(--gr)':r.q_value>0?'var(--ye)':'var(--rd)';
|
| 981 |
+
return `<div class="qt-row">
|
| 982 |
+
<span class="qt-action">${r.action}</span>
|
| 983 |
+
<div class="qt-bar"><div class="qt-bar-fill" style="width:${pct}%;background:${col}"></div></div>
|
| 984 |
+
<span class="qt-val" style="color:${col}">${r.q_value.toFixed(3)}</span>
|
| 985 |
+
<span class="qt-vis">${r.visits}x</span>
|
| 986 |
+
</div>`;
|
| 987 |
+
}).join('') : '<div class="qt-row" style="color:var(--mu)">No entries yet</div>'}
|
| 988 |
+
</div>`;
|
| 989 |
+
}).join('')}
|
| 990 |
+
</div>
|
| 991 |
+
<div class="section" style="margin-top:1rem">Worst-performing actions</div>
|
| 992 |
+
<div class="qtable-grid">
|
| 993 |
+
${Object.values((qt.worst_actions||[]).reduce((g,r)=>{ if(!g[r.agent])g[r.agent]=[];g[r.agent].push(r);return g },{})).map(rows=>{
|
| 994 |
+
const agent=rows[0].agent;
|
| 995 |
+
return `<div class="qt-agent">
|
| 996 |
+
<div class="qt-agent-hdr" style="color:var(--rd)">⚠ ${agent} — avoid</div>
|
| 997 |
+
${rows.map(r=>`<div class="qt-row"><span class="qt-action">${r.action}</span><span class="qt-val" style="color:var(--rd)">${r.q_value.toFixed(3)}</span></div>`).join('')}
|
| 998 |
+
</div>`;
|
| 999 |
+
}).join('')}
|
| 1000 |
+
</div>`;
|
| 1001 |
+
document.getElementById('tabBody').innerHTML=html;
|
| 1002 |
+
}
|
| 1003 |
+
|
| 1004 |
+
function renderRewards(){
|
| 1005 |
+
const rw = stats?.rewards||{};
|
| 1006 |
+
const max = Math.max(...trend.map(t=>Math.abs(t.avg_reward||0)), 0.001);
|
| 1007 |
+
const bars = trend.length ? trend.map(t=>{
|
| 1008 |
+
const h=Math.max(3,Math.abs(t.avg_reward||0)/max*100);
|
| 1009 |
+
const col=t.avg_reward>=0?'var(--gr)':'var(--rd)';
|
| 1010 |
+
const hStr=new Date(t.ts*1000).getHours()+'h';
|
| 1011 |
+
return `<div class="t-bar-wrap"><div style="flex:1;display:flex;align-items:flex-end;width:100%"><div class="t-bar" style="height:${h}%;background:${col}" title="avg=${t.avg_reward} n=${t.count}"></div></div><div class="t-lbl">${hStr}</div></div>`;
|
| 1012 |
+
}).join('') : '<div style="color:var(--mu);font-family:DM Mono,monospace;font-size:.75rem;margin:auto">No reward data yet</div>';
|
| 1013 |
+
|
| 1014 |
+
document.getElementById('tabBody').innerHTML=`
|
| 1015 |
+
<div class="kpis">
|
| 1016 |
+
<div class="kpi"><div class="kpi-n">${rw.total||0}</div><div class="kpi-l">Total scored</div></div>
|
| 1017 |
+
<div class="kpi"><div class="kpi-n" style="color:var(--gr)">${rw.avg_all_time?.toFixed(3)||'—'}</div><div class="kpi-l">All-time avg</div></div>
|
| 1018 |
+
<div class="kpi"><div class="kpi-n" style="color:var(--cy)">${rw.last_24h?.count||0}</div><div class="kpi-l">Last 24h</div></div>
|
| 1019 |
+
<div class="kpi"><div class="kpi-n" style="color:var(--cy)">${rw.last_24h?.avg?.toFixed(3)||'—'}</div><div class="kpi-l">24h avg</div></div>
|
| 1020 |
+
</div>
|
| 1021 |
+
<div class="trend-container">
|
| 1022 |
+
<div class="trend-title">Avg reward per hour (24h)</div>
|
| 1023 |
+
<div class="trend-chart">${bars}</div>
|
| 1024 |
+
</div>
|
| 1025 |
+
<div class="section">Scoring model</div>
|
| 1026 |
+
<div style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden">
|
| 1027 |
+
${[['error_penalty','-0.40','Any event with status=error'],['latency_fast (LLM <500ms)','+0.30','LLM call completed quickly'],['latency_ok (500-1500ms)','+0.10','LLM call acceptable latency'],['latency_slow (1500-4000ms)','-0.10','LLM call slow'],['latency_very_slow (>4000ms)','-0.30','LLM call very slow'],['token_efficiency','+0.10','Output/input ratio > 0.5'],['react_progress','+0.10','Each ReAct step completed'],['skill_reuse','+0.15','Skill loaded from FORGE'],['reflection_bonus','+0.20','Agent performed self-reflection']].map(([k,v,d])=>`<div class="config-row"><span class="config-key">${k}</span><span class="config-val">${v}</span><span class="config-desc">${d}</span></div>`).join('')}
|
| 1028 |
+
</div>`;
|
| 1029 |
+
}
|
| 1030 |
+
|
| 1031 |
+
function renderRLHF(){
|
| 1032 |
+
const s = stats?.rlhf||{};
|
| 1033 |
+
document.getElementById('tabBody').innerHTML=`
|
| 1034 |
+
<div class="kpis">
|
| 1035 |
+
<div class="kpi"><div class="kpi-n">${s.total||0}</div><div class="kpi-l">Total entries</div></div>
|
| 1036 |
+
<div class="kpi"><div class="kpi-n" style="color:var(--gr)">${s.by_label?.approved||0}</div><div class="kpi-l">Approved</div></div>
|
| 1037 |
+
<div class="kpi"><div class="kpi-n" style="color:var(--rd)">${s.by_label?.rejected||0}</div><div class="kpi-l">Rejected</div></div>
|
| 1038 |
+
<div class="kpi"><div class="kpi-n" style="color:var(--mu)">${s.by_label?.unlabeled||0}</div><div class="kpi-l">Unlabeled</div></div>
|
| 1039 |
+
</div>
|
| 1040 |
+
<table class="rlhf-table" style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden">
|
| 1041 |
+
<thead><tr><th>Agent</th><th>Prompt</th><th>Completion</th><th>Label</th><th>Reward</th><th>Source</th></tr></thead>
|
| 1042 |
+
<tbody>
|
| 1043 |
+
${rlhf.length ? rlhf.map(r=>`<tr>
|
| 1044 |
+
<td>${r.agent}</td>
|
| 1045 |
+
<td title="${esc(r.prompt)}">${esc(r.prompt.slice(0,40))}...</td>
|
| 1046 |
+
<td title="${esc(r.completion)}">${esc(r.completion.slice(0,50))}...</td>
|
| 1047 |
+
<td><span class="badge badge-${r.label}">${r.label}</span></td>
|
| 1048 |
+
<td style="color:${(r.reward||0)>=0?'var(--gr)':'var(--rd)'}">${r.reward!=null?r.reward:'—'}</td>
|
| 1049 |
+
<td style="color:var(--mu)">${r.source}</td>
|
| 1050 |
+
</tr>`).join('') : '<tr><td colspan="6" class="empty">No RLHF entries yet</td></tr>'}
|
| 1051 |
+
</tbody>
|
| 1052 |
+
</table>`;
|
| 1053 |
+
}
|
| 1054 |
+
|
| 1055 |
+
function renderCandidates(){
|
| 1056 |
+
document.getElementById('tabBody').innerHTML=`
|
| 1057 |
+
<p style="font-family:'DM Mono',monospace;font-size:.75rem;color:var(--mu);margin-bottom:1rem">
|
| 1058 |
+
Patterns detected by agents that recur ${3}+ times. Promote to FORGE or reject.
|
| 1059 |
+
</p>
|
| 1060 |
+
${candidates.length ? candidates.map(c=>`
|
| 1061 |
+
<div class="cand-card">
|
| 1062 |
+
<div class="cand-freq">${c.frequency}x</div>
|
| 1063 |
+
<div style="flex:1">
|
| 1064 |
+
<div class="cand-desc">${esc(c.description)}</div>
|
| 1065 |
+
<div class="cand-meta">from ${c.agent} · ${new Date(c.created_at*1000).toLocaleDateString()}</div>
|
| 1066 |
+
</div>
|
| 1067 |
+
<div style="display:flex;flex-direction:column;gap:.35rem">
|
| 1068 |
+
<button class="btn btn-approve" onclick="updateCand('${c.id}','promoted')">⇧ Promote</button>
|
| 1069 |
+
<button class="btn btn-reject" onclick="updateCand('${c.id}','rejected')">✕ Reject</button>
|
| 1070 |
+
</div>
|
| 1071 |
+
</div>`).join('') : '<div class="empty">No pending skill candidates</div>'}`;
|
| 1072 |
+
}
|
| 1073 |
+
|
| 1074 |
+
function renderConfig(){
|
| 1075 |
+
document.getElementById('tabBody').innerHTML=`
|
| 1076 |
+
<div class="section">Hyperparameters</div>
|
| 1077 |
+
<div style="background:var(--sf);border:1px solid var(--br);border-radius:8px;overflow:hidden">
|
| 1078 |
+
<div class="config-row"><span class="config-key">Learning rate α</span><span class="config-val" id="cfgLR">loading...</span><span class="config-desc">Q-value update step size</span></div>
|
| 1079 |
+
<div class="config-row"><span class="config-key">Discount γ</span><span class="config-val" id="cfgDisc">loading...</span><span class="config-desc">Future reward weight</span></div>
|
| 1080 |
+
<div class="config-row"><span class="config-key">Epsilon ε</span><span class="config-val" id="cfgEps">loading...</span><span class="config-desc">Exploration rate (random action probability)</span></div>
|
| 1081 |
+
<div class="config-row"><span class="config-key">Sync interval</span><span class="config-val" id="cfgSync">loading...</span><span class="config-desc">Trace pull frequency (seconds)</span></div>
|
| 1082 |
+
<div class="config-row"><span class="config-key">Trace URL</span><span class="config-val" id="cfgTrace">loading...</span><span class="config-desc">agent-trace endpoint</span></div>
|
| 1083 |
+
</div>
|
| 1084 |
+
<div class="section" style="margin-top:1rem">MCP connection</div>
|
| 1085 |
+
<pre style="background:var(--sf);border:1px solid var(--br);border-radius:6px;padding:.75rem;font-family:'DM Mono',monospace;font-size:.72rem;color:var(--cy)">{"mcpServers":{"learn":{"command":"npx","args":["-y","mcp-remote","${window.location.origin}/mcp/sse"]}}}</pre>
|
| 1086 |
+
<div class="section" style="margin-top:1rem">Quick integration (NEXUS / any agent)</div>
|
| 1087 |
+
<pre style="background:var(--sf);border:1px solid var(--br);border-radius:6px;padding:.75rem;font-family:'DM Mono',monospace;font-size:.72rem;color:var(--gr)">LEARN_URL = "${window.location.origin}"
|
| 1088 |
+
|
| 1089 |
+
# Ask LEARN for best LLM to route to
|
| 1090 |
+
import requests
|
| 1091 |
+
resp = requests.post(f"{LEARN_URL}/api/q/best", json={
|
| 1092 |
+
"agent": "nexus",
|
| 1093 |
+
"state": {"agent": "nexus", "event": "model_selection"},
|
| 1094 |
+
"actions": ["qwen/qwen3.5-35b-a3b", "claude-haiku-4-5", "hf_api", "local_cpu"]
|
| 1095 |
+
})
|
| 1096 |
+
best = resp.json() # {"action": "qwen/qwen3.5-35b-a3b", "q_value": 0.72, "strategy": "exploit"}
|
| 1097 |
+
|
| 1098 |
+
# After inference, update Q-value
|
| 1099 |
+
requests.post(f"{LEARN_URL}/api/q/update", json={
|
| 1100 |
+
"agent": "nexus",
|
| 1101 |
+
"state": {"agent": "nexus", "event": "model_selection"},
|
| 1102 |
+
"action": best["action"],
|
| 1103 |
+
"reward": 0.8 # from trace scoring
|
| 1104 |
+
})</pre>`;
|
| 1105 |
+
fetch('/api/health').then(r=>r.json()).then(d=>{
|
| 1106 |
+
document.getElementById('cfgLR').textContent='0.1 (env: LEARN_RATE)';
|
| 1107 |
+
document.getElementById('cfgDisc').textContent='0.9 (env: DISCOUNT)';
|
| 1108 |
+
document.getElementById('cfgEps').textContent='0.15 (env: EPSILON)';
|
| 1109 |
+
document.getElementById('cfgSync').textContent='120s (env: SYNC_INTERVAL)';
|
| 1110 |
+
document.getElementById('cfgTrace').textContent='env: TRACE_URL';
|
| 1111 |
+
});
|
| 1112 |
+
}
|
| 1113 |
+
|
| 1114 |
+
async function triggerSync(){
|
| 1115 |
+
const btn=document.querySelector('.btn-sync');
|
| 1116 |
+
btn.textContent='↻ Syncing...';btn.disabled=true;
|
| 1117 |
+
const r=await fetch('/api/sync',{method:'POST'}).then(x=>x.json());
|
| 1118 |
+
btn.textContent=`↻ Scored ${r.scored||0}`;
|
| 1119 |
+
setTimeout(()=>{btn.textContent='↻ Sync Traces';btn.disabled=false;},3000);
|
| 1120 |
+
await loadAll();
|
| 1121 |
+
}
|
| 1122 |
+
|
| 1123 |
+
async function updateCand(id,status){
|
| 1124 |
+
await fetch(`/api/candidates/${id}`,{method:'PATCH',headers:{'Content-Type':'application/json'},body:JSON.stringify({status})});
|
| 1125 |
+
await loadCandidates();renderCandidates();
|
| 1126 |
+
}
|
| 1127 |
+
|
| 1128 |
+
function esc(s){return String(s||'').replace(/&/g,'&').replace(/</g,'<').replace(/>/g,'>')}
|
| 1129 |
+
|
| 1130 |
+
loadAll();setInterval(loadAll,15000);
|
| 1131 |
+
</script>
|
| 1132 |
+
</body></html>"""
|
| 1133 |
+
|
| 1134 |
+
@app.get("/", response_class=HTMLResponse)
|
| 1135 |
+
async def root(): return HTMLResponse(content=SPA, media_type="text/html; charset=utf-8")
|
| 1136 |
+
|
| 1137 |
+
if __name__ == "__main__":
|
| 1138 |
+
uvicorn.run(app, host="0.0.0.0", port=PORT, log_level="info")
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi>=0.111.0
|
| 2 |
+
uvicorn>=0.30.0
|
| 3 |
+
httpx>=0.27.0
|