Spaces:

Uddiii
/

Multi-Agentic

Running

App Files Files Community

Uddiii commited on Apr 25

Commit

d8c3b18

1 Parent(s): 9c68ba6

kaggle: route Patient + Nurse to 8B-instant pool

Browse files

Files changed (5) hide show

.env.example +5 -2
ER_MAP/dashboard.py +8 -7
ER_MAP/evaluate_baseline.py +2 -2
kaggle/KAGGLE.md +19 -5
kaggle/train_ermap_grpo_kaggle.ipynb +5 -2

.env.example CHANGED Viewed

@@ -23,9 +23,12 @@ GROQ_MEDICAL_JUDGE_API_KEY=
 GROQ_API_KEY=
 # --- Per-role models (override the in-code defaults if you want) ---
 ERMAP_DOCTOR_MODEL=llama-3.1-8b-instant
-ERMAP_NURSE_MODEL=llama-3.3-70b-versatile
-ERMAP_PATIENT_MODEL=llama-3.3-70b-versatile
 ERMAP_EMPATHY_JUDGE_MODEL=llama-3.3-70b-versatile
 ERMAP_MEDICAL_JUDGE_MODEL=llama-3.3-70b-versatile

 GROQ_API_KEY=
 # --- Per-role models (override the in-code defaults if you want) ---
+# Traffic-shaping: high-volume roleplay agents (Doctor/Nurse/Patient) on
+# the 8B pool (500K TPD); only the two judges hit the smaller 70B pool
+# (100K TPD) because their grading quality directly shapes the reward.
 ERMAP_DOCTOR_MODEL=llama-3.1-8b-instant
+ERMAP_NURSE_MODEL=llama-3.1-8b-instant
+ERMAP_PATIENT_MODEL=llama-3.1-8b-instant
 ERMAP_EMPATHY_JUDGE_MODEL=llama-3.3-70b-versatile
 ERMAP_MEDICAL_JUDGE_MODEL=llama-3.3-70b-versatile

ER_MAP/dashboard.py CHANGED Viewed

@@ -46,14 +46,15 @@ _DEMO_KEYS = {
     "GROQ_EMPATHY_JUDGE_API_KEY":   "",
     "GROQ_MEDICAL_JUDGE_API_KEY":   "",
-    # --- Per-role models ---
-    # Doctor runs the small/fast tier (Llama-3.1-8B-Instant) — Groq does
-    # not host a Llama-7B; 8B is the closest Llama small-tier on Groq and
-    # gives ~14 400 req/day per key vs ~6 000/day for 70B.
     "ERMAP_DOCTOR_MODEL":           "llama-3.1-8b-instant",
-    # Everyone else runs Llama-3.3-70B for nuanced personas / grading.
-    "ERMAP_NURSE_MODEL":            "llama-3.3-70b-versatile",
-    "ERMAP_PATIENT_MODEL":          "llama-3.3-70b-versatile",
     "ERMAP_EMPATHY_JUDGE_MODEL":    "llama-3.3-70b-versatile",
     "ERMAP_MEDICAL_JUDGE_MODEL":    "llama-3.3-70b-versatile",

     "GROQ_EMPATHY_JUDGE_API_KEY":   "",
     "GROQ_MEDICAL_JUDGE_API_KEY":   "",
+    # --- Per-role models (traffic-shaping for free-tier budget) ---
+    # High-volume agents (Doctor / Nurse / Patient — fire on every env
+    # step) run the 8B-instant pool: 14 400 RPD / 500K TPD per account.
+    # The two judges fire mostly on terminal events but their grading
+    # quality directly shapes the reward, so they stay on 70B-versatile
+    # (1 000 RPD / 100K TPD pool — separate budget).
     "ERMAP_DOCTOR_MODEL":           "llama-3.1-8b-instant",
+    "ERMAP_NURSE_MODEL":            "llama-3.1-8b-instant",
+    "ERMAP_PATIENT_MODEL":          "llama-3.1-8b-instant",
     "ERMAP_EMPATHY_JUDGE_MODEL":    "llama-3.3-70b-versatile",
     "ERMAP_MEDICAL_JUDGE_MODEL":    "llama-3.3-70b-versatile",

ER_MAP/evaluate_baseline.py CHANGED Viewed

@@ -64,8 +64,8 @@ _DEMO_KEYS = {
     "GROQ_EMPATHY_JUDGE_API_KEY": "",
     "GROQ_MEDICAL_JUDGE_API_KEY": "",
     "ERMAP_DOCTOR_MODEL":         "llama-3.1-8b-instant",
-    "ERMAP_NURSE_MODEL":          "llama-3.3-70b-versatile",
-    "ERMAP_PATIENT_MODEL":        "llama-3.3-70b-versatile",
 }

     "GROQ_EMPATHY_JUDGE_API_KEY": "",
     "GROQ_MEDICAL_JUDGE_API_KEY": "",
     "ERMAP_DOCTOR_MODEL":         "llama-3.1-8b-instant",
+    "ERMAP_NURSE_MODEL":          "llama-3.1-8b-instant",
+    "ERMAP_PATIENT_MODEL":        "llama-3.1-8b-instant",
 }

kaggle/KAGGLE.md CHANGED Viewed

@@ -106,13 +106,27 @@ In practice: a single 12-hour session is usually enough to clear Phase 1 and pro
 ## Per-role Groq keys vs. one shared key
-The dashboard ships with 4 distinct Groq clients (Nurse, Patient, Empathy Judge, Medical Judge) and a fallback chain that walks across all four if any fails auth. Inside training:
-- Each env step does **1 Nurse + 1 Patient + occasionally 1 Empathy Judge + 1 Medical Judge call** (judges fire mostly on terminal actions, so call ratio is roughly 4 : 4 : 1 : 1).
-- 1 free Groq key = 14 400 req/day on 8B-instant or 6 000 req/day on 70B-versatile.
-- 120-episode training × 8 avg steps × 2 conversational LLM calls = ~2 000 calls. **Even one key is enough for a single training run**, but if you split across 4 keys you have 4× the daily headroom for re-runs.
-If you only have **one** Groq key, set just `GROQ_API_KEY` as a Kaggle Secret. Everything still works — the AgentRouter falls back to the same client for all roles.
 ---

 ## Per-role Groq keys vs. one shared key
+The dashboard ships with 4 distinct Groq clients (Nurse, Patient, Empathy Judge, Medical Judge) and a fallback chain that walks across all four if any fails auth. Per-key budgets are *shared* on Groq's free tier (limits are per-account, not per-key) — but the model split below buys you real headroom because **each model has its own daily pool**.
+### Default model assignment (traffic-shaping)
+| Role | Model | Free-tier pool | Why |
+|---|---|---|---|
+| Nurse | `llama-3.1-8b-instant` | 14 400 RPD / 500K TPD | high-volume (every env step) |
+| Patient | `llama-3.1-8b-instant` | shared 8B pool | high-volume (every env step) |
+| Empathy Judge | `llama-3.3-70b-versatile` | 1 000 RPD / 100K TPD | grading quality directly shapes reward |
+| Medical Judge | `llama-3.3-70b-versatile` | shared 70B pool | grading quality directly shapes reward |
+Quick budget check for **one full 120-episode training run**:
+| Pool | Estimated calls/run | Daily ceiling | Headroom |
+|---|---|---|---|
+| 8B-instant (Nurse + Patient) | ~2 880 | 14 400 RPD | ~5x |
+| 70B-versatile (judges) | ~720 | 1 000 RPD | ~1.4x |
+You can do **one training run per day per account** comfortably. If you need to retry inside the same day, drop one of the two judges to 8B-instant temporarily — the reward signal degrades a little, but training keeps moving.
+If you only have **one** Groq key total, set just `GROQ_API_KEY` as a Kaggle Secret. Everything still works — the AgentRouter falls back to the same client for all roles, and the per-model budgets still split traffic across pools.
 ---

kaggle/train_ermap_grpo_kaggle.ipynb CHANGED Viewed

@@ -226,8 +226,11 @@
     "# Doctor-on-Kaggle is the LOCAL trained model, NOT a Groq call. The\n",
     "# Doctor's Groq key is therefore unused here, but Nurse / Patient /\n",
     "# Empathy Judge / Medical Judge all hit Groq once per env step.\n",
-    "os.environ[\"ERMAP_NURSE_MODEL\"]            = \"llama-3.3-70b-versatile\"\n",
-    "os.environ[\"ERMAP_PATIENT_MODEL\"]          = \"llama-3.3-70b-versatile\"\n",
     "os.environ[\"ERMAP_EMPATHY_JUDGE_MODEL\"]    = \"llama-3.3-70b-versatile\"\n",
     "os.environ[\"ERMAP_MEDICAL_JUDGE_MODEL\"]    = \"llama-3.3-70b-versatile\"\n",
     "\n",

     "# Doctor-on-Kaggle is the LOCAL trained model, NOT a Groq call. The\n",
     "# Doctor's Groq key is therefore unused here, but Nurse / Patient /\n",
     "# Empathy Judge / Medical Judge all hit Groq once per env step.\n",
+    "# Traffic-shaping: high-volume roleplay agents (Nurse + Patient) on the\n",
+    "# 8B-instant pool (500K TPD, 14,400 RPD); the two judges stay on 70B-\n",
+    "# versatile because their grading quality directly shapes the reward.\n",
+    "os.environ[\"ERMAP_NURSE_MODEL\"]            = \"llama-3.1-8b-instant\"\n",
+    "os.environ[\"ERMAP_PATIENT_MODEL\"]          = \"llama-3.1-8b-instant\"\n",
     "os.environ[\"ERMAP_EMPATHY_JUDGE_MODEL\"]    = \"llama-3.3-70b-versatile\"\n",
     "os.environ[\"ERMAP_MEDICAL_JUDGE_MODEL\"]    = \"llama-3.3-70b-versatile\"\n",
     "\n",