Spaces:

Inframat-x
/

ML-Chatbot

Sleeping

App Files Files Community

Inframat-x commited on Nov 3, 2025

Commit

d1791d5

verified ·

1 Parent(s): 65df9cc

Create rag_eval_metrics.py

Browse files

Files changed (1) hide show

rag_eval_metrics.py +59 -0

rag_eval_metrics.py ADDED Viewed

	@@ -0,0 +1,59 @@

+# ====== Eval tab (optional, tiny and safe) ======
+import subprocess, sys, textwrap
+def _run_eval_inproc(gold_path: str, k: int = 8):
+    # Run the evaluator script and return file paths + JSON summary
+    out_dir = str(ARTIFACT_DIR)
+    logs = str(LOG_PATH)
+    cmd = [
+        sys.executable, "rag_eval_metrics.py",
+        "--gold_csv", gold_path,
+        "--logs_jsonl", logs,
+        "--k", str(k),
+        "--out_dir", out_dir
+    ]
+    try:
+        p = subprocess.run(cmd, capture_output=True, text=True, check=False)
+        stdout = p.stdout or ""
+        stderr = p.stderr or ""
+        # Read outputs
+        perq = ARTIFACT_DIR / "metrics_per_question.csv"
+        agg  = ARTIFACT_DIR / "metrics_aggregate.json"
+        agg_json = {}
+        if agg.exists():
+            import json
+            agg_json = json.loads(agg.read_text(encoding="utf-8"))
+        # Minimal report for the UI
+        report = "```\n" + (stdout.strip() or "(no stdout)") + "\n" + (stderr.strip() or "") + "\n```"
+        return (str(perq) if perq.exists() else None,
+                str(agg)  if agg.exists()  else None,
+                agg_json,
+                report)
+    except Exception as e:
+        return (None, None, {}, f"**Eval error:** {e}")
+with gr.Tab("📏 Evaluate (Gold vs Logs)"):
+    gr.Markdown("Upload your **gold.csv** (question, reference_answer, support_docs_pages, type) and compute metrics against the app logs.")
+    with gr.Row():
+        gold_file = gr.File(label="gold.csv", file_types=[".csv"], interactive=True)
+        k_slider  = gr.Slider(3, 12, value=8, step=1, label="k for Hit/Recall/nDCG")
+    with gr.Row():
+        btn_eval = gr.Button("Compute Metrics", variant="primary")
+    with gr.Row():
+        out_perq = gr.File(label="Per-question metrics (CSV)")
+        out_agg  = gr.File(label="Aggregate metrics (JSON)")
+    out_json = gr.JSON(label="Aggregate summary")
+    out_log  = gr.Markdown(label="Run log")
+    def _eval_wrapper(gf, k):
+        # If user didn’t upload, look for gold.csv in repo
+        if gf is None:
+            default_gold = Path("gold.csv")
+            if not default_gold.exists():
+                return None, None, {}, "**No gold.csv provided or found in repo root.**"
+            gold_path = str(default_gold)
+        else:
+            gold_path = gf.name
+        return _run_eval_inproc(gold_path, int(k))
+    btn_eval.click(_eval_wrapper, inputs=[gold_file, k_slider], outputs=[out_perq, out_agg, out_json, out_log])