Spaces:

Inframat-x
/

For_evaluation

Sleeping

App Files Files Community

Inframat-x commited on Nov 24, 2025

Commit

a269863

verified ·

1 Parent(s): ed83d97

Update app.py

Browse files

Files changed (1) hide show

app.py +186 -8

app.py CHANGED Viewed

@@ -6,6 +6,9 @@
 # - Stable categoricals ("NA"); no over-strict completeness gate
 # - Fixed [[PAGE=...]] regex
 # - NEW: Lightweight instrumentation (JSONL logs per RAG turn)
 # ================================================================
 # ---------------------- Runtime flags (HF-safe) ----------------------
@@ -17,7 +20,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
 # ------------------------------- Imports ------------------------------
 import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
 from pathlib import Path
-from typing import List, Dict, Any
 import numpy as np
 import pandas as pd
@@ -740,7 +743,7 @@ def rag_reply(
         "retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
         "output": {
             "final_answer": final,
-            "used_sentences": [{"sent": s["sent"], "doc": s["doc"], "page": s["page"]} for s in selected]
         },
         "latency_ms_total": total_ms,
         "latency_ms_llm": llm_latency_ms,
@@ -818,7 +821,7 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
     color: #eef6ff !important;
 }
-/* NEW — Evaluate tab dark/high-contrast styling */
 #eval-tab .block, #eval-tab .group, #eval-tab .accordion {
     background: linear-gradient(165deg, #0a0f1f 0%, #0d1a31 60%, #0a1c2e 100%) !important;
     border-radius: 12px;
@@ -849,6 +852,142 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
 /* Predictor output emphasis */
 #pred-out .wrap { font-size: 20px; font-weight: 700; color: #ecfdf5; }
 """
 theme = gr.themes.Soft(
@@ -866,6 +1005,31 @@ theme = gr.themes.Soft(
 )
 with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
     gr.Markdown(
         "<h1 style='margin:0'>Self-Sensing Concrete Assistant</h1>"
         "<p style='opacity:.9'>"
@@ -987,14 +1151,14 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
             gr.Markdown("Upload your **gold.csv** and compute metrics against the app logs.")
             with gr.Row():
                 gold_file = gr.File(label="gold.csv", file_types=[".csv"], interactive=True)
-                k_slider  = gr.Slider(3, 12, value=8, step=1, label="k for Hit/Recall/nDCG")
             with gr.Row():
                 btn_eval = gr.Button("Compute Metrics", variant="primary")
             with gr.Row():
-                out_perq = gr.File(label="Per-question metrics (CSV)")
-                out_agg  = gr.File(label="Aggregate metrics (JSON)")
-            out_json = gr.JSON(label="Aggregate summary")
-            out_log  = gr.Markdown(label="Run log")
             def _run_eval_inproc(gold_path: str, k: int = 8):
                 import json as _json
@@ -1041,3 +1205,17 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
 # ------------- Launch -------------
 if __name__ == "__main__":
     demo.queue().launch()

 # - Stable categoricals ("NA"); no over-strict completeness gate
 # - Fixed [[PAGE=...]] regex
 # - NEW: Lightweight instrumentation (JSONL logs per RAG turn)
+# - UPDATED THEME: Dark-blue tabs + Evaluate tab + k-slider styling
+# - PATCH: Per-question/aggregate File + JSON outputs now dark-themed via elem_id hooks
+# - OPTIONAL JS: Adds .eval-active class when Evaluate tab is selected
 # ================================================================
 # ---------------------- Runtime flags (HF-safe) ----------------------
 # ------------------------------- Imports ------------------------------
 import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
 from pathlib import Path
+from typing import List, Dict, Any, Optional
 import numpy as np
 import pandas as pd
         "retrieval": {"hits": retr_list, "latency_ms_retriever": latency_ms_retriever},
         "output": {
             "final_answer": final,
+            "used_sentences": [{"sent": s['sent'], "doc": s['doc'], "page": s['page']} for s in selected]
         },
         "latency_ms_total": total_ms,
         "latency_ms_llm": llm_latency_ms,
     color: #eef6ff !important;
 }
+/* Evaluate tab dark/high-contrast styling */
 #eval-tab .block, #eval-tab .group, #eval-tab .accordion {
     background: linear-gradient(165deg, #0a0f1f 0%, #0d1a31 60%, #0a1c2e 100%) !important;
     border-radius: 12px;
 /* Predictor output emphasis */
 #pred-out .wrap { font-size: 20px; font-weight: 700; color: #ecfdf5; }
+/* Tab header: darker blue theme for all tabs */
+.gradio-container .tab-nav button[role="tab"] {
+  background: #0b1b34 !important;
+  color: #cfe6ff !important;
+  border: 1px solid #1e3a8a !important;
+}
+.gradio-container .tab-nav button[role="tab"][aria-selected="true"] {
+  background: #0e2a57 !important;
+  color: #e0f2fe !important;
+  border-color: #3b82f6 !important;
+}
+/* Evaluate tab: enforce dark-blue text for labels/marks */
+#eval-tab .label,
+#eval-tab label,
+#eval-tab .gr-slider .label,
+#eval-tab .wrap .label,
+#eval-tab .prose,
+#eval-tab .markdown,
+#eval-tab p,
+#eval-tab span {
+  color: #cfe6ff !important; /* softer than pure white */
+}
+/* Target the specific k-slider label strongly */
+#k-slider .label,
+#k-slider label,
+#k-slider .wrap .label {
+  color: #cfe6ff !important;
+  text-shadow: 0 1px 0 rgba(0,0,0,0.35);
+}
+/* Slider track/thumb (dark blue gradient + blue thumb) */
+#eval-tab input[type="range"] {
+  accent-color: #3b82f6 !important; /* fallback */
+}
+/* WebKit */
+#eval-tab input[type="range"]::-webkit-slider-runnable-track {
+  height: 6px;
+  background: linear-gradient(90deg, #0b3b68, #1e3a8a);
+  border-radius: 4px;
+}
+#eval-tab input[type="range"]::-webkit-slider-thumb {
+  -webkit-appearance: none;
+  appearance: none;
+  margin-top: -6px;            /* centers thumb on 6px track */
+  width: 18px; height: 18px;
+  background: #1d4ed8;
+  border: 1px solid #60a5fa;
+  border-radius: 50%;
+}
+/* Firefox */
+#eval-tab input[type="range"]::-moz-range-track {
+  height: 6px;
+  background: linear-gradient(90deg, #0b3b68, #1e3a8a);
+  border-radius: 4px;
+}
+#eval-tab input[type="range"]::-moz-range-thumb {
+  width: 18px; height: 18px;
+  background: #1d4ed8;
+  border: 1px solid #60a5fa;
+  border-radius: 50%;
+}
+/* ======== PATCH: Style the File + JSON outputs by ID ======== */
+#perq-file, #agg-file {
+  background: rgba(8, 13, 26, 0.9) !important;
+  border: 1px solid #3b82f6 !important;
+  border-radius: 12px !important;
+  padding: 8px !important;
+}
+#perq-file * , #agg-file * { color: #dbeafe !important; }
+#perq-file a, #agg-file a {
+  background: #0e2a57 !important;
+  color: #e0f2fe !important;
+  border: 1px solid #60a5fa !important;
+  border-radius: 8px !important;
+  padding: 6px 10px !important;
+  text-decoration: none !important;
+}
+#perq-file a:hover, #agg-file a:hover {
+  background: #10356f !important;
+  border-color: #93c5fd !important;
+}
+/* File preview wrappers (covers multiple Gradio render modes) */
+#perq-file .file-preview, #agg-file .file-preview,
+#perq-file .wrap, #agg-file .wrap {
+  background: rgba(2, 6, 23, 0.85) !important;
+  border-radius: 10px !important;
+  border: 1px solid rgba(148,163,184,.3) !important;
+}
+/* JSON output: dark panel + readable text */
+#agg-json {
+  background: rgba(2, 6, 23, 0.85) !important;
+  border: 1px solid rgba(148,163,184,.35) !important;
+  border-radius: 12px !important;
+  padding: 8px !important;
+}
+#agg-json *, #agg-json .json, #agg-json .wrap { color: #e6f2ff !important; }
+#agg-json pre, #agg-json code {
+  background: rgba(4, 10, 24, 0.9) !important;
+  color: #e2e8f0 !important;
+  border: 1px solid rgba(148,163,184,.35) !important;
+  border-radius: 10px !important;
+}
+/* Tree/overflow modes */
+#agg-json [data-testid="json-tree"],
+#agg-json [role="tree"],
+#agg-json .overflow-auto {
+  background: rgba(4, 10, 24, 0.9) !important;
+  color: #e6f2ff !important;
+  border-radius: 10px !important;
+  border: 1px solid rgba(148,163,184,.35) !important;
+}
+/* Eval log markdown */
+#eval-log, #eval-log * { color: #cfe6ff !important; }
+#eval-log pre, #eval-log code {
+  background: rgba(2, 6, 23, 0.85) !important;
+  color: #e2e8f0 !important;
+  border: 1px solid rgba(148,163,184,.3) !important;
+  border-radius: 10px !important;
+}
+/* When Evaluate tab is active and JS has added .eval-active, bump contrast subtly */
+#eval-tab.eval-active .block,
+#eval-tab.eval-active .group {
+  border-color: #60a5fa !important;
+}
+#eval-tab.eval-active .label {
+  color: #e6f2ff !important;
+}
 """
 theme = gr.themes.Soft(
 )
 with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
+    # Optional: JS to toggle .eval-active when Evaluate tab selected
+    gr.HTML("""
+    <script>
+    (function(){
+      const applyEvalActive = () => {
+        const selected = document.querySelector('.tab-nav button[role="tab"][aria-selected="true"]');
+        const evalPanel = document.querySelector('#eval-tab');
+        if (!evalPanel) return;
+        if (selected && /Evaluate/.test(selected.textContent)) {
+          evalPanel.classList.add('eval-active');
+        } else {
+          evalPanel.classList.remove('eval-active');
+        }
+      };
+      document.addEventListener('click', function(e) {
+        if (e.target && e.target.getAttribute('role') === 'tab') {
+          setTimeout(applyEvalActive, 50);
+        }
+      }, true);
+      document.addEventListener('DOMContentLoaded', applyEvalActive);
+      setTimeout(applyEvalActive, 300);
+    })();
+    </script>
+    """)
     gr.Markdown(
         "<h1 style='margin:0'>Self-Sensing Concrete Assistant</h1>"
         "<p style='opacity:.9'>"
             gr.Markdown("Upload your **gold.csv** and compute metrics against the app logs.")
             with gr.Row():
                 gold_file = gr.File(label="gold.csv", file_types=[".csv"], interactive=True)
+                k_slider  = gr.Slider(3, 12, value=8, step=1, label="k for Hit/Recall/nDCG", elem_id="k-slider")
             with gr.Row():
                 btn_eval = gr.Button("Compute Metrics", variant="primary")
             with gr.Row():
+                out_perq = gr.File(label="Per-question metrics (CSV)", elem_id="perq-file")
+                out_agg  = gr.File(label="Aggregate metrics (JSON)", elem_id="agg-file")
+            out_json = gr.JSON(label="Aggregate summary", elem_id="agg-json")
+            out_log  = gr.Markdown(label="Run log", elem_id="eval-log")
             def _run_eval_inproc(gold_path: str, k: int = 8):
                 import json as _json
 # ------------- Launch -------------
 if __name__ == "__main__":
     demo.queue().launch()
+    import os
+import pandas as pd
+# Folder where your RAG files are stored
+folder = "papers"  # change if needed
+# List all files in the folder
+files = sorted(os.listdir(folder))
+# Save them to a CSV file
+pd.DataFrame({"doc": files}).to_csv("paper_list.csv", index=False)
+print("✅ Saved paper_list.csv with", len(files), "papers")