Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,6 +7,7 @@
|
|
| 7 |
# - Fixed [[PAGE=...]] regex
|
| 8 |
# - NEW: Lightweight instrumentation (JSONL logs per RAG turn)
|
| 9 |
# - UPDATED THEME: Dark-blue tabs + Evaluate tab + k-slider styling
|
|
|
|
| 10 |
# - OPTIONAL JS: Adds .eval-active class when Evaluate tab is selected
|
| 11 |
# ================================================================
|
| 12 |
|
|
@@ -19,7 +20,7 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
| 19 |
# ------------------------------- Imports ------------------------------
|
| 20 |
import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
|
| 21 |
from pathlib import Path
|
| 22 |
-
from typing import List, Dict, Any
|
| 23 |
|
| 24 |
import numpy as np
|
| 25 |
import pandas as pd
|
|
@@ -918,6 +919,67 @@ input[type="checkbox"], .gr-checkbox, .gr-checkbox > * { pointer-events: auto !i
|
|
| 918 |
border-radius: 50%;
|
| 919 |
}
|
| 920 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 921 |
/* When Evaluate tab is active and JS has added .eval-active, bump contrast subtly */
|
| 922 |
#eval-tab.eval-active .block,
|
| 923 |
#eval-tab.eval-active .group {
|
|
@@ -957,15 +1019,12 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 957 |
evalPanel.classList.remove('eval-active');
|
| 958 |
}
|
| 959 |
};
|
| 960 |
-
// After tab clicks
|
| 961 |
document.addEventListener('click', function(e) {
|
| 962 |
if (e.target && e.target.getAttribute('role') === 'tab') {
|
| 963 |
setTimeout(applyEvalActive, 50);
|
| 964 |
}
|
| 965 |
}, true);
|
| 966 |
-
// On initial load
|
| 967 |
document.addEventListener('DOMContentLoaded', applyEvalActive);
|
| 968 |
-
// Fallback timer in case of hydration delays
|
| 969 |
setTimeout(applyEvalActive, 300);
|
| 970 |
})();
|
| 971 |
</script>
|
|
@@ -1092,15 +1151,14 @@ with gr.Blocks(css=CSS, theme=theme, fill_height=True) as demo:
|
|
| 1092 |
gr.Markdown("Upload your **gold.csv** and compute metrics against the app logs.")
|
| 1093 |
with gr.Row():
|
| 1094 |
gold_file = gr.File(label="gold.csv", file_types=[".csv"], interactive=True)
|
| 1095 |
-
# Add elem_id so CSS can target this slider
|
| 1096 |
k_slider = gr.Slider(3, 12, value=8, step=1, label="k for Hit/Recall/nDCG", elem_id="k-slider")
|
| 1097 |
with gr.Row():
|
| 1098 |
btn_eval = gr.Button("Compute Metrics", variant="primary")
|
| 1099 |
with gr.Row():
|
| 1100 |
-
out_perq = gr.File(label="Per-question metrics (CSV)")
|
| 1101 |
-
out_agg = gr.File(label="Aggregate metrics (JSON)")
|
| 1102 |
-
out_json = gr.JSON(label="Aggregate summary")
|
| 1103 |
-
out_log = gr.Markdown(label="Run log")
|
| 1104 |
|
| 1105 |
def _run_eval_inproc(gold_path: str, k: int = 8):
|
| 1106 |
import json as _json
|
|
|
|
| 7 |
# - Fixed [[PAGE=...]] regex
|
| 8 |
# - NEW: Lightweight instrumentation (JSONL logs per RAG turn)
|
| 9 |
# - UPDATED THEME: Dark-blue tabs + Evaluate tab + k-slider styling
|
| 10 |
+
# - PATCH: Per-question/aggregate File + JSON outputs now dark-themed via elem_id hooks
|
| 11 |
# - OPTIONAL JS: Adds .eval-active class when Evaluate tab is selected
|
| 12 |
# ================================================================
|
| 13 |
|
|
|
|
| 20 |
# ------------------------------- Imports ------------------------------
|
| 21 |
import re, joblib, warnings, json, traceback, time, uuid, subprocess, sys
|
| 22 |
from pathlib import Path
|
| 23 |
+
from typing import List, Dict, Any, Optional
|
| 24 |
|
| 25 |
import numpy as np
|
| 26 |
import pandas as pd
|
|
|
|
| 919 |
border-radius: 50%;
|
| 920 |
}
|
| 921 |
|
| 922 |
+
/* ======== PATCH: Style the File + JSON outputs by ID ======== */
|
| 923 |
+
#perq-file, #agg-file {
|
| 924 |
+
background: rgba(8, 13, 26, 0.9) !important;
|
| 925 |
+
border: 1px solid #3b82f6 !important;
|
| 926 |
+
border-radius: 12px !important;
|
| 927 |
+
padding: 8px !important;
|
| 928 |
+
}
|
| 929 |
+
#perq-file * , #agg-file * { color: #dbeafe !important; }
|
| 930 |
+
#perq-file a, #agg-file a {
|
| 931 |
+
background: #0e2a57 !important;
|
| 932 |
+
color: #e0f2fe !important;
|
| 933 |
+
border: 1px solid #60a5fa !important;
|
| 934 |
+
border-radius: 8px !important;
|
| 935 |
+
padding: 6px 10px !important;
|
| 936 |
+
text-decoration: none !important;
|
| 937 |
+
}
|
| 938 |
+
#perq-file a:hover, #agg-file a:hover {
|
| 939 |
+
background: #10356f !important;
|
| 940 |
+
border-color: #93c5fd !important;
|
| 941 |
+
}
|
| 942 |
+
/* File preview wrappers (covers multiple Gradio render modes) */
|
| 943 |
+
#perq-file .file-preview, #agg-file .file-preview,
|
| 944 |
+
#perq-file .wrap, #agg-file .wrap {
|
| 945 |
+
background: rgba(2, 6, 23, 0.85) !important;
|
| 946 |
+
border-radius: 10px !important;
|
| 947 |
+
border: 1px solid rgba(148,163,184,.3) !important;
|
| 948 |
+
}
|
| 949 |
+
|
| 950 |
+
/* JSON output: dark panel + readable text */
|
| 951 |
+
#agg-json {
|
| 952 |
+
background: rgba(2, 6, 23, 0.85) !important;
|
| 953 |
+
border: 1px solid rgba(148,163,184,.35) !important;
|
| 954 |
+
border-radius: 12px !important;
|
| 955 |
+
padding: 8px !important;
|
| 956 |
+
}
|
| 957 |
+
#agg-json *, #agg-json .json, #agg-json .wrap { color: #e6f2ff !important; }
|
| 958 |
+
#agg-json pre, #agg-json code {
|
| 959 |
+
background: rgba(4, 10, 24, 0.9) !important;
|
| 960 |
+
color: #e2e8f0 !important;
|
| 961 |
+
border: 1px solid rgba(148,163,184,.35) !important;
|
| 962 |
+
border-radius: 10px !important;
|
| 963 |
+
}
|
| 964 |
+
/* Tree/overflow modes */
|
| 965 |
+
#agg-json [data-testid="json-tree"],
|
| 966 |
+
#agg-json [role="tree"],
|
| 967 |
+
#agg-json .overflow-auto {
|
| 968 |
+
background: rgba(4, 10, 24, 0.9) !important;
|
| 969 |
+
color: #e6f2ff !important;
|
| 970 |
+
border-radius: 10px !important;
|
| 971 |
+
border: 1px solid rgba(148,163,184,.35) !important;
|
| 972 |
+
}
|
| 973 |
+
|
| 974 |
+
/* Eval log markdown */
|
| 975 |
+
#eval-log, #eval-log * { color: #cfe6ff !important; }
|
| 976 |
+
#eval-log pre, #eval-log code {
|
| 977 |
+
background: rgba(2, 6, 23, 0.85) !important;
|
| 978 |
+
color: #e2e8f0 !important;
|
| 979 |
+
border: 1px solid rgba(148,163,184,.3) !important;
|
| 980 |
+
border-radius: 10px !important;
|
| 981 |
+
}
|
| 982 |
+
|
| 983 |
/* When Evaluate tab is active and JS has added .eval-active, bump contrast subtly */
|
| 984 |
#eval-tab.eval-active .block,
|
| 985 |
#eval-tab.eval-active .group {
|
|
|
|
| 1019 |
evalPanel.classList.remove('eval-active');
|
| 1020 |
}
|
| 1021 |
};
|
|
|
|
| 1022 |
document.addEventListener('click', function(e) {
|
| 1023 |
if (e.target && e.target.getAttribute('role') === 'tab') {
|
| 1024 |
setTimeout(applyEvalActive, 50);
|
| 1025 |
}
|
| 1026 |
}, true);
|
|
|
|
| 1027 |
document.addEventListener('DOMContentLoaded', applyEvalActive);
|
|
|
|
| 1028 |
setTimeout(applyEvalActive, 300);
|
| 1029 |
})();
|
| 1030 |
</script>
|
|
|
|
| 1151 |
gr.Markdown("Upload your **gold.csv** and compute metrics against the app logs.")
|
| 1152 |
with gr.Row():
|
| 1153 |
gold_file = gr.File(label="gold.csv", file_types=[".csv"], interactive=True)
|
|
|
|
| 1154 |
k_slider = gr.Slider(3, 12, value=8, step=1, label="k for Hit/Recall/nDCG", elem_id="k-slider")
|
| 1155 |
with gr.Row():
|
| 1156 |
btn_eval = gr.Button("Compute Metrics", variant="primary")
|
| 1157 |
with gr.Row():
|
| 1158 |
+
out_perq = gr.File(label="Per-question metrics (CSV)", elem_id="perq-file")
|
| 1159 |
+
out_agg = gr.File(label="Aggregate metrics (JSON)", elem_id="agg-file")
|
| 1160 |
+
out_json = gr.JSON(label="Aggregate summary", elem_id="agg-json")
|
| 1161 |
+
out_log = gr.Markdown(label="Run log", elem_id="eval-log")
|
| 1162 |
|
| 1163 |
def _run_eval_inproc(gold_path: str, k: int = 8):
|
| 1164 |
import json as _json
|