Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
# app.py (final:
|
| 2 |
|
| 3 |
-
import os,
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
| 6 |
from datetime import datetime, timezone
|
|
@@ -46,6 +46,13 @@ def run_eval(conversation: str,
|
|
| 46 |
if not conversation or conversation.strip() == "":
|
| 47 |
return None, None, None, None, "❌ Please paste a conversation to evaluate."
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
# normalize weights
|
| 50 |
user_weights = {
|
| 51 |
"trust": w_trust, "accuracy": w_accuracy, "explain": w_explain,
|
|
@@ -107,10 +114,10 @@ def run_eval(conversation: str,
|
|
| 107 |
avg_row[c] = round(compare_df[c].mean(), 2)
|
| 108 |
avg_df = pd.DataFrame([avg_row])
|
| 109 |
|
| 110 |
-
# ----
|
| 111 |
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
| 112 |
-
|
| 113 |
-
with zipfile.ZipFile(
|
| 114 |
for label, df, total in all_tables:
|
| 115 |
df2 = df.copy()
|
| 116 |
df2.loc[len(df2)] = {
|
|
@@ -123,9 +130,8 @@ def run_eval(conversation: str,
|
|
| 123 |
zf.writestr(f"results_{label}_{ts}.csv", df2.to_csv(index=False).encode("utf-8"))
|
| 124 |
zf.writestr(f"comparison_{ts}.csv", compare_df.to_csv(index=False).encode("utf-8"))
|
| 125 |
zf.writestr(f"judgments_{ts}.json", json.dumps(json_blobs, indent=2).encode("utf-8"))
|
| 126 |
-
zip_buffer.seek(0)
|
| 127 |
-
zip_bytes = zip_buffer.getvalue() # raw bytes
|
| 128 |
|
|
|
|
| 129 |
merged_tables = []
|
| 130 |
for label, df, total in all_tables:
|
| 131 |
merged_tables.append(pd.DataFrame({
|
|
@@ -139,7 +145,7 @@ def run_eval(conversation: str,
|
|
| 139 |
merged_df = pd.concat(merged_tables, ignore_index=True)
|
| 140 |
usage_text_all = "\n".join(token_usage_blocks)
|
| 141 |
|
| 142 |
-
return merged_df, compare_df, (avg_df if avg_df is not None else pd.DataFrame()),
|
| 143 |
|
| 144 |
except Exception as e:
|
| 145 |
tb = traceback.format_exc()
|
|
@@ -175,7 +181,7 @@ with gr.Blocks(title="FinanceEval – Hybrid Judge (Gradio)") as demo:
|
|
| 175 |
compare_out = gr.Dataframe()
|
| 176 |
avg_out = gr.Dataframe()
|
| 177 |
with gr.Tab("Downloads & Usage"):
|
| 178 |
-
zip_file = gr.File(label="Download ZIP (CSVs + JSON)", type="
|
| 179 |
usage_text = gr.Textbox(label="Token Usage / Errors", lines=8)
|
| 180 |
|
| 181 |
run_btn.click(
|
|
|
|
| 1 |
+
# app.py (final: temp-file ZIP + auto-clean + error handling)
|
| 2 |
|
| 3 |
+
import os, glob, json, zipfile, traceback
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
| 6 |
from datetime import datetime, timezone
|
|
|
|
| 46 |
if not conversation or conversation.strip() == "":
|
| 47 |
return None, None, None, None, "❌ Please paste a conversation to evaluate."
|
| 48 |
|
| 49 |
+
# cleanup old ZIPs
|
| 50 |
+
for f in glob.glob("/tmp/financeeval_*.zip"):
|
| 51 |
+
try:
|
| 52 |
+
os.remove(f)
|
| 53 |
+
except Exception:
|
| 54 |
+
pass
|
| 55 |
+
|
| 56 |
# normalize weights
|
| 57 |
user_weights = {
|
| 58 |
"trust": w_trust, "accuracy": w_accuracy, "explain": w_explain,
|
|
|
|
| 114 |
avg_row[c] = round(compare_df[c].mean(), 2)
|
| 115 |
avg_df = pd.DataFrame([avg_row])
|
| 116 |
|
| 117 |
+
# ---- Write ZIP into /tmp (ephemeral, privacy-safe) ----
|
| 118 |
ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ")
|
| 119 |
+
zip_path = f"/tmp/financeeval_{ts}.zip"
|
| 120 |
+
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
|
| 121 |
for label, df, total in all_tables:
|
| 122 |
df2 = df.copy()
|
| 123 |
df2.loc[len(df2)] = {
|
|
|
|
| 130 |
zf.writestr(f"results_{label}_{ts}.csv", df2.to_csv(index=False).encode("utf-8"))
|
| 131 |
zf.writestr(f"comparison_{ts}.csv", compare_df.to_csv(index=False).encode("utf-8"))
|
| 132 |
zf.writestr(f"judgments_{ts}.json", json.dumps(json_blobs, indent=2).encode("utf-8"))
|
|
|
|
|
|
|
| 133 |
|
| 134 |
+
# merge tables for UI
|
| 135 |
merged_tables = []
|
| 136 |
for label, df, total in all_tables:
|
| 137 |
merged_tables.append(pd.DataFrame({
|
|
|
|
| 145 |
merged_df = pd.concat(merged_tables, ignore_index=True)
|
| 146 |
usage_text_all = "\n".join(token_usage_blocks)
|
| 147 |
|
| 148 |
+
return merged_df, compare_df, (avg_df if avg_df is not None else pd.DataFrame()), zip_path, usage_text_all
|
| 149 |
|
| 150 |
except Exception as e:
|
| 151 |
tb = traceback.format_exc()
|
|
|
|
| 181 |
compare_out = gr.Dataframe()
|
| 182 |
avg_out = gr.Dataframe()
|
| 183 |
with gr.Tab("Downloads & Usage"):
|
| 184 |
+
zip_file = gr.File(label="Download ZIP (CSVs + JSON)", type="file")
|
| 185 |
usage_text = gr.Textbox(label="Token Usage / Errors", lines=8)
|
| 186 |
|
| 187 |
run_btn.click(
|