navaneethkrishnan commited on
Commit
43aa499
Β·
verified Β·
1 Parent(s): f843798

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -27
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py (final: temp-file ZIP + auto-clean + error handling)
2
 
3
  import os, glob, json, zipfile, traceback
4
  import gradio as gr
@@ -22,15 +22,6 @@ DEFAULT_METRIC_WEIGHTS = {
22
  "clarity": 0.10,
23
  }
24
 
25
- JUDGE_ALPHA = {
26
- "trust": 0.70,
27
- "accuracy": 0.65,
28
- "explain": 0.50,
29
- "client_first": 0.70,
30
- "risk_safety": 0.60,
31
- "clarity": 0.70,
32
- }
33
-
34
  # -----------------------------
35
  # Core runner
36
  # -----------------------------
@@ -78,25 +69,22 @@ def run_eval(conversation: str,
78
 
79
  for p in providers:
80
  metrics_out, usage, raw_json = evaluate_all_metrics(
81
- provider=p, conversation_text=model_only, alpha_map=JUDGE_ALPHA
82
  )
83
  rows = []
84
  for m, payload in metrics_out.items():
85
  rows.append({
86
  "Metric": m,
87
  "LLM Score (1-5)": payload.get("judge_score", None),
88
- "NLP Subscore (0-1)": round(payload.get("nlp_subscore", 0.0), 3),
89
- "Fused (0-10)": round(payload.get("fused_0_10", 0.0), 2),
90
- "Comment": payload.get("comment", "")
91
  })
92
  df = pd.DataFrame(rows)
93
- total = weighted_total(
94
- {k: v.get("fused_0_10", 0.0) for k, v in metrics_out.items()},
95
- user_weights
96
- )
97
  compare_rows.append({
98
  "Model": p.label,
99
- **{r["Metric"]: r["Fused (0-10)"] for _, r in df.iterrows()},
100
  "Total (0-10)": round(total, 2)
101
  })
102
  token_usage_blocks.append(
@@ -123,9 +111,9 @@ def run_eval(conversation: str,
123
  df2.loc[len(df2)] = {
124
  "Metric": "TOTAL",
125
  "LLM Score (1-5)": "-",
126
- "NLP Subscore (0-1)": "-",
127
- "Fused (0-10)": total,
128
- "Comment": ""
129
  }
130
  zf.writestr(f"results_{label}_{ts}.csv", df2.to_csv(index=False).encode("utf-8"))
131
  zf.writestr(f"comparison_{ts}.csv", compare_df.to_csv(index=False).encode("utf-8"))
@@ -137,9 +125,9 @@ def run_eval(conversation: str,
137
  merged_tables.append(pd.DataFrame({
138
  "Metric": [f"β€” {label} β€”"],
139
  "LLM Score (1-5)": [""],
140
- "NLP Subscore (0-1)": [""],
141
- "Fused (0-10)": [""],
142
- "Comment": [""]
143
  }))
144
  merged_tables.append(df)
145
  merged_df = pd.concat(merged_tables, ignore_index=True)
@@ -181,7 +169,6 @@ with gr.Blocks(title="FinanceEval – Hybrid Judge (Gradio)") as demo:
181
  compare_out = gr.Dataframe()
182
  avg_out = gr.Dataframe()
183
  with gr.Tab("Downloads & Usage"):
184
- # βœ… Fixed: type must be 'filepath' not 'file'
185
  zip_file = gr.File(label="Download ZIP (CSVs + JSON)", type="filepath")
186
  usage_text = gr.Textbox(label="Token Usage / Errors", lines=8)
187
 
@@ -194,4 +181,4 @@ with gr.Blocks(title="FinanceEval – Hybrid Judge (Gradio)") as demo:
194
  )
195
 
196
  if __name__ == "__main__":
197
- demo.launch(show_error=True)
 
1
+ # app.py (LLM-only scoring, NLP as flags only, privacy-safe temp files)
2
 
3
  import os, glob, json, zipfile, traceback
4
  import gradio as gr
 
22
  "clarity": 0.10,
23
  }
24
 
 
 
 
 
 
 
 
 
 
25
  # -----------------------------
26
  # Core runner
27
  # -----------------------------
 
69
 
70
  for p in providers:
71
  metrics_out, usage, raw_json = evaluate_all_metrics(
72
+ provider=p, conversation_text=model_only, alpha_map={} # alpha_map ignored now
73
  )
74
  rows = []
75
  for m, payload in metrics_out.items():
76
  rows.append({
77
  "Metric": m,
78
  "LLM Score (1-5)": payload.get("judge_score", None),
79
+ "Final Score (0-10)": round(payload.get("score_0_10", 0.0), 2),
80
+ "Comment": payload.get("comment", ""),
81
+ "NLP Flags": json.dumps(payload.get("nlp_details", {}))[:200] # truncated
82
  })
83
  df = pd.DataFrame(rows)
84
+ total = weighted_total({k: v.get("score_0_10", 0.0) for k, v in metrics_out.items()}, user_weights)
 
 
 
85
  compare_rows.append({
86
  "Model": p.label,
87
+ **{r["Metric"]: r["Final Score (0-10)"] for _, r in df.iterrows()},
88
  "Total (0-10)": round(total, 2)
89
  })
90
  token_usage_blocks.append(
 
111
  df2.loc[len(df2)] = {
112
  "Metric": "TOTAL",
113
  "LLM Score (1-5)": "-",
114
+ "Final Score (0-10)": total,
115
+ "Comment": "",
116
+ "NLP Flags": ""
117
  }
118
  zf.writestr(f"results_{label}_{ts}.csv", df2.to_csv(index=False).encode("utf-8"))
119
  zf.writestr(f"comparison_{ts}.csv", compare_df.to_csv(index=False).encode("utf-8"))
 
125
  merged_tables.append(pd.DataFrame({
126
  "Metric": [f"β€” {label} β€”"],
127
  "LLM Score (1-5)": [""],
128
+ "Final Score (0-10)": [""],
129
+ "Comment": [""],
130
+ "NLP Flags": [""]
131
  }))
132
  merged_tables.append(df)
133
  merged_df = pd.concat(merged_tables, ignore_index=True)
 
169
  compare_out = gr.Dataframe()
170
  avg_out = gr.Dataframe()
171
  with gr.Tab("Downloads & Usage"):
 
172
  zip_file = gr.File(label="Download ZIP (CSVs + JSON)", type="filepath")
173
  usage_text = gr.Textbox(label="Token Usage / Errors", lines=8)
174
 
 
181
  )
182
 
183
  if __name__ == "__main__":
184
+ demo.launch()