navaneethkrishnan commited on
Commit
0cee974
·
verified ·
1 Parent(s): 9ceb38c

Upload 2 files

Browse files
Files changed (2) hide show
  1. src/comparison.py +38 -0
  2. src/config.py +21 -0
src/comparison.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from src.api_clients import openai_client
3
+
4
+ last_eval_result = {} # Shared state
5
+
6
+ def get_last_eval_data():
7
+ return last_eval_result if last_eval_result else None
8
+
9
+ def run_comparison(human_scores, human_comments, model_scores, model_comments):
10
+ prompt = f"""Compare human and model summary evaluations.
11
+
12
+ Human Scores: {human_scores}
13
+ Model Scores: {model_scores}
14
+
15
+ Human Comments: {human_comments}
16
+ Model Comments: {model_comments}
17
+
18
+ Output key differences, strengths, and any mismatches."""
19
+ res = openai_client.chat.completions.create(
20
+ model="gpt-4o-mini",
21
+ messages=[{"role": "user", "content": prompt}],
22
+ max_tokens=800
23
+ )
24
+ return res.choices[0].message.content
25
+
26
+ def import_model_metrics():
27
+ data = get_last_eval_data()
28
+ if not data or "scores" not in data:
29
+ return ["" ] * 6
30
+ s = data["scores"]
31
+ return (
32
+ str(s.get("coverage", "")),
33
+ str(s.get("alignment", "")),
34
+ str(s.get("hallucination", "")),
35
+ str(s.get("relevance", "")),
36
+ str(s.get("bias_toxicity", "")),
37
+ json.dumps(data.get("comments", ""), indent=2)
38
+ )
src/config.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MAX_TOKENS = {"OpenAI": 8000, "DeepSeek": 8000, "Claude": 4000}
2
+
3
+ PRESET = {
4
+ "Twin-Lock": dict(coverage=0.25, alignment=0.20, hallucination=0.15, relevance=0.15, bias_toxicity=0.05),
5
+ "Judge-Lock": dict(coverage=0.35, alignment=0.15, hallucination=0.30, relevance=0.15, bias_toxicity=0.05)
6
+ }
7
+
8
+ CSS = """
9
+ body,.gradio-container{background:#f7f7f7!important;color:#1a1a1a!important}
10
+ textarea,textarea.gr-input{background:#f7f7f7!important;color:#1a1a1a!important}
11
+ textarea::placeholder,input::placeholder{color:#666!important}
12
+ input[type=radio]{accent-color:#000000}
13
+ input[type=checkbox]{accent-color:#000000}
14
+ #variant-group input[type=radio]{accent-color:#ffa500}
15
+ #backend-group input[type=checkbox]{accent-color:#0074d9}
16
+ .metric-slider input[type=range]::-webkit-slider-thumb,
17
+ .metric-slider input[type=range]::-moz-range-thumb{background:#21a366!important}
18
+ .metric-slider input[type=range]::-webkit-slider-runnable-track,
19
+ .metric-slider input[type=range]::-moz-range-track{background:#cfe8db!important}
20
+ #btn-twin,#btn-judge,#run-btn{background:#000000!important;color:#ffffff!important;border-radius:6px!important}
21
+ """