ping98k commited on
Commit
4ccee12
·
1 Parent(s): c0bf2b8

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +27 -20
main.py CHANGED
@@ -1,6 +1,6 @@
1
  from dotenv import load_dotenv
2
  load_dotenv()
3
- import os, json, gradio as gr
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
6
  from litellm import completion
@@ -18,6 +18,13 @@ def generate_players(instruction, n):
18
  )
19
  return [c.message.content.strip() for c in response.choices]
20
 
 
 
 
 
 
 
 
21
  def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool_size, max_workers):
22
  instruction = instruction_input.strip()
23
  criteria_list = [c.strip() for c in criteria_input.split(",") if c.strip()] or ["Factuality", "Instruction Following", "Precision"]
@@ -28,6 +35,7 @@ def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool
28
  process_log = []
29
  def log(msg):
30
  process_log.append(msg)
 
31
  yield "\n".join(process_log), ""
32
  yield from log("Generating players …")
33
  all_players = generate_players(instruction, n_gen)
@@ -35,23 +43,22 @@ def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool
35
  def criteria_block():
36
  return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
37
  def prompt_score(player):
38
- prompt = f"""Evaluate the output below on the following criteria:
 
39
  {criteria_block()}
40
 
41
- Return JSON exactly like: {{\"score\": [{', '.join(['1-10'] * len(criteria_list))}]}}.
42
 
43
  Instruction:
44
  {instruction}
45
 
46
  Output:
47
- {player}"""
 
48
  response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
49
  return response.choices[0].message.content.strip()
50
  def score(player):
51
- try:
52
- data = json.loads(prompt_score(player))
53
- except json.JSONDecodeError:
54
- data = eval(prompt_score(player))
55
  lst = data.get("score", data.get("scores", []))
56
  return sum(lst) / len(lst) if lst else 0.0
57
  yield from log("Scoring players …")
@@ -60,24 +67,23 @@ Output:
60
  top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
61
  yield from log(f"Filtered to {len(top_players)} players with best scores")
62
  def prompt_play(a, b):
63
- prompt = f"""Compare the two players below using:
 
64
  {criteria_block()}
65
 
66
- Return ONLY JSON {{\"winner\": \"A\"}} or {{\"winner\": \"B\"}}.
67
 
68
  Instruction:
69
  {instruction}
70
 
71
  Players:
72
  <A>{a}</A>
73
- <B>{b}</B>"""
 
74
  response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
75
  return response.choices[0].message.content.strip()
76
  def play(a, b):
77
- try:
78
- winner_label = json.loads(prompt_play(a, b))["winner"]
79
- except json.JSONDecodeError:
80
- winner_label = eval(prompt_play(a, b)).get("winner", "A")
81
  return a if winner_label == "A" else b
82
  def tournament_round(pairs, executor):
83
  futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
@@ -93,10 +99,11 @@ Players:
93
  current = players[:]
94
  while len(current) > 1:
95
  pairs = [(current[i], current[i + 1]) for i in range(0, len(current) - 1, 2)]
96
- for w, l in tournament_round(pairs, executor):
 
97
  lost_to[l] = w
98
- current = [w for w, _ in tournament_round(pairs, executor)]
99
- if len(players) % 2 == 1:
100
  current.append(players[-1])
101
  return current[0], lost_to
102
  def get_candidates(champion, lost_to):
@@ -113,8 +120,8 @@ Players:
113
  runner_up = lost_to.get(champion)
114
  finalists = [champion] + ([runner_up] if runner_up else [])
115
  semifinalists = [p for p, o in lost_to.items() if o in finalists and p not in finalists]
116
- candidates = set(finalists + semifinalists + get_candidates(champion, lost_to))
117
- return playoff(list(candidates), executor)[:num_top_picks]
118
  yield from log("Running tournament …")
119
  with ThreadPoolExecutor(max_workers=max_workers) as ex:
120
  top_k = get_top(top_players, ex)
 
1
  from dotenv import load_dotenv
2
  load_dotenv()
3
+ import os, json, re, ast, gradio as gr
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
6
  from litellm import completion
 
18
  )
19
  return [c.message.content.strip() for c in response.choices]
20
 
21
+ def _clean_json(txt):
22
+ txt = re.sub(r"^```.*?\n|```$", "", txt, flags=re.DOTALL).strip()
23
+ try:
24
+ return json.loads(txt)
25
+ except json.JSONDecodeError:
26
+ return ast.literal_eval(txt)
27
+
28
  def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool_size, max_workers):
29
  instruction = instruction_input.strip()
30
  criteria_list = [c.strip() for c in criteria_input.split(",") if c.strip()] or ["Factuality", "Instruction Following", "Precision"]
 
35
  process_log = []
36
  def log(msg):
37
  process_log.append(msg)
38
+ tqdm.write(msg)
39
  yield "\n".join(process_log), ""
40
  yield from log("Generating players …")
41
  all_players = generate_players(instruction, n_gen)
 
43
  def criteria_block():
44
  return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
45
  def prompt_score(player):
46
+ prompt = f"""
47
+ Evaluate the output below on the following criteria:
48
  {criteria_block()}
49
 
50
+ Return JSON exactly like: {{"score": [{', '.join(['1-10'] * len(criteria_list))}]}}.
51
 
52
  Instruction:
53
  {instruction}
54
 
55
  Output:
56
+ {player}
57
+ """
58
  response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
59
  return response.choices[0].message.content.strip()
60
  def score(player):
61
+ data = _clean_json(prompt_score(player))
 
 
 
62
  lst = data.get("score", data.get("scores", []))
63
  return sum(lst) / len(lst) if lst else 0.0
64
  yield from log("Scoring players …")
 
67
  top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
68
  yield from log(f"Filtered to {len(top_players)} players with best scores")
69
  def prompt_play(a, b):
70
+ prompt = f"""
71
+ Compare the two players below using:
72
  {criteria_block()}
73
 
74
+ Return ONLY JSON {{"winner": "A"}} or {{"winner": "B"}}.
75
 
76
  Instruction:
77
  {instruction}
78
 
79
  Players:
80
  <A>{a}</A>
81
+ <B>{b}</B>
82
+ """
83
  response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
84
  return response.choices[0].message.content.strip()
85
  def play(a, b):
86
+ winner_label = _clean_json(prompt_play(a, b)).get("winner", "A")
 
 
 
87
  return a if winner_label == "A" else b
88
  def tournament_round(pairs, executor):
89
  futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
 
99
  current = players[:]
100
  while len(current) > 1:
101
  pairs = [(current[i], current[i + 1]) for i in range(0, len(current) - 1, 2)]
102
+ round_results = tournament_round(pairs, executor)
103
+ for w, l in round_results:
104
  lost_to[l] = w
105
+ current = [w for w, _ in round_results]
106
+ if len(players) % 2 == 1 and players[-1] not in current:
107
  current.append(players[-1])
108
  return current[0], lost_to
109
  def get_candidates(champion, lost_to):
 
120
  runner_up = lost_to.get(champion)
121
  finalists = [champion] + ([runner_up] if runner_up else [])
122
  semifinalists = [p for p, o in lost_to.items() if o in finalists and p not in finalists]
123
+ candidates = list(set(finalists + semifinalists + get_candidates(champion, lost_to)))
124
+ return playoff(candidates, executor)[:num_top_picks]
125
  yield from log("Running tournament …")
126
  with ThreadPoolExecutor(max_workers=max_workers) as ex:
127
  top_k = get_top(top_players, ex)