Spaces:
Sleeping
Sleeping
ping98k
commited on
Commit
·
4ccee12
1
Parent(s):
c0bf2b8
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
load_dotenv()
|
| 3 |
-
import os, json, gradio as gr
|
| 4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
from tqdm import tqdm
|
| 6 |
from litellm import completion
|
|
@@ -18,6 +18,13 @@ def generate_players(instruction, n):
|
|
| 18 |
)
|
| 19 |
return [c.message.content.strip() for c in response.choices]
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool_size, max_workers):
|
| 22 |
instruction = instruction_input.strip()
|
| 23 |
criteria_list = [c.strip() for c in criteria_input.split(",") if c.strip()] or ["Factuality", "Instruction Following", "Precision"]
|
|
@@ -28,6 +35,7 @@ def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool
|
|
| 28 |
process_log = []
|
| 29 |
def log(msg):
|
| 30 |
process_log.append(msg)
|
|
|
|
| 31 |
yield "\n".join(process_log), ""
|
| 32 |
yield from log("Generating players …")
|
| 33 |
all_players = generate_players(instruction, n_gen)
|
|
@@ -35,23 +43,22 @@ def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool
|
|
| 35 |
def criteria_block():
|
| 36 |
return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
|
| 37 |
def prompt_score(player):
|
| 38 |
-
prompt = f"""
|
|
|
|
| 39 |
{criteria_block()}
|
| 40 |
|
| 41 |
-
Return JSON exactly like: {{
|
| 42 |
|
| 43 |
Instruction:
|
| 44 |
{instruction}
|
| 45 |
|
| 46 |
Output:
|
| 47 |
-
{player}
|
|
|
|
| 48 |
response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
|
| 49 |
return response.choices[0].message.content.strip()
|
| 50 |
def score(player):
|
| 51 |
-
|
| 52 |
-
data = json.loads(prompt_score(player))
|
| 53 |
-
except json.JSONDecodeError:
|
| 54 |
-
data = eval(prompt_score(player))
|
| 55 |
lst = data.get("score", data.get("scores", []))
|
| 56 |
return sum(lst) / len(lst) if lst else 0.0
|
| 57 |
yield from log("Scoring players …")
|
|
@@ -60,24 +67,23 @@ Output:
|
|
| 60 |
top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
|
| 61 |
yield from log(f"Filtered to {len(top_players)} players with best scores")
|
| 62 |
def prompt_play(a, b):
|
| 63 |
-
prompt = f"""
|
|
|
|
| 64 |
{criteria_block()}
|
| 65 |
|
| 66 |
-
Return ONLY JSON {{
|
| 67 |
|
| 68 |
Instruction:
|
| 69 |
{instruction}
|
| 70 |
|
| 71 |
Players:
|
| 72 |
<A>{a}</A>
|
| 73 |
-
<B>{b}</B>
|
|
|
|
| 74 |
response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
|
| 75 |
return response.choices[0].message.content.strip()
|
| 76 |
def play(a, b):
|
| 77 |
-
|
| 78 |
-
winner_label = json.loads(prompt_play(a, b))["winner"]
|
| 79 |
-
except json.JSONDecodeError:
|
| 80 |
-
winner_label = eval(prompt_play(a, b)).get("winner", "A")
|
| 81 |
return a if winner_label == "A" else b
|
| 82 |
def tournament_round(pairs, executor):
|
| 83 |
futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
|
|
@@ -93,10 +99,11 @@ Players:
|
|
| 93 |
current = players[:]
|
| 94 |
while len(current) > 1:
|
| 95 |
pairs = [(current[i], current[i + 1]) for i in range(0, len(current) - 1, 2)]
|
| 96 |
-
|
|
|
|
| 97 |
lost_to[l] = w
|
| 98 |
-
current = [w for w, _ in
|
| 99 |
-
if len(players) % 2 == 1:
|
| 100 |
current.append(players[-1])
|
| 101 |
return current[0], lost_to
|
| 102 |
def get_candidates(champion, lost_to):
|
|
@@ -113,8 +120,8 @@ Players:
|
|
| 113 |
runner_up = lost_to.get(champion)
|
| 114 |
finalists = [champion] + ([runner_up] if runner_up else [])
|
| 115 |
semifinalists = [p for p, o in lost_to.items() if o in finalists and p not in finalists]
|
| 116 |
-
candidates = set(finalists + semifinalists + get_candidates(champion, lost_to))
|
| 117 |
-
return playoff(
|
| 118 |
yield from log("Running tournament …")
|
| 119 |
with ThreadPoolExecutor(max_workers=max_workers) as ex:
|
| 120 |
top_k = get_top(top_players, ex)
|
|
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
load_dotenv()
|
| 3 |
+
import os, json, re, ast, gradio as gr
|
| 4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
from tqdm import tqdm
|
| 6 |
from litellm import completion
|
|
|
|
| 18 |
)
|
| 19 |
return [c.message.content.strip() for c in response.choices]
|
| 20 |
|
| 21 |
+
def _clean_json(txt):
|
| 22 |
+
txt = re.sub(r"^```.*?\n|```$", "", txt, flags=re.DOTALL).strip()
|
| 23 |
+
try:
|
| 24 |
+
return json.loads(txt)
|
| 25 |
+
except json.JSONDecodeError:
|
| 26 |
+
return ast.literal_eval(txt)
|
| 27 |
+
|
| 28 |
def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool_size, max_workers):
|
| 29 |
instruction = instruction_input.strip()
|
| 30 |
criteria_list = [c.strip() for c in criteria_input.split(",") if c.strip()] or ["Factuality", "Instruction Following", "Precision"]
|
|
|
|
| 35 |
process_log = []
|
| 36 |
def log(msg):
|
| 37 |
process_log.append(msg)
|
| 38 |
+
tqdm.write(msg)
|
| 39 |
yield "\n".join(process_log), ""
|
| 40 |
yield from log("Generating players …")
|
| 41 |
all_players = generate_players(instruction, n_gen)
|
|
|
|
| 43 |
def criteria_block():
|
| 44 |
return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
|
| 45 |
def prompt_score(player):
|
| 46 |
+
prompt = f"""
|
| 47 |
+
Evaluate the output below on the following criteria:
|
| 48 |
{criteria_block()}
|
| 49 |
|
| 50 |
+
Return JSON exactly like: {{"score": [{', '.join(['1-10'] * len(criteria_list))}]}}.
|
| 51 |
|
| 52 |
Instruction:
|
| 53 |
{instruction}
|
| 54 |
|
| 55 |
Output:
|
| 56 |
+
{player}
|
| 57 |
+
"""
|
| 58 |
response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
|
| 59 |
return response.choices[0].message.content.strip()
|
| 60 |
def score(player):
|
| 61 |
+
data = _clean_json(prompt_score(player))
|
|
|
|
|
|
|
|
|
|
| 62 |
lst = data.get("score", data.get("scores", []))
|
| 63 |
return sum(lst) / len(lst) if lst else 0.0
|
| 64 |
yield from log("Scoring players …")
|
|
|
|
| 67 |
top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
|
| 68 |
yield from log(f"Filtered to {len(top_players)} players with best scores")
|
| 69 |
def prompt_play(a, b):
|
| 70 |
+
prompt = f"""
|
| 71 |
+
Compare the two players below using:
|
| 72 |
{criteria_block()}
|
| 73 |
|
| 74 |
+
Return ONLY JSON {{"winner": "A"}} or {{"winner": "B"}}.
|
| 75 |
|
| 76 |
Instruction:
|
| 77 |
{instruction}
|
| 78 |
|
| 79 |
Players:
|
| 80 |
<A>{a}</A>
|
| 81 |
+
<B>{b}</B>
|
| 82 |
+
"""
|
| 83 |
response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
|
| 84 |
return response.choices[0].message.content.strip()
|
| 85 |
def play(a, b):
|
| 86 |
+
winner_label = _clean_json(prompt_play(a, b)).get("winner", "A")
|
|
|
|
|
|
|
|
|
|
| 87 |
return a if winner_label == "A" else b
|
| 88 |
def tournament_round(pairs, executor):
|
| 89 |
futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
|
|
|
|
| 99 |
current = players[:]
|
| 100 |
while len(current) > 1:
|
| 101 |
pairs = [(current[i], current[i + 1]) for i in range(0, len(current) - 1, 2)]
|
| 102 |
+
round_results = tournament_round(pairs, executor)
|
| 103 |
+
for w, l in round_results:
|
| 104 |
lost_to[l] = w
|
| 105 |
+
current = [w for w, _ in round_results]
|
| 106 |
+
if len(players) % 2 == 1 and players[-1] not in current:
|
| 107 |
current.append(players[-1])
|
| 108 |
return current[0], lost_to
|
| 109 |
def get_candidates(champion, lost_to):
|
|
|
|
| 120 |
runner_up = lost_to.get(champion)
|
| 121 |
finalists = [champion] + ([runner_up] if runner_up else [])
|
| 122 |
semifinalists = [p for p, o in lost_to.items() if o in finalists and p not in finalists]
|
| 123 |
+
candidates = list(set(finalists + semifinalists + get_candidates(champion, lost_to)))
|
| 124 |
+
return playoff(candidates, executor)[:num_top_picks]
|
| 125 |
yield from log("Running tournament …")
|
| 126 |
with ThreadPoolExecutor(max_workers=max_workers) as ex:
|
| 127 |
top_k = get_top(top_players, ex)
|