ping98k commited on
Commit
2584782
Β·
1 Parent(s): c94e158

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +129 -145
main.py CHANGED
@@ -1,158 +1,142 @@
1
- import json
2
- import os
3
- import random
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
6
- import time
7
- # Number of top picks to return (default 5)
 
8
  NUM_TOP_PICKS = int(os.getenv("NUM_TOP_PICKS", 5))
9
- # Initial pool size after scoring (default 20)
10
- POOL_SIZE = int(os.getenv("POOL_SIZE", 20))
11
- # Maximum number of worker threads for parallel execution
12
- MAX_WORKERS = int(os.getenv("MAX_WORKERS", 10))
13
 
14
- # -----------------------------------------------------------------------------
15
- from litellm import completion
16
 
17
- instruction = "tell me story"
 
 
 
 
 
 
18
 
19
- # prompt_score: send player to 4o-mini and return raw response content
20
- # expects model returns something like '{"score": [4,5,6]}'
21
- def prompt_score(player):
22
- response = completion(
23
- model="gpt-4o-mini",
24
- messages=[{"role": "system", "content":
25
- f"""Evaluate the output below based on the following criteria:
26
- 1) Factuality
27
- 2) Instruction Following
28
- 3) Precision
29
 
30
- Return a JSON object in this format: {{"score": [1–10, 1–10, 1–10]}} β€” one score for each criterion.
 
 
31
 
32
- Here is the instruction:
 
 
33
  {instruction}
34
 
35
  Output:
36
- {player}
37
- """.split()
38
- }]
39
- )
40
- # extract message text content
41
- return response.choices[0].message.content
42
-
43
- # score: Call Litellm (OpenAI 4o-mini) to get a list of scores and return their average.
44
- # Parses JSON with key "score" or "scores" and computes average.
45
- def score(player):
46
- response = prompt_score(player)
47
- try:
48
- data = json.loads(response)
49
- scores_list = data.get("score", data.get("scores", []))
50
- except (json.JSONDecodeError, NameError):
51
- # Fallback: eval in safe context
52
- data = eval(response)
53
- scores_list = data.get("score", data.get("scores", []))
54
- if not scores_list:
55
- return 0.0
56
- return sum(scores_list) / len(scores_list)
57
-
58
- def play(a, b, scores):
59
- # Return 'a' if its score >= b's score, else 'b'
60
- return a if scores[a] >= scores[b] else b
61
-
62
- # precompute_scores: Batch parallel scoring of all players.
63
- # Returns a dict mapping player -> their computed score.
64
- def precompute_scores(players, executor):
65
- """Compute all scores in parallel once and return a dict."""
66
- # Submit all score() calls to the executor
67
- futures = {executor.submit(score, p): p for p in players}
68
- scores = {}
69
- # Collect results as they complete
70
- for fut in tqdm(as_completed(futures), total=len(futures), desc="Scoring"):
71
- p = futures[fut]
72
- scores[p] = fut.result()
73
- return scores
74
-
75
- # tournament_round: Play one elimination round in parallel.
76
- # Takes pairs of players, returns a list of (winner, loser) tuples.
77
- def tournament_round(pairs, executor, scores):
78
- """Play a batch of matches in parallel; returns list of (winner, loser)."""
79
- futures = {executor.submit(play, a, b, scores): (a, b) for a, b in pairs}
80
- results = []
81
- # As matches complete, record winners and losers
82
- for fut in tqdm(as_completed(futures), total=len(futures),
83
- desc="Tournament round", leave=False):
84
- a, b = futures[fut]
85
- w = fut.result()
86
- loser = b if w == a else a
87
- results.append((w, loser))
88
- return results
89
-
90
- # tournament: Run full single-elimination bracket on a player list.
91
- # Returns the champion and a map of who each loser lost to.
92
- def tournament(players, executor, scores):
93
- lost_to = {}
94
- current = players[:]
95
- # Continue until only one player remains
96
- while len(current) > 1:
97
- # Pair off adjacent players
98
- pairs = [(current[i], current[i+1]) for i in range(0, len(current)-1, 2)]
99
- round_results = tournament_round(pairs, executor, scores)
100
- next_round = [w for w, _ in round_results]
101
- # Record loss relationships
102
- for w, loser in round_results:
103
- lost_to[loser] = w
104
- # Handle odd player out (bye)
105
- if len(current) % 2 == 1:
106
- next_round.append(current[-1])
107
- current = next_round
108
- # Final remaining player is champion
109
- return current[0], lost_to
110
-
111
- # get_candidates: Identify players who lost directly to the champion.
112
- def get_candidates(champion, lost_to):
113
- # Include all who lost to champion + champion itself
114
- return [p for p, o in lost_to.items() if o == champion] + [champion]
115
-
116
- # playoff: Conduct a round-robin among candidates to refine ranking.
117
- # Returns candidates sorted by number of wins descending.
118
- def playoff(candidates, executor, scores):
119
- wins = {p: 0 for p in candidates}
120
- # Generate all unique matchups
121
- pairs = [(candidates[i], candidates[j])
122
- for i in range(len(candidates)) for j in range(i+1, len(candidates))]
123
- futures = {executor.submit(play, a, b, scores): (a, b) for a, b in pairs}
124
- # Tally wins as matches complete
125
- for fut in tqdm(as_completed(futures), total=len(futures),
126
- desc="Playoff", leave=False):
127
- winner = fut.result()
128
- wins[winner] += 1
129
- # Sort by wins (highest first)
130
- return sorted(candidates, key=lambda p: wins[p], reverse=True)
131
-
132
- # get_top: Main orchestration to get top K players.
133
- # 1) Run tournament to identify top bracket players
134
- # 2) Gather candidates (champion, runner-up, semifinalists)
135
- # 3) Conduct playoff to finalize top K ordering
136
- # Returns a list of top 'k' players.
137
- def get_top(players, executor, scores, k=NUM_TOP_PICKS):
138
- champion, lost_to = tournament(players, executor, scores)
139
- runnerup = lost_to.get(champion)
140
- finalists = [champion] + ([runnerup] if runnerup else [])
141
- semifinalists = [p for p, o in lost_to.items() if o in finalists and p not in finalists]
142
- candidates = set(finalists + semifinalists + get_candidates(champion, lost_to))
143
- ranking = playoff(list(candidates), executor, scores)
144
- return ranking[:k]
145
 
146
  if __name__ == "__main__":
147
- # Create list of 100 players labeled S1..S100
148
- all_players = [f"S{i}" for i in range(1, 101)]
149
- with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
150
- # 1) Compute scores once
151
- scores = precompute_scores(all_players, executor)
152
-
153
- # 2) Select top N players by score
154
- top_n_players = sorted(all_players, key=lambda p: scores[p], reverse=True)[:POOL_SIZE]
155
-
156
- # 3) Run optimized tournament + playoff using cached scores
157
- top5 = get_top(top_n_players, executor, scores)
158
- print("πŸ† Top picks:", top5)
 
1
+ import os, json
 
 
2
  from concurrent.futures import ThreadPoolExecutor, as_completed
3
  from tqdm import tqdm
4
+ from litellm import completion
5
+ import gradio as gr
6
+
7
  NUM_TOP_PICKS = int(os.getenv("NUM_TOP_PICKS", 5))
8
+ POOL_SIZE = int(os.getenv("POOL_SIZE", 20))
9
+ MAX_WORKERS = int(os.getenv("MAX_WORKERS", 10))
 
 
10
 
 
 
11
 
12
+ def run_tournament(instruction_input, criteria_input):
13
+ instruction = instruction_input.strip()
14
+ criteria_list = [c.strip() for c in criteria_input.split(",") if c.strip()] or [
15
+ "Factuality",
16
+ "Instruction Following",
17
+ "Precision",
18
+ ]
19
 
20
+ def criteria_block():
21
+ return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
 
 
 
 
 
 
 
 
22
 
23
+ def prompt_score(player):
24
+ prompt = f"""Evaluate the output below on the following criteria:
25
+ {criteria_block()}
26
 
27
+ Return JSON exactly like: {{\"score\": [{', '.join(['1-10'] * len(criteria_list))}]}}.
28
+
29
+ Instruction:
30
  {instruction}
31
 
32
  Output:
33
+ {player}"""
34
+ response = completion(
35
+ model="gpt-4o-mini",
36
+ messages=[{"role": "system", "content": prompt}],
37
+ )
38
+ return response.choices[0].message.content.strip()
39
+
40
+ def score(player):
41
+ try:
42
+ data = json.loads(prompt_score(player))
43
+ except json.JSONDecodeError:
44
+ data = eval(prompt_score(player))
45
+ lst = data.get("score", data.get("scores", []))
46
+ return sum(lst) / len(lst) if lst else 0.0
47
+
48
+ def prompt_play(a, b):
49
+ prompt = f"""Compare the two players below using:
50
+ {criteria_block()}
51
+
52
+ Return ONLY JSON {{\"winner\": \"A\"}} or {{\"winner\": \"B\"}}.
53
+
54
+ Instruction:
55
+ {instruction}
56
+
57
+ Players:
58
+ <A>{a}</A>
59
+ <B>{b}</B>"""
60
+ response = completion(
61
+ model="gpt-4o-mini",
62
+ messages=[{"role": "system", "content": prompt}],
63
+ )
64
+ return response.choices[0].message.content.strip()
65
+
66
+ def play(a, b):
67
+ try:
68
+ winner_label = json.loads(prompt_play(a, b))["winner"]
69
+ except json.JSONDecodeError:
70
+ winner_label = eval(prompt_play(a, b)).get("winner", "A")
71
+ return a if winner_label == "A" else b
72
+
73
+ def precompute_scores(players, executor):
74
+ futures = {executor.submit(score, p): p for p in players}
75
+ scores = {}
76
+ for fut in tqdm(as_completed(futures), total=len(futures)):
77
+ scores[futures[fut]] = fut.result()
78
+ return scores
79
+
80
+ def tournament_round(pairs, executor):
81
+ futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
82
+ results = []
83
+ for fut in tqdm(as_completed(futures), total=len(futures)):
84
+ a, b = futures[fut]
85
+ winner = fut.result()
86
+ loser = b if winner == a else a
87
+ results.append((winner, loser))
88
+ return results
89
+
90
+ def tournament(players, executor):
91
+ lost_to = {}
92
+ current = players[:]
93
+ while len(current) > 1:
94
+ pairs = [(current[i], current[i + 1]) for i in range(0, len(current) - 1, 2)]
95
+ for w, l in tournament_round(pairs, executor):
96
+ lost_to[l] = w
97
+ current = [w for w, _ in tournament_round(pairs, executor)]
98
+ if len(players) % 2 == 1:
99
+ current.append(players[-1])
100
+ return current[0], lost_to
101
+
102
+ def get_candidates(champion, lost_to):
103
+ return [p for p, o in lost_to.items() if o == champion] + [champion]
104
+
105
+ def playoff(candidates, executor):
106
+ wins = {p: 0 for p in candidates}
107
+ pairs = [
108
+ (candidates[i], candidates[j])
109
+ for i in range(len(candidates))
110
+ for j in range(i + 1, len(candidates))
111
+ ]
112
+ futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
113
+ for fut in tqdm(as_completed(futures), total=len(futures)):
114
+ wins[fut.result()] += 1
115
+ return sorted(candidates, key=lambda p: wins[p], reverse=True)
116
+
117
+ def get_top(players, executor, k=NUM_TOP_PICKS):
118
+ champion, lost_to = tournament(players, executor)
119
+ runner_up = lost_to.get(champion)
120
+ finalists = [champion] + ([runner_up] if runner_up else [])
121
+ semifinalists = [p for p, o in lost_to.items() if o in finalists and p not in finalists]
122
+ candidates = set(finalists + semifinalists + get_candidates(champion, lost_to))
123
+ return playoff(list(candidates), executor)[:k]
124
+
125
+ all_players = [f"S{i}" for i in range(1, 10)]
126
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
127
+ scores = precompute_scores(all_players, ex)
128
+ top_players = sorted(all_players, key=scores.get, reverse=True)[:POOL_SIZE]
129
+ top_k = get_top(top_players, ex)
130
+ return ", ".join(top_k)
131
+
132
+ demo = gr.Interface(
133
+ fn=run_tournament,
134
+ inputs=[
135
+ gr.Textbox(lines=2, label="Instruction"),
136
+ gr.Textbox(lines=1, label="Criteria (comma separated)"),
137
+ ],
138
+ outputs=gr.Textbox(label="Top picks"),
139
+ )
 
 
140
 
141
  if __name__ == "__main__":
142
+ demo.launch()