ping98k commited on
Commit
f94af77
·
unverified ·
2 Parent(s): 66f49ec f736041

Merge pull request #1 from ping98k/codex/explain-codebase-structure-and-key-concepts

Browse files
Files changed (3) hide show
  1. README.md +21 -0
  2. main.py +5 -41
  3. tournament_utils.py +44 -0
README.md CHANGED
@@ -1,2 +1,23 @@
1
  # llm-brainstorm
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # llm-brainstorm
2
 
3
+ This project provides a small interface for running "tournaments" between language model answers. It is built with Gradio and LiteLLM.
4
+
5
+ ## Usage
6
+
7
+ 1. Create a `.env` file in the repository root and define any API keys required by your model. You can also set defaults for:
8
+ - `NUM_TOP_PICKS`
9
+ - `POOL_SIZE`
10
+ - `MAX_WORKERS`
11
+ - `NUM_GENERATIONS`
12
+ 2. Install dependencies (example with `pip`):
13
+ ```bash
14
+ pip install gradio litellm python-dotenv tqdm matplotlib
15
+ ```
16
+ 3. Run the app:
17
+ ```bash
18
+ python main.py
19
+ ```
20
+ 4. Open the displayed local URL to provide an instruction and evaluation criteria.
21
+
22
+ The interface will generate multiple answers, score them, and run a head-to-head tournament to find the best outputs.
23
+
main.py CHANGED
@@ -3,22 +3,14 @@ load_dotenv()
3
  import os, json, re, ast, gradio as gr
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
6
- from litellm import completion
7
  import matplotlib.pyplot as plt
 
8
 
9
  NUM_TOP_PICKS_DEFAULT = int(os.getenv("NUM_TOP_PICKS", 5))
10
  POOL_SIZE_DEFAULT = int(os.getenv("POOL_SIZE", 10))
11
  MAX_WORKERS_DEFAULT = int(os.getenv("MAX_WORKERS", 10))
12
  NUM_GENERATIONS_DEFAULT = int(os.getenv("NUM_GENERATIONS", 20))
13
 
14
- def generate_players(instruction, n):
15
- response = completion(
16
- model="gpt-4o-mini",
17
- messages=[{"role": "user", "content": instruction}],
18
- n=n
19
- )
20
- return [c.message.content.strip() for c in response.choices]
21
-
22
  def _clean_json(txt):
23
  txt = re.sub(r"^```.*?\n|```$", "", txt, flags=re.DOTALL).strip()
24
  try:
@@ -45,23 +37,9 @@ def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool
45
  yield from log(f"{len(all_players)} players generated")
46
  def criteria_block():
47
  return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
48
- def prompt_score(player):
49
- prompt = f"""
50
- Evaluate the output below on the following criteria:
51
- {criteria_block()}
52
-
53
- Return JSON exactly like: {{"score": [{', '.join(['1-10'] * len(criteria_list))}]}}.
54
-
55
- Instruction:
56
- {instruction}
57
 
58
- Output:
59
- {player}
60
- """
61
- response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
62
- return response.choices[0].message.content.strip()
63
  def score(player):
64
- data = _clean_json(prompt_score(player))
65
  lst = data.get("score", data.get("scores", []))
66
  return sum(lst) / len(lst) if lst else 0.0
67
  yield from log("Scoring players …")
@@ -72,24 +50,10 @@ Output:
72
  yield from log("Histogram generated")
73
  top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
74
  yield from log(f"Filtered to {len(top_players)} players with best scores")
75
- def prompt_play(a, b):
76
- prompt = f"""
77
- Compare the two players below using:
78
- {criteria_block()}
79
-
80
- Return ONLY JSON {{"winner": "A"}} or {{"winner": "B"}}.
81
-
82
- Instruction:
83
- {instruction}
84
-
85
- Players:
86
- <A>{a}</A>
87
- <B>{b}</B>
88
- """
89
- response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
90
- return response.choices[0].message.content.strip()
91
  def play(a, b):
92
- winner_label = _clean_json(prompt_play(a, b)).get("winner", "A")
 
 
93
  return a if winner_label == "A" else b
94
  def tournament_round(pairs, executor):
95
  futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
 
3
  import os, json, re, ast, gradio as gr
4
  from concurrent.futures import ThreadPoolExecutor, as_completed
5
  from tqdm import tqdm
 
6
  import matplotlib.pyplot as plt
7
+ from tournament_utils import generate_players, prompt_score, prompt_play
8
 
9
  NUM_TOP_PICKS_DEFAULT = int(os.getenv("NUM_TOP_PICKS", 5))
10
  POOL_SIZE_DEFAULT = int(os.getenv("POOL_SIZE", 10))
11
  MAX_WORKERS_DEFAULT = int(os.getenv("MAX_WORKERS", 10))
12
  NUM_GENERATIONS_DEFAULT = int(os.getenv("NUM_GENERATIONS", 20))
13
 
 
 
 
 
 
 
 
 
14
  def _clean_json(txt):
15
  txt = re.sub(r"^```.*?\n|```$", "", txt, flags=re.DOTALL).strip()
16
  try:
 
37
  yield from log(f"{len(all_players)} players generated")
38
  def criteria_block():
39
  return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
 
 
 
 
 
 
 
 
 
40
 
 
 
 
 
 
41
  def score(player):
42
+ data = _clean_json(prompt_score(instruction, criteria_block(), player))
43
  lst = data.get("score", data.get("scores", []))
44
  return sum(lst) / len(lst) if lst else 0.0
45
  yield from log("Scoring players …")
 
50
  yield from log("Histogram generated")
51
  top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
52
  yield from log(f"Filtered to {len(top_players)} players with best scores")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def play(a, b):
54
+ winner_label = _clean_json(
55
+ prompt_play(instruction, criteria_block(), a, b)
56
+ ).get("winner", "A")
57
  return a if winner_label == "A" else b
58
  def tournament_round(pairs, executor):
59
  futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}
tournament_utils.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from litellm import completion
2
+
3
+
4
+ def generate_players(instruction: str, n: int, model: str = "gpt-4o-mini"):
5
+ """Request `n` completions for the instruction using the given model."""
6
+ response = completion(
7
+ model=model,
8
+ messages=[{"role": "user", "content": instruction}],
9
+ n=n,
10
+ )
11
+ return [c.message.content.strip() for c in response.choices]
12
+
13
+
14
+ def prompt_score(instruction: str, criteria_block: str, player: str, model: str = "gpt-4o-mini") -> str:
15
+ """Return a JSON score string evaluating `player` on the criteria."""
16
+ prompt = f"""Evaluate the output below on the following criteria:
17
+ {criteria_block}
18
+
19
+ Return JSON exactly like: {{"score": [1-10]}}.
20
+
21
+ Instruction:
22
+ {instruction}
23
+
24
+ Output:
25
+ {player}"""
26
+ response = completion(model=model, messages=[{"role": "system", "content": prompt}])
27
+ return response.choices[0].message.content.strip()
28
+
29
+
30
+ def prompt_play(instruction: str, criteria_block: str, a: str, b: str, model: str = "gpt-4o-mini") -> str:
31
+ """Return which player wins in JSON using the given criteria."""
32
+ prompt = f"""Compare the two players below using:
33
+ {criteria_block}
34
+
35
+ Return ONLY JSON {{"winner": "A"}} or {{"winner": "B"}}.
36
+
37
+ Instruction:
38
+ {instruction}
39
+
40
+ Players:
41
+ <A>{a}</A>
42
+ <B>{b}</B>"""
43
+ response = completion(model=model, messages=[{"role": "system", "content": prompt}])
44
+ return response.choices[0].message.content.strip()