Spaces:

ping98k
/

LLM-Brainstorming

Sleeping

App Files Files Community

ping98k commited on Jun 26, 2025

Commit

f94af77

unverified ·

2 Parent(s): 66f49ec f736041

Merge pull request #1 from ping98k/codex/explain-codebase-structure-and-key-concepts

Browse files

Files changed (3) hide show

README.md +21 -0
main.py +5 -41
tournament_utils.py +44 -0

README.md CHANGED Viewed

	@@ -1,2 +1,23 @@
1	# llm-brainstorm
2

 # llm-brainstorm
+This project provides a small interface for running "tournaments" between language model answers. It is built with Gradio and LiteLLM.
+## Usage
+1. Create a `.env` file in the repository root and define any API keys required by your model. You can also set defaults for:
+   - `NUM_TOP_PICKS`
+   - `POOL_SIZE`
+   - `MAX_WORKERS`
+   - `NUM_GENERATIONS`
+2. Install dependencies (example with `pip`):
+   ```bash
+   pip install gradio litellm python-dotenv tqdm matplotlib
+   ```
+3. Run the app:
+   ```bash
+   python main.py
+   ```
+4. Open the displayed local URL to provide an instruction and evaluation criteria.
+The interface will generate multiple answers, score them, and run a head-to-head tournament to find the best outputs.

main.py CHANGED Viewed

@@ -3,22 +3,14 @@ load_dotenv()
 import os, json, re, ast, gradio as gr
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
-from litellm import completion
 import matplotlib.pyplot as plt
 NUM_TOP_PICKS_DEFAULT = int(os.getenv("NUM_TOP_PICKS", 5))
 POOL_SIZE_DEFAULT = int(os.getenv("POOL_SIZE", 10))
 MAX_WORKERS_DEFAULT = int(os.getenv("MAX_WORKERS", 10))
 NUM_GENERATIONS_DEFAULT = int(os.getenv("NUM_GENERATIONS", 20))
-def generate_players(instruction, n):
-    response = completion(
-        model="gpt-4o-mini",
-        messages=[{"role": "user", "content": instruction}],
-        n=n
-    )
-    return [c.message.content.strip() for c in response.choices]
 def _clean_json(txt):
     txt = re.sub(r"^```.*?\n|```$", "", txt, flags=re.DOTALL).strip()
     try:
@@ -45,23 +37,9 @@ def run_tournament(instruction_input, criteria_input, n_gen, num_top_picks, pool
     yield from log(f"{len(all_players)} players generated")
     def criteria_block():
         return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
-    def prompt_score(player):
-        prompt = f"""
-Evaluate the output below on the following criteria:
-{criteria_block()}
-Return JSON exactly like: {{"score": [{', '.join(['1-10'] * len(criteria_list))}]}}.
-Instruction:
-{instruction}
-Output:
-{player}
-"""
-        response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
-        return response.choices[0].message.content.strip()
     def score(player):
-        data = _clean_json(prompt_score(player))
         lst = data.get("score", data.get("scores", []))
         return sum(lst) / len(lst) if lst else 0.0
     yield from log("Scoring players …")
@@ -72,24 +50,10 @@ Output:
     yield from log("Histogram generated")
     top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
     yield from log(f"Filtered to {len(top_players)} players with best scores")
-    def prompt_play(a, b):
-        prompt = f"""
-Compare the two players below using:
-{criteria_block()}
-Return ONLY JSON {{"winner": "A"}} or {{"winner": "B"}}.
-Instruction:
-{instruction}
-Players:
-<A>{a}</A>
-<B>{b}</B>
-"""
-        response = completion(model="gpt-4o-mini", messages=[{"role": "system", "content": prompt}])
-        return response.choices[0].message.content.strip()
     def play(a, b):
-        winner_label = _clean_json(prompt_play(a, b)).get("winner", "A")
         return a if winner_label == "A" else b
     def tournament_round(pairs, executor):
         futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}

 import os, json, re, ast, gradio as gr
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
 import matplotlib.pyplot as plt
+from tournament_utils import generate_players, prompt_score, prompt_play
 NUM_TOP_PICKS_DEFAULT = int(os.getenv("NUM_TOP_PICKS", 5))
 POOL_SIZE_DEFAULT = int(os.getenv("POOL_SIZE", 10))
 MAX_WORKERS_DEFAULT = int(os.getenv("MAX_WORKERS", 10))
 NUM_GENERATIONS_DEFAULT = int(os.getenv("NUM_GENERATIONS", 20))
 def _clean_json(txt):
     txt = re.sub(r"^```.*?\n|```$", "", txt, flags=re.DOTALL).strip()
     try:
     yield from log(f"{len(all_players)} players generated")
     def criteria_block():
         return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
     def score(player):
+        data = _clean_json(prompt_score(instruction, criteria_block(), player))
         lst = data.get("score", data.get("scores", []))
         return sum(lst) / len(lst) if lst else 0.0
     yield from log("Scoring players …")
     yield from log("Histogram generated")
     top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
     yield from log(f"Filtered to {len(top_players)} players with best scores")
     def play(a, b):
+        winner_label = _clean_json(
+            prompt_play(instruction, criteria_block(), a, b)
+        ).get("winner", "A")
         return a if winner_label == "A" else b
     def tournament_round(pairs, executor):
         futures = {executor.submit(play, a, b): (a, b) for a, b in pairs}

tournament_utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from litellm import completion
+def generate_players(instruction: str, n: int, model: str = "gpt-4o-mini"):
+    """Request `n` completions for the instruction using the given model."""
+    response = completion(
+        model=model,
+        messages=[{"role": "user", "content": instruction}],
+        n=n,
+    )
+    return [c.message.content.strip() for c in response.choices]
+def prompt_score(instruction: str, criteria_block: str, player: str, model: str = "gpt-4o-mini") -> str:
+    """Return a JSON score string evaluating `player` on the criteria."""
+    prompt = f"""Evaluate the output below on the following criteria:
+{criteria_block}
+Return JSON exactly like: {{"score": [1-10]}}.
+Instruction:
+{instruction}
+Output:
+{player}"""
+    response = completion(model=model, messages=[{"role": "system", "content": prompt}])
+    return response.choices[0].message.content.strip()
+def prompt_play(instruction: str, criteria_block: str, a: str, b: str, model: str = "gpt-4o-mini") -> str:
+    """Return which player wins in JSON using the given criteria."""
+    prompt = f"""Compare the two players below using:
+{criteria_block}
+Return ONLY JSON {{"winner": "A"}} or {{"winner": "B"}}.
+Instruction:
+{instruction}
+Players:
+<A>{a}</A>
+<B>{b}</B>"""
+    response = completion(model=model, messages=[{"role": "system", "content": prompt}])
+    return response.choices[0].message.content.strip()