Spaces:

ping98k
/

LLM-Brainstorming

Sleeping

App Files Files Community

ping98k commited on Jun 27, 2025

Commit

348dd93

unverified ·

2 Parent(s): f801763 41ec98d

Merge pull request #10 from ping98k/codex/add-id-to-each-player

Browse files

Files changed (1) hide show

main.py +17 -12

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dotenv import load_dotenv
-load_dotenv("./local.env")
 import os, json, re, ast, gradio as gr
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
@@ -30,7 +30,7 @@ class SimpleProgress:
         return f"{self.prefix} {self.count}/{self.total} - ETA {eta}"
 NUM_TOP_PICKS_DEFAULT = int(os.getenv("NUM_TOP_PICKS", 3))
-POOL_SIZE_DEFAULT = int(os.getenv("POOL_SIZE", 5))
 MAX_WORKERS_DEFAULT = int(os.getenv("MAX_WORKERS", 100))
 NUM_GENERATIONS_DEFAULT = int(os.getenv("NUM_GENERATIONS", 10))
 API_BASE_DEFAULT = os.getenv("OPENAI_API_BASE", "")
@@ -40,9 +40,9 @@ PAIRWISE_FILTER_DEFAULT = os.getenv("ENABLE_PAIRWISE_FILTER", "true").lower() ==
 GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
 SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
 PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
-GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "1.0"))
-SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.1"))
-PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.1"))
 SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
 PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
 CRITERIA_DEFAULT = "Factuality,Instruction Following,Precision"
@@ -131,10 +131,12 @@ def run_tournament(
             f"Total tokens: {prompt_tokens + completion_tokens}"
         )
-    def log_completion(prefix: str, text: str):
         disp = text.replace("\n", " ")
         if len(disp) > 100:
             disp = disp[:100] + "…"
         return log(f"{prefix}{disp}")
     def log(msg):
         process_log.append(msg)
@@ -153,12 +155,15 @@ def run_tournament(
     add_usage(usage)
     yield from log(f"{len(all_players)} players generated")
     for i, p in enumerate(all_players, 1):
-        yield from log_completion(f"Completion {i}: ", p)
     def criteria_block():
         return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
     if enable_score_filter:
-        def score(player):
             text, usage = prompt_score(
                 instruction,
                 criteria_list,
@@ -172,7 +177,7 @@ def run_tournament(
                 return_usage=True,
             )
             add_usage(usage)
-            score_outputs.append(text)
             data = _clean_json(text)
             if "scores" in data and isinstance(data["scores"], list):
                 vals = data["scores"]
@@ -183,7 +188,7 @@ def run_tournament(
         with ThreadPoolExecutor(max_workers=max_workers) as ex:
             prog = SimpleProgress(len(all_players), "Scoring")
             scores = {}
-            for p, s in zip(all_players, ex.map(score, all_players)):
                 scores[p] = s
                 yield from log(prog.step())
         hist_fig = plt.figure()
@@ -191,8 +196,8 @@ def run_tournament(
         yield from log("Histogram generated")
         top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
         yield from log(f"Filtered to {len(top_players)} players with best scores")
-        for i, txt in enumerate(score_outputs, 1):
-            yield from log_completion(f"Score completion {i}: ", txt)
     else:
         top_players = all_players
     if enable_pairwise_filter:

 from dotenv import load_dotenv
+load_dotenv("./local.env",override=True)
 import os, json, re, ast, gradio as gr
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
         return f"{self.prefix} {self.count}/{self.total} - ETA {eta}"
 NUM_TOP_PICKS_DEFAULT = int(os.getenv("NUM_TOP_PICKS", 3))
+POOL_SIZE_DEFAULT = int(os.getenv("POOL_SIZE", 6))
 MAX_WORKERS_DEFAULT = int(os.getenv("MAX_WORKERS", 100))
 NUM_GENERATIONS_DEFAULT = int(os.getenv("NUM_GENERATIONS", 10))
 API_BASE_DEFAULT = os.getenv("OPENAI_API_BASE", "")
 GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
 SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
 PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
+GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "0.9"))
+SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.6"))
+PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.6"))
 SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
 PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
 CRITERIA_DEFAULT = "Factuality,Instruction Following,Precision"
             f"Total tokens: {prompt_tokens + completion_tokens}"
         )
+    def log_completion(prefix: str, text: str, player_id: int | None = None):
         disp = text.replace("\n", " ")
         if len(disp) > 100:
             disp = disp[:100] + "…"
+        if player_id is not None:
+            prefix = f"{prefix}(ID {player_id}) "
         return log(f"{prefix}{disp}")
     def log(msg):
         process_log.append(msg)
     add_usage(usage)
     yield from log(f"{len(all_players)} players generated")
     for i, p in enumerate(all_players, 1):
+        yield from log_completion(f"Completion {i}: ", p, i)
     def criteria_block():
         return "\n".join(f"{i + 1}) {c}" for i, c in enumerate(criteria_list))
     if enable_score_filter:
+        players_with_ids = list(enumerate(all_players, 1))
+        def score(item):
+            idx, player = item
             text, usage = prompt_score(
                 instruction,
                 criteria_list,
                 return_usage=True,
             )
             add_usage(usage)
+            score_outputs.append((idx, text))
             data = _clean_json(text)
             if "scores" in data and isinstance(data["scores"], list):
                 vals = data["scores"]
         with ThreadPoolExecutor(max_workers=max_workers) as ex:
             prog = SimpleProgress(len(all_players), "Scoring")
             scores = {}
+            for (idx, p), s in zip(players_with_ids, ex.map(score, players_with_ids)):
                 scores[p] = s
                 yield from log(prog.step())
         hist_fig = plt.figure()
         yield from log("Histogram generated")
         top_players = sorted(all_players, key=scores.get, reverse=True)[:pool_size]
         yield from log(f"Filtered to {len(top_players)} players with best scores")
+        for i, (idx, txt) in enumerate(score_outputs, 1):
+            yield from log_completion(f"Score completion {i}: ", txt, idx)
     else:
         top_players = all_players
     if enable_pairwise_filter: