Spaces:

ping98k
/

LLM-Brainstorming

Sleeping

App Files Files Community

ping98k commited on Jun 28, 2025

Commit

e4a181a

1 Parent(s): 9036d14

update to run on huggingface spaces

Browse files

Files changed (2) hide show

main.py +12 -12
tournament_utils.py +4 -4

main.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from dotenv import load_dotenv
-load_dotenv("./local.env",override=True)
 import os, json, re, ast, gradio as gr
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
@@ -40,9 +40,9 @@ PAIRWISE_FILTER_DEFAULT = os.getenv("ENABLE_PAIRWISE_FILTER", "true").lower() ==
 GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
 SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
 PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
-GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "0.9"))
-SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.6"))
-PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.6"))
 SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
 PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
 GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
@@ -107,9 +107,9 @@ def run_tournament(
     if pairwise_temperature is None:
         pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
     if not api_base:
-        api_base = API_BASE_DEFAULT
     if not api_token:
-        api_token = API_TOKEN_DEFAULT
     if not generate_model:
         generate_model = GENERATE_MODEL_DEFAULT
     if not score_model:
@@ -322,7 +322,7 @@ def run_tournament(
 demo = gr.Interface(
     fn=run_tournament,
     inputs=[
-        gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path"),
         gr.Textbox(value="", label="API Token", type="password"),
         gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
         gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
@@ -340,11 +340,11 @@ demo = gr.Interface(
         gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
         gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
         gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
-        gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)"),
-        gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)"),
-        gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)"),
-        gr.Checkbox(value=False, label="Enable Explain (Score)"),
-        gr.Checkbox(value=False, label="Enable Explain (Pairwise)"),
     ],
     outputs=[
         gr.Textbox(lines=10, label="Process"),

 from dotenv import load_dotenv
+# load_dotenv("./local.env",override=True)
 import os, json, re, ast, gradio as gr
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from tqdm import tqdm
 GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
 SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
 PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
+GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "1.2"))
+SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.1"))
+PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.1"))
 SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
 PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
 GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
     if pairwise_temperature is None:
         pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
     if not api_base:
+        api_base = ""
     if not api_token:
+        api_token = ""
     if not generate_model:
         generate_model = GENERATE_MODEL_DEFAULT
     if not score_model:
 demo = gr.Interface(
     fn=run_tournament,
     inputs=[
+        gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path", info="Clone repos and run locally To change the API base path"),
         gr.Textbox(value="", label="API Token", type="password"),
         gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
         gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
         gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
         gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
         gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
+        gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)", info="Enable Qwen3 think mode"),
+        gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)" , info="Enable Qwen3 think mode"),
+        gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)" , info="Enable Qwen3 think mode"),
+        gr.Checkbox(value=True, label="Enable Explain (Score)", info="Prompt LLM to think step by step"),
+        gr.Checkbox(value=True, label="Enable Explain (Pairwise)", info="Prompt LLM to think step by step"),
     ],
     outputs=[
         gr.Textbox(lines=10, label="Process"),

tournament_utils.py CHANGED Viewed

@@ -35,7 +35,7 @@ def generate_players(
     """
     messages = [{"role": "user", "content": instruction}]
     kwargs = _completion_kwargs(api_base, api_key, temperature)
-    kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
     response = completion(
         model=model,
         messages=messages,
@@ -64,7 +64,7 @@ def prompt_score(
     return_usage: bool = False,
 ) -> str | tuple[str, object]:
     """Return a plaintext score evaluation for `player`."""
-    example_scores = ", ".join(["1-10"] * len(criteria_list)) or "1-10"
     prompt = f"""Evaluate the output below on the following criteria:
 {criteria_block}
@@ -84,7 +84,7 @@ def prompt_score(
     prompt += f"\n\nOutput:\n{player}"
     kwargs = _completion_kwargs(api_base, api_key, temperature)
-    kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
     response = completion(
         model=model,
         messages=[{"role": "system", "content": prompt}],
@@ -135,7 +135,7 @@ def prompt_pairwise(
         prompt += f"\n\nInstruction:\n{instruction}"
     prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
     kwargs = _completion_kwargs(api_base, api_key, temperature)
-    kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
     response = completion(
         model=model,
         messages=[{"role": "system", "content": prompt}],

     """
     messages = [{"role": "user", "content": instruction}]
     kwargs = _completion_kwargs(api_base, api_key, temperature)
+    # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
     response = completion(
         model=model,
         messages=messages,
     return_usage: bool = False,
 ) -> str | tuple[str, object]:
     """Return a plaintext score evaluation for `player`."""
+    example_scores = ", ".join(["5"] * len(criteria_list)) or "5"
     prompt = f"""Evaluate the output below on the following criteria:
 {criteria_block}
     prompt += f"\n\nOutput:\n{player}"
     kwargs = _completion_kwargs(api_base, api_key, temperature)
+    # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
     response = completion(
         model=model,
         messages=[{"role": "system", "content": prompt}],
         prompt += f"\n\nInstruction:\n{instruction}"
     prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
     kwargs = _completion_kwargs(api_base, api_key, temperature)
+    # kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
     response = completion(
         model=model,
         messages=[{"role": "system", "content": prompt}],