Spaces:
Sleeping
Sleeping
ping98k
commited on
Commit
·
e4a181a
1
Parent(s):
9036d14
update to run on huggingface spaces
Browse files- main.py +12 -12
- tournament_utils.py +4 -4
main.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
-
load_dotenv("./local.env",override=True)
|
| 3 |
import os, json, re, ast, gradio as gr
|
| 4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
from tqdm import tqdm
|
|
@@ -40,9 +40,9 @@ PAIRWISE_FILTER_DEFAULT = os.getenv("ENABLE_PAIRWISE_FILTER", "true").lower() ==
|
|
| 40 |
GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
|
| 41 |
SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
|
| 42 |
PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
|
| 43 |
-
GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "
|
| 44 |
-
SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.
|
| 45 |
-
PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.
|
| 46 |
SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
|
| 47 |
PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
|
| 48 |
GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
|
|
@@ -107,9 +107,9 @@ def run_tournament(
|
|
| 107 |
if pairwise_temperature is None:
|
| 108 |
pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
|
| 109 |
if not api_base:
|
| 110 |
-
api_base =
|
| 111 |
if not api_token:
|
| 112 |
-
api_token =
|
| 113 |
if not generate_model:
|
| 114 |
generate_model = GENERATE_MODEL_DEFAULT
|
| 115 |
if not score_model:
|
|
@@ -322,7 +322,7 @@ def run_tournament(
|
|
| 322 |
demo = gr.Interface(
|
| 323 |
fn=run_tournament,
|
| 324 |
inputs=[
|
| 325 |
-
gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path"),
|
| 326 |
gr.Textbox(value="", label="API Token", type="password"),
|
| 327 |
gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
|
| 328 |
gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
|
|
@@ -340,11 +340,11 @@ demo = gr.Interface(
|
|
| 340 |
gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
|
| 341 |
gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
|
| 342 |
gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
|
| 343 |
-
gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)"),
|
| 344 |
-
gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)"),
|
| 345 |
-
gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)"),
|
| 346 |
-
gr.Checkbox(value=
|
| 347 |
-
gr.Checkbox(value=
|
| 348 |
],
|
| 349 |
outputs=[
|
| 350 |
gr.Textbox(lines=10, label="Process"),
|
|
|
|
| 1 |
from dotenv import load_dotenv
|
| 2 |
+
# load_dotenv("./local.env",override=True)
|
| 3 |
import os, json, re, ast, gradio as gr
|
| 4 |
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 5 |
from tqdm import tqdm
|
|
|
|
| 40 |
GENERATE_MODEL_DEFAULT = os.getenv("GENERATE_MODEL", "gpt-4o-mini")
|
| 41 |
SCORE_MODEL_DEFAULT = os.getenv("SCORE_MODEL", "gpt-4o-mini")
|
| 42 |
PAIRWISE_MODEL_DEFAULT = os.getenv("PAIRWISE_MODEL", "gpt-4o-mini")
|
| 43 |
+
GENERATE_TEMPERATURE_DEFAULT = float(os.getenv("GENERATE_TEMPERATURE", "1.2"))
|
| 44 |
+
SCORE_TEMPERATURE_DEFAULT = float(os.getenv("SCORE_TEMPERATURE", "0.1"))
|
| 45 |
+
PAIRWISE_TEMPERATURE_DEFAULT = float(os.getenv("PAIRWISE_TEMPERATURE", "0.1"))
|
| 46 |
SCORE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_SCORE", "true").lower() == "true"
|
| 47 |
PAIRWISE_WITH_INSTRUCTION_DEFAULT = os.getenv("PASS_INSTRUCTION_TO_PAIRWISE", "true").lower() == "true"
|
| 48 |
GENERATE_THINKING_DEFAULT = os.getenv("ENABLE_GENERATE_THINKING", "false").lower() == "true"
|
|
|
|
| 107 |
if pairwise_temperature is None:
|
| 108 |
pairwise_temperature = PAIRWISE_TEMPERATURE_DEFAULT
|
| 109 |
if not api_base:
|
| 110 |
+
api_base = ""
|
| 111 |
if not api_token:
|
| 112 |
+
api_token = ""
|
| 113 |
if not generate_model:
|
| 114 |
generate_model = GENERATE_MODEL_DEFAULT
|
| 115 |
if not score_model:
|
|
|
|
| 322 |
demo = gr.Interface(
|
| 323 |
fn=run_tournament,
|
| 324 |
inputs=[
|
| 325 |
+
gr.Textbox(value=API_BASE_DEFAULT, label="API Base Path", info="Clone repos and run locally To change the API base path"),
|
| 326 |
gr.Textbox(value="", label="API Token", type="password"),
|
| 327 |
gr.Textbox(value=GENERATE_MODEL_DEFAULT, label="Generation Model"),
|
| 328 |
gr.Textbox(value=SCORE_MODEL_DEFAULT, label="Score Model"),
|
|
|
|
| 340 |
gr.Checkbox(value=PAIRWISE_FILTER_DEFAULT, label="Enable Pairwise Filter"),
|
| 341 |
gr.Checkbox(value=SCORE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Score Model"),
|
| 342 |
gr.Checkbox(value=PAIRWISE_WITH_INSTRUCTION_DEFAULT, label="Pass Instruction to Pairwise Model"),
|
| 343 |
+
gr.Checkbox(value=GENERATE_THINKING_DEFAULT, label="Enable Thinking (Generate)", info="Enable Qwen3 think mode"),
|
| 344 |
+
gr.Checkbox(value=SCORE_THINKING_DEFAULT, label="Enable Thinking (Score)" , info="Enable Qwen3 think mode"),
|
| 345 |
+
gr.Checkbox(value=PAIRWISE_THINKING_DEFAULT, label="Enable Thinking (Pairwise)" , info="Enable Qwen3 think mode"),
|
| 346 |
+
gr.Checkbox(value=True, label="Enable Explain (Score)", info="Prompt LLM to think step by step"),
|
| 347 |
+
gr.Checkbox(value=True, label="Enable Explain (Pairwise)", info="Prompt LLM to think step by step"),
|
| 348 |
],
|
| 349 |
outputs=[
|
| 350 |
gr.Textbox(lines=10, label="Process"),
|
tournament_utils.py
CHANGED
|
@@ -35,7 +35,7 @@ def generate_players(
|
|
| 35 |
"""
|
| 36 |
messages = [{"role": "user", "content": instruction}]
|
| 37 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
| 38 |
-
kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
| 39 |
response = completion(
|
| 40 |
model=model,
|
| 41 |
messages=messages,
|
|
@@ -64,7 +64,7 @@ def prompt_score(
|
|
| 64 |
return_usage: bool = False,
|
| 65 |
) -> str | tuple[str, object]:
|
| 66 |
"""Return a plaintext score evaluation for `player`."""
|
| 67 |
-
example_scores = ", ".join(["
|
| 68 |
prompt = f"""Evaluate the output below on the following criteria:
|
| 69 |
{criteria_block}
|
| 70 |
|
|
@@ -84,7 +84,7 @@ def prompt_score(
|
|
| 84 |
|
| 85 |
prompt += f"\n\nOutput:\n{player}"
|
| 86 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
| 87 |
-
kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
| 88 |
response = completion(
|
| 89 |
model=model,
|
| 90 |
messages=[{"role": "system", "content": prompt}],
|
|
@@ -135,7 +135,7 @@ def prompt_pairwise(
|
|
| 135 |
prompt += f"\n\nInstruction:\n{instruction}"
|
| 136 |
prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
|
| 137 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
| 138 |
-
kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
| 139 |
response = completion(
|
| 140 |
model=model,
|
| 141 |
messages=[{"role": "system", "content": prompt}],
|
|
|
|
| 35 |
"""
|
| 36 |
messages = [{"role": "user", "content": instruction}]
|
| 37 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
| 38 |
+
# kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
| 39 |
response = completion(
|
| 40 |
model=model,
|
| 41 |
messages=messages,
|
|
|
|
| 64 |
return_usage: bool = False,
|
| 65 |
) -> str | tuple[str, object]:
|
| 66 |
"""Return a plaintext score evaluation for `player`."""
|
| 67 |
+
example_scores = ", ".join(["5"] * len(criteria_list)) or "5"
|
| 68 |
prompt = f"""Evaluate the output below on the following criteria:
|
| 69 |
{criteria_block}
|
| 70 |
|
|
|
|
| 84 |
|
| 85 |
prompt += f"\n\nOutput:\n{player}"
|
| 86 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
| 87 |
+
# kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
| 88 |
response = completion(
|
| 89 |
model=model,
|
| 90 |
messages=[{"role": "system", "content": prompt}],
|
|
|
|
| 135 |
prompt += f"\n\nInstruction:\n{instruction}"
|
| 136 |
prompt += f"\n\nPlayers:\n<A>{a}</A>\n<B>{b}</B>"
|
| 137 |
kwargs = _completion_kwargs(api_base, api_key, temperature)
|
| 138 |
+
# kwargs["chat_template_kwargs"] = {"enable_thinking": thinking}
|
| 139 |
response = completion(
|
| 140 |
model=model,
|
| 141 |
messages=[{"role": "system", "content": prompt}],
|