Spaces:

smolagents
/

ml-agent

Running

akseljoonas HF Staff commited on Nov 28, 2025

Commit

9fe493b

1 Parent(s): 9de209d

gpt 5 nano judge

Files changed (3) hide show

agent/main.py CHANGED Viewed

@@ -16,6 +16,17 @@ from agent.core.agent_loop import submission_loop
 from agent.core.session import OpType
 from agent.core.tools import ToolRouter
 @dataclass
 class Operation:
@@ -98,15 +109,6 @@ async def main():
     print("=" * 60)
     print("Type your messages below. Type 'exit', 'quit', or '/quit' to end.\n")
-    lmnr_api_key = os.environ.get("LMNR_API_KEY")
-    if lmnr_api_key:
-        try:
-            Laminar.initialize(project_api_key=lmnr_api_key)
-            litellm.callbacks = [LaminarLiteLLMCallback()]
-            print("✅ Laminar initialized")
-        except Exception as e:
-            print(f"⚠️ Failed to initialize Laminar: {e}")
     # Create queues for communication
     submission_queue = asyncio.Queue()
     event_queue = asyncio.Queue()

 from agent.core.session import OpType
 from agent.core.tools import ToolRouter
+litellm.drop_params = True
+lmnr_api_key = os.environ.get("LMNR_API_KEY")
+if lmnr_api_key:
+    try:
+        Laminar.initialize(project_api_key=lmnr_api_key)
+        litellm.callbacks = [LaminarLiteLLMCallback()]
+        print("✅ Laminar initialized")
+    except Exception as e:
+        print(f"⚠️ Failed to initialize Laminar: {e}")
 @dataclass
 class Operation:
     print("=" * 60)
     print("Type your messages below. Type 'exit', 'quit', or '/quit' to end.\n")
     # Create queues for communication
     submission_queue = asyncio.Queue()
     event_queue = asyncio.Queue()

eval/rubric_eval.py CHANGED Viewed

@@ -97,7 +97,7 @@ def evaluate_with_rubrics(
     question: str,
     response: str,
     rubrics: List[RubricData],
-    model: str = "gpt-4o-mini",
 ) -> RubricEvaluation:
     """
     Evaluate response using RaR-Explicit method (weighted sum).

     question: str,
     response: str,
     rubrics: List[RubricData],
+    model: str = "gpt-5-nano",
 ) -> RubricEvaluation:
     """
     Evaluate response using RaR-Explicit method (weighted sum).

eval/task.py CHANGED Viewed

@@ -14,6 +14,7 @@ from inspect_ai import Task, task
 from inspect_ai.dataset import Sample, hf_dataset
 from inspect_ai.scorer import Score, Target, mean, scorer
 from inspect_ai.solver._task_state import TaskState
 PROJECT_ROOT = Path(__file__).resolve().parents[1]
 if str(PROJECT_ROOT) not in sys.path:
@@ -56,7 +57,7 @@ def _metadata_to_rubrics(metadata: dict[str, Any]) -> list[RubricData]:
 @scorer(metrics=[mean()], name="rubric_scorer")
-def rubric_scorer(judge_model: str = "gpt-4o-mini"):
     async def score(state: TaskState, target: Target) -> Score:
         response_text = state.output.completion or state.output.message.text
         question = state.metadata.get("question", state.input_text)
@@ -99,8 +100,9 @@ def hf_benchmark_with_rubrics(
     },
     dataset_name: str = "akseljoonas/hf-agent-rubrics@train",
     limit: int | None = None,
-    judge_model: str = "gpt-4o-mini",
 ) -> Task:
     if "@" not in dataset_name:
         raise ValueError("Dataset name must be in the format 'author/dataset@split'")
     dataset_name, dataset_split = dataset_name.split("@")

 from inspect_ai.dataset import Sample, hf_dataset
 from inspect_ai.scorer import Score, Target, mean, scorer
 from inspect_ai.solver._task_state import TaskState
+import litellm
 PROJECT_ROOT = Path(__file__).resolve().parents[1]
 if str(PROJECT_ROOT) not in sys.path:
 @scorer(metrics=[mean()], name="rubric_scorer")
+def rubric_scorer(judge_model: str = "gpt-5-mini"):
     async def score(state: TaskState, target: Target) -> Score:
         response_text = state.output.completion or state.output.message.text
         question = state.metadata.get("question", state.input_text)
     },
     dataset_name: str = "akseljoonas/hf-agent-rubrics@train",
     limit: int | None = None,
+    judge_model: str = "gpt-5-mini",
 ) -> Task:
+    litellm.drop_params = True
     if "@" not in dataset_name:
         raise ValueError("Dataset name must be in the format 'author/dataset@split'")
     dataset_name, dataset_split = dataset_name.split("@")