fa_agents

Runtime error

App Files Files Community

j14i commited on Dec 24, 2025

Commit

86b8466

1 Parent(s): 90a12a7

Shoutout Warns

Browse files

Files changed (2) hide show

agent.py +3 -0
test_bench.py +21 -1

agent.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from typing import Annotated, TypedDict
 from dotenv import load_dotenv
@@ -11,6 +12,8 @@ from langgraph.graph.state import END, START, CompiledStateGraph, StateGraph
 from langgraph.prebuilt import ToolNode
 from pydantic import SecretStr
 load_dotenv()

 import os
+import warnings
 from typing import Annotated, TypedDict
 from dotenv import load_dotenv
 from langgraph.prebuilt import ToolNode
 from pydantic import SecretStr
+warnings.filterwarnings("ignore", category=UserWarning, module="langchain_tavily")
 load_dotenv()

test_bench.py CHANGED Viewed

@@ -7,6 +7,7 @@ Usage:
     uv run python test_bench.py --level 1          # Run on level 1 only
     uv run python test_bench.py --level 1 --n 3    # Run on 3 level 1 questions
     uv run python test_bench.py --all              # Run on all validation questions
     uv run python test_bench.py --type youtube,file,web,excel,pdf,image,audio,text-only
 """
@@ -146,6 +147,21 @@ def filter_by_type(
     return filtered
 def load_gaia_data(level: int | None = None) -> list[GaiaQuestion]:
     """Load GAIA validation dataset with all metadata."""
     print("Downloading GAIA dataset...")
@@ -179,6 +195,7 @@ def run_test_bench(
     task_type: str | None = None,
     run_all: bool = False,
     save_results: bool = True,
 ) -> list[TestResult]:
     """
     Run the test bench on the agent.
@@ -198,6 +215,7 @@ def run_test_bench(
     questions = load_gaia_data(level=level)
     questions = filter_by_type(questions, task_type)
     if not questions:
         print(f"No questions found for type '{task_type}'")
@@ -219,7 +237,7 @@ def run_test_bench(
         if q.file_name:
             print(f"  File: {q.file_name}")
         if q.annotator.tools:
-            print(f"  Tools needed: {q.annotator.tools}")
         try:
             actual = agent(q.question)
@@ -349,6 +367,7 @@ def main():
     )
     parser.add_argument("--all", action="store_true", help="Run all questions")
     parser.add_argument("--no-save", action="store_true", help="Don't save results")
     args = parser.parse_args()
     from agent import BasicAgent
@@ -365,6 +384,7 @@ def main():
         task_type=args.type,
         run_all=args.all,
         save_results=not args.no_save,
     )

     uv run python test_bench.py --level 1          # Run on level 1 only
     uv run python test_bench.py --level 1 --n 3    # Run on 3 level 1 questions
     uv run python test_bench.py --all              # Run on all validation questions
+    uv run python test_bench.py --task-id 1234     # Run on specific task ID
     uv run python test_bench.py --type youtube,file,web,excel,pdf,image,audio,text-only
 """
     return filtered
+def filter_by_task_id(
+    questions: list[GaiaQuestion], task_id: str | None
+) -> list[GaiaQuestion]:
+    """Filter questions by task id."""
+    if not task_id:
+        return questions
+    filtered = []
+    for q in questions:
+        if q.task_id == task_id:
+            filtered.append(q)
+    return filtered
 def load_gaia_data(level: int | None = None) -> list[GaiaQuestion]:
     """Load GAIA validation dataset with all metadata."""
     print("Downloading GAIA dataset...")
     task_type: str | None = None,
     run_all: bool = False,
     save_results: bool = True,
+    task_id: str | None = None,
 ) -> list[TestResult]:
     """
     Run the test bench on the agent.
     questions = load_gaia_data(level=level)
     questions = filter_by_type(questions, task_type)
+    questions = filter_by_task_id(questions, task_id)
     if not questions:
         print(f"No questions found for type '{task_type}'")
         if q.file_name:
             print(f"  File: {q.file_name}")
         if q.annotator.tools:
+            print(f"  Tools needed:\n{q.annotator.tools}")
         try:
             actual = agent(q.question)
     )
     parser.add_argument("--all", action="store_true", help="Run all questions")
     parser.add_argument("--no-save", action="store_true", help="Don't save results")
+    parser.add_argument("--task-id", type=str, help="Run specific task ID")
     args = parser.parse_args()
     from agent import BasicAgent
         task_type=args.type,
         run_all=args.all,
         save_results=not args.no_save,
+        task_id=args.task_id,
     )