Spaces:

InternScience
/

ResearchHarness

Running

App Files Files Community

black-yt commited on 30 days ago

Commit

b4eb71d

1 Parent(s): a47195c

Sync tool execution semantics

Browse files

Files changed (4) hide show

VERSION +1 -1
agent_base/prompts/system_base.md +1 -0
agent_base/react_agent.py +81 -22
agent_base/tools/tool_web.py +14 -29

VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- v0.0.35


1	+ v0.0.36

agent_base/prompts/system_base.md CHANGED Viewed

@@ -92,6 +92,7 @@ You are a capable all-purpose AI assistant. You do far more than simple question
   - ask the human user for essential missing information -> `AskUser`
   - persistent interactive shell state -> `Terminal*`
 - Search results and scholar results are discovery aids. They are not page-verification evidence by themselves.
 - Prefer `Bash` over `Terminal*` unless persistent interactive shell state is genuinely required.
 ## Human Clarification Workflow

   - ask the human user for essential missing information -> `AskUser`
   - persistent interactive shell state -> `Terminal*`
 - Search results and scholar results are discovery aids. They are not page-verification evidence by themselves.
+- Each tool call should express one clear request. For independent read-only work, such as multiple searches, multiple page fetches, or multiple file reads, issue multiple tool calls in the same assistant turn rather than packing several requests into one tool argument.
 - Prefer `Bash` over `Terminal*` unless persistent interactive shell state is genuinely required.
 ## Human Clarification Workflow

agent_base/react_agent.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import argparse
 from contextlib import contextmanager
 import json
 import os
@@ -79,6 +80,18 @@ DEFAULT_TEMPERATURE = 0.6
 DEFAULT_TOP_P = 0.95
 DEFAULT_PRESENCE_PENALTY = 1.1
 DEFAULT_LLM_TIMEOUT_SECONDS = 600.0
 def default_model_name() -> str:
@@ -613,6 +626,26 @@ def execute_tool_by_name(tool_map: dict[str, Any], tool_name: str, tool_args: An
     return tool.call(tool_args, **kwargs)
 class MultiTurnReactAgent(BaseAgent):
     def __init__(
         self,
@@ -1216,38 +1249,64 @@ class MultiTurnReactAgent(BaseAgent):
                 tool_turn_message_start = len(messages)
                 messages.append(assistant_message)
                 deferred_image_contexts: list[tuple[str, str, Any, Any, dict[str, Any]]] = []
                 for tool_call, tool_arguments in zip(assistant_tool_calls, assistant_tool_arguments):
                     if remaining_runtime_seconds(runtime_deadline) is not None and remaining_runtime_seconds(runtime_deadline) <= 0:
                         result_text = "No result found before the maximum agent runtime limit."
                         termination = f"agent runtime limit reached: {agent_runtime_limit}s"
                         return finalize(result_text, termination, error=termination)
-                    tool_call_id = str(tool_call.get("id", ""))
-                    function_block = tool_call.get("function", {}) if isinstance(tool_call, dict) else {}
-                    tool_name = str(function_block.get("name", ""))
                     try:
-                        result = self.custom_call_tool(
-                            tool_name,
-                            tool_arguments,
-                            workspace_root=resolved_workspace_root,
-                            runtime_deadline=runtime_deadline,
-                            model_name=self.model,
                         )
                     except KeyboardInterrupt:
                         messages = messages[:tool_turn_message_start]
                         return finalize_interrupted()
-                    tool_result_text = tool_result_message_content(result)
-                    messages.append(api_tool_message(tool_call_id, result))
-                    trace_writer.append(
-                        role="tool",
-                        text=tool_result_text,
-                        turn_index=round_index,
-                        tool_call_ids=[tool_call_id],
-                        tool_names=[tool_name],
-                        tool_arguments=[tool_arguments],
-                    )
-                    extra_image_context = image_context_message(result, self.model)
-                    if extra_image_context is not None:
-                        deferred_image_contexts.append((tool_call_id, tool_name, tool_arguments, result, extra_image_context))
                 for tool_call_id, tool_name, tool_arguments, result, extra_image_context in deferred_image_contexts:
                     messages.append(extra_image_context)
                     trace_writer.append(

 import argparse
+from concurrent.futures import ThreadPoolExecutor
 from contextlib import contextmanager
 import json
 import os
 DEFAULT_TOP_P = 0.95
 DEFAULT_PRESENCE_PENALTY = 1.1
 DEFAULT_LLM_TIMEOUT_SECONDS = 600.0
+MAX_PARALLEL_READ_TOOL_CALLS = 3
+PARALLEL_READ_TOOL_NAMES = frozenset(
+    {
+        "Glob",
+        "Grep",
+        "Read",
+        "ReadImage",
+        "WebSearch",
+        "ScholarSearch",
+        "WebFetch",
+    }
+)
 def default_model_name() -> str:
     return tool.call(tool_args, **kwargs)
+def can_parallelize_tool_name(tool_name: str) -> bool:
+    return tool_name in PARALLEL_READ_TOOL_NAMES
+def tool_execution_batches(tool_names: Sequence[str]) -> list[list[int]]:
+    batches: list[list[int]] = []
+    read_batch: list[int] = []
+    for index, tool_name in enumerate(tool_names):
+        if can_parallelize_tool_name(tool_name):
+            read_batch.append(index)
+            continue
+        if read_batch:
+            batches.append(read_batch)
+            read_batch = []
+        batches.append([index])
+    if read_batch:
+        batches.append(read_batch)
+    return batches
 class MultiTurnReactAgent(BaseAgent):
     def __init__(
         self,
                 tool_turn_message_start = len(messages)
                 messages.append(assistant_message)
                 deferred_image_contexts: list[tuple[str, str, Any, Any, dict[str, Any]]] = []
+                tool_call_items: list[dict[str, Any]] = []
                 for tool_call, tool_arguments in zip(assistant_tool_calls, assistant_tool_arguments):
+                    function_block = tool_call.get("function", {}) if isinstance(tool_call, dict) else {}
+                    tool_call_items.append(
+                        {
+                            "tool_call_id": str(tool_call.get("id", "")),
+                            "tool_name": str(function_block.get("name", "")),
+                            "tool_arguments": tool_arguments,
+                        }
+                    )
+                def execute_tool_item(item: dict[str, Any]) -> tuple[dict[str, Any], Any]:
+                    result = self.custom_call_tool(
+                        str(item["tool_name"]),
+                        item["tool_arguments"],
+                        workspace_root=resolved_workspace_root,
+                        runtime_deadline=runtime_deadline,
+                        model_name=self.model,
+                    )
+                    return item, result
+                for batch_indexes in tool_execution_batches([str(item["tool_name"]) for item in tool_call_items]):
                     if remaining_runtime_seconds(runtime_deadline) is not None and remaining_runtime_seconds(runtime_deadline) <= 0:
                         result_text = "No result found before the maximum agent runtime limit."
                         termination = f"agent runtime limit reached: {agent_runtime_limit}s"
                         return finalize(result_text, termination, error=termination)
+                    batch_items = [tool_call_items[index] for index in batch_indexes]
                     try:
+                        should_run_parallel = len(batch_items) > 1 and all(
+                            can_parallelize_tool_name(str(item["tool_name"])) for item in batch_items
                         )
+                        if should_run_parallel:
+                            max_workers = min(MAX_PARALLEL_READ_TOOL_CALLS, len(batch_items))
+                            with ThreadPoolExecutor(max_workers=max_workers) as executor:
+                                batch_results = list(executor.map(execute_tool_item, batch_items))
+                        else:
+                            batch_results = [execute_tool_item(item) for item in batch_items]
                     except KeyboardInterrupt:
                         messages = messages[:tool_turn_message_start]
                         return finalize_interrupted()
+                    for item, result in batch_results:
+                        tool_call_id = str(item["tool_call_id"])
+                        tool_name = str(item["tool_name"])
+                        tool_arguments = item["tool_arguments"]
+                        tool_result_text = tool_result_message_content(result)
+                        messages.append(api_tool_message(tool_call_id, result))
+                        trace_writer.append(
+                            role="tool",
+                            text=tool_result_text,
+                            turn_index=round_index,
+                            tool_call_ids=[tool_call_id],
+                            tool_names=[tool_name],
+                            tool_arguments=[tool_arguments],
+                        )
+                        extra_image_context = image_context_message(result, self.model)
+                        if extra_image_context is not None:
+                            deferred_image_contexts.append((tool_call_id, tool_name, tool_arguments, result, extra_image_context))
                 for tool_call_id, tool_name, tool_arguments, result, extra_image_context in deferred_image_contexts:
                     messages.append(extra_image_context)
                     trace_writer.append(

agent_base/tools/tool_web.py CHANGED Viewed

@@ -4,7 +4,6 @@ import os
 import re
 import sys
 import time
-from concurrent.futures import ThreadPoolExecutor
 from typing import Optional, Union
 import requests
@@ -61,17 +60,13 @@ def _clean_webpage_text(text: str) -> str:
 class WebSearch(ToolBase):
     name = "WebSearch"
-    description = "Perform Google web searches and return the top results. Accepts multiple complementary queries."
     parameters = {
         "type": "object",
         "properties": {
             "query": {
-                "type": "array",
-                "items": {
-                    "type": "string",
-                },
-                "minItems": 1,
-                "description": "Array of query strings. Include multiple complementary search queries in a single call.",
             },
         },
         "required": ["query"],
@@ -166,27 +161,21 @@ class WebSearch(ToolBase):
         except ValueError as exc:
             return f"[WebSearch] {exc}"
-        if isinstance(query, list):
-            with ThreadPoolExecutor(max_workers=3) as executor:
-                responses = list(executor.map(self.search_with_serp, query))
-            response = "\n=======\n".join(responses)
-        else:
-            return "[WebSearch] 'query' must be a list of strings."
-        return response
 class ScholarSearch(ToolBase):
     name = "ScholarSearch"
-    description = "Search academic sources through Google Scholar and return relevant publication results."
     parameters = {
         "type": "object",
         "properties": {
             "query": {
-                "type": "array",
-                "items": {"type": "string", "description": "The search query."},
-                "minItems": 1,
-                "description": "The list of search queries for Google Scholar.",
             },
         },
         "required": ["query"],
@@ -264,13 +253,9 @@ class ScholarSearch(ToolBase):
         except ValueError as exc:
             return f"[ScholarSearch] {exc}"
-        if isinstance(query, list):
-            with ThreadPoolExecutor(max_workers=3) as executor:
-                response = list(executor.map(self.google_scholar_with_serp, query))
-            response = "\n=======\n".join(response)
-        else:
-            return "[ScholarSearch] 'query' must be a list of strings."
-        return response
 class WebFetch(ToolBase):
@@ -475,9 +460,9 @@ def main(argv: Optional[list[str]] = None) -> int:
     load_dotenv(PROJECT_ROOT / ".env")
     if args.tool == "search":
-        result = WebSearch().call({"query": [" ".join(args.query)]})
     elif args.tool == "scholar":
-        result = ScholarSearch().call({"query": [" ".join(args.query)]})
     else:
         result = WebFetch().call(
             {

 import re
 import sys
 import time
 from typing import Optional, Union
 import requests
 class WebSearch(ToolBase):
     name = "WebSearch"
+    description = "Perform one Google web search and return the top results. Call WebSearch multiple times for multiple queries."
     parameters = {
         "type": "object",
         "properties": {
             "query": {
+                "type": "string",
+                "description": "The search query.",
             },
         },
         "required": ["query"],
         except ValueError as exc:
             return f"[WebSearch] {exc}"
+        if not isinstance(query, str) or not query.strip():
+            return "[WebSearch] 'query' must be a non-empty string."
+        return self.search_with_serp(query.strip())
 class ScholarSearch(ToolBase):
     name = "ScholarSearch"
+    description = "Run one academic search through Google Scholar and return relevant publication results. Call ScholarSearch multiple times for multiple queries."
     parameters = {
         "type": "object",
         "properties": {
             "query": {
+                "type": "string",
+                "description": "The search query for Google Scholar.",
             },
         },
         "required": ["query"],
         except ValueError as exc:
             return f"[ScholarSearch] {exc}"
+        if not isinstance(query, str) or not query.strip():
+            return "[ScholarSearch] 'query' must be a non-empty string."
+        return self.google_scholar_with_serp(query.strip())
 class WebFetch(ToolBase):
     load_dotenv(PROJECT_ROOT / ".env")
     if args.tool == "search":
+        result = WebSearch().call({"query": " ".join(args.query)})
     elif args.tool == "scholar":
+        result = ScholarSearch().call({"query": " ".join(args.query)})
     else:
         result = WebFetch().call(
             {