Pulastya B commited on
Commit
0310ce5
·
1 Parent(s): eeeb8b7

feat: Implement text-based tool calling for Gemini (ReAct pattern)

Browse files

- Remove dependency on unstable Gemini function calling SDK
- Parse tool calls from text using JSON blocks or function syntax
- Support multiple formats: JSON blocks, function calls
- Send tool results as formatted text instead of function responses
- More reliable than SDK-based function calling
- Compatible with all Gemini SDK versions
- Fixes all tool calling issues on Render

Files changed (1) hide show
  1. src/orchestrator.py +101 -18
src/orchestrator.py CHANGED
@@ -361,7 +361,19 @@ class DataScienceCopilot:
361
  """Build comprehensive system prompt for the copilot."""
362
  return """You are an autonomous Data Science Agent. You EXECUTE tasks, not advise.
363
 
364
- **CRITICAL: Use the provided function calling tools. Do NOT generate XML-style function calls.**
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
  **CRITICAL: Detect the user's intent and use the appropriate workflow.**
367
 
@@ -1048,6 +1060,72 @@ You are a DOER. Complete workflows based on user intent."""
1048
 
1049
  return compressed
1050
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1051
  def _convert_to_gemini_tools(self, groq_tools: List[Dict]) -> List[Dict]:
1052
  """
1053
  Convert Groq/OpenAI format tools to Gemini format.
@@ -1375,20 +1453,12 @@ You are a DOER. Complete workflows based on user intent."""
1375
  combined_message = f"{messages[0]['content']}\n\n{messages[1]['content']}"
1376
  response = gemini_chat.send_message(combined_message)
1377
  else:
1378
- # Subsequent iterations: send function responses
1379
  last_tool_msg = messages[-1]
1380
  if last_tool_msg.get("role") == "tool":
1381
- # Send function response back to Gemini
1382
- from google.ai.generativelanguage_v1beta.types import content as glm_content
1383
-
1384
- function_response_part = glm_content.Part(
1385
- function_response=glm_content.FunctionResponse(
1386
- name=last_tool_msg["name"],
1387
- response={"result": last_tool_msg["content"]}
1388
- )
1389
- )
1390
-
1391
- response = gemini_chat.send_message(function_response_part)
1392
  else:
1393
  # Fallback
1394
  response = gemini_chat.send_message("Continue with the next step.")
@@ -1406,16 +1476,29 @@ You are a DOER. Complete workflows based on user intent."""
1406
  self.api_calls_made += 1
1407
  self.last_api_call_time = time.time()
1408
 
1409
- # Extract function calls from Gemini response
1410
  tool_calls = []
1411
  final_content = None
1412
 
1413
  if response.candidates and response.candidates[0].content.parts:
1414
  for part in response.candidates[0].content.parts:
1415
- if hasattr(part, 'function_call') and part.function_call:
1416
- tool_calls.append(part.function_call)
1417
- elif hasattr(part, 'text') and part.text:
1418
- final_content = part.text
 
 
 
 
 
 
 
 
 
 
 
 
 
1419
 
1420
  # Check if done (no tool calls)
1421
  if not tool_calls:
 
361
  """Build comprehensive system prompt for the copilot."""
362
  return """You are an autonomous Data Science Agent. You EXECUTE tasks, not advise.
363
 
364
+ **CRITICAL: Tool Calling Format**
365
+ When you need to use a tool, respond with a JSON block like this:
366
+ ```json
367
+ {
368
+ "tool": "tool_name",
369
+ "arguments": {
370
+ "param1": "value1",
371
+ "param2": 123
372
+ }
373
+ }
374
+ ```
375
+
376
+ **ONE TOOL PER RESPONSE**. After tool execution, I will send you the result and you can call the next tool.
377
 
378
  **CRITICAL: Detect the user's intent and use the appropriate workflow.**
379
 
 
1060
 
1061
  return compressed
1062
 
1063
+ def _parse_text_tool_calls(self, text_response: str) -> List[Dict[str, Any]]:
1064
+ """
1065
+ Parse tool calls from text-based LLM response (ReAct pattern).
1066
+ Supports multiple formats:
1067
+ - JSON: {"tool": "tool_name", "arguments": {...}}
1068
+ - Function: tool_name(arg1="value", arg2="value")
1069
+ - Markdown: ```json {...} ```
1070
+ """
1071
+ import re
1072
+ tool_calls = []
1073
+
1074
+ # Pattern 1: JSON blocks (most reliable)
1075
+ json_pattern = r'```(?:json)?\s*(\{[^\`]+\})\s*```'
1076
+ json_matches = re.findall(json_pattern, text_response, re.DOTALL)
1077
+
1078
+ for match in json_matches:
1079
+ try:
1080
+ tool_data = json.loads(match)
1081
+ if "tool" in tool_data or "function" in tool_data:
1082
+ tool_name = tool_data.get("tool") or tool_data.get("function")
1083
+ arguments = tool_data.get("arguments") or tool_data.get("args") or {}
1084
+ tool_calls.append({
1085
+ "id": f"call_{len(tool_calls)}",
1086
+ "function": {
1087
+ "name": tool_name,
1088
+ "arguments": json.dumps(arguments)
1089
+ }
1090
+ })
1091
+ except json.JSONDecodeError:
1092
+ continue
1093
+
1094
+ # Pattern 2: Function call format - tool_name(arg1="value", arg2=123)
1095
+ if not tool_calls:
1096
+ func_pattern = r'(\w+)\s*\((.*?)\)'
1097
+ for match in re.finditer(func_pattern, text_response):
1098
+ tool_name = match.group(1)
1099
+ args_str = match.group(2)
1100
+
1101
+ # Check if this looks like a known tool
1102
+ if any(tool_name in tool["function"]["name"] for tool in self._compress_tools_registry()):
1103
+ # Parse arguments
1104
+ arguments = {}
1105
+ arg_pattern = r'(\w+)\s*=\s*(["\']?)([^,\)]+)\2'
1106
+ for arg_match in re.finditer(arg_pattern, args_str):
1107
+ key = arg_match.group(1)
1108
+ value = arg_match.group(3)
1109
+ # Try to parse as number/bool
1110
+ if value.lower() == "true":
1111
+ arguments[key] = True
1112
+ elif value.lower() == "false":
1113
+ arguments[key] = False
1114
+ elif value.isdigit():
1115
+ arguments[key] = int(value)
1116
+ else:
1117
+ arguments[key] = value
1118
+
1119
+ tool_calls.append({
1120
+ "id": f"call_{len(tool_calls)}",
1121
+ "function": {
1122
+ "name": tool_name,
1123
+ "arguments": json.dumps(arguments)
1124
+ }
1125
+ })
1126
+
1127
+ return tool_calls
1128
+
1129
  def _convert_to_gemini_tools(self, groq_tools: List[Dict]) -> List[Dict]:
1130
  """
1131
  Convert Groq/OpenAI format tools to Gemini format.
 
1453
  combined_message = f"{messages[0]['content']}\n\n{messages[1]['content']}"
1454
  response = gemini_chat.send_message(combined_message)
1455
  else:
1456
+ # Subsequent iterations: send tool results as plain text
1457
  last_tool_msg = messages[-1]
1458
  if last_tool_msg.get("role") == "tool":
1459
+ # Format tool result as text for Gemini
1460
+ result_message = f"Tool '{last_tool_msg['name']}' executed successfully.\n\nResult:\n{last_tool_msg['content']}\n\nWhat's the next step?"
1461
+ response = gemini_chat.send_message(result_message)
 
 
 
 
 
 
 
 
1462
  else:
1463
  # Fallback
1464
  response = gemini_chat.send_message("Continue with the next step.")
 
1476
  self.api_calls_made += 1
1477
  self.last_api_call_time = time.time()
1478
 
1479
+ # Extract tool calls from Gemini TEXT response (text-based tool calling)
1480
  tool_calls = []
1481
  final_content = None
1482
 
1483
  if response.candidates and response.candidates[0].content.parts:
1484
  for part in response.candidates[0].content.parts:
1485
+ if hasattr(part, 'text') and part.text:
1486
+ text_response = part.text
1487
+ final_content = text_response
1488
+
1489
+ # Parse tool calls from text using JSON blocks or function syntax
1490
+ parsed_calls = self._parse_text_tool_calls(text_response)
1491
+ if parsed_calls:
1492
+ # Convert to tool_call objects matching Groq format
1493
+ for call in parsed_calls:
1494
+ tool_call_obj = type('ToolCall', (), {
1495
+ 'id': call['id'],
1496
+ 'function': type('Function', (), {
1497
+ 'name': call['function']['name'],
1498
+ 'arguments': call['function']['arguments']
1499
+ })()
1500
+ })()
1501
+ tool_calls.append(tool_call_obj)
1502
 
1503
  # Check if done (no tool calls)
1504
  if not tool_calls: