Deep_Research_Agent

Sleeping

App Files Files Community

Lasdw commited on May 15, 2025

Commit

71621f7

1 Parent(s): b699411

fix python code tool error

Browse files

Files changed (2) hide show

agent.py +73 -12
tools.py +196 -35

agent.py CHANGED Viewed

@@ -29,7 +29,8 @@ from tools import (
     excel_to_text,
     save_attachment_to_tempfile,
     process_youtube_video,
-    transcribe_audio
 )
 load_dotenv()
@@ -69,6 +70,7 @@ excel_to_text: Convert Excel to Markdown table with attachment, args: {"excel_pa
 process_youtube_video: Extract and analyze YouTube video content by providing the video URL. Returns video metadata and transcript, args: {"url": {"type": "string"}, "summarize": {"type": "boolean", "optional": true}}
 transcribe_audio: Transcribe audio files using OpenAI Whisper, args: {"audio_path": {"type": "string"}, "file_content": {"type": "string", "optional": true}, "language": {"type": "string", "optional": true}}
 IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
 Example use for tools:
@@ -474,15 +476,64 @@ def python_code_node(state: AgentState) -> Dict[str, Any]:
     print(f"Python code action_input: {action_input}")
     print(f"Action input type: {type(action_input)}")
-    # Try different ways to extract the code
-    code = ""
-    if isinstance(action_input, dict):
-        code = action_input.get("code", "")
-        print(f"Extracted code from dict: {repr(code[:100])}")
-    elif isinstance(action_input, str):
-        # If action_input is a string, it might be the code directly
-        code = action_input
-        print(f"Using string as code: {repr(code[:100])}")
     # Additional validation: check for unmatched braces
     open_braces = code.count('{')
@@ -490,7 +541,7 @@ def python_code_node(state: AgentState) -> Dict[str, Any]:
     if open_braces != close_braces:
         result = f"Error: Code contains unmatched braces. Found {open_braces} '{{' and {close_braces} '}}'. Please check your code syntax."
     else:
-        # Call the code execution function
         result = run_python_code(code)
     print(f"Code execution result: {result[:100]}...")  # Print first 100 chars
@@ -1109,7 +1160,17 @@ class TurboNerd:
 # Example usage:
 if __name__ == "__main__":
     agent = TurboNerd(max_iterations=25)
-    response = agent("""Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.""")
     print("\nFinal Response:")
     print(response)

     excel_to_text,
     save_attachment_to_tempfile,
     process_youtube_video,
+    transcribe_audio,
+    extract_python_code_from_complex_input
 )
 load_dotenv()
 process_youtube_video: Extract and analyze YouTube video content by providing the video URL. Returns video metadata and transcript, args: {"url": {"type": "string"}, "summarize": {"type": "boolean", "optional": true}}
 transcribe_audio: Transcribe audio files using OpenAI Whisper, args: {"audio_path": {"type": "string"}, "file_content": {"type": "string", "optional": true}, "language": {"type": "string", "optional": true}}
+If you get stuck, try using another tool. For example if you are unable to find relevant information from the tavily_search tool, try using the wikipedia_search tool and vice versa.
 IMPORTANT: Make sure your JSON is properly formatted with double quotes around keys and string values.
 Example use for tools:
     print(f"Python code action_input: {action_input}")
     print(f"Action input type: {type(action_input)}")
+    # First try our specialized extraction function that handles nested structures
+    code = extract_python_code_from_complex_input(action_input)
+    # If extraction failed or returned the same complex structure, fallback to regex
+    if code == action_input or (isinstance(code, str) and code.strip().startswith('{') and '"code"' in code):
+        # Convert the action_input to string for regex processing if it's a dictionary
+        if isinstance(action_input, dict):
+            action_input_str = json.dumps(action_input)
+        else:
+            action_input_str = str(action_input)
+        # First, attempt direct regex extraction which is most robust for nested structures
+        import re
+        # Try to extract code using regex patterns for different nesting levels
+        # Pattern for deeply nested code
+        deep_pattern = re.search(r'"code"\s*:\s*"(.*?)(?<!\\)"\s*}\s*}\s*}', action_input_str, re.DOTALL)
+        if deep_pattern:
+            extracted_code = deep_pattern.group(1)
+            # Unescape the extracted code
+            extracted_code = extracted_code.replace('\\n', '\n').replace('\\"', '"').replace("\\'", "'")
+            code = extracted_code
+            print(f"Extracted deeply nested code using regex: {repr(code[:100])}")
+        # Pattern for single level nesting
+        elif '"code"' in action_input_str:
+            pattern = re.search(r'"code"\s*:\s*"(.*?)(?<!\\)"', action_input_str, re.DOTALL)
+            if pattern:
+                extracted_code = pattern.group(1)
+                # Unescape the extracted code
+                extracted_code = extracted_code.replace('\\n', '\n').replace('\\"', '"').replace("\\'", "'")
+                code = extracted_code
+                print(f"Extracted code using regex: {repr(code[:100])}")
+        # If regex extraction failed, try dictionary approaches
+        if code == action_input and isinstance(action_input, dict):
+            # Direct code access
+            if "code" in action_input:
+                code = action_input["code"]
+                print(f"Extracted code directly from dict: {repr(code[:100])}")
+            # Nested JSON structure handling
+            elif isinstance(action_input.get("code", ""), str) and action_input.get("code", "").strip().startswith('{'):
+                try:
+                    nested_json = json.loads(action_input["code"])
+                    if "action_input" in nested_json and isinstance(nested_json["action_input"], dict) and "code" in nested_json["action_input"]:
+                        code = nested_json["action_input"]["code"]
+                        print(f"Extracted code from nested JSON: {repr(code[:100])}")
+                except:
+                    # If parsing fails, use the code field as-is
+                    pass
+        # If still no code, use the action_input directly (string case)
+        if code == action_input and isinstance(action_input, str):
+            code = action_input
+            print(f"Using action_input as code: {repr(code[:100])}")
+    print(f"Final code to execute: {repr(code[:100])}...")
     # Additional validation: check for unmatched braces
     open_braces = code.count('{')
     if open_braces != close_braces:
         result = f"Error: Code contains unmatched braces. Found {open_braces} '{{' and {close_braces} '}}'. Please check your code syntax."
     else:
+        # Call the code execution function, which now also has improved extraction logic
         result = run_python_code(code)
     print(f"Code execution result: {result[:100]}...")  # Print first 100 chars
 # Example usage:
 if __name__ == "__main__":
     agent = TurboNerd(max_iterations=25)
+    response = agent("""Given this table defining * on the set S = {a, b, c, d, e}
+|*|a|b|c|d|e|
+|---|---|---|---|---|---|
+|a|a|b|c|b|d|
+|b|b|c|a|e|c|
+|c|c|a|b|b|a|
+|d|b|e|b|e|d|
+|e|d|b|a|d|c|
+provide the subset of S involved in any possible counter-examples that prove * is not commutative. Provide your answer as a comma separated list of the elements in the set in alphabetical order.""")
     print("\nFinal Response:")
     print(response)

tools.py CHANGED Viewed

@@ -24,46 +24,207 @@ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, No
 load_dotenv()
-def run_python_code(code: str):
-    """Execute Python code safely using exec() instead of subprocess."""
-    # Check for potentially dangerous operations
-    dangerous_operations = [
-        "os.system", "os.popen", "os.unlink", "os.remove",
-        "subprocess.run", "subprocess.call", "subprocess.Popen",
-        "shutil.rmtree", "shutil.move", "shutil.copy",
-        "open(", "file(", "eval(", "exec(",
-        "__import__", "input(", "raw_input(",
-        "__builtins__", "globals(", "locals(",
-        "compile(", "execfile(", "reload("
-    ]
-    # Safe imports that should be allowed
-    safe_imports = {
-        "import datetime", "import math", "import random",
-        "import statistics", "import collections", "import itertools",
-        "import re", "import json", "import csv", "import numpy",
-        "import pandas", "from math import", "from datetime import",
-        "from statistics import", "from collections import",
-        "from itertools import"
-    }
-    # Check for dangerous operations
-    for dangerous_op in dangerous_operations:
-        if dangerous_op in code:
-            return f"Error: Code contains potentially unsafe operations: {dangerous_op}"
-    # Check each line for imports
-    for line in code.splitlines():
-        line = line.strip()
-        if line.startswith("import ") or line.startswith("from "):
-            # Check if it's in our safe list
-            is_safe = any(line.startswith(safe_import) for safe_import in safe_imports)
-            # Also allow basic numpy/pandas imports
-            is_safe = is_safe or line.startswith("import numpy") or line.startswith("import pandas")
-            if not is_safe:
-                return f"Error: Code contains potentially unsafe import: {line}"
     try:
         # Capture stdout to get print output
         import io
         import sys

 load_dotenv()
+def extract_python_code_from_complex_input(input_text):
+    """
+    Dedicated function to extract Python code from deeply nested JSON structures.
+    This function handles the specific case of Python code embedded in nested JSON.
+    """
+    import re
+    import json
+    # Convert to string if it's not already
+    if not isinstance(input_text, str):
+        try:
+            input_text = json.dumps(input_text)
+        except:
+            input_text = str(input_text)
+    # Check if this looks like a JSON structure containing code
+    if not (input_text.strip().startswith('{') and '"code"' in input_text):
+        return input_text  # Not a JSON structure, return as is
+    # First attempt: Try to extract using a direct regex for the nested case
+    # This pattern looks for "code": "..." with proper escaping
+    pattern = re.compile(r'"code"\s*:\s*"(.*?)(?<!\\)"\s*}', re.DOTALL)
+    matches = pattern.findall(input_text)
+    if matches:
+        # Get the longest match (most likely the complete code)
+        extracted_code = max(matches, key=len)
+        # Unescape common escape sequences
+        extracted_code = extracted_code.replace('\\n', '\n')
+        extracted_code = extracted_code.replace('\\"', '"')
+        extracted_code = extracted_code.replace("\\'", "'")
+        extracted_code = extracted_code.replace("\\\\", "\\")
+        print(f"Extracted code using direct regex approach: {extracted_code[:50]}...")
+        return extracted_code
+    # Second attempt: Try JSON parsing and navigate the structure
+    try:
+        parsed = json.loads(input_text)
+        # Navigate through possible structures
+        if isinstance(parsed, dict):
+            # Direct code field
+            if 'code' in parsed:
+                extracted = parsed['code']
+                if isinstance(extracted, str):
+                    return extracted
+            # Action with action_input structure
+            if 'action' in parsed and 'action_input' in parsed:
+                action_input = parsed['action_input']
+                # Case 1: action_input is a dict with code
+                if isinstance(action_input, dict) and 'code' in action_input:
+                    return action_input['code']
+                # Case 2: action_input is a string that might be JSON
+                if isinstance(action_input, str):
+                    try:
+                        nested = json.loads(action_input)
+                        if isinstance(nested, dict) and 'code' in nested:
+                            return nested['code']
+                    except:
+                        # If it's not valid JSON, might be the code itself
+                        return action_input
+    except:
+        # If JSON parsing fails, try one more regex approach
+        # This looks for any content between balanced braces
+        try:
+            # Find the innermost code field
+            code_start = input_text.rfind('"code"')
+            if code_start != -1:
+                # Find the start of the value (after the colon and quote)
+                value_start = input_text.find(':', code_start)
+                if value_start != -1:
+                    value_start = input_text.find('"', value_start)
+                    if value_start != -1:
+                        value_start += 1  # Move past the quote
+                        # Now find the end quote that's not escaped
+                        value_end = value_start
+                        while True:
+                            next_quote = input_text.find('"', value_end + 1)
+                            if next_quote == -1:
+                                break
+                            # Check if this quote is escaped
+                            if input_text[next_quote - 1] != '\\':
+                                value_end = next_quote
+                                break
+                            value_end = next_quote
+                        if value_end > value_start:
+                            extracted = input_text[value_start:value_end]
+                            # Unescape
+                            extracted = extracted.replace('\\n', '\n')
+                            extracted = extracted.replace('\\"', '"')
+                            extracted = extracted.replace("\\'", "'")
+                            extracted = extracted.replace("\\\\", "\\")
+                            return extracted
+        except:
+            pass
+    # If all else fails, return the original input
+    return input_text
+def run_python_code(code: str):
+    """Execute Python code safely using exec() instead of subprocess."""
     try:
+        # Pre-process code to handle complex nested structures
+        # This is our most aggressive approach to extract the actual code
+        code = extract_python_code_from_complex_input(code)
+        # First, check if the input is a nested JSON structure
+        if code.strip().startswith('{') and ('"action"' in code or "'action'" in code):
+            try:
+                # Common issue: escaped quotes causing JSON parse errors
+                # Pre-process to handle common escaping problems
+                preprocessed_code = code
+                # Handle the specific case we're seeing with nested escaped quotes
+                import re
+                # Search for nested code pattern - this is a more direct approach
+                code_pattern = re.search(r'"code"\s*:\s*"(.*?)"\s*\}\s*\}\s*\}', code, re.DOTALL)
+                if code_pattern:
+                    extracted_code = code_pattern.group(1)
+                    # Unescape the extracted code
+                    extracted_code = extracted_code.replace('\\n', '\n').replace('\\"', '"').replace("\\'", "'")
+                    code = extracted_code
+                    print(f"Extracted code using regex pattern: {code[:100]}")
+                else:
+                    # Try JSON parsing approach if regex fails
+                    import json
+                    try:
+                        # First try direct parsing
+                        parsed_json = json.loads(code)
+                        # Check if this is an action structure with embedded code
+                        if 'action' in parsed_json and 'action_input' in parsed_json:
+                            if isinstance(parsed_json['action_input'], dict) and 'code' in parsed_json['action_input']:
+                                # Extract the actual code from the nested structure
+                                code = parsed_json['action_input']['code']
+                                print(f"Extracted code using JSON parsing: {code[:100]}")
+                            elif isinstance(parsed_json['action_input'], str):
+                                # Try to parse the action_input as JSON if it's a string
+                                try:
+                                    inner_input = json.loads(parsed_json['action_input'])
+                                    if isinstance(inner_input, dict) and 'code' in inner_input:
+                                        code = inner_input['code']
+                                        print(f"Extracted nested code: {code[:100]}")
+                                except:
+                                    # If parsing fails, assume the action_input itself is the code
+                                    code = parsed_json['action_input']
+                                    print(f"Using action_input as code: {code[:100]}")
+                    except json.JSONDecodeError:
+                        # Direct parsing failed, try alternative approaches
+                        print("JSON parsing failed, trying alternative approaches")
+            except Exception as e:
+                print(f"Error during code extraction: {str(e)}")
+                # If JSON parsing fails, proceed with the original code
+                pass
+        print(f"Final code to execute: {code[:100]}...")
+        # Check for potentially dangerous operations
+        dangerous_operations = [
+            "os.system", "os.popen", "os.unlink", "os.remove",
+            "subprocess.run", "subprocess.call", "subprocess.Popen",
+            "shutil.rmtree", "shutil.move", "shutil.copy",
+            "open(", "file(", "eval(", "exec(",
+            "__import__", "input(", "raw_input(",
+            "__builtins__", "globals(", "locals(",
+            "compile(", "execfile(", "reload("
+        ]
+        # Safe imports that should be allowed
+        safe_imports = {
+            "import datetime", "import math", "import random",
+            "import statistics", "import collections", "import itertools",
+            "import re", "import json", "import csv", "import numpy",
+            "import pandas", "from math import", "from datetime import",
+            "from statistics import", "from collections import",
+            "from itertools import"
+        }
+        # Check for dangerous operations
+        for dangerous_op in dangerous_operations:
+            if dangerous_op in code:
+                return f"Error: Code contains potentially unsafe operations: {dangerous_op}"
+        # Check each line for imports
+        for line in code.splitlines():
+            line = line.strip()
+            if line.startswith("import ") or line.startswith("from "):
+                # Check if it's in our safe list
+                is_safe = any(line.startswith(safe_import) for safe_import in safe_imports)
+                # Also allow basic numpy/pandas imports
+                is_safe = is_safe or line.startswith("import numpy") or line.startswith("import pandas")
+                if not is_safe:
+                    return f"Error: Code contains potentially unsafe import: {line}"
         # Capture stdout to get print output
         import io
         import sys