Deep_Research_Agent

Sleeping

App Files Files Community

Lasdw commited on May 16, 2025

Commit

36dcbf4

1 Parent(s): d3fb3e0

added pdf viewing tool

Browse files

Files changed (4) hide show

agent.py +63 -2
app.py +20 -3
requirements.txt +2 -0
tools.py +113 -0

agent.py CHANGED Viewed

@@ -32,7 +32,8 @@ from tools import (
     transcribe_audio,
     extract_python_code_from_complex_input,
     process_image,
-    read_file
 )
 load_dotenv()
@@ -544,7 +545,10 @@ def extract_json_from_text(text: str) -> dict:
                         "excel": "excel_to_text",
                         "youtube": "process_youtube_video",
                         "webpage": "webpage_scrape",
-                        "scrape": "webpage_scrape"
                     }
                     if result["action"].lower() in tool_mapping:
@@ -1390,6 +1394,58 @@ def read_file_node(state: AgentState) -> Dict[str, Any]:
         "action_input": None   # Clear the action input
     }
 # Router function to direct to the correct tool
 def router(state: AgentState) -> str:
     """Route to the appropriate tool based on the current_tool field."""
@@ -1420,6 +1476,8 @@ def router(state: AgentState) -> str:
         return "process_image"
     elif tool == "read_file":
         return "read_file"
     else:
         return "end"
@@ -1441,6 +1499,7 @@ def create_agent_graph() -> StateGraph:
     builder.add_node("transcribe_audio", transcribe_audio_node)
     builder.add_node("process_image", process_image_node)
     builder.add_node("read_file", read_file_node)
     # Define edges: these determine how the control flow moves
     builder.add_edge(START, "assistant")
@@ -1476,6 +1535,7 @@ def create_agent_graph() -> StateGraph:
             "transcribe_audio": "transcribe_audio",
             "process_image": "process_image",
             "read_file": "read_file",
             "end": END
         }
     )
@@ -1492,6 +1552,7 @@ def create_agent_graph() -> StateGraph:
     builder.add_edge("transcribe_audio", "assistant")
     builder.add_edge("process_image", "assistant")
     builder.add_edge("read_file", "assistant")
     # Compile the graph
     return builder.compile()

     transcribe_audio,
     extract_python_code_from_complex_input,
     process_image,
+    read_file,
+    process_online_document
 )
 load_dotenv()
                         "excel": "excel_to_text",
                         "youtube": "process_youtube_video",
                         "webpage": "webpage_scrape",
+                        "scrape": "webpage_scrape",
+                        "pdf": "process_online_document",
+                        "document": "process_online_document",
+                        "online": "process_online_document"
                     }
                     if result["action"].lower() in tool_mapping:
         "action_input": None   # Clear the action input
     }
+def process_online_document_node(state: AgentState) -> Dict[str, Any]:
+    """Node that processes online PDFs and images."""
+    print("Online Document Processing Tool Called...\n\n")
+    # Extract tool arguments
+    action_input = state.get("action_input", {})
+    print(f"Online document processing action_input: {action_input}")
+    # Extract URL and document type
+    url = ""
+    doc_type = "auto"  # Default to auto-detection
+    if isinstance(action_input, dict):
+        url = action_input.get("url", "")
+        doc_type = action_input.get("doc_type", "auto")
+    elif isinstance(action_input, str):
+        url = action_input
+    print(f"Processing online document: '{url}' (type: {doc_type})")
+    # Safety check - don't run with empty URL
+    if not url:
+        result = "Error: No URL provided. Please provide a valid URL to process."
+    elif not url.startswith(("http://", "https://")):
+        result = f"Error: Invalid URL format: {url}. Please provide a valid URL starting with http:// or https://."
+    else:
+        # Call the online document processing function
+        try:
+            result = process_online_document(url, doc_type)
+        except Exception as e:
+            result = f"Error processing online document: {str(e)}\n\nThis could be due to:\n- The document is not accessible\n- Network connectivity issues\n- Unsupported document type\n- Rate limiting"
+    print(f"Online document processing result length: {len(result)}")
+    # Format the observation to continue the ReAct cycle
+    tool_message = AIMessage(
+        content=f"Observation: {result.strip()}"
+    )
+    # Print the observation that will be sent back to the assistant
+    print("\n=== TOOL OBSERVATION ===")
+    content_preview = tool_message.content[:500] + "..." if len(tool_message.content) > 500 else tool_message.content
+    print(content_preview)
+    print("=== END OBSERVATION ===\n")
+    # Return the updated state
+    return {
+        "messages": state["messages"] + [tool_message],
+        "current_tool": None,  # Reset the current tool
+        "action_input": None   # Clear the action input
+    }
 # Router function to direct to the correct tool
 def router(state: AgentState) -> str:
     """Route to the appropriate tool based on the current_tool field."""
         return "process_image"
     elif tool == "read_file":
         return "read_file"
+    elif tool == "process_online_document":
+        return "process_online_document"
     else:
         return "end"
     builder.add_node("transcribe_audio", transcribe_audio_node)
     builder.add_node("process_image", process_image_node)
     builder.add_node("read_file", read_file_node)
+    builder.add_node("process_online_document", process_online_document_node)
     # Define edges: these determine how the control flow moves
     builder.add_edge(START, "assistant")
             "transcribe_audio": "transcribe_audio",
             "process_image": "process_image",
             "read_file": "read_file",
+            "process_online_document": "process_online_document",
             "end": END
         }
     )
     builder.add_edge("transcribe_audio", "assistant")
     builder.add_edge("process_image", "assistant")
     builder.add_edge("read_file", "assistant")
+    builder.add_edge("process_online_document", "assistant")
     # Compile the graph
     return builder.compile()

app.py CHANGED Viewed

@@ -257,7 +257,7 @@ with gr.Blocks(title="TurboNerd Agent🤓") as demo:
             ## Chat with TurboNerd 🤓
             Ask any question and get an answer from TurboNerd. The agent can search the web, Wikipedia, analyze images, process audio, and more!
-            ### Complex Example Questions:
             **Research & Analysis:**
             - "Find the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists. Cross-reference this information with their Wikipedia page and any recent news articles."
@@ -278,8 +278,25 @@ with gr.Blocks(title="TurboNerd Agent🤓") as demo:
                 with gr.Column(scale=4):
                     chatbot = gr.Chatbot(
                         label="Conversation",
-                        height=300,
-                        type="messages"
                     )
                     with gr.Row():
                         question_input = gr.Textbox(

             ## Chat with TurboNerd 🤓
             Ask any question and get an answer from TurboNerd. The agent can search the web, Wikipedia, analyze images, process audio, and more!
+            ### Example Questions:
             **Research & Analysis:**
             - "Find the first name of the only Malko Competition recipient from the 20th Century (after 1977) whose nationality on record is a country that no longer exists. Cross-reference this information with their Wikipedia page and any recent news articles."
                 with gr.Column(scale=4):
                     chatbot = gr.Chatbot(
                         label="Conversation",
+                        height=600,
+                        type="messages",
+                        elem_id="chatbot",
+                        show_copy_button=True,
+                        show_label=True,
+                        container=True,
+                        bubble_full_width=False,
+                        rtl=False,
+                        show_share_button=False,
+                        show_retry_button=True,
+                        show_clear_button=True,
+                        avatar_images=(None, None),
+                        likeable=False,
+                        layout="panel",
+                        min_width=600,
+                        max_width=1200,
+                        scale=1,
+                        autoscroll=True,
+                        elem_classes=["chatbot-container"]
                     )
                     with gr.Row():
                         question_input = gr.Textbox(

requirements.txt CHANGED Viewed

@@ -22,3 +22,5 @@ openai
 openpyxl
 Pillow
 numpy

 openpyxl
 Pillow
 numpy
+PyPDF2
+pymupdf

tools.py CHANGED Viewed

@@ -1339,6 +1339,114 @@ def read_file(file_path: str, file_content: Optional[bytes] = None, line_start:
                 print(f"Warning: Could not delete temporary file {temp_path}: {e}")
                 # Non-fatal error, don't propagate exception
 # Define the tools configuration
 tools_config = [
     {
@@ -1390,5 +1498,10 @@ tools_config = [
         "name": "read_file",
         "description": "Read and display the contents of a text file (.py, .txt, etc.). You can provide a file path or use a file attachment. Optionally specify line range to read a specific portion of the file.",
         "func": read_file
     }
 ]

                 print(f"Warning: Could not delete temporary file {temp_path}: {e}")
                 # Non-fatal error, don't propagate exception
+def process_online_document(url: str, doc_type: str = "auto") -> str:
+    """
+    Process and analyze online PDFs and images.
+    Args:
+        url: URL of the document or image
+        doc_type: Type of document ("pdf", "image", or "auto" for automatic detection)
+    Returns:
+        Analysis of the document content
+    """
+    try:
+        # Validate URL
+        parsed_url = urlparse(url)
+        if not parsed_url.scheme or not parsed_url.netloc:
+            return f"Error: Invalid URL format: {url}. Please provide a valid URL with http:// or https:// prefix."
+        # Block potentially dangerous URLs
+        blocked_domains = [
+            "localhost", "127.0.0.1", "0.0.0.0",
+            "192.168.", "10.0.", "172.16.", "172.17.", "172.18.", "172.19.", "172.20.",
+            "172.21.", "172.22.", "172.23.", "172.24.", "172.25.", "172.26.", "172.27.",
+            "172.28.", "172.29.", "172.30.", "172.31."
+        ]
+        if any(domain in parsed_url.netloc for domain in blocked_domains):
+            return f"Error: Access to internal/local URLs is blocked for security: {url}"
+        print(f"Processing online document: {url}")
+        # Set headers to mimic a browser
+        headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
+            'Accept': 'text/html,application/xhtml+xml,application/pdf,image/*,*/*;q=0.8',
+            'Accept-Language': 'en-US,en;q=0.5',
+            'Connection': 'keep-alive',
+        }
+        # Download the file
+        response = requests.get(url, headers=headers, stream=True, timeout=15)
+        response.raise_for_status()
+        # Determine content type
+        content_type = response.headers.get('content-type', '').lower()
+        # Create a temporary file to save the content
+        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+            temp_file.write(response.content)
+            temp_path = temp_file.name
+        try:
+            # Process based on content type or specified doc_type
+            if doc_type == "auto":
+                if "pdf" in content_type or url.lower().endswith('.pdf'):
+                    doc_type = "pdf"
+                elif any(img_type in content_type for img_type in ['jpeg', 'png', 'gif', 'bmp', 'webp']):
+                    doc_type = "image"
+                else:
+                    return f"Error: Unsupported content type: {content_type}"
+            if doc_type == "pdf":
+                try:
+                    import PyPDF2
+                    with open(temp_path, 'rb') as file:
+                        pdf_reader = PyPDF2.PdfReader(file)
+                        text_content = ""
+                        for page in pdf_reader.pages:
+                            text_content += page.extract_text() + "\n"
+                        # Get metadata
+                        metadata = pdf_reader.metadata
+                        result = "PDF Analysis:\n\n"
+                        if metadata:
+                            result += "Metadata:\n"
+                            for key, value in metadata.items():
+                                if value:
+                                    result += f"- {key}: {value}\n"
+                            result += "\n"
+                        result += f"Number of pages: {len(pdf_reader.pages)}\n\n"
+                        result += "Content:\n"
+                        result += text_content[:8000]  # Limit content length
+                        if len(text_content) > 8000:
+                            result += "\n\n[Content truncated due to length...]"
+                        return result
+                except ImportError:
+                    return "Error: PyPDF2 library is required for PDF processing. Please install it using 'pip install PyPDF2'"
+            elif doc_type == "image":
+                # Use the existing process_image function
+                return process_image(temp_path, url=url)
+            else:
+                return f"Error: Unsupported document type: {doc_type}"
+        finally:
+            # Clean up the temporary file
+            try:
+                os.unlink(temp_path)
+            except Exception as e:
+                print(f"Warning: Could not delete temporary file {temp_path}: {e}")
+    except requests.exceptions.RequestException as e:
+        return f"Error accessing URL {url}: {str(e)}"
+    except Exception as e:
+        return f"Error processing online document: {str(e)}"
 # Define the tools configuration
 tools_config = [
     {
         "name": "read_file",
         "description": "Read and display the contents of a text file (.py, .txt, etc.). You can provide a file path or use a file attachment. Optionally specify line range to read a specific portion of the file.",
         "func": read_file
+    },
+    {
+        "name": "process_online_document",
+        "description": "Process and analyze online PDFs and images. Provide a URL and optionally specify the document type ('pdf', 'image', or 'auto').",
+        "func": process_online_document
     }
 ]