Final_Agent

Sleeping

App Files Files Community

RuaZhou commited on Jul 16, 2025

Commit

ba320d3

verified ·

1 Parent(s): a129000

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -3

app.py CHANGED Viewed

@@ -10,11 +10,13 @@ from langchain_openai import ChatOpenAI
 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_google_community import GoogleSearchAPIWrapper
 from langchain_community.document_loaders import YoutubeLoader
 import wikipedia
 import speech_recognition as sr
 import tempfile
 import ast
 # Or using AudioTranscriptTool.
@@ -42,7 +44,44 @@ def add(a: int|float, b: int|float) -> float:
 def subtract(a: int|float, b: int|float) -> float:
     """Subtract a with b."""
     return a - b
 def youtube_transcript_tool(url: str) -> str:
     """Load transcript from a YouTube video URL."""
     loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
@@ -104,6 +143,8 @@ tools = [
     multiply,
     add,
     subtract,
     youtube_transcript_tool,
     transcribe_audio,
     analyze_python_code,
@@ -133,8 +174,22 @@ from langchain_core.messages import HumanMessage, SystemMessage
 def assistant(state: AgentState, llm_with_tools):
     # System message
-    textual_description_of_tool = """
 transcribe_audio(file_url: str) -> str:
     Downloads an audio file from a URL into a temporary file and transcribes it using SpeechRecognition.
     Args:
@@ -196,7 +251,7 @@ Substract(a: int|float, b: int|float) -> float:
     previous_message = state["messages"]
     sys_msg = SystemMessage(content=f"""You are an agent that must use tools for computations or unknown info. Think step-by-step: 1. Analyze question. 2. Call tools if needed. 3. Summarize.
     Please call different search tools or wikipedia tool multiple times if needed for verification or depth, aiming for at least third times on complex queries.
-    And here are the tools you can use :\n{textual_description_of_tool} \n
     I will ask you a question. Please return your answer with the following template: [YOUR FINAL ANSWER] without brackets.
     YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
     If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.

 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_google_community import GoogleSearchAPIWrapper
 from langchain_community.document_loaders import YoutubeLoader
+from langchain_community.document_loaders import PyPDFLoader
 import wikipedia
 import speech_recognition as sr
 import tempfile
 import ast
+import pytesseract
+from PIL import Image
 # Or using AudioTranscriptTool.
 def subtract(a: int|float, b: int|float) -> float:
     """Subtract a with b."""
     return a - b
+def pdf_loader_tool(file_url: str) -> str:
+    """Load and extract text from a PDF file downloaded from given file_url."""
+    try:
+        # Download file into temporary file
+        with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as temp_file:
+            response = requests.get(file_url)
+            if response.status_code != 200:
+                return f"Failed to download file: {response.status_code}"
+            temp_file.write(response.content)
+            temp_file.flush()  # Make sure data is written
+            # Read from temp file
+            loader = PyPDFLoader(temp_file.name)
+            docs = loader.load()
+            return "\n".join([doc.page_content for doc in docs])
+    except Exception as e:
+        return f"Reading failed: {str(e)}"
+def read_image_text(file_URL: str) -> str:
+    """Extract text from image downloaded from given file_URL using OCR."""
+    try:
+        # Download file into temporary file
+        with tempfile.NamedTemporaryFile(suffix=".jpg", delete=True) as temp_file:
+            response = requests.get(file_url)
+            if response.status_code != 200:
+                return f"Failed to download file: {response.status_code}"
+            temp_file.write(response.content)
+            temp_file.flush()  # Make sure data is written
+            # Read from temp file
+            image = Image.open(temp_file.name)
+            return pytesseract.image_to_string(image)
+    except Exception as e:
+        return f"Reading failed: {str(e)}"
 def youtube_transcript_tool(url: str) -> str:
     """Load transcript from a YouTube video URL."""
     loader = YoutubeLoader.from_youtube_url(url, add_video_info=True)
     multiply,
     add,
     subtract,
+    read_image_text,
+    pdf_loader_tool,
     youtube_transcript_tool,
     transcribe_audio,
     analyze_python_code,
 def assistant(state: AgentState, llm_with_tools):
     # System message
+    textual_description_of_tools = """
+pdf_loader_tool(file_url: str) -> str:
+    Load and extract text from a PDF file downloaded from given file_url.
+    Args:
+        file_url, a string indicating the url of the given file.
+    Returns:
+        The text extracted from the given pdf.
+read_image_text(file_URL: str) -> str:
+    Extract text from image downloaded from given file_URL using OCR.
+    Args:
+        file_URL, a string indicating the url of the given file.
+    Returns:
+        The text extracted from the given image.
 transcribe_audio(file_url: str) -> str:
     Downloads an audio file from a URL into a temporary file and transcribes it using SpeechRecognition.
     Args:
     previous_message = state["messages"]
     sys_msg = SystemMessage(content=f"""You are an agent that must use tools for computations or unknown info. Think step-by-step: 1. Analyze question. 2. Call tools if needed. 3. Summarize.
     Please call different search tools or wikipedia tool multiple times if needed for verification or depth, aiming for at least third times on complex queries.
+    And here are the tools you can use :\n{textual_description_of_tools} \n
     I will ask you a question. Please return your answer with the following template: [YOUR FINAL ANSWER] without brackets.
     YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
     If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.