Final_Assignment_Template

Sleeping

App Files Files Community

yplam commited on Apr 30, 2025

Commit

7c21d30

1 Parent(s): ca3ab6d

add more tools

Browse files

Files changed (7) hide show

.env.template +1 -0
agent.py +140 -32
app.py +7 -4
requirements.txt +2 -1
tool/files.py +0 -5
tool/math.py +53 -3
tool/youtube.py +10 -3

.env.template CHANGED Viewed

@@ -3,4 +3,5 @@ OPENAI_API_KEY=your_openai_api_key_here
 OPENAI_API_BASE=https://api.openai.com/v1
 OPENAI_PROXY=http://127.0.0.1:7899
 PROXY_URL=http://127.0.0.1:7899
 # Add other configuration variables below

 OPENAI_API_BASE=https://api.openai.com/v1
 OPENAI_PROXY=http://127.0.0.1:7899
 PROXY_URL=http://127.0.0.1:7899
+SERPER_API_KEY=
 # Add other configuration variables below

agent.py CHANGED Viewed

@@ -1,24 +1,22 @@
 import os
-from typing import Annotated, Optional, TypedDict
 from dotenv import load_dotenv
 from langgraph.graph.message import add_messages
 from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
-from tool.files import read_file
-from tool.math import divide
 from langchain.chat_models import init_chat_model
 from langgraph.graph import StateGraph, MessagesState, START, END
 from langgraph.prebuilt import ToolNode
-from tool.youtube import get_video_id, youtube_transcript
 load_dotenv()
-tools = [
-            get_video_id,
-            youtube_transcript,
-            read_file
-        ]
 llm = init_chat_model(
     model="gpt-4o",
     model_provider="openai",
@@ -28,11 +26,122 @@ llm = init_chat_model(
     openai_proxy=os.getenv("OPENAI_PROXY"),
 )
 llm_with_tools = llm.bind_tools(tools)
 class State(TypedDict):
-    input_file: Optional[str]
     messages: Annotated[list[AnyMessage], add_messages]
     answer: str
@@ -41,22 +150,19 @@ def should_continue(state: State):
     last_message = messages[-1]
     if last_message.tool_calls:
         return "tools"
-    return END
-def format_answer(last_message: str):
-    system_message_content = "You are a general AI assistant. \
-        Check the user's answer and validate and format it with the following rules: \
-        The output should be in the following format: \
         FINAL ANSWER: [YOUR FINAL ANSWER]. \
-        YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. \
-        If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. \
-        If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. \
-        If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \
-        Your answer should only start with 'FINAL ANSWER: ', then follows with the answer. "
     system_message = SystemMessage(content=system_message_content)
-    messages = [system_message] + [last_message]
-    answer = llm_with_tools.invoke(messages)
-    return answer.content
 def agent(state: State):
     system_message_content = "You are a general AI assistant. I will ask you a question. \
@@ -67,12 +173,11 @@ def agent(state: State):
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. \
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \
         Your answer should only start with 'FINAL ANSWER: ', then follows with the answer. "
-    if state["input_file"]:
-        system_message_content += f"\nYou are given a file: {state['input_file']}"
     system_message = SystemMessage(content=system_message_content)
     messages = [system_message] + state["messages"]
-    answer = llm_with_tools.invoke(messages)
-    return {"messages": [answer], "answer": format_answer(answer.content)}
 class Agent:
@@ -81,12 +186,15 @@ class Agent:
         tool_node = ToolNode(tools)
         graph_builder = StateGraph(State)
         graph_builder.add_node("agent", agent)
         graph_builder.add_node("tools", tool_node)
         graph_builder.add_edge(START, "agent")
-        graph_builder.add_conditional_edges("agent", should_continue, ["tools", END])
         graph_builder.add_edge("tools", "agent")
         self.graph = graph_builder.compile()
         try:
             # Save graph visualization as PNG file
@@ -99,6 +207,6 @@ class Agent:
             print(f"Could not save graph visualization: {str(e)}")
             pass
-    def __call__(self, question: str, file_name: str|None) -> str:
-        result = self.graph.invoke({"input_file": file_name, "messages": [HumanMessage(content=question)]})
         return result["answer"]

+import json
 import os
+from typing import Annotated, Optional, TypedDict, List
 from dotenv import load_dotenv
 from langgraph.graph.message import add_messages
 from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
 from langchain.chat_models import init_chat_model
 from langgraph.graph import StateGraph, MessagesState, START, END
 from langgraph.prebuilt import ToolNode
+import requests
+from langchain_community.document_loaders import WikipediaLoader
+from langchain_community.document_loaders import WebBaseLoader
+from langchain_core.tools import tool
+from tool.math import add, divide, multiply, subtract, modulus
+from tool.youtube import  youtube_transcript
 load_dotenv()
 llm = init_chat_model(
     model="gpt-4o",
     model_provider="openai",
     openai_proxy=os.getenv("OPENAI_PROXY"),
 )
+@tool
+def analyze_image_by_url(image_url: str, prompt: str) -> str:
+    """Using VL model to analyze the image in image_url using the prompt, and return the answer.
+    Args:
+        image_url: The url of the image to analyze
+        prompt: The prompt to use to analyze the image
+    Returns:
+        The answer to the prompt
+    """
+    if image_url is None:
+        return ""
+    response = llm.invoke([{
+        "role": "user",
+        "content": [
+            {"type": "text", "text": prompt},
+            {
+                "type": "image_url",
+                "image_url": {
+                    "url": image_url
+                }
+            }
+        ]
+    }])
+    print(f"Response: {response.content}")
+    return response.content
+def read_file_by_path(file_path: str) -> str:
+    """Read the file in file_path and return the content."""
+    print(f"Reading file: {file_path}")
+    if file_path is None:
+        return ""
+    with open(file_path, "r") as f:
+        return f.read()
+@tool
+def read_file_by_url(file_url: str) -> str:
+    """Read the file in file_url and return the content.
+    Args:
+        file_url: The url of the file to read
+    Returns:
+        The raw content of the file
+    """
+    print(f"Reading file: {file_url}")
+    if file_url is None:
+        return ""
+    response = requests.get(file_url)
+    return response.content
+@tool
+def load_webpage_from_url(url: str) -> str:
+    """Load the webpage from the given url and return the content.
+    Args:
+        url: The url of the webpage to load
+    Returns:
+        The content of the webpage
+    """
+    print(f"Loading webpage from: {url}")
+    return WebBaseLoader(url).load()
+@tool
+def load_wikipedia(query: str) -> str:
+    """Load Wikipedia for the given query and return the content.
+    Args:
+        query: The query to search Wikipedia for
+    Returns:
+        The content of the Wikipedia page
+    """
+    print(f"Loading Wikipedia for: {query}")
+    return WikipediaLoader(query=query, load_max_docs=1).load()
+@tool
+def search_google(query: str) -> str:
+    """Search Google for the given query and return the result.
+    Args:
+        query: The query to search Google for
+    Returns:
+        The result of the Google search
+    """
+    print(f"Searching Google for: {query}")
+    url = "https://google.serper.dev/search"
+    payload = json.dumps({
+        "q": query
+    })
+    headers = {
+        'X-API-KEY': os.getenv("SERPER_API_KEY"),
+        'Content-Type': 'application/json'
+    }
+    response = requests.request("POST", url, headers=headers, data=payload)
+    print(f"Google search result for: {query}")
+    print(response.text)
+    return response.text
+tools = [
+            youtube_transcript,
+            analyze_image_by_url,
+            read_file_by_path,
+            read_file_by_url,
+            load_webpage_from_url,
+            load_wikipedia,
+            search_google,
+            multiply,
+            add,
+            subtract,
+            divide,
+            modulus
+        ]
 llm_with_tools = llm.bind_tools(tools)
 class State(TypedDict):
+    local_file_path: Optional[str]
+    file_url: Optional[str]
     messages: Annotated[list[AnyMessage], add_messages]
     answer: str
     last_message = messages[-1]
     if last_message.tool_calls:
         return "tools"
+    return "format_answer"
+def format_answer(state: State):
+    system_message_content = "You are a AI assistant to extract the answer from the user's answer. \
+        The user's answer should be in the following format: \
         FINAL ANSWER: [YOUR FINAL ANSWER]. \
+        Your need to extract and only return the answer. If you don't find the answer, output 'N/A' \
+        Remove '.' from the end of the answer."
     system_message = SystemMessage(content=system_message_content)
+    messages = [system_message] + [state["messages"][-1]]
+    answer = llm.invoke(messages)
+    return {"answer": answer.content}
 def agent(state: State):
     system_message_content = "You are a general AI assistant. I will ask you a question. \
         If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. \
         If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. \
         Your answer should only start with 'FINAL ANSWER: ', then follows with the answer. "
+    if state["local_file_path"]:
+        system_message_content += f"\nYou can only read files I provide you. You are given a file path related to the question: {state['local_file_path']}, and the online url related to the same file: {state['file_url']}"
     system_message = SystemMessage(content=system_message_content)
     messages = [system_message] + state["messages"]
+    return {"messages": [llm_with_tools.invoke(messages)]}
 class Agent:
         tool_node = ToolNode(tools)
         graph_builder = StateGraph(State)
         graph_builder.add_node("agent", agent)
         graph_builder.add_node("tools", tool_node)
+        graph_builder.add_node("format_answer", format_answer)
         graph_builder.add_edge(START, "agent")
+        graph_builder.add_conditional_edges("agent", should_continue, ["tools", "format_answer"])
         graph_builder.add_edge("tools", "agent")
+        graph_builder.add_edge("format_answer", END)
         self.graph = graph_builder.compile()
         try:
             # Save graph visualization as PNG file
             print(f"Could not save graph visualization: {str(e)}")
             pass
+    def __call__(self, question: str, local_file_path: str|None, file_url: str|None) -> str:
+        result = self.graph.invoke({"local_file_path": local_file_path, "file_url": file_url, "messages": [HumanMessage(content=question)]})
         return result["answer"]

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import os
 from dotenv import load_dotenv
 import gradio as gr
 import requests
@@ -15,7 +16,6 @@ def download_file(filename: str) -> str:
     """
     Downloads a file from the API and returns the path to the local file.
     """
-    return None
     if filename is None or filename == "":
         return None
     print(f"Downloading file: {filename}")
@@ -90,15 +90,18 @@ def run_all( username: str|None, submit: bool = True):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
-        file_name = item.get("file_name") or ""
-        file_path = download_file(file_name)
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             print("-"*100)
             print(f"Running agent on task {task_id}: {question_text}")
-            submitted_answer = agent(question_text, "")
             print("-"*30)
             print(f"Submitted answer: {submitted_answer}")
             print("-"*100)

 import os
+import tempfile
 from dotenv import load_dotenv
 import gradio as gr
 import requests
     """
     Downloads a file from the API and returns the path to the local file.
     """
     if filename is None or filename == "":
         return None
     print(f"Downloading file: {filename}")
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        local_file_path = None
+        file_url = None
+        if item.get("file_name"):
+            local_file_path = download_file(task_id)
+            file_url = f"{DEFAULT_API_URL}/files/{task_id}"
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
             print("-"*100)
             print(f"Running agent on task {task_id}: {question_text}")
+            submitted_answer = agent(question_text, local_file_path, file_url)
             print("-"*30)
             print(f"Submitted answer: {submitted_answer}")
             print("-"*100)

requirements.txt CHANGED Viewed

@@ -5,4 +5,5 @@ langchain_openai
 langchain
 python-dotenv
 youtube_transcript_api
-pandas

 langchain
 python-dotenv
 youtube_transcript_api
+pandas
+langchain_community

tool/files.py DELETED Viewed

@@ -1,5 +0,0 @@
-def read_file(file_path: str) -> str:
-    """Reads the content of a file and returns it as a string."""
-    print(f"Reading file: {file_path}")
-    with open(file_path, 'r') as file:
-        return file.read()

tool/math.py CHANGED Viewed

@@ -1,3 +1,53 @@
-def divide(a: int, b: int) -> float:
-    """Divide a and b - for Master Wayne's occasional calculations."""
-    return a / b

+from langchain_core.tools import tool
+@tool
+def multiply(a: int, b: int) -> int:
+    """Multiply two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a * b
+@tool
+def add(a: int, b: int) -> int:
+    """Add two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a + b
+@tool
+def subtract(a: int, b: int) -> int:
+    """Subtract two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a - b
+@tool
+def divide(a: int, b: int) -> int:
+    """Divide two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    if b == 0:
+        raise ValueError("Cannot divide by zero.")
+    return a / b
+@tool
+def modulus(a: int, b: int) -> int:
+    """Get the modulus of two numbers.
+    Args:
+        a: first int
+        b: second int
+    """
+    return a % b

tool/youtube.py CHANGED Viewed

@@ -1,12 +1,18 @@
 import os
 from youtube_transcript_api import YouTubeTranscriptApi
 from youtube_transcript_api.proxies import GenericProxyConfig
-def youtube_transcript(video_id: str) -> str:
     """
-    Extracts the transcript from a YouTube video id
     """
-    print(f"Extracting transcript from: {video_id}")
     try:
         ytt_api = YouTubeTranscriptApi()
         if os.getenv("PROXY_URL"):
@@ -16,6 +22,7 @@ def youtube_transcript(video_id: str) -> str:
                     https_url=os.getenv("PROXY_URL"),
                 )
             )
         transcript = ytt_api.fetch(video_id)
         print(f"Transcript: {transcript}")
         return transcript

 import os
 from youtube_transcript_api import YouTubeTranscriptApi
 from youtube_transcript_api.proxies import GenericProxyConfig
+from langchain_core.tools import tool
+@tool
+def youtube_transcript(video_url: str) -> str:
     """
+    Extracts the transcript from a YouTube video url
+    Args:
+        video_url: The url of the YouTube video
+    Returns:
+        The transcript of the YouTube video
     """
+    print(f"Extracting transcript from: {video_url}")
     try:
         ytt_api = YouTubeTranscriptApi()
         if os.getenv("PROXY_URL"):
                     https_url=os.getenv("PROXY_URL"),
                 )
             )
+        video_id = get_video_id(video_url)
         transcript = ytt_api.fetch(video_id)
         print(f"Transcript: {transcript}")
         return transcript