Spaces:

mabelwang21
/

Agents_Final_Assignment

Sleeping

App Files Files Community

mabelwang21 commited on May 11, 2025

Commit

e656aa6

1 Parent(s): b1d7643

update agent class with langgraph

Browse files

Files changed (1) hide show

agent.py +205 -95

agent.py CHANGED Viewed

@@ -1,118 +1,228 @@
-from smolagents import ToolCallingAgent, tool
-from langchain_community.tools import DuckDuckGoSearchRun
-from langchain_community.utilities import WikipediaAPIWrapper
-from langchain.tools import BaseTool
 from PIL import Image
 import pytesseract
-import fitz
-import ast
-import os
-# -------------------- TOOL DEFINITIONS --------------------
 @tool
-def web_search(query: str) -> str:
-    """
-    Search the web using DuckDuckGo.
-    Args:
-        query (str): The search query string.
-    Returns:
-        str: Summary of search results.
-    """
-    search = DuckDuckGoSearchRun()
-    return search.run(query)
 @tool
 def wikipedia_search(query: str) -> str:
-    """
-    Look up a topic on Wikipedia and return relevant content.
-    Args:
-        query (str): The topic or term to search on Wikipedia.
-    Returns:
-        str: Extracted Wikipedia content.
-    """
-    wiki = WikipediaQueryRun()
-    return wiki.run(query)
 @tool
 def image_recognition(image_path: str) -> str:
-    """
-    Perform OCR on an image to extract text.
-    Args:
-        image_path (str): Path to the image file.
-    Returns:
-        str: Extracted text from the image.
-    """
-    img = Image.open(image_path)
-    return pytesseract.image_to_string(img)
 @tool
 def read_pdf(pdf_path: str) -> str:
-    """
-    Extract all text from a PDF document.
-    Args:
-        pdf_path (str): Path to the PDF file.
-    Returns:
-        str: Text content of the PDF.
-    """
-    doc = fitz.open(pdf_path)
-    return "".join(page.get_text() for page in doc)
 @tool
-def calculate(expr: str) -> float:
-    """
-    Evaluate a simple math expression.
-    Args:
-        expr (str): The math expression to evaluate.
-    Returns:
-        float: Result of the expression.
-    """
-    def _eval(node):
-        if isinstance(node, ast.BinOp):
-            left = _eval(node.left)
-            right = _eval(node.right)
-            if isinstance(node.op, ast.Add): return left + right
-            if isinstance(node.op, ast.Sub): return left - right
-            if isinstance(node.op, ast.Mult): return left * right
-            if isinstance(node.op, ast.Div): return left / right
-            if isinstance(node.op, ast.Pow): return left ** right
-        elif isinstance(node, ast.UnaryOp):
-            operand = _eval(node.operand)
-            if isinstance(node.op, ast.UAdd): return +operand
-            if isinstance(node.op, ast.USub): return -operand
-        elif isinstance(node, ast.Num):
-            return node.n
-        else:
-            raise TypeError(f"Unsupported type: {node}")
-    parsed = ast.parse(expr, mode='eval').body
-    return _eval(parsed)
-# -------------------- AGENT CLASS --------------------
-tools = [web_search, wikipedia_search, image_recognition, read_pdf, calculate]
-HF_TOKEN = os.getenv("HF_API_TOKEN")
 class MyAgent:
-    def __init__(self):
-        from smolagents import HfApiModel
-        self.agent = ToolCallingAgent(
-            tools=tools,
-            model=HfApiModel("Qwen/Qwen2.5-Coder-32B-Instruct", token=HF_TOKEN)  # or another supported model
-        )
-    def __call__(self, question: str) -> str:
-        try:
-            result = self.agent.run(question)
-            return f"FINAL ANSWER: {result.strip()}"
-        except Exception as e:
-            return f"FINAL ANSWER: ERROR - {e}"

+import os
+import ast
+import re
+import operator as op
+from pathlib import Path
+from typing import List, TypedDict, Annotated, Optional
+from langchain.tools import tool
+from langchain_community.document_loaders import (
+    CSVLoader,
+    YoutubeLoader,
+)
+from langchain.chat_models import init_chat_model
+from langchain.agents import initialize_agent, AgentType
+from langchain_community.retrievers import BM25Retriever
+from langchain.tools import Tool
+from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage
+from langgraph.graph.message import add_messages
+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import ToolNode, tools_condition
+from youtube_transcript_api import YouTubeTranscriptApi
 from PIL import Image
 import pytesseract
+import fitz  # PyMuPDF
+# === System Prompt ===
+SYSTEM_PROMPT = """
+You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER].
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number nor use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
+""".strip()
 @tool
+def calculate(expr: str) -> str:
+    """Evaluate a simple math expression and return the result."""
+    _OPERATORS = {
+        ast.Add: op.add,
+        ast.Sub: op.sub,
+        ast.Mult: op.mul,
+        ast.Div: op.truediv,
+        ast.Pow: op.pow,
+        ast.USub: op.neg,
+    }
+    def _eval(node):
+        if isinstance(node, ast.Num):
+            return node.n
+        elif isinstance(node, ast.BinOp):
+            return _OPERATORS[type(node.op)](_eval(node.left), _eval(node.right))
+        elif isinstance(node, ast.UnaryOp):
+            return _OPERATORS[type(node.op)](_eval(node.operand))
+        else:
+            raise ValueError(f"Unsupported expression: {ast.dump(node)}")
+    try:
+        parsed = ast.parse(expr, mode='eval').body
+        result = _eval(parsed)
+        return str(result)
+    except Exception as e:
+        return f"Error calculating expression: {e}"
+@tool
+def web_search(query: str) -> str:
+    """Search the web for current information using DuckDuckGo."""
+    try:
+        from langchain.utilities import DuckDuckGoSearchRun
+        return DuckDuckGoSearchRun().run(query)
+    except Exception as e:
+        return f"Error performing web search: {e}"
 @tool
 def wikipedia_search(query: str) -> str:
+    """Search Wikipedia for a general-topic query."""
+    try:
+        from langchain.utilities import WikipediaAPIWrapper
+        return WikipediaAPIWrapper().run(query)
+    except Exception as e:
+        return f"Error searching Wikipedia: {e}"
 @tool
 def image_recognition(image_path: str) -> str:
+    """Analyze and extract text from an image using Tesseract OCR."""
+    try:
+        img = Image.open(image_path)
+        return pytesseract.image_to_string(img)
+    except Exception as e:
+        return f"Error processing image: {e}"
 @tool
 def read_pdf(pdf_path: str) -> str:
+    """Read and extract text from a PDF document."""
+    try:
+        doc = fitz.open(pdf_path)
+        return "".join(page.get_text() for page in doc)
+    except Exception as e:
+        return f"Error reading PDF: {e}"
 @tool
+def read_csv(csv_path: str) -> str:
+    """Read and extract text from a CSV file, row by row."""
+    try:
+        loader = CSVLoader(csv_path, encoding='utf-8')
+        docs = loader.load()
+        return "\n".join(doc.page_content for doc in docs)
+    except Exception as e:
+        return f"Error reading CSV: {e}"
+@tool
+def read_spreadsheet(spreadsheet_path: str) -> str:
+    """Read a spreadsheet into a DataFrame and return CSV text."""
+    try:
+        import pandas as pd
+        df = pd.read_excel(spreadsheet_path)
+        return df.to_csv(index=False)
+    except Exception as e:
+        return f"Error reading spreadsheet: {e}"
+@tool
+def transcribe_audio(audio_path: str) -> str:
+    """Transcribe audio file (e.g., MP3) using Whisper."""
+    try:
+        docs = AudioLoader(audio_path).load()
+        transcripts = WhisperLoader().load(docs)
+        return "\n".join(doc.page_content for doc in transcripts)
+    except Exception as e:
+        return f"Error transcribing audio: {e}"
+@tool
+def youtube_transcript_tool(video_url: str) -> str:
+    """Download the transcript of a YouTube video using LangChain YoutubeLoader."""
+    try:
+        loader = YoutubeLoader.from_youtube_url(video_url)
+        docs = loader.load()
+        return "\n".join(doc.page_content for doc in docs)
+    except Exception as e:
+        return f"Error fetching YouTube transcript: {e}"
+@tool
+def youtube_transcript_api(video_url_or_id: str) -> str:
+    """Download transcript from YouTube using youtube-transcript-api."""
+    try:
+        match = re.search(r"(?:v=|youtu\.be/)([A-Za-z0-9_-]{11})", video_url_or_id)
+        vid = match.group(1) if match else video_url_or_id
+        entries = YouTubeTranscriptApi.get_transcript(vid)
+        return " ".join(segment["text"] for segment in entries)
+    except Exception as e:
+        return f"Error fetching transcript via API: {e}"
+#o3_mini = init_chat_model("openai:o3-mini", temperature=0)
+#claude_sonnet = init_chat_model(anthropic:claude-3-5-sonnet-latest", temperature=0)
+#gemini_2_flash = init_chat_model("google_vertexai:gemini-2.0-flash", temperature=0)
+_ = os.getenv("ANTHROPIC_API_KEY")
+tools = [
+            calculate, web_search, wikipedia_search, image_recognition,
+            read_pdf, read_csv, read_spreadsheet, transcribe_audio,
+            youtube_transcript_tool, youtube_transcript_api
+        ]
+class AgentState(TypedDict):
+    # The document provided
+    input_file: Optional[str]  # Contains file path (PDF/PNG)
+    messages: Annotated[list[AnyMessage], add_messages]
+# === Agent Class ===
 class MyAgent:
+    def __init__(
+        self,
+        model_name: str = "anthropic:claude-3-5-sonnet-latest",
+        temperature: float = 0.0
+    ):
+        # Initialize LLM
+        self.llm = init_chat_model(model_name, temperature=temperature)
+        # Base tools: use provided tools or default list
+        self.tools = tools
+        # Human-readable tool descriptions
+        self.textual_tool_desc = "\n".join(t.__doc__.strip() for t in self.tools)
+        # Define assistant node
+        def assistant_node(state: AgentState) -> dict:
+            sys_msg = SystemMessage(
+                content="\n".join([
+                    SYSTEM_PROMPT,
+                    "\nTools available:\n" + self.textual_tool_desc
+                ])
+            )
+            msgs = [sys_msg] + state["messages"]
+            response = self.llm(msgs)
+            return {"messages": state["messages"] + [response], "input_file": state.get("input_file")}
+        # Condition to invoke tools: check if last LLM message mentions a tool invocation
+        def needs_tool(state: AgentState) -> bool:
+            last = state["messages"][-1].content.lower()
+            return any(f"{t.__name__.lower()}(" in last for t in self.tools)
+        # Build the state graph
+        builder = StateGraph(AgentState)
+        builder.add_node("assistant", assistant_node)
+        builder.add_node("tools", ToolNode(self.tools))
+        builder.add_edge(START, "assistant")
+        builder.add_conditional_edges("assistant", needs_tool)
+        builder.add_edge("tools", "assistant")
+        self.react_graph = builder.compile()
+    def __call__(
+        self,
+        user_input: str,
+        input_file: Optional[str] = None,
+    ) -> str:
+        state = AgentState()
+        state["messages"] = [HumanMessage(content=user_input)]
+        state["input_file"] = input_file
+        out = self.react_graph(state)
+        # Return only the final LLM message content
+        return out["messages"][-1].content.strip()
+# CLI entrypoint
+if __name__ == "__main__":
+    import fire
+    fire.Fire(MyAgent)