Final_Assignment_Template

Sleeping

App Files Files Community

philincloud commited on May 29, 2025

Commit

beb3afa

verified ·

1 Parent(s): 50ab30a

Update langgraph_agent.py

Browse files

Files changed (1) hide show

langgraph_agent.py +85 -268

langgraph_agent.py CHANGED Viewed

@@ -1,268 +1,85 @@
-import os
-import io
-import contextlib
-import pandas as pd
-from typing import Dict, List, Union
-import re
-from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
-from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
-from langgraph.graph import START, StateGraph, MessagesState
-from langgraph.prebuilt import tools_condition, ToolNode
-from langchain_openai import ChatOpenAI
-from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
-from langchain_community.document_loaders import WikipediaLoader
-from langchain_core.messages import SystemMessage, HumanMessage, ToolMessage
-from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_core.tools import tool
-from langchain_google_community import GoogleSearchAPIWrapper
-@tool
-def multiply(a: int, b: int) -> int:
-    """Multiply two integers."""
-    return a * b
-@tool
-def add(a: int, b: int) -> int:
-    """Add two integers."""
-    return a + b
-@tool
-def subtract(a: int, b: int) -> int:
-    """Subtract the second integer from the first."""
-    return a - b
-@tool
-def divide(a: int, b: int) -> float:
-    """Divide first integer by second; error if divisor is zero."""
-    if b == 0:
-        raise ValueError("Cannot divide by zero.")
-    return a / b
-@tool
-def modulus(a: int, b: int) -> int:
-    """Return the remainder of dividing first integer by second."""
-    return a % b
-@tool
-def wiki_search(query: str) -> dict:
-    """Search Wikipedia for a query and return up to 2 documents."""
-    try:
-        docs = WikipediaLoader(query=query, load_max_docs=5, lang="en", doc_content_chars_max=7000).load()
-        if not docs:
-            return {"wiki_results": f"No documents found on Wikipedia for '{query}'."}
-        formatted = "\n\n---\n\n".join(
-            f'<Document source="{d.metadata.get("source", "N/A")}"/>\n{d.page_content}'
-            for d in docs
-        )
-        return {"wiki_results": formatted}
-    except Exception as e:
-        print(f"Error in wiki_search tool: {e}")
-        return {"wiki_results": f"Error occurred while searching Wikipedia for '{query}'. Details: {str(e)}"}
-search = GoogleSearchAPIWrapper()
-@tool
-def google_web_search(query: str) -> str:
-    """Perform a web search (via Google Custom Search) and return results."""
-    try:
-        return search.run(query)
-    except Exception as e:
-        print(f"Error in google_web_search tool: {e}")
-        return f"Error occurred while searching the web for '{query}'. Details: {str(e)}"
-# HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
-# But keeping InferenceClient initialization for completeness if other HF tools might be added later.
-HF_API_TOKEN = os.getenv("HF_API_TOKEN")
-MODEL = os.getenv("MODEL")
-HF_INFERENCE_CLIENT = None
-if HF_API_TOKEN:
-    HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
-else:
-    print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
-@tool
-def read_file_content(file_path: str) -> Dict[str, str]:
-    """Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, indicates it's a blob for LLM processing."""
-    try:
-        _, file_extension = os.path.splitext(file_path)
-        file_extension = file_extension.lower()
-        # Prioritize handling of video, audio, and image files for direct LLM processing
-        if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
-            return {"file_type": "video", "file_name": file_path, "file_content": f"Video file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this video content directly as a blob."}
-        elif file_extension == ".mp3":
-            return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly as a blob."}
-        elif file_extension in (".jpeg", ".jpg", ".png"):
-            return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly as a blob."}
-        # Handle text and code files
-        elif file_extension in (".txt", ".py"):
-            with open(file_path, "r", encoding="utf-8") as f:
-                content = f.read()
-            return {"file_type": "text/code", "file_name": file_path, "file_content": content}
-        # Handle Excel files
-        elif file_extension == ".xlsx":
-            df = pd.read_excel(file_path)
-            content = df.to_string()
-            return {"file_type": "excel", "file_name": file_path, "file_content": content}
-        else:
-            return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3, .mp4, .avi, .mov, .mkv, .webm files are recognized."}
-    except FileNotFoundError:
-        return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
-    except Exception as e:
-        return {"file_error": f"Error reading file {file_path}: {e}"}
-@tool
-def python_interpreter(code: str) -> Dict[str, str]:
-    """Executes Python code and returns its standard output. If there's an error during execution, it returns the error message."""
-    old_stdout = io.StringIO()
-    with contextlib.redirect_stdout(old_stdout):
-        try:
-            exec_globals = {}
-            exec_locals = {}
-            exec(code, exec_globals, exec_locals)
-            output = old_stdout.getvalue()
-            return {"execution_result": output.strip()}
-        except Exception as e:
-            return {"execution_error": str(e)}
-# --- Youtube Tool (Remains the same) ---
-@tool
-def Youtube(url: str, question: str) -> Dict[str, str]:
-    """
-    Tells about the YouTube video identified by the given URL, answering a question about it.
-    Note: This is a simulated response. In a real application, this would interact with a YouTube API
-    or a video analysis service to get actual video information and transcripts.
-    """
-    print(f"Youtube called with URL: {url}, Question: {question}")
-    # Placeholder for actual YouTube API call.
-    # In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API
-    # or a dedicated video transcription/analysis service.
-    # Simulating the previous video content for demonstration
-    if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url):
-        return {
-            "video_url": url,
-            "question_asked": question,
-            "video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.",
-            "details": {
-                "00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'",
-                "00:00:11": "Teal'c takes a large gulp from a black mug",
-                "00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'",
-                "00:00:26": "Someone agrees, 'extremely'"
-            }
-        }
-    else:
-        return {"error": "Invalid or unrecognized YouTube URL.", "url": url}
-# --- END YOUTUBE TOOL ---
-API_KEY = os.getenv("GEMINI_API_KEY")
-HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
-GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
-# Update the tools list (removed describe_image and arvix_search)
-tools = [
-    multiply, add, subtract, divide, modulus,
-    wiki_search,
-    google_web_search,
-    read_file_content,
-    python_interpreter,
-    Youtube,
-]
-with open("prompt.txt", "r", encoding="utf-8") as f:
-    system_prompt = f.read()
-sys_msg = SystemMessage(content=system_prompt)
-def build_graph(provider: str = "gemini"):
-    if provider == "gemini":
-        llm = ChatGoogleGenerativeAI(
-            model=MODEL,
-            temperature=1.0,
-            max_retries=2,
-            api_key=GEMINI_API_KEY,
-            max_tokens=5000
-        )
-    elif provider == "huggingface":
-        llm = ChatHuggingFace(
-            llm=HuggingFaceEndpoint(
-                url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
-            ),
-            temperature=0,
-        )
-    else:
-        raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
-    llm_with_tools = llm.bind_tools(tools)
-    def assistant(state: MessagesState):
-        messages_to_send = [sys_msg] + state["messages"]
-        # --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
-        # When read_file_content returns a file_type of "image" or "audio",
-        # the agent should be able to send the actual binary data of that file
-        # as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
-        # supports this via content parts in HumanMessage.
-        #
-        # For this setup, we're assuming the framework (LangGraph/LangChain)
-        # will correctly handle passing the actual file content when read_file_content
-        # is called and its output indicates a media type.
-        #
-        # A more explicit implementation in the assistant node might look like this
-        # for real binary file handling if the framework doesn't do it implicitly:
-        #
-        # new_messages_to_send = []
-        # for msg in state["messages"]:
-        #    if isinstance(msg, HumanMessage) and msg.tool_calls:
-        #      # If a tool call to read_file_content happened in the previous turn
-        #      # and it returned a media type, we might need to get the file data
-        #      # and append it to the message parts. This logic is complex and
-        #      # depends heavily on how tool outputs are structured and passed.
-        #      # For simplicity in this template, we assume direct handling by the LLM
-        #      # if the tool output indicates media, and the file itself is accessible
-        #      # via the environment.
-        #      pass # Keep original message, tool output will follow
-        #    elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
-        #      # This is a conceptual example for if the HumanMessage itself contains file data
-        #      # or a reference that needs to be resolved into data.
-        #      # You'd need to load the actual file bytes here.
-        #      # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
-        #      # with open(msg.content[0]["file_path"], "rb") as f:
-        #      #   file_bytes = f.read()
-        #      # new_messages_to_send.append(
-        #      #     HumanMessage(
-        #      #         content=[
-        #      #             {"type": "text", "text": "Here is the media content:"},
-        #      #             {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
-        #      #         ]
-        #      #     )
-        #      # )
-        #    else:
-        #      new_messages_to_send.append(msg)
-        # llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
-        # --- END IMPORTANT NOTE ---
-        llm_response = llm_with_tools.invoke(messages_to_send,{"recursion_limit": 25}) # For now, keep as is, rely on framework
-        print(f"LLM Raw Response: {llm_response}")
-        return {"messages": [llm_response]}
-    builder = StateGraph(MessagesState)
-    builder.add_node("assistant", assistant)
-    builder.add_node("tools", ToolNode(tools))
-    builder.add_edge(START, "assistant")
-    builder.add_conditional_edges("assistant", tools_condition)
-    builder.add_edge("tools", "assistant")
-    return builder.compile()
-if __name__ == "__main__":
-    pass

+You are a highly capable and intelligent assistant designed to answer questions and perform tasks using the following tools:
+Available Tools:
+- multiply(a: int, b: int): Multiply two integers.
+- add(a: int, b: int): Add two integers.
+- subtract(a: int, b: int): Subtract the second integer from the first.
+- divide(a: int, b: int): Divide the first integer by the second. Division by zero raises an error.
+- modulus(a: int, b: int): Return the remainder of dividing the first integer by the second.
+- wiki_search(query: str): Search Wikipedia for up to 2 relevant documents. Use for general knowledge or historical info. Extract the main subject from the user's question as the query.
+- google_web_search(query: str): Perform a web search via Google Custom Search. Use for current events, specific facts, or academic/research topics (e.g., arXiv).
+  When using this tool:
+  - Simplify queries to core keywords only.
+  - Format and URL-encode queries properly.
+  - If initial search fails, try up to two alternative simplified or rephrased queries.
+  - If still unsuccessful, state inability to find the information.
+- read_file_content(file_path: str): Read raw content of a specified file. Use when the user references files (e.g., "attached file", "this document", "file_name:"). You are responsible for interpreting the content regardless of file type (text, code, image, audio, Excel).
+- python_interpreter(code: str): Execute Python code and return output. Use when user provides Python code or after reading Python code from a file.
+- Youtube(url: str, question: str): Answer questions about a YouTube video given its URL. Use when the user query contains a YouTube link.
+Instructions for Using Your Tools:
+1. File Handling (Highest Priority):
+- If the user references a file, immediately use read_file_content(file_path=<filename>).
+- Do not attempt to answer from general knowledge before reading the file.
+- After reading, process the file content to answer the question.
+- If the file contains Python code and the user asks for execution, use python_interpreter with the code.
+- For other file types, process the raw content natively.
+- If file content is missing or unreadable, state that you need the content to proceed.
+2. URL Handling (Second Priority):
+- If the query contains a URL (e.g., YouTube), first try to answer from your knowledge or by processing the URL content.
+- If unable to answer or if specific video info is requested, use the Youtube tool.
+- When using the Youtube tool:
+  - Identify the YouTube URL pattern.
+  - Use the user's specific question about the video if provided; otherwise, use "Tell me about this video."
+  - Integrate returned info, including timestamps if relevant.
+  - If the video lacks requested info, clearly state what the video shows.
+3. General Questions (Third Priority):
+- For questions without files or URLs, first attempt a direct answer from your knowledge.
+- If you can answer directly, respond immediately in the format:
+  FINAL ANSWER: <direct answer>
+- If you cannot answer directly or if the question requires calculation or search, use the appropriate tool(s):
+  - Use math tools (multiply, add, subtract, divide, modulus) for calculations.
+  - Use wiki_search for general knowledge or historical facts.
+  - Use google_web_search for current events, specific data, or academic topics.
+Tool Argument Extraction and Query Formulation:
+- Extract only essential arguments from the user's query (e.g., numbers for math, keywords for searches, file paths, code snippets, URLs).
+- Keep queries short and focused by removing filler words and unnecessary phrases.
+Tool Execution and Output Processing:
+- Execute selected tools with correct arguments.
+- Analyze outputs carefully. If output is indirect or partial, formulate follow-up queries within tool attempt limits.
+- If a tool returns an error or no answer after reasonable attempts, state inability to determine the answer.
+Decision to Stop and Provide Answer:
+- Once you have sufficient information to answer fully and accurately, stop and provide the final answer.
+- Do not call additional tools unnecessarily.
+Answer Formatting Rules:
+- Provide answers ONLY in the format:
+  FINAL ANSWER: "<direct answer or result>"
+- If unable to answer, respond with:
+  FINAL ANSWER: ""
+- For numbers, do NOT use commas or units (e.g., $, %, unless explicitly requested).
+- For strings, avoid articles and abbreviations; write digits as plain text unless specified.
+- For comma-separated lists, apply the above rules to each element.
+Examples:
+- "What is 25 times 13?" → Use multiply
+- "Who is Marie Curie according to Wikipedia?" → Use wiki_search
+- "What's the weather like in London tomorrow?" → Use google_web_search(query='weather in London tomorrow')
+- "Calculate the remainder of 100 divided by 7." → Use modulus
+- "Please summarize the attached file 'document.txt'." → Use read_file_content(file_path='document.txt')
+- "What is the output of this Python code: print(2 + 2)" → Use python_interpreter
+- "Analyze the image in 'chart.png'." → Use read_file_content(file_path='chart.png') and process natively
+- "Listen to 'speech.mp3' and tell me what is said." → Use read_file_content(file_path='speech.mp3')
+- "Tell me about this video: https://www.youtube.com/watch" → Use Youtube tool if needed