Final_Assignment_Template

Runtime error

App Files Files Community

jensenwiedler commited on May 18, 2025

Commit

ccce173

1 Parent(s): 3945599

basic agent with 30 score

Browse files

Files changed (9) hide show

.DS_Store +0 -0
.gitignore +3 -0
agent/__pycache__/__init__.cpython-312.pyc +0 -0
agent/__pycache__/graph.cpython-312.pyc +0 -0
agent/__pycache__/tools.cpython-312.pyc +0 -0
agent/graph.py +27 -10
agent/tools.py +185 -21
app.py +28 -11
requirements.txt +5 -1

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.env
+node_modules
+whisper-large-v3

agent/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (185 Bytes). View file

agent/__pycache__/graph.cpython-312.pyc ADDED Viewed

Binary file (2.15 kB). View file

agent/__pycache__/tools.cpython-312.pyc CHANGED Viewed

Binary files a/agent/__pycache__/tools.cpython-312.pyc and b/agent/__pycache__/tools.cpython-312.pyc differ

agent/graph.py CHANGED Viewed

@@ -1,23 +1,40 @@
 from langgraph.graph import StateGraph, MessagesState, START, END
-from langchain_core.messages import HumanMessage, AIMessage
 from langgraph.prebuilt import ToolNode, tools_condition
-from tools import TOOLS
 class State(MessagesState):
-    file_name: str
-def retriever(state: State):
-    if state.file_name:
-        # Simulate file retrieval
-        return {"file_content": f"Retrieved content from {state.file_name}"}
 def call_model(state: State):
-    return {"messages": [AIMessage(content="Hello! How can I assist you today?")]}
 def build_agent():
-    graph_builder = StateGraph(MessagesState)
     graph_builder.add_node("call_model", call_model)
     graph_builder.add_node("tools", ToolNode(TOOLS))

+from typing import Annotated, Optional
 from langgraph.graph import StateGraph, MessagesState, START, END
+from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
 from langgraph.prebuilt import ToolNode, tools_condition
+from langchain_ollama import ChatOllama
+from agent.tools import TOOLS
 class State(MessagesState):
+    file_path: str
+model = ChatOllama(model="qwen3:32b")
+#model = ChatOllama(model="llama3.2:3b")
+model_with_tools = model.bind_tools(TOOLS)
 def call_model(state: State):
+    return {"messages": [AIMessage(content="FINAL ANSWER: right")]}
+    system_prompt = """
+    You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
+    YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+    If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+    If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+    Instructions for the tools:
+    - If you need information from the web, you must use both the web_search and wikipedia_search tools, unless the question mentions wikipedia. Then, you must use only the wikipedia_search tool.
+    Do not forget to use the FINAl ANSWER: [YOUR FINAL ANSWER] template!!!
+    """
+    if state["file_path"] and state["file_path"] != "":
+        system_prompt += f"\n\nYou have acces to a file at {state['file_path']}. You can use it to answer the question. Use this file path as input to relevant tools."
+    result = model_with_tools.invoke([SystemMessage(content=system_prompt)] + state["messages"])
+    return {"messages": [result]}
 def build_agent():
+    graph_builder = StateGraph(State)
     graph_builder.add_node("call_model", call_model)
     graph_builder.add_node("tools", ToolNode(TOOLS))

agent/tools.py CHANGED Viewed

@@ -3,39 +3,60 @@ from langchain_core.tools import tool
 from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
 @tool
 def wikipedia_search(query: str) -> str:
     """
-    Search Wikipedia for a given query and return max 2 results.
     Args:
         query: The search query.
     """
     # Simulate a search operation
-    search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
     formatted_docs = "\n\n---\n\n".join(
         [
-            f'<Document title="{doc.metadata["title"]}"/>\n{doc.page_content}\n</Document>'
-            for doc in search_docs
         ])
     return formatted_docs
 @tool
 def youtube_transcript(url: str) -> str:
     """"Returns the transcript of a YouTube video given its URL.
     Args:
         url: The YouTube video URL.
     """
-    try:
-        transcripts = YoutubeLoader.from_youtube_url(url, add_video_info=False).load()
-        return f"Video Transcript: {transcripts[0].page_content}"
-    except Exception as e:
-        return "No transcript available for this video. Error: {e}"
-wrapper = DuckDuckGoSearchAPIWrapper(max_results=5)
-search = DuckDuckGoSearchResults(output_format="list", api_wrapper=wrapper)
 @tool
 def web_search(query: str) -> str:
@@ -46,13 +67,17 @@ def web_search(query: str) -> str:
         query: The search query.
     """
     # Simulate a web search operation
-    query = "obama"
-    search_results = search.invoke(query)
-    formatted_result = "\n\n---\n\n".join([
-        f"- {result['title']}: {result['link']} \n {result['snippet']}"
-        for result in search_results
-    ])
-    return f"Web search results for '{query}'"
 @tool
 def add_numbers(numbers: List[float]) -> float:
@@ -76,5 +101,144 @@ def multiply_numbers(numbers: List[float]) -> float:
     return result
-TOOLS = [wikipedia_search, web_search, youtube_transcript, add_numbers, multiply_numbers]

 from langchain_community.document_loaders import WikipediaLoader, YoutubeLoader
 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
+from langchain_ollama import ChatOllama
+from langchain_sandbox import PyodideSandbox
+import base64
+from langchain_core.messages import HumanMessage, SystemMessage
+import torch
+from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
+from docling.document_converter import DocumentConverter
+from langchain_tavily import TavilySearch
+doc_converter = DocumentConverter()
 @tool
 def wikipedia_search(query: str) -> str:
     """
+    Search Wikipedia for a given query and return max 1 result.
     Args:
         query: The search query.
     """
     # Simulate a search operation
+    search_docs = WikipediaLoader(query=query, load_max_docs=1).load()
+    docling_docs = [doc_converter.convert(doc.metadata["source"]).document.export_to_markdown() for doc in search_docs]
+    start_indexes = []
+    for d in docling_docs:
+        start_index = d.find("From Wikipedia")
+        if start_index != -1:
+            start_indexes.append(start_index)
+        else:
+            start_indexes.append(0)
     formatted_docs = "\n\n---\n\n".join(
         [
+            f'<Document title="{search_doc.metadata["title"]}"/>\n{docling_doc[start_index:]}\n</Document>'
+            for search_doc, docling_doc, start_index in zip(search_docs, docling_docs, start_indexes)
         ])
     return formatted_docs
 @tool
 def youtube_transcript(url: str) -> str:
     """"Returns the transcript of a YouTube video given its URL.
+    This is a text-based tool and should not be used for visual information of the video.
     Args:
         url: The YouTube video URL.
     """
+    max_tries = 3
+    for _ in range(max_tries):
+        try:
+            transcripts = YoutubeLoader.from_youtube_url(url, add_video_info=False).load()
+            return f"Video Transcript: {transcripts[0].page_content}"
+        except Exception as e:
+            print(f"Attempt failed: {e}")
+            continue
+    # If all attempts fail, return an error message
+    return "No transcript available. This video might not have a transcript or the URL is invalid."
 @tool
 def web_search(query: str) -> str:
         query: The search query.
     """
     # Simulate a web search operation
+    tavily_search = TavilySearch(max_results=3)
+    search_docs = tavily_search.invoke(query)
+     # Format
+    formatted_search_docs = "\n\n---\n\n".join(
+        [
+            f'<Document href="{doc["url"]}">\n{doc["content"]}\n</Document>'
+            for doc in search_docs["results"]
+        ]
+    )
+    return f"Web search results for '{query}':\n\n{formatted_search_docs}"
 @tool
 def add_numbers(numbers: List[float]) -> float:
     return result
+vision_llm = ChatOllama(model="gemma3:27b")
+# might be better to use supervisor method..
+@tool
+def image_question_answering(img_path: str, question: str) -> str:
+    """
+    Given an image path and a question, return the answer to the question based on the image. Just pass the initial question from the human as a query.
+    Args:
+        img_path: The path to the image.
+        question: The question to ask about the image.
+    """
+    system_prompt = """
+    You are a helpful assistant that can answer questions about images.
+    You need to think step by step carefully, provide your thinking process and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
+    """
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        question = "Review the chess position provided in the image. It is black's turn. Provide the correct next move for black which guarantees a win. Please provide your response in algebraic notation."
+        # Prepare the prompt including the base64 image data
+        message = [
+            SystemMessage(content=system_prompt),
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": question,
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = vision_llm.invoke(message)
+        return response.content
+    except Exception as e:
+        error_msg = f"Error image questioning: {str(e)}"
+        print(error_msg)
+        return error_msg
+device = "mps"
+checkpoint = "./whisper-large-v3"
+model = AutoModelForSpeechSeq2Seq.from_pretrained(
+    checkpoint, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True
+)
+model.to(device)
+processor = AutoProcessor.from_pretrained(checkpoint)
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model=model,
+    tokenizer=processor.tokenizer,
+    feature_extractor=processor.feature_extractor,
+    torch_dtype=torch.float32,
+    device=device,
+)
+@tool
+def speech_to_text(audio_path: str) -> str:
+    """
+    Convert speech to text using a given audio file. Not for youtube links.
+    Args:
+        audio_path: The path to the audio file.
+    """
+    try:
+        result = pipe(audio_path)
+        return result["text"].strip()
+    except Exception as e:
+        result = pipe(audio_path, return_timestamps=True)
+        return result["text"].strip()
+    except Exception as e:
+        return f"Error processing audio file: {str(e)}"
+@tool
+def read_file_content(path: str) -> str:
+    """
+    Read the content of a file (pdf, docs, xlsx, etc.) but also from a URL (like arxiv or websites) and returns it as markdown.
+    Args:
+        file_path: The path to the file.
+    """
+    try:
+        doc = doc_converter.convert(path).document
+        markdown = doc.export_to_markdown()
+        return f"File Content:\n\n{markdown}"
+    except Exception as e:
+        return f"Error reading file: {str(e)}"
+sandbox = PyodideSandbox(
+    # Allow Pyodide to install python packages that
+    # might be required.
+    allow_net=True,
+)
+@tool
+async def run_python_code(input_type: str, input: str) -> str:
+    """
+    Run Python code in a sandboxed environment. You can provide either a code snippet or a file path.
+    1. If input_type is "code", input should be a string containing the Python code to run.
+    2. If input_type is "file", input should be a string containing the path to the file.
+    Args:
+        input_type: The type of input, code or file.
+        input: The Python code to run or the path to the file.
+    """
+    try:
+        if input_type == "code":
+            code = input
+        elif input_type == "file":
+            with open(input, "r") as file:
+                code = file.read()
+        else:
+            return "Invalid input type. Please provide 'code' or 'file' as input_type."
+        result = await sandbox.execute(code)
+        return f"Result execution: result: {result.result}, stdout: {result.stdout}, stderr: {result.stderr}, status: {result.status}"
+    except Exception as e:
+        return f"Error executing Python code: {str(e)}"
+@tool
+def reverse_string(input: str) -> str:
+    """
+    Reverse a given string.
+    Args:
+        input: The string to reverse.
+    """
+    return input[::-1]
+TOOLS = [wikipedia_search, web_search, youtube_transcript, add_numbers, multiply_numbers , image_question_answering, speech_to_text, read_file_content, run_python_code, reverse_string]

app.py CHANGED Viewed

@@ -1,13 +1,16 @@
-from io import BytesIO
 import os
 import gradio as gr
 import requests
-import inspect
 import pandas as pd
 from langchain_core.messages import HumanMessage
 from agent.graph import build_agent
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
@@ -17,22 +20,31 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class BasicAgent:
     def __init__(self):
         self.agent = build_agent()
-    def __call__(self, question: str, task_id: str, file_name="") -> str:
         messages = [HumanMessage(content=question)]
         if file_name:
-            task_id = "cca530fc-4052-43b2-b130-b30968d8aa44"
             response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
             response.raise_for_status()
             file_data = response.content
-            #file_data = BytesIO(file_data)
-        state = self.agent.invoke({"messages": messages, "file_name": file_name})
         answer = state["messages"][-1].content
         return answer
-def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
@@ -94,7 +106,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text, file_name=file_name)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
@@ -207,4 +219,9 @@ if __name__ == "__main__":
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
-    demo.launch(debug=True, share=False)

+from dotenv import load_dotenv
 import os
 import gradio as gr
 import requests
+import asyncio
 import pandas as pd
 from langchain_core.messages import HumanMessage
 from agent.graph import build_agent
+load_dotenv()
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 class BasicAgent:
     def __init__(self):
         self.agent = build_agent()
+    async def __call__(self, question: str, task_id: str, file_name="") -> str:
         messages = [HumanMessage(content=question)]
         if file_name:
             response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
             response.raise_for_status()
             file_data = response.content
+            # write to temp location
+            with open(file_name, "wb") as f:
+                f.write(file_data)
+        state = {"messages": messages, "file_path": file_name}
+        print(f"question: {question}")
+        state = await self.agent.ainvoke(state)
+        for msg in state["messages"]:
+            msg.pretty_print()
         answer = state["messages"][-1].content
+        try:
+            answer = answer.split("FINAL ANSWER: ")[-1].strip()
+        except Exception as e:
+            print(f"Error parsing answer: {e}")
+            answer = "AGENT ERROR: Unable to parse answer."
+        print(f"answer: {answer}")
         return answer
+async def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
+            submitted_answer = await agent(question_text, task_id=task_id, file_name=file_name)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
     print("-"*(60 + len(" App Starting ")) + "\n")
     print("Launching Gradio Interface for Basic Agent Evaluation...")
+    demo.launch(debug=True, share=False)
+    # agent = BasicAgent()
+    # res = asyncio.run(agent("Hello, how are you?", "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3", "99c9cc74-fdc8-46c6-8f8d-3ce2d3bfeea3.mp3"))
+    # print(res)

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 gradio
 requests
 langgraph
 langchain
@@ -6,4 +7,7 @@ langchain-community
 wikipedia
 youtube-transcript-api
 duckduckgo-search
-docling

 gradio
+gradio[oauth]
 requests
 langgraph
 langchain
 wikipedia
 youtube-transcript-api
 duckduckgo-search
+langchain-docling
+langchain-sandbox
+langchain-ollama
+langchain-tavily