Final_Assignment_Template

Running

App Files Files Community

TommasoBB commited on 16 days ago

Commit

00a50e0

verified ·

1 Parent(s): 81917a3

Update app.py

Browse files

Files changed (1) hide show

app.py +318 -7

app.py CHANGED Viewed

@@ -1,23 +1,327 @@
 import os
 import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -75,12 +379,19 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
-        question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:

 import os
 import gradio as gr
+from gradio_client import file
 import requests
 import inspect
 import pandas as pd
+import tools
+from smolagents import CodeAgent, HfApiModel
+from typing import TypedDict, List, Dict, Any, Optional
+from langgraph.graph import StateGraph, START, END
+from langgraph.messages import HumanMessage
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Models ---
+# Vision model for image analysis / OCR
+vision_model = HfApiModel(repo_id="FireRedTeam/FireRed-OCR", max_new_tokens=2048, temperature=0.3)
+math_model = HfApiModel(repo_id="Qwen/Qwen2.5-Math-1.5B", max_new_tokens=2048, temperature=0.3)
+#define the state
+class AgentState(TypedDict):
+    question: str
+    task_id: Optional[str]
+    file_name: Optional[str]
+    is_searching: Optional[bool]
+    have_file: Optional[bool]
+    is_math: Optional[bool]
+    have_image: Optional[bool]
+    final_answer: Optional[str]  # The final answer produced by the agent
+    messages: List[Dict[str, Any]]  # Track conversation with LLM for analysis
+#define nodes
+def read(state: AgentState) -> str:
+    """Agent reads and logs the incoming question."""
+    question = state["question"]
+    print(f"Agent is reading the question: {question[:50]}...")
+    return {}
+def classify(state: AgentState) -> str:
+    """Agent classifies the question to determine which tools to use."""
+    question = state["question"].lower()
+    #prompt for LLM to classify the question
+    prompt = f"""
+            You are an agent that classifies questions to determine which tools to use.
+            Classify the following question into the categories: 'need to be searched on web/wikipidia', 'has a file in the question', 'is a math problem', 'has an image in the question'.
+            Question: {question}
+            Return a JSON object with boolean fields for each category, for example:
+            {{
+                "is_searching": true,
+                "have_file": false,
+                "is_math": false,
+                "have_image": false
+            }}
+            """
+    messages = [HumanMessage(content=prompt)]
+    response = model.invoke(messages)
+    is_searching = response.get("is_searching", False)
+    have_file = response.get("have_file", False)
+    is_math = response.get("is_math", False)
+    have_image = response.get("have_image", False)
+    print(f"Classification result: is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}")
+    mew_messages = state.get("messages", []) + [
+        {"role": "system", "content": "Classify the question to determine which tools to use."},
+        {"role": "user", "content": question},
+        {"role": "assistant", "content": f"Classification result: is_searching={is_searching}, have_file={have_file}, is_math={is_math}, have_image={have_image}"}
+    ]
+    return {
+        "is_searching": is_searching,
+        "have_file": have_file,
+        "is_math": is_math,
+        "have_image": have_image,
+        "messages": mew_messages
+    }
+def handele_search(state: AgentState) -> str:
+    """Agent performs a web search if classified as needing search."""
+    question = state["question"]
+    print(f"Agent is performing a web search for: {question[:50]}...")
+    search_results = tools.WebSearchTool().run(question)
+    print(f"Search results: {search_results[:100]}...")
+    new_messages = state.get("messages", []) + [
+        {"role": "system", "content": "Perform a web search if classified as needing search."},
+        {"role": "user", "content": question},
+        {"role": "assistant", "content": f"Search results: {search_results[:100]}..."}
+    ]
+    return {
+        "search_results": search_results,
+        "messages": new_messages
+    }
+def handle_image(state: AgentState) -> str:
+    """Agent handles an image if classified as having an image.
+    Downloads the image as base64 and sends it to a vision-capable model
+    using a multimodal message format."""
+    question = state["question"]
+    task_id = state.get("task_id", "")
+    file_name = state.get("file_name", "")
+    # Use ImageReaderTool to download the image as base64
+    image_reader = tools.ImageReaderTool()
+    image_data_uri = image_reader(task_id) if task_id and file_name else ""
+    if not image_data_uri or image_data_uri.startswith("Failed"):
+        print(f"Could not download image for task {task_id}")
+        new_messages = state.get("messages", []) + [
+            {"role": "assistant", "content": f"[Could not download image '{file_name}' for analysis.]"}
+        ]
+        return {
+            "image_description": "",
+            "transcribed_text": "",
+            "messages": new_messages
+        }
+    # Build multimodal message with image for a vision-capable model
+    prompt_text = f"""Analyze the attached image in detail.
+Describe the content of the image and transcribe all text visible in it.
+Question: {question}
+Return a JSON object with the following fields:
+{{
+    "image_description": "A detailed description of the image content.",
+    "transcribed_text": "All text visible in the image transcribed here."
+}}"""
+    # Multimodal message: the vision model receives both text and image
+    messages = [
+        HumanMessage(content=[
+            {"type": "text", "text": prompt_text},
+            {"type": "image_url", "image_url": {"url": image_data_uri}}
+        ])
+    ]
+    # Use the dedicated vision model (FireRed-OCR) for image analysis
+    response = vision_model.invoke(messages)
+    image_description = response.get("image_description", "")
+    transcribed_text = response.get("transcribed_text", "")
+    print(f"Image description: {image_description[:100]}...")
+    print(f"Transcribed text: {transcribed_text[:100]}...")
+    new_messages = state.get("messages", []) + [
+        {"role": "system", "content": "Analyze and describe the image if classified as having an image."},
+        {"role": "user", "content": question},
+        {"role": "assistant", "content": f"Image description: {image_description[:100]}..., Transcribed text: {transcribed_text[:100]}..."}
+    ]
+    return {
+        "image_description": image_description,
+        "transcribed_text": transcribed_text,
+        "messages": new_messages
+    }
+def handle_file(state: AgentState) -> str:
+    """Agent processes the file if classified as having a file.
+    Uses the FileReaderTool to download and read the file from the API."""
+    question = state["question"]
+    task_id = state.get("task_id", "")
+    file_name = state.get("file_name", "")
+    # Use the file_reader tool to fetch the file content
+    file_reader = tools.FileReaderTool()
+    file_content = file_reader(task_id) if task_id and file_name else ""
+    # Build prompt with the retrieved file content
+    file_context = ""
+    if file_content:
+        file_context = f"\n\n--- Attached file: {file_name} ---\n{file_content}\n--- End of file ---"
+    elif file_name:
+        file_context = f"\n\n[Note: A file '{file_name}' was referenced but could not be retrieved.]"
+    prompt = f"""You are an agent that can read and extract information from files.
+Below is the content of the attached file retrieved from the API. Read it carefully and extract any relevant information that could help answer the question.
+Question: {question}{file_context}
+Return a JSON object with the following field:
+{{
+    "extracted_info": "The relevant extracted information from the file."
+}}"""
+    messages = [HumanMessage(content=prompt)]
+    response = model.invoke(messages)
+    extracted_info = response.get("extracted_info", "")
+    print(f"Extracted file info: {extracted_info[:100]}...")
+    new_messages = state.get("messages", []) + [
+        {"role": "system", "content": "Read and extract information from the attached file."},
+        {"role": "user", "content": question},
+        {"role": "assistant", "content": f"Extracted info: {extracted_info[:100]}..."}
+    ]
+    return {
+        "extracted_info": extracted_info,
+        "messages": new_messages
+    }
+def handle_math(state: AgentState) -> str:
+    """Agent handles a math problem if classified as a math problem."""
+    question = state["question"]
+    print(f"Agent is handling a math problem: {question[:50]}...")
+    messages = [HumanMessage(content=f"Solve the following math problem step by step:\n\n{question}")]
+    response = math_model.invoke(messages)
+    solution = response.get("solution", "")
+    print(f"Math solution: {solution[:100]}...")
+    new_messages = state.get("messages", []) + [
+        {"role": "system", "content": "Handle the question if classified as a math problem."},
+        {"role": "user", "content": question},
+        {"role": "assistant", "content": f"Math solution: {solution[:100]}..."}
+    ]
+    return {
+        "math_solution": solution,
+        "messages": new_messages
+    }
+def answer(state: AgentState) -> dict:
+    """Synthesize a final answer from all gathered context in messages."""
+    question = state["question"]
+    messages_history = state.get("messages", [])
+    # Build context summary from all assistant messages
+    context_parts = []
+    for msg in messages_history:
+        if msg.get("role") == "assistant":
+            context_parts.append(msg["content"])
+    context = "\n".join(context_parts) if context_parts else "No additional context gathered."
+    prompt = f"""You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Question: {question}
+Context gathered:
+{context}
+"""
+    messages = [HumanMessage(content=prompt)]
+    # Use the general model for final answer synthesis
+    general_model = HfApiModel(repo_id="Qwen3.5-35B-A3B", max_new_tokens=2048, temperature=0.3)
+    response = general_model.invoke(messages)
+    raw_response = response.content if hasattr(response, 'content') else str(response)
+    # Extract the final answer after "FINAL ANSWER:" if present
+    if "FINAL ANSWER:" in raw_response:
+        final_answer = raw_response.split("FINAL ANSWER:")[-1].strip()
+    else:
+        final_answer = raw_response.strip()
+    print(f"Final answer: {final_answer[:100]}...")
+    return {"final_answer": final_answer}
+def route_after_classify(state: AgentState) -> str:
+    """Routing function: decide which handler to invoke based on classification."""
+    if state.get("have_image"):
+        return "handle_image"
+    if state.get("have_file"):
+        return "handle_file"
+    if state.get("is_math"):
+        return "handle_math"
+    if state.get("is_searching"):
+        return "handle_search"
+    # Default: go straight to answer
+    return "answer"
+#create the graph
+agent_graph = StateGraph(AgentState)
+agent_graph.add_node("read", read)
+agent_graph.add_node("classify", classify)
+agent_graph.add_node("handle_search", handele_search)
+agent_graph.add_node("handle_image", handle_image)
+agent_graph.add_node("handle_file", handle_file)
+agent_graph.add_node("handle_math", handle_math)
+agent_graph.add_node("answer", answer)
+agent_graph.add_edge(START, "read")
+agent_graph.add_edge("read", "classify")
+agent_graph.add_conditional_edges(
+    "classify",
+    route_after_classify,
+)
+agent_graph.add_edge("handle_search", "answer")
+agent_graph.add_edge("handle_image", "answer")
+agent_graph.add_edge("handle_file", "answer")
+agent_graph.add_edge("handle_math", "answer")
+agent_graph.add_edge("answer", END)
+compiled_agent = agent_graph.compile()
 # --- Basic Agent Definition ---
 # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
 class BasicAgent:
     def __init__(self):
+        self.file_reader = tools.FileReaderTool()
+        self.image_reader = tools.ImageReaderTool()
+        self.web_search = tools.WebSearchTool()
+        self.tools = [self.file_reader, self.image_reader, self.web_search]
+        self.vision_model = vision_model  # FireRedTeam/FireRed-OCR for image tasks
+        print("Agent initialized.")
+    def __call__(self, question: str, task_id: str = "", file_name: str = "") -> str:
         print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Run the LangGraph workflow
+        result_state = compiled_agent.invoke({
+            "question": question,
+            "task_id": task_id,
+            "file_name": file_name,
+            "messages": [],
+            "is_searching": False,
+            "have_file": False,
+            "is_math": False,
+            "have_image": False,
+            "final_answer": ""
+        })
+        # Extract the final answer from the state
+        final_answer = result_state.get("final_answer", "No answer produced.")
+        print(f"Agent returning answer: {final_answer[:100]}...")
+        return final_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:
         task_id = item.get("task_id")
+        # Handle both "Question" (dataset format) and "question" (API format)
+        question_text = item.get("question") or item.get("Question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        # Check for attached file
+        file_name = item.get("file_name", "")
+        if file_name:
+            print(f"Task {task_id} has attached file: {file_name}")
         try:
+            submitted_answer = agent(question_text, task_id=task_id, file_name=file_name)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e: