Final_Assignment_Template

Sleeping

App Files Files Community

MickyWin22 commited on Jun 9, 2025

Commit

0fe12f3

verified ·

1 Parent(s): 35d7582

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -24

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import requests
 import pandas as pd
 import traceback
 import time
 # Import smol-agent and tool components
 from smolagents import CodeAgent, LiteLLMModel, tool
@@ -13,39 +14,62 @@ from unstructured.partition.auto import partition
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Tool Definition ---
 @tool
 def file_reader(file_path: str) -> str:
-    """Reads the content of a file and returns its text content.
-    This tool supports various file types like PDF, TXT, CSV, etc., from either
-    a local path or a web URL.
     Args:
         file_path (str): The local path or web URL of the file to be read.
     """
     try:
         if file_path.startswith("http://") or file_path.startswith("https://"):
             response = requests.get(file_path, timeout=20)
             response.raise_for_status()
-            with open("temp_file", "wb") as f:
                 f.write(response.content)
-            elements = partition("temp_file")
-            os.remove("temp_file") # Clean up
         else:
-            elements = partition(file_path)
         return "\n\n".join([str(el) for el in elements])
     except Exception as e:
         return f"Error reading or processing file '{file_path}': {e}"
-# --- Agent Class ---
 class GaiaSmolAgent:
     def __init__(self):
         """
         Initializes the optimized agent.
-        Optimization 1: Use a faster LLM (Gemini 1.5 Flash) to reduce latency.
-        Optimization 2: Use a single, powerful agent with a detailed system prompt
-                        to eliminate the slow two-step (plan -> execute) process.
         """
         print("Initializing Optimized GaiaSmolAgent...")
         api_key = os.getenv("GEMINI_API_KEY")
@@ -62,14 +86,14 @@ class GaiaSmolAgent:
         # Store the sophisticated system prompt as an instance variable.
         self.system_prompt = """
-        You are an expert-level research assistant AI. Your sole purpose is to answer the user's question by breaking it down into logical steps and using the provided tools.
         **Available Tools:**
         - `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
-        - `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL.
         **Your Thought Process:**
-        1.  **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed.
         2.  **Formulate a Plan:** Think step-by-step about which tools to use in what order. For example, you might need to search for a URL first, then read the content of that URL.
         3.  **Execute & Analyze:** Call the necessary tools. Carefully examine the output of each tool to extract the required facts. You can write Python code to process the data returned by the tools.
         4.  **Synthesize the Answer:** Once you have gathered sufficient information, formulate a final, concise answer to the original question.
@@ -81,25 +105,36 @@ class GaiaSmolAgent:
         - Do not ask for clarification. Directly proceed to solve the problem.
         """
-        # Initialize the agent without the 'system_prompt' argument to prevent the TypeError.
         self.agent = CodeAgent(
             model=model,
             tools=[file_reader, DuckDuckGoSearchTool()],
             add_base_tools=True,  # Provides the python interpreter and the final_answer function
         )
-        print("Optimized GaiaSmolAgent initialized successfully.")
-    def __call__(self, question: str) -> str:
         """
         Directly runs the agent to generate and execute a plan to answer the question.
-        This simplified single-call approach is faster and more efficient.
         """
         print(f"Optimized Agent received question: {question[:100]}...")
         try:
-            # Combine the system prompt with the actual question to give the agent full context.
-            full_prompt = f"{self.system_prompt}\n\nUser Question: \"{question}\""
-            # The agent now internally handles the reasoning, code generation, and execution in one step.
-            final_answer = self.agent.run(full_prompt)
         except Exception as e:
             print(f"FATAL AGENT ERROR: An exception occurred during agent execution: {e}")
             print(traceback.format_exc()) # Print full traceback for easier debugging

 import pandas as pd
 import traceback
 import time
+import mimetypes
 # Import smol-agent and tool components
 from smolagents import CodeAgent, LiteLLMModel, tool
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# --- Tool Definition (Updated for Multimodality) ---
 @tool
 def file_reader(file_path: str) -> str:
+    """
+    Reads the content of a file and returns its text content.
+    This tool supports various file types, including text (PDF, TXT, CSV)
+    and can perform Optical Character Recognition (OCR) on images (PNG, JPG).
+    It can be used with either a local path or a web URL.
+    For non-text/image formats like audio or video, it will return a message
+    indicating the file type, as it cannot analyze their content directly.
     Args:
         file_path (str): The local path or web URL of the file to be read.
     """
+    temp_file_path = None
     try:
+        # Handle web URLs by downloading the file first
         if file_path.startswith("http://") or file_path.startswith("https://"):
+            temp_file_path = "temp_downloaded_file"
             response = requests.get(file_path, timeout=20)
             response.raise_for_status()
+            with open(temp_file_path, "wb") as f:
                 f.write(response.content)
+            local_path = temp_file_path
         else:
+            local_path = file_path
+        # Gracefully handle unsupported file types (e.g., audio, video)
+        mime_type, _ = mimetypes.guess_type(local_path)
+        if mime_type and not (mime_type.startswith('text/') or mime_type.startswith('image/') or mime_type == 'application/pdf' or mime_type == 'application/zip'):
+            if temp_file_path and os.path.exists(temp_file_path):
+                os.remove(temp_file_path)
+            return f"File is of a non-visual, non-text format ({mime_type}). Content analysis is not supported by this tool."
+        # Use 'unstructured' which has built-in OCR for images.
+        # This will extract text from images where possible.
+        elements = partition(local_path)
+        # Clean up the temporary file if it was created
+        if temp_file_path and os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
         return "\n\n".join([str(el) for el in elements])
     except Exception as e:
+        # Ensure cleanup even if an error occurs
+        if temp_file_path and os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
         return f"Error reading or processing file '{file_path}': {e}"
+# --- Agent Class (Updated with Native Memory Management) ---
 class GaiaSmolAgent:
     def __init__(self):
         """
         Initializes the optimized agent.
+        Now uses the agent's native conversation memory capabilities.
         """
         print("Initializing Optimized GaiaSmolAgent...")
         api_key = os.getenv("GEMINI_API_KEY")
         # Store the sophisticated system prompt as an instance variable.
         self.system_prompt = """
+        You are an expert-level research assistant AI. Your sole purpose is to answer the user's question by breaking it down into logical steps and using the provided tools. You will have access to the conversation history, so use it for context.
         **Available Tools:**
         - `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
+        - `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL. It can read text and extract text from images (OCR).
         **Your Thought Process:**
+        1.  **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed, considering the previous turns in the conversation.
         2.  **Formulate a Plan:** Think step-by-step about which tools to use in what order. For example, you might need to search for a URL first, then read the content of that URL.
         3.  **Execute & Analyze:** Call the necessary tools. Carefully examine the output of each tool to extract the required facts. You can write Python code to process the data returned by the tools.
         4.  **Synthesize the Answer:** Once you have gathered sufficient information, formulate a final, concise answer to the original question.
         - Do not ask for clarification. Directly proceed to solve the problem.
         """
+        # Initialize the agent with the updated file_reader tool and memory settings.
         self.agent = CodeAgent(
             model=model,
             tools=[file_reader, DuckDuckGoSearchTool()],
             add_base_tools=True,  # Provides the python interpreter and the final_answer function
+            planning_interval=3 # Re-plan every 3 steps, considering memory.
         )
+        print("Optimized GaiaSmolAgent initialized successfully with native memory and multimodal capabilities.")
+    def __call__(self, question: str, reset_memory: bool = False) -> str:
         """
         Directly runs the agent to generate and execute a plan to answer the question.
+        It leverages the agent's built-in memory, controlled by the `reset` parameter.
+        Args:
+            question (str): The user's question.
+            reset_memory (bool): If True, the agent's conversation memory will be cleared
+                                 before running. Maps to the agent's `reset` parameter.
         """
         print(f"Optimized Agent received question: {question[:100]}...")
         try:
+            # Combine the system prompt with the current question. The agent will handle the history.
+            full_prompt = f"{self.system_prompt}\n\nCURRENT TASK:\nUser Question: \"{question}\""
+            # Use the agent's `reset` parameter to control conversation memory.
+            # `reset=False` keeps the memory from previous calls.
+            final_answer = self.agent.run(full_prompt, reset=reset_memory)
         except Exception as e:
             print(f"FATAL AGENT ERROR: An exception occurred during agent execution: {e}")
             print(traceback.format_exc()) # Print full traceback for easier debugging