MickyWin22 commited on
Commit
0fe12f3
·
verified ·
1 Parent(s): 35d7582

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -24
app.py CHANGED
@@ -4,6 +4,7 @@ import requests
4
  import pandas as pd
5
  import traceback
6
  import time
 
7
 
8
  # Import smol-agent and tool components
9
  from smolagents import CodeAgent, LiteLLMModel, tool
@@ -13,39 +14,62 @@ from unstructured.partition.auto import partition
13
  # --- Constants ---
14
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
 
16
- # --- Tool Definition ---
17
  @tool
18
  def file_reader(file_path: str) -> str:
19
- """Reads the content of a file and returns its text content.
20
-
21
- This tool supports various file types like PDF, TXT, CSV, etc., from either
22
- a local path or a web URL.
 
 
 
23
 
24
  Args:
25
  file_path (str): The local path or web URL of the file to be read.
26
  """
 
27
  try:
 
28
  if file_path.startswith("http://") or file_path.startswith("https://"):
 
29
  response = requests.get(file_path, timeout=20)
30
  response.raise_for_status()
31
- with open("temp_file", "wb") as f:
32
  f.write(response.content)
33
- elements = partition("temp_file")
34
- os.remove("temp_file") # Clean up
35
  else:
36
- elements = partition(file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  return "\n\n".join([str(el) for el in elements])
38
  except Exception as e:
 
 
 
39
  return f"Error reading or processing file '{file_path}': {e}"
40
 
41
- # --- Agent Class ---
 
42
  class GaiaSmolAgent:
43
  def __init__(self):
44
  """
45
  Initializes the optimized agent.
46
- Optimization 1: Use a faster LLM (Gemini 1.5 Flash) to reduce latency.
47
- Optimization 2: Use a single, powerful agent with a detailed system prompt
48
- to eliminate the slow two-step (plan -> execute) process.
49
  """
50
  print("Initializing Optimized GaiaSmolAgent...")
51
  api_key = os.getenv("GEMINI_API_KEY")
@@ -62,14 +86,14 @@ class GaiaSmolAgent:
62
 
63
  # Store the sophisticated system prompt as an instance variable.
64
  self.system_prompt = """
65
- You are an expert-level research assistant AI. Your sole purpose is to answer the user's question by breaking it down into logical steps and using the provided tools.
66
 
67
  **Available Tools:**
68
  - `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
69
- - `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL.
70
 
71
  **Your Thought Process:**
72
- 1. **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed.
73
  2. **Formulate a Plan:** Think step-by-step about which tools to use in what order. For example, you might need to search for a URL first, then read the content of that URL.
74
  3. **Execute & Analyze:** Call the necessary tools. Carefully examine the output of each tool to extract the required facts. You can write Python code to process the data returned by the tools.
75
  4. **Synthesize the Answer:** Once you have gathered sufficient information, formulate a final, concise answer to the original question.
@@ -81,25 +105,36 @@ class GaiaSmolAgent:
81
  - Do not ask for clarification. Directly proceed to solve the problem.
82
  """
83
 
84
- # Initialize the agent without the 'system_prompt' argument to prevent the TypeError.
85
  self.agent = CodeAgent(
86
  model=model,
87
  tools=[file_reader, DuckDuckGoSearchTool()],
88
  add_base_tools=True, # Provides the python interpreter and the final_answer function
 
89
  )
90
- print("Optimized GaiaSmolAgent initialized successfully.")
 
91
 
92
- def __call__(self, question: str) -> str:
93
  """
94
  Directly runs the agent to generate and execute a plan to answer the question.
95
- This simplified single-call approach is faster and more efficient.
 
 
 
 
 
96
  """
97
  print(f"Optimized Agent received question: {question[:100]}...")
 
98
  try:
99
- # Combine the system prompt with the actual question to give the agent full context.
100
- full_prompt = f"{self.system_prompt}\n\nUser Question: \"{question}\""
101
- # The agent now internally handles the reasoning, code generation, and execution in one step.
102
- final_answer = self.agent.run(full_prompt)
 
 
 
103
  except Exception as e:
104
  print(f"FATAL AGENT ERROR: An exception occurred during agent execution: {e}")
105
  print(traceback.format_exc()) # Print full traceback for easier debugging
 
4
  import pandas as pd
5
  import traceback
6
  import time
7
+ import mimetypes
8
 
9
  # Import smol-agent and tool components
10
  from smolagents import CodeAgent, LiteLLMModel, tool
 
14
  # --- Constants ---
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
+ # --- Tool Definition (Updated for Multimodality) ---
18
  @tool
19
  def file_reader(file_path: str) -> str:
20
+ """
21
+ Reads the content of a file and returns its text content.
22
+ This tool supports various file types, including text (PDF, TXT, CSV)
23
+ and can perform Optical Character Recognition (OCR) on images (PNG, JPG).
24
+ It can be used with either a local path or a web URL.
25
+ For non-text/image formats like audio or video, it will return a message
26
+ indicating the file type, as it cannot analyze their content directly.
27
 
28
  Args:
29
  file_path (str): The local path or web URL of the file to be read.
30
  """
31
+ temp_file_path = None
32
  try:
33
+ # Handle web URLs by downloading the file first
34
  if file_path.startswith("http://") or file_path.startswith("https://"):
35
+ temp_file_path = "temp_downloaded_file"
36
  response = requests.get(file_path, timeout=20)
37
  response.raise_for_status()
38
+ with open(temp_file_path, "wb") as f:
39
  f.write(response.content)
40
+ local_path = temp_file_path
 
41
  else:
42
+ local_path = file_path
43
+
44
+ # Gracefully handle unsupported file types (e.g., audio, video)
45
+ mime_type, _ = mimetypes.guess_type(local_path)
46
+ if mime_type and not (mime_type.startswith('text/') or mime_type.startswith('image/') or mime_type == 'application/pdf' or mime_type == 'application/zip'):
47
+ if temp_file_path and os.path.exists(temp_file_path):
48
+ os.remove(temp_file_path)
49
+ return f"File is of a non-visual, non-text format ({mime_type}). Content analysis is not supported by this tool."
50
+
51
+ # Use 'unstructured' which has built-in OCR for images.
52
+ # This will extract text from images where possible.
53
+ elements = partition(local_path)
54
+
55
+ # Clean up the temporary file if it was created
56
+ if temp_file_path and os.path.exists(temp_file_path):
57
+ os.remove(temp_file_path)
58
+
59
  return "\n\n".join([str(el) for el in elements])
60
  except Exception as e:
61
+ # Ensure cleanup even if an error occurs
62
+ if temp_file_path and os.path.exists(temp_file_path):
63
+ os.remove(temp_file_path)
64
  return f"Error reading or processing file '{file_path}': {e}"
65
 
66
+
67
+ # --- Agent Class (Updated with Native Memory Management) ---
68
  class GaiaSmolAgent:
69
  def __init__(self):
70
  """
71
  Initializes the optimized agent.
72
+ Now uses the agent's native conversation memory capabilities.
 
 
73
  """
74
  print("Initializing Optimized GaiaSmolAgent...")
75
  api_key = os.getenv("GEMINI_API_KEY")
 
86
 
87
  # Store the sophisticated system prompt as an instance variable.
88
  self.system_prompt = """
89
+ You are an expert-level research assistant AI. Your sole purpose is to answer the user's question by breaking it down into logical steps and using the provided tools. You will have access to the conversation history, so use it for context.
90
 
91
  **Available Tools:**
92
  - `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
93
+ - `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL. It can read text and extract text from images (OCR).
94
 
95
  **Your Thought Process:**
96
+ 1. **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed, considering the previous turns in the conversation.
97
  2. **Formulate a Plan:** Think step-by-step about which tools to use in what order. For example, you might need to search for a URL first, then read the content of that URL.
98
  3. **Execute & Analyze:** Call the necessary tools. Carefully examine the output of each tool to extract the required facts. You can write Python code to process the data returned by the tools.
99
  4. **Synthesize the Answer:** Once you have gathered sufficient information, formulate a final, concise answer to the original question.
 
105
  - Do not ask for clarification. Directly proceed to solve the problem.
106
  """
107
 
108
+ # Initialize the agent with the updated file_reader tool and memory settings.
109
  self.agent = CodeAgent(
110
  model=model,
111
  tools=[file_reader, DuckDuckGoSearchTool()],
112
  add_base_tools=True, # Provides the python interpreter and the final_answer function
113
+ planning_interval=3 # Re-plan every 3 steps, considering memory.
114
  )
115
+
116
+ print("Optimized GaiaSmolAgent initialized successfully with native memory and multimodal capabilities.")
117
 
118
+ def __call__(self, question: str, reset_memory: bool = False) -> str:
119
  """
120
  Directly runs the agent to generate and execute a plan to answer the question.
121
+ It leverages the agent's built-in memory, controlled by the `reset` parameter.
122
+
123
+ Args:
124
+ question (str): The user's question.
125
+ reset_memory (bool): If True, the agent's conversation memory will be cleared
126
+ before running. Maps to the agent's `reset` parameter.
127
  """
128
  print(f"Optimized Agent received question: {question[:100]}...")
129
+
130
  try:
131
+ # Combine the system prompt with the current question. The agent will handle the history.
132
+ full_prompt = f"{self.system_prompt}\n\nCURRENT TASK:\nUser Question: \"{question}\""
133
+
134
+ # Use the agent's `reset` parameter to control conversation memory.
135
+ # `reset=False` keeps the memory from previous calls.
136
+ final_answer = self.agent.run(full_prompt, reset=reset_memory)
137
+
138
  except Exception as e:
139
  print(f"FATAL AGENT ERROR: An exception occurred during agent execution: {e}")
140
  print(traceback.format_exc()) # Print full traceback for easier debugging