Update app.py
Browse files
app.py
CHANGED
|
@@ -5,34 +5,42 @@ import pandas as pd
|
|
| 5 |
import traceback
|
| 6 |
import time
|
| 7 |
import mimetypes
|
|
|
|
| 8 |
|
| 9 |
# Import smol-agent and tool components
|
| 10 |
from smolagents import CodeAgent, LiteLLMModel, tool
|
| 11 |
from smolagents import DuckDuckGoSearchTool
|
| 12 |
from unstructured.partition.auto import partition
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# --- Constants ---
|
| 15 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 16 |
|
| 17 |
-
# --- Tool Definition (
|
| 18 |
@tool
|
| 19 |
def file_reader(file_path: str) -> str:
|
| 20 |
"""
|
| 21 |
-
Reads the content of a file and returns
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
| 27 |
|
| 28 |
Args:
|
| 29 |
file_path (str): The local path or web URL of the file to be read.
|
|
|
|
|
|
|
| 30 |
"""
|
| 31 |
temp_file_path = None
|
| 32 |
try:
|
| 33 |
-
#
|
| 34 |
if file_path.startswith("http://") or file_path.startswith("https://"):
|
| 35 |
-
temp_file_path =
|
| 36 |
response = requests.get(file_path, timeout=20)
|
| 37 |
response.raise_for_status()
|
| 38 |
with open(temp_file_path, "wb") as f:
|
|
@@ -41,44 +49,62 @@ def file_reader(file_path: str) -> str:
|
|
| 41 |
else:
|
| 42 |
local_path = file_path
|
| 43 |
|
| 44 |
-
# Gracefully handle unsupported file types (e.g., audio, video)
|
| 45 |
mime_type, _ = mimetypes.guess_type(local_path)
|
| 46 |
-
|
| 47 |
-
if temp_file_path and os.path.exists(temp_file_path):
|
| 48 |
-
os.remove(temp_file_path)
|
| 49 |
-
return f"File is of a non-visual, non-text format ({mime_type}). Content analysis is not supported by this tool."
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
|
|
|
|
|
|
| 59 |
return "\n\n".join([str(el) for el in elements])
|
|
|
|
| 60 |
except Exception as e:
|
| 61 |
-
|
|
|
|
|
|
|
| 62 |
if temp_file_path and os.path.exists(temp_file_path):
|
| 63 |
os.remove(temp_file_path)
|
| 64 |
-
return f"Error reading or processing file '{file_path}': {e}"
|
| 65 |
|
| 66 |
|
| 67 |
-
# --- Agent Class (Updated with
|
| 68 |
class GaiaSmolAgent:
|
| 69 |
def __init__(self):
|
| 70 |
"""
|
| 71 |
Initializes the optimized agent.
|
| 72 |
-
Now uses the agent's native conversation memory
|
| 73 |
"""
|
| 74 |
print("Initializing Optimized GaiaSmolAgent...")
|
| 75 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 76 |
if not api_key:
|
| 77 |
raise ValueError("API key 'GEMINI_API_KEY' not found in environment secrets.")
|
| 78 |
|
| 79 |
-
# Use a
|
| 80 |
model = LiteLLMModel(
|
| 81 |
-
model_id="gemini/gemini-1.5-
|
| 82 |
api_key=api_key,
|
| 83 |
temperature=0.0,
|
| 84 |
timeout=120.0, # Add a timeout to prevent hanging
|
|
@@ -90,7 +116,7 @@ class GaiaSmolAgent:
|
|
| 90 |
|
| 91 |
**Available Tools:**
|
| 92 |
- `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
|
| 93 |
-
- `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL. It can read text
|
| 94 |
|
| 95 |
**Your Thought Process:**
|
| 96 |
1. **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed, considering the previous turns in the conversation.
|
|
@@ -113,7 +139,7 @@ class GaiaSmolAgent:
|
|
| 113 |
planning_interval=3 # Re-plan every 3 steps, considering memory.
|
| 114 |
)
|
| 115 |
|
| 116 |
-
print("Optimized GaiaSmolAgent initialized successfully with native memory and multimodal capabilities.")
|
| 117 |
|
| 118 |
def __call__(self, question: str, reset_memory: bool = False) -> str:
|
| 119 |
"""
|
|
|
|
| 5 |
import traceback
|
| 6 |
import time
|
| 7 |
import mimetypes
|
| 8 |
+
from tempfile import NamedTemporaryFile
|
| 9 |
|
| 10 |
# Import smol-agent and tool components
|
| 11 |
from smolagents import CodeAgent, LiteLLMModel, tool
|
| 12 |
from smolagents import DuckDuckGoSearchTool
|
| 13 |
from unstructured.partition.auto import partition
|
| 14 |
|
| 15 |
+
# Imports for advanced file processing
|
| 16 |
+
import speech_recognition as sr
|
| 17 |
+
from moviepy.editor import VideoFileClip
|
| 18 |
+
|
| 19 |
# --- Constants ---
|
| 20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 21 |
|
| 22 |
+
# --- Tool Definition (Upgraded for Full Multimodality) ---
|
| 23 |
@tool
|
| 24 |
def file_reader(file_path: str) -> str:
|
| 25 |
"""
|
| 26 |
+
Reads and analyzes the content of a file and returns relevant text-based information.
|
| 27 |
+
Supports:
|
| 28 |
+
- Text files (PDF, TXT, CSV)
|
| 29 |
+
- Images (PNG, JPG) with OCR
|
| 30 |
+
- Audio (MP3, WAV) via speech recognition
|
| 31 |
+
- Video (MP4, MOV) via speech recognition on audio track
|
| 32 |
+
Can be used with a local file path or a web URL.
|
| 33 |
|
| 34 |
Args:
|
| 35 |
file_path (str): The local path or web URL of the file to be read.
|
| 36 |
+
Returns:
|
| 37 |
+
str: Extracted or transcribed content as text.
|
| 38 |
"""
|
| 39 |
temp_file_path = None
|
| 40 |
try:
|
| 41 |
+
# Download the file if it's a URL
|
| 42 |
if file_path.startswith("http://") or file_path.startswith("https://"):
|
| 43 |
+
temp_file_path = NamedTemporaryFile(delete=False).name
|
| 44 |
response = requests.get(file_path, timeout=20)
|
| 45 |
response.raise_for_status()
|
| 46 |
with open(temp_file_path, "wb") as f:
|
|
|
|
| 49 |
else:
|
| 50 |
local_path = file_path
|
| 51 |
|
|
|
|
| 52 |
mime_type, _ = mimetypes.guess_type(local_path)
|
| 53 |
+
recognizer = sr.Recognizer()
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
+
if mime_type:
|
| 56 |
+
# Handle audio files
|
| 57 |
+
if mime_type.startswith("audio/"):
|
| 58 |
+
with sr.AudioFile(local_path) as source:
|
| 59 |
+
audio = recognizer.record(source)
|
| 60 |
+
# Using whisper for robust speech recognition
|
| 61 |
+
return recognizer.recognize_whisper(audio)
|
| 62 |
|
| 63 |
+
# Handle video files by extracting audio
|
| 64 |
+
elif mime_type.startswith("video/"):
|
| 65 |
+
# Use a temporary file for the extracted audio
|
| 66 |
+
with NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp:
|
| 67 |
+
audio_temp_path = audio_temp.name
|
| 68 |
+
|
| 69 |
+
clip = VideoFileClip(local_path)
|
| 70 |
+
clip.audio.write_audiofile(audio_temp_path, codec='pcm_s16le')
|
| 71 |
+
|
| 72 |
+
with sr.AudioFile(audio_temp_path) as source:
|
| 73 |
+
audio = recognizer.record(source)
|
| 74 |
+
|
| 75 |
+
# Clean up the temporary audio file
|
| 76 |
+
os.remove(audio_temp_path)
|
| 77 |
+
|
| 78 |
+
# Using whisper for robust speech recognition
|
| 79 |
+
return recognizer.recognize_whisper(audio)
|
| 80 |
|
| 81 |
+
# Default to handling text and images with OCR if not audio/video
|
| 82 |
+
elements = partition(local_path)
|
| 83 |
return "\n\n".join([str(el) for el in elements])
|
| 84 |
+
|
| 85 |
except Exception as e:
|
| 86 |
+
return f"Error reading or processing file '{file_path}': {e}"
|
| 87 |
+
finally:
|
| 88 |
+
# Clean up the downloaded file if it exists
|
| 89 |
if temp_file_path and os.path.exists(temp_file_path):
|
| 90 |
os.remove(temp_file_path)
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
+
# --- Agent Class (Updated with More Powerful Model and Tools) ---
|
| 94 |
class GaiaSmolAgent:
|
| 95 |
def __init__(self):
|
| 96 |
"""
|
| 97 |
Initializes the optimized agent.
|
| 98 |
+
Now uses a more powerful model and the agent's native conversation memory.
|
| 99 |
"""
|
| 100 |
print("Initializing Optimized GaiaSmolAgent...")
|
| 101 |
api_key = os.getenv("GEMINI_API_KEY")
|
| 102 |
if not api_key:
|
| 103 |
raise ValueError("API key 'GEMINI_API_KEY' not found in environment secrets.")
|
| 104 |
|
| 105 |
+
# Use a more powerful, "clever" model for better reasoning.
|
| 106 |
model = LiteLLMModel(
|
| 107 |
+
model_id="gemini/gemini-1.5-pro-latest",
|
| 108 |
api_key=api_key,
|
| 109 |
temperature=0.0,
|
| 110 |
timeout=120.0, # Add a timeout to prevent hanging
|
|
|
|
| 116 |
|
| 117 |
**Available Tools:**
|
| 118 |
- `duck_duck_go_search(query: str) -> str`: Use this to find information, file URLs, or anything on the web.
|
| 119 |
+
- `file_reader(file_path: str) -> str`: Use this to read the contents of a file from a local path or a web URL. It can read text, extract text from images (OCR), and transcribe audio from audio/video files.
|
| 120 |
|
| 121 |
**Your Thought Process:**
|
| 122 |
1. **Deconstruct the Goal:** Carefully analyze the question to understand what information is needed, considering the previous turns in the conversation.
|
|
|
|
| 139 |
planning_interval=3 # Re-plan every 3 steps, considering memory.
|
| 140 |
)
|
| 141 |
|
| 142 |
+
print("Optimized GaiaSmolAgent initialized successfully with native memory and full multimodal capabilities.")
|
| 143 |
|
| 144 |
def __call__(self, question: str, reset_memory: bool = False) -> str:
|
| 145 |
"""
|