Update app.py
Browse files
app.py
CHANGED
|
@@ -14,12 +14,15 @@ from unstructured.partition.auto import partition
|
|
| 14 |
|
| 15 |
# Imports for advanced file processing
|
| 16 |
import speech_recognition as sr
|
| 17 |
-
from
|
| 18 |
|
| 19 |
# --- Constants ---
|
| 20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 21 |
|
| 22 |
-
# ---
|
|
|
|
|
|
|
|
|
|
| 23 |
@tool
|
| 24 |
def file_reader(file_path: str) -> str:
|
| 25 |
"""
|
|
@@ -37,6 +40,7 @@ def file_reader(file_path: str) -> str:
|
|
| 37 |
str: Extracted or transcribed content as text.
|
| 38 |
"""
|
| 39 |
temp_file_path = None
|
|
|
|
| 40 |
try:
|
| 41 |
# Download the file if it's a URL
|
| 42 |
if file_path.startswith("http://") or file_path.startswith("https://"):
|
|
@@ -57,25 +61,20 @@ def file_reader(file_path: str) -> str:
|
|
| 57 |
if mime_type.startswith("audio/"):
|
| 58 |
with sr.AudioFile(local_path) as source:
|
| 59 |
audio = recognizer.record(source)
|
| 60 |
-
# Using whisper for robust speech recognition
|
| 61 |
return recognizer.recognize_whisper(audio)
|
| 62 |
|
| 63 |
-
# Handle video files by extracting audio
|
| 64 |
elif mime_type.startswith("video/"):
|
| 65 |
-
# Use a temporary file for the extracted audio
|
| 66 |
with NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp:
|
| 67 |
audio_temp_path = audio_temp.name
|
| 68 |
|
| 69 |
-
|
| 70 |
-
|
|
|
|
| 71 |
|
| 72 |
with sr.AudioFile(audio_temp_path) as source:
|
| 73 |
audio = recognizer.record(source)
|
| 74 |
|
| 75 |
-
# Clean up the temporary audio file
|
| 76 |
-
os.remove(audio_temp_path)
|
| 77 |
-
|
| 78 |
-
# Using whisper for robust speech recognition
|
| 79 |
return recognizer.recognize_whisper(audio)
|
| 80 |
|
| 81 |
# Default to handling text and images with OCR if not audio/video
|
|
@@ -88,6 +87,10 @@ def file_reader(file_path: str) -> str:
|
|
| 88 |
# Clean up the downloaded file if it exists
|
| 89 |
if temp_file_path and os.path.exists(temp_file_path):
|
| 90 |
os.remove(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
|
| 93 |
# --- Agent Class (Updated with More Powerful Model and Tools) ---
|
|
|
|
| 14 |
|
| 15 |
# Imports for advanced file processing
|
| 16 |
import speech_recognition as sr
|
| 17 |
+
from pydub import AudioSegment
|
| 18 |
|
| 19 |
# --- Constants ---
|
| 20 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 21 |
|
| 22 |
+
# --- Constants ---
|
| 23 |
+
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 24 |
+
|
| 25 |
+
# --- Tool Definition (Upgraded for Full Multimodality with pydub) ---
|
| 26 |
@tool
|
| 27 |
def file_reader(file_path: str) -> str:
|
| 28 |
"""
|
|
|
|
| 40 |
str: Extracted or transcribed content as text.
|
| 41 |
"""
|
| 42 |
temp_file_path = None
|
| 43 |
+
audio_temp_path = None
|
| 44 |
try:
|
| 45 |
# Download the file if it's a URL
|
| 46 |
if file_path.startswith("http://") or file_path.startswith("https://"):
|
|
|
|
| 61 |
if mime_type.startswith("audio/"):
|
| 62 |
with sr.AudioFile(local_path) as source:
|
| 63 |
audio = recognizer.record(source)
|
|
|
|
| 64 |
return recognizer.recognize_whisper(audio)
|
| 65 |
|
| 66 |
+
# Handle video files by extracting audio with pydub
|
| 67 |
elif mime_type.startswith("video/"):
|
|
|
|
| 68 |
with NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp:
|
| 69 |
audio_temp_path = audio_temp.name
|
| 70 |
|
| 71 |
+
# Extract audio using pydub
|
| 72 |
+
video_audio = AudioSegment.from_file(local_path, format=mime_type.split('/')[1])
|
| 73 |
+
video_audio.export(audio_temp_path, format="wav")
|
| 74 |
|
| 75 |
with sr.AudioFile(audio_temp_path) as source:
|
| 76 |
audio = recognizer.record(source)
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
return recognizer.recognize_whisper(audio)
|
| 79 |
|
| 80 |
# Default to handling text and images with OCR if not audio/video
|
|
|
|
| 87 |
# Clean up the downloaded file if it exists
|
| 88 |
if temp_file_path and os.path.exists(temp_file_path):
|
| 89 |
os.remove(temp_file_path)
|
| 90 |
+
# Clean up the temporary audio file
|
| 91 |
+
if audio_temp_path and os.path.exists(audio_temp_path):
|
| 92 |
+
os.remove(audio_temp_path)
|
| 93 |
+
|
| 94 |
|
| 95 |
|
| 96 |
# --- Agent Class (Updated with More Powerful Model and Tools) ---
|