MickyWin22 commited on
Commit
5065308
·
verified ·
1 Parent(s): 3f76d2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -11
app.py CHANGED
@@ -14,12 +14,15 @@ from unstructured.partition.auto import partition
14
 
15
  # Imports for advanced file processing
16
  import speech_recognition as sr
17
- from moviepy.editor import VideoFileClip
18
 
19
  # --- Constants ---
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
- # --- Tool Definition (Upgraded for Full Multimodality) ---
 
 
 
23
  @tool
24
  def file_reader(file_path: str) -> str:
25
  """
@@ -37,6 +40,7 @@ def file_reader(file_path: str) -> str:
37
  str: Extracted or transcribed content as text.
38
  """
39
  temp_file_path = None
 
40
  try:
41
  # Download the file if it's a URL
42
  if file_path.startswith("http://") or file_path.startswith("https://"):
@@ -57,25 +61,20 @@ def file_reader(file_path: str) -> str:
57
  if mime_type.startswith("audio/"):
58
  with sr.AudioFile(local_path) as source:
59
  audio = recognizer.record(source)
60
- # Using whisper for robust speech recognition
61
  return recognizer.recognize_whisper(audio)
62
 
63
- # Handle video files by extracting audio
64
  elif mime_type.startswith("video/"):
65
- # Use a temporary file for the extracted audio
66
  with NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp:
67
  audio_temp_path = audio_temp.name
68
 
69
- clip = VideoFileClip(local_path)
70
- clip.audio.write_audiofile(audio_temp_path, codec='pcm_s16le')
 
71
 
72
  with sr.AudioFile(audio_temp_path) as source:
73
  audio = recognizer.record(source)
74
 
75
- # Clean up the temporary audio file
76
- os.remove(audio_temp_path)
77
-
78
- # Using whisper for robust speech recognition
79
  return recognizer.recognize_whisper(audio)
80
 
81
  # Default to handling text and images with OCR if not audio/video
@@ -88,6 +87,10 @@ def file_reader(file_path: str) -> str:
88
  # Clean up the downloaded file if it exists
89
  if temp_file_path and os.path.exists(temp_file_path):
90
  os.remove(temp_file_path)
 
 
 
 
91
 
92
 
93
  # --- Agent Class (Updated with More Powerful Model and Tools) ---
 
14
 
15
  # Imports for advanced file processing
16
  import speech_recognition as sr
17
+ from pydub import AudioSegment
18
 
19
  # --- Constants ---
20
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
21
 
22
+ # --- Constants ---
23
+ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
24
+
25
+ # --- Tool Definition (Upgraded for Full Multimodality with pydub) ---
26
  @tool
27
  def file_reader(file_path: str) -> str:
28
  """
 
40
  str: Extracted or transcribed content as text.
41
  """
42
  temp_file_path = None
43
+ audio_temp_path = None
44
  try:
45
  # Download the file if it's a URL
46
  if file_path.startswith("http://") or file_path.startswith("https://"):
 
61
  if mime_type.startswith("audio/"):
62
  with sr.AudioFile(local_path) as source:
63
  audio = recognizer.record(source)
 
64
  return recognizer.recognize_whisper(audio)
65
 
66
+ # Handle video files by extracting audio with pydub
67
  elif mime_type.startswith("video/"):
 
68
  with NamedTemporaryFile(suffix=".wav", delete=False) as audio_temp:
69
  audio_temp_path = audio_temp.name
70
 
71
+ # Extract audio using pydub
72
+ video_audio = AudioSegment.from_file(local_path, format=mime_type.split('/')[1])
73
+ video_audio.export(audio_temp_path, format="wav")
74
 
75
  with sr.AudioFile(audio_temp_path) as source:
76
  audio = recognizer.record(source)
77
 
 
 
 
 
78
  return recognizer.recognize_whisper(audio)
79
 
80
  # Default to handling text and images with OCR if not audio/video
 
87
  # Clean up the downloaded file if it exists
88
  if temp_file_path and os.path.exists(temp_file_path):
89
  os.remove(temp_file_path)
90
+ # Clean up the temporary audio file
91
+ if audio_temp_path and os.path.exists(audio_temp_path):
92
+ os.remove(audio_temp_path)
93
+
94
 
95
 
96
  # --- Agent Class (Updated with More Powerful Model and Tools) ---