Csuarezg commited on
Commit
38d5f80
·
verified ·
1 Parent(s): 2f56333

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -7
app.py CHANGED
@@ -9,6 +9,34 @@ import logging
9
  from typing import List, Dict, Optional, TypedDict, Annotated
10
  import numpy as np
11
  import base64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # Core ML/AI imports
14
  from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage, ToolMessage
@@ -26,7 +54,15 @@ import wikipedia
26
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
27
  import speech_recognition as sr
28
  from PIL import Image
29
- from transformers import pipeline
 
 
 
 
 
 
 
 
30
 
31
  # Audio processing - NEW IMPORTS
32
  try:
@@ -185,12 +221,19 @@ class GAIAAgent:
185
  def __init__(self, parent_agent):
186
  self.parent_agent = parent_agent
187
  print("🔧 Initializing Enhanced FileAnalyzerTool...")
188
- try:
189
- self.image_analyzer = pipeline("image-classification", model="google/vit-base-patch16-224")
190
- self.text_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
191
- print("✅ Image analysis models loaded successfully")
192
- except Exception as e:
193
- print(f"⚠️ Could not load image analysis models: {e}")
 
 
 
 
 
 
 
194
  self.image_analyzer = None
195
  self.text_generator = None
196
 
@@ -199,6 +242,12 @@ class GAIAAgent:
199
  print("✅ Audio processing (pydub) available")
200
  else:
201
  print("⚠️ pydub not available - MP3 conversion will be limited")
 
 
 
 
 
 
202
 
203
  def analyze(self, file_path: str, file_type: str) -> str:
204
  try:
@@ -1318,6 +1367,16 @@ if __name__ == "__main__":
1318
  else:
1319
  print("⚠️ pydub not available - consider adding to requirements.txt")
1320
 
 
 
 
 
 
 
 
 
 
 
1321
  print("="*50 + "\n")
1322
  print("🌟 Launching Enhanced GAIA Agent Interface...")
1323
  demo.launch(debug=True, share=False)
 
9
  from typing import List, Dict, Optional, TypedDict, Annotated
10
  import numpy as np
11
  import base64
12
+ import subprocess
13
+ import sys
14
+
15
+ # Configure ffmpeg for pydub in HuggingFace Spaces
16
+ def setup_ffmpeg():
17
+ """Setup ffmpeg for audio processing in HuggingFace Spaces"""
18
+ try:
19
+ # Check if ffmpeg is already available
20
+ subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
21
+ print("✅ ffmpeg already available")
22
+ return True
23
+ except (subprocess.CalledProcessError, FileNotFoundError):
24
+ try:
25
+ # Try to install ffmpeg using apt-get (works in HF Spaces)
26
+ print("📦 Installing ffmpeg...")
27
+ subprocess.run(['apt-get', 'update'], capture_output=True, check=True)
28
+ subprocess.run(['apt-get', 'install', '-y', 'ffmpeg'], capture_output=True, check=True)
29
+ print("✅ ffmpeg installed successfully")
30
+ return True
31
+ except subprocess.CalledProcessError as e:
32
+ print(f"⚠️ Could not install ffmpeg: {e}")
33
+ return False
34
+ except Exception as e:
35
+ print(f"⚠️ ffmpeg setup failed: {e}")
36
+ return False
37
+
38
+ # Setup ffmpeg early
39
+ FFMPEG_AVAILABLE = setup_ffmpeg()
40
 
41
  # Core ML/AI imports
42
  from langchain_core.messages import HumanMessage, SystemMessage, AnyMessage, ToolMessage
 
54
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
55
  import speech_recognition as sr
56
  from PIL import Image
57
+
58
+ # Transformers with error handling
59
+ try:
60
+ from transformers import pipeline
61
+ TRANSFORMERS_AVAILABLE = True
62
+ print("✅ Transformers library loaded successfully")
63
+ except ImportError:
64
+ TRANSFORMERS_AVAILABLE = False
65
+ print("⚠️ Transformers not available - image analysis will be limited")
66
 
67
  # Audio processing - NEW IMPORTS
68
  try:
 
221
  def __init__(self, parent_agent):
222
  self.parent_agent = parent_agent
223
  print("🔧 Initializing Enhanced FileAnalyzerTool...")
224
+
225
+ # Initialize image analysis models if transformers is available
226
+ if TRANSFORMERS_AVAILABLE:
227
+ try:
228
+ self.image_analyzer = pipeline("image-classification", model="google/vit-base-patch16-224")
229
+ self.text_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
230
+ print("✅ Image analysis models loaded successfully")
231
+ except Exception as e:
232
+ print(f"⚠️ Could not load image analysis models: {e}")
233
+ self.image_analyzer = None
234
+ self.text_generator = None
235
+ else:
236
+ print("⚠️ Transformers not available - image analysis models disabled")
237
  self.image_analyzer = None
238
  self.text_generator = None
239
 
 
242
  print("✅ Audio processing (pydub) available")
243
  else:
244
  print("⚠️ pydub not available - MP3 conversion will be limited")
245
+
246
+ # Check ffmpeg availability
247
+ if FFMPEG_AVAILABLE:
248
+ print("✅ ffmpeg available for audio conversion")
249
+ else:
250
+ print("⚠️ ffmpeg not available - some audio formats may not work")
251
 
252
  def analyze(self, file_path: str, file_type: str) -> str:
253
  try:
 
1367
  else:
1368
  print("⚠️ pydub not available - consider adding to requirements.txt")
1369
 
1370
+ if FFMPEG_AVAILABLE:
1371
+ print("✅ ffmpeg available for audio conversion!")
1372
+ else:
1373
+ print("⚠️ ffmpeg not available - some audio formats may not work")
1374
+
1375
+ if TRANSFORMERS_AVAILABLE:
1376
+ print("✅ Transformers available for image analysis!")
1377
+ else:
1378
+ print("⚠️ transformers not available - consider adding to requirements.txt")
1379
+
1380
  print("="*50 + "\n")
1381
  print("🌟 Launching Enhanced GAIA Agent Interface...")
1382
  demo.launch(debug=True, share=False)