TarSh8654 commited on
Commit
ccafac2
·
verified ·
1 Parent(s): 8a86ca9

Rename summarizer_tool.py to modified_summarizer_tool.py

Browse files
summarizer_tool.py → modified_summarizer_tool.py RENAMED
@@ -16,12 +16,12 @@ import tempfile
16
  import json # Added for handling JSON output consistently
17
 
18
  # --- Langchain Imports ---
19
- # Ensure these are correct based on Langchain's modularization
20
  from langchain_community.vectorstores import FAISS
21
  from langchain_community.embeddings import HuggingFaceEmbeddings
22
- from langchain.text_splitter import RecursiveCharacterTextSplitter # This one is still in langchain
23
  from langchain_community.document_loaders import PyPDFLoader
24
  from langchain.chains import RetrievalQA
 
25
 
26
  # --- Other Imports ---
27
  from gtts import gTTS
@@ -31,21 +31,19 @@ from datasets import load_dataset, Audio # Added for dataset loading
31
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
32
 
33
  # --- Global Cache for Pipelines ---
34
- # This prevents reloading the same model multiple times
35
  _pipeline_cache = {}
36
 
37
  def get_pipeline(task_name, model_name=None, **kwargs):
38
  """
39
  Retrieves a Hugging Face pipeline, caching it for efficiency.
40
  """
41
- # Create a unique key for the cache based on task, model, and kwargs
42
  cache_key = f"{task_name}-{model_name}-{hash(frozenset(kwargs.items()))}"
43
  if cache_key not in _pipeline_cache:
44
  logging.info(f"Loading pipeline for task '{task_name}' with model '{model_name}'...")
45
  if model_name:
46
  _pipeline_cache[cache_key] = pipeline(task_name, model=model_name, **kwargs)
47
  else:
48
- _pipeline_cache[cache_key] = pipeline(task_name, **kwargs) # Uses default model for task
49
  logging.info(f"Pipeline '{task_name}' loaded.")
50
  return _pipeline_cache[cache_key]
51
 
@@ -54,19 +52,17 @@ def get_pipeline(task_name, model_name=None, **kwargs):
54
  class AllInOneDispatcher:
55
  def __init__(self):
56
  logging.info("Initializing AllInOneDispatcher...")
57
- self.memory = [] # For storing interaction history (optional)
58
 
59
- # Define default models for various tasks.
60
- # These will be loaded on demand via get_pipeline.
61
  self.default_models = {
62
  "sentiment-analysis": "distilbert-base-uncased-finetuned-sst-2-english",
63
  "summarization": "sshleifer/distilbart-cnn-12-6",
64
- "text-generation": "gpt2",
65
  "translation_en_to_fr": "Helsinki-NLP/opus-mt-en-fr",
66
  "image-classification": "google/vit-base-patch16-224",
67
  "object-detection": "facebook/detr-resnet-50",
68
- "automatic-speech-recognition": "openai/whisper-tiny.en", # For English ASR
69
- # Add other models/tasks as needed
70
  }
71
  logging.info("AllInOneDispatcher initialized.")
72
 
@@ -78,11 +74,9 @@ class AllInOneDispatcher:
78
  return get_pipeline(task, model_name=final_model_name)
79
 
80
  def _is_file(self, path):
81
- """Checks if the given path exists and is a file."""
82
  return os.path.exists(path) and os.path.isfile(path)
83
 
84
  def handle_text(self, text: str, task: str = "sentiment-analysis", **kwargs):
85
- """Processes text input for a given NLP task."""
86
  if not isinstance(text, str):
87
  raise TypeError("Text input must be a string.")
88
  logging.info(f"Handling text for task: {task}")
@@ -92,7 +86,6 @@ class AllInOneDispatcher:
92
  return result
93
 
94
  def handle_image(self, path: str, task: str = "image-classification", **kwargs):
95
- """Processes image file input for a given computer vision task."""
96
  if not self._is_file(path):
97
  raise FileNotFoundError(f"Image file not found: {path}")
98
  logging.info(f"Handling image for task: {task}")
@@ -106,23 +99,20 @@ class AllInOneDispatcher:
106
  return result
107
 
108
  def handle_audio(self, path: str, task: str = "automatic-speech-recognition", **kwargs):
109
- """Processes audio file input for a given audio task."""
110
  if not self._is_file(path):
111
  raise FileNotFoundError(f"Audio file not found: {path}")
112
  logging.info(f"Handling audio for task: {task}")
113
-
114
- # Whisper models expect audio in a specific format (16kHz, mono, float32)
115
  try:
116
  audio = AudioSegment.from_file(path)
117
- audio = audio.set_channels(1).set_frame_rate(16000) # Convert to mono, 16kHz
118
 
119
  buffer = io.BytesIO()
120
- audio.export(buffer, format="wav") # Export to WAV in memory
121
- buffer.seek(0) # Rewind buffer
122
 
123
- array, sampling_rate = sf.read(buffer) # Read with soundfile
124
  if array.dtype != np.float32:
125
- array = array.astype(np.float32) # Ensure float32
126
 
127
  except Exception as e:
128
  logging.error(f"Error preparing audio file for processing: {e}")
@@ -134,11 +124,6 @@ class AllInOneDispatcher:
134
  return result
135
 
136
  def handle_video(self, path: str):
137
- """
138
- Processes video file input. This is a limited implementation:
139
- Extracts first few frames for image analysis and audio for ASR.
140
- Requires OpenCV (cv2) and system-wide ffmpeg.
141
- """
142
  if not self._is_file(path):
143
  raise FileNotFoundError(f"Video file not found: {path}")
144
  logging.info(f"Handling video: {path}")
@@ -157,42 +142,36 @@ class AllInOneDispatcher:
157
  ret, frame = cap.read()
158
  if not ret:
159
  break
160
- frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))) # Convert BGR to RGB for PIL
161
- if len(frames) >= 5: break # Process only first 5 frames for efficiency
162
  cap.release()
163
 
164
- # Extract audio from video
165
  audio_temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
166
  try:
167
- # Using os.system for ffmpeg call requires ffmpeg to be in PATH
168
- # This is a common way but can be less robust than a Python wrapper.
169
- # Hugging Face Spaces typically has ffmpeg.
170
  os.system(f"ffmpeg -i \"{path}\" -q:a 0 -map a \"{audio_temp_path}\" -y")
171
  if not os.path.exists(audio_temp_path) or os.path.getsize(audio_temp_path) == 0:
172
  raise RuntimeError("FFmpeg failed to extract audio or extracted empty audio.")
173
  except Exception as e:
174
  logging.error(f"FFmpeg audio extraction failed: {e}")
175
- audio_temp_path = None # Indicate failure
176
 
177
  image_result = None
178
  audio_result = None
179
 
180
  if frames:
181
  try:
182
- # Process the first frame for image classification
183
  image_result = self.handle_image(frames[0], task="image-classification")
184
  except Exception as e:
185
  logging.warning(f"Failed to process video frame for image classification: {e}")
186
 
187
  if audio_temp_path:
188
  try:
189
- # Process the extracted audio for ASR
190
  audio_result = self.handle_audio(audio_temp_path, task="automatic-speech-recognition")
191
  except Exception as e:
192
  logging.warning(f"Failed to process extracted audio from video: {e}")
193
  finally:
194
  if os.path.exists(audio_temp_path):
195
- os.remove(audio_temp_path) # Clean up temp audio file
196
 
197
  result = {"image_analysis": image_result, "audio_analysis": audio_result}
198
  self.memory.append({"task": "video_analysis", "input": path, "output": result})
@@ -204,7 +183,6 @@ class AllInOneDispatcher:
204
  raise FileNotFoundError(f"PDF file not found: {path}")
205
  logging.info(f"Handling PDF: {path}")
206
 
207
- # RAG components
208
  try:
209
  loader = PyPDFLoader(path)
210
  docs = loader.load()
@@ -212,8 +190,14 @@ class AllInOneDispatcher:
212
  split_docs = splitter.split_documents(docs)
213
  embeddings = HuggingFaceEmbeddings()
214
  vectorstore = FAISS.from_documents(split_docs, embeddings)
215
- # Using a text-generation pipeline as the LLM for RetrievalQA
216
- qa_llm = self._get_task_pipeline("text-generation", model_name="gpt2") # Using a smaller model for RAG LLM
 
 
 
 
 
 
217
  qa_chain = RetrievalQA.from_chain_type(llm=qa_llm, retriever=vectorstore.as_retriever())
218
  result = qa_chain.run("Summarize this document")
219
  self.memory.append({"task": "pdf_summarization", "input": path, "output": result})
@@ -223,7 +207,6 @@ class AllInOneDispatcher:
223
  raise ValueError(f"Could not process PDF: {e}. Ensure PDF is valid and Langchain dependencies are met.")
224
 
225
  def handle_tts(self, text: str, lang: str = 'en'):
226
- """Converts text to speech and returns the path to the generated audio file."""
227
  if not isinstance(text, str):
228
  raise TypeError("Text input for TTS must be a string.")
229
  logging.info(f"Handling TTS for text: '{text[:50]}...'")
@@ -234,15 +217,9 @@ class AllInOneDispatcher:
234
  return temp_path
235
 
236
  def process_dataset_from_hub(self, dataset_name: str, subset_name: str, split: str, column_to_process: str, task: str, num_samples: int = 5):
237
- """
238
- Loads a dataset from Hugging Face Hub, processes a specified column
239
- for a given task, and returns results for a limited number of samples.
240
- """
241
  logging.info(f"Attempting to load dataset '{dataset_name}' (subset: {subset_name}, split: {split})...")
242
 
243
  try:
244
- # Load dataset. Using streaming=True for potentially very large datasets
245
- # and then taking a few examples. trust_remote_code is important for some datasets.
246
  if subset_name.strip():
247
  dataset = load_dataset(dataset_name, subset_name, split=split, streaming=True, trust_remote_code=True)
248
  else:
@@ -253,7 +230,7 @@ class AllInOneDispatcher:
253
  processed_results = []
254
  for i, example in enumerate(dataset):
255
  if i >= num_samples:
256
- break # Stop after processing desired number of samples
257
 
258
  if column_to_process not in example:
259
  processed_results.append({
@@ -264,23 +241,16 @@ class AllInOneDispatcher:
264
  continue
265
 
266
  input_data_for_processing = example[column_to_process]
267
- temp_file_to_clean = None # To track temporary files for cleanup
268
 
269
- # Determine the actual data type and prepare for self.process
270
- # Hugging Face datasets often load audio/image as specific objects/dicts
271
  if isinstance(input_data_for_processing, str):
272
- # It's already a string, assume text or a path
273
  pass
274
  elif isinstance(input_data_for_processing, dict) and 'array' in input_data_for_processing and 'sampling_rate' in input_data_for_processing:
275
- # This is an audio object from datasets library
276
- # Save to a temporary WAV file for self.handle_audio
277
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
278
  sf.write(tmp_audio.name, input_data_for_processing['array'], input_data_for_processing['sampling_rate'])
279
  input_data_for_processing = tmp_audio.name
280
  temp_file_to_clean = tmp_audio.name
281
  elif isinstance(input_data_for_processing, Image.Image):
282
- # This is a PIL Image object from datasets library
283
- # Save to a temporary PNG file for self.handle_image
284
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_image:
285
  input_data_for_processing.save(tmp_image.name)
286
  input_data_for_processing = tmp_image.name
@@ -291,10 +261,9 @@ class AllInOneDispatcher:
291
  "status": "error",
292
  "reason": f"Unsupported data type in column '{column_to_process}': {type(input_data_for_processing)}"
293
  })
294
- continue # Skip to next sample
295
 
296
  try:
297
- # Call the general process method of the dispatcher
298
  single_result = self.process(input_data_for_processing, task=task)
299
  processed_results.append({
300
  "sample_index": i,
@@ -311,7 +280,7 @@ class AllInOneDispatcher:
311
  })
312
  finally:
313
  if temp_file_to_clean and os.path.exists(temp_file_to_clean):
314
- os.remove(temp_file_to_clean) # Clean up temporary file
315
 
316
  return processed_results
317
 
@@ -321,20 +290,6 @@ class AllInOneDispatcher:
321
 
322
 
323
  def process(self, input_data, task=None, **kwargs):
324
- """
325
- Main entry point for the AI tool. Tries to determine input type and
326
- dispatches to the appropriate processing function.
327
-
328
- Args:
329
- input_data: Can be raw text (str) or a file path (str) for image/audio/video/pdf.
330
- task (str, optional): The specific AI task to perform.
331
- Required for non-text inputs.
332
- For text, it defaults to "sentiment-analysis".
333
- **kwargs: Additional arguments to pass to the specific handler or pipeline.
334
-
335
- Returns:
336
- The result from the AI model, or a file path for TTS.
337
- """
338
  if not isinstance(input_data, str):
339
  raise TypeError("Input data must be a string (raw text or file path).")
340
 
@@ -348,17 +303,15 @@ class AllInOneDispatcher:
348
  if not task: task = "automatic-speech-recognition"
349
  return self.handle_audio(input_data, task=task, **kwargs)
350
  elif file_extension in ['mp4', 'mov', 'avi', 'mkv']:
351
- # Video processing is a separate, more complex handler
352
  return self.handle_video(input_data)
353
  elif file_extension == 'pdf':
354
  return self.handle_pdf(input_data)
355
  else:
356
  raise ValueError(f"Unsupported file type: .{file_extension}. Or specify task for this file.")
357
  else:
358
- # Assume it's raw text if not a file path
359
  if task == "tts":
360
  return self.handle_tts(input_data, **kwargs)
361
- if not task: task = "sentiment-analysis" # Default text task
362
  return self.handle_text(input_data, task=task, **kwargs)
363
 
364
  # --- Example Usage (for local testing only - will be skipped when imported by app.py) ---
@@ -385,7 +338,7 @@ if __name__ == "__main__":
385
  tts_path = dispatcher.process(tts_text, task="tts", lang="en")
386
  print(f"TTS audio saved to: {tts_path}")
387
  if os.path.exists(tts_path):
388
- os.remove(tts_path) # Clean up generated audio
389
 
390
  # Image Examples (requires dummy image or real path)
391
  dummy_image_path = "dummy_image_for_test.png"
@@ -432,7 +385,6 @@ if __name__ == "__main__":
432
  os.remove(dummy_audio_path)
433
 
434
  # PDF Example (requires a dummy PDF or real path)
435
- # Note: Creating a dummy PDF programmatically is complex.
436
  # For testing, you'd need to place a small PDF file in the same directory.
437
  # dummy_pdf_path = "dummy.pdf"
438
  # if os.path.exists(dummy_pdf_path):
@@ -460,4 +412,3 @@ if __name__ == "__main__":
460
  print(f"Error during dataset processing example: {e}")
461
 
462
  logging.info("Local example usage complete.")
463
-
 
16
  import json # Added for handling JSON output consistently
17
 
18
  # --- Langchain Imports ---
 
19
  from langchain_community.vectorstores import FAISS
20
  from langchain_community.embeddings import HuggingFaceEmbeddings
21
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
22
  from langchain_community.document_loaders import PyPDFLoader
23
  from langchain.chains import RetrievalQA
24
+ from langchain_community.llms import HuggingFacePipeline # <--- ADD THIS LINE
25
 
26
  # --- Other Imports ---
27
  from gtts import gTTS
 
31
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
32
 
33
  # --- Global Cache for Pipelines ---
 
34
  _pipeline_cache = {}
35
 
36
  def get_pipeline(task_name, model_name=None, **kwargs):
37
  """
38
  Retrieves a Hugging Face pipeline, caching it for efficiency.
39
  """
 
40
  cache_key = f"{task_name}-{model_name}-{hash(frozenset(kwargs.items()))}"
41
  if cache_key not in _pipeline_cache:
42
  logging.info(f"Loading pipeline for task '{task_name}' with model '{model_name}'...")
43
  if model_name:
44
  _pipeline_cache[cache_key] = pipeline(task_name, model=model_name, **kwargs)
45
  else:
46
+ _pipeline_cache[cache_key] = pipeline(task_name, **kwargs)
47
  logging.info(f"Pipeline '{task_name}' loaded.")
48
  return _pipeline_cache[cache_key]
49
 
 
52
  class AllInOneDispatcher:
53
  def __init__(self):
54
  logging.info("Initializing AllInOneDispatcher...")
55
+ self.memory = []
56
 
 
 
57
  self.default_models = {
58
  "sentiment-analysis": "distilbert-base-uncased-finetuned-sst-2-english",
59
  "summarization": "sshleifer/distilbart-cnn-12-6",
60
+ "text-generation": "gpt2", # Keep gpt2 for general text generation
61
  "translation_en_to_fr": "Helsinki-NLP/opus-mt-en-fr",
62
  "image-classification": "google/vit-base-patch16-224",
63
  "object-detection": "facebook/detr-resnet-50",
64
+ "automatic-speech-recognition": "openai/whisper-tiny.en",
65
+ "rag-llm": "gpt2" # New default for the RAG LLM
66
  }
67
  logging.info("AllInOneDispatcher initialized.")
68
 
 
74
  return get_pipeline(task, model_name=final_model_name)
75
 
76
  def _is_file(self, path):
 
77
  return os.path.exists(path) and os.path.isfile(path)
78
 
79
  def handle_text(self, text: str, task: str = "sentiment-analysis", **kwargs):
 
80
  if not isinstance(text, str):
81
  raise TypeError("Text input must be a string.")
82
  logging.info(f"Handling text for task: {task}")
 
86
  return result
87
 
88
  def handle_image(self, path: str, task: str = "image-classification", **kwargs):
 
89
  if not self._is_file(path):
90
  raise FileNotFoundError(f"Image file not found: {path}")
91
  logging.info(f"Handling image for task: {task}")
 
99
  return result
100
 
101
  def handle_audio(self, path: str, task: str = "automatic-speech-recognition", **kwargs):
 
102
  if not self._is_file(path):
103
  raise FileNotFoundError(f"Audio file not found: {path}")
104
  logging.info(f"Handling audio for task: {task}")
 
 
105
  try:
106
  audio = AudioSegment.from_file(path)
107
+ audio = audio.set_channels(1).set_frame_rate(16000)
108
 
109
  buffer = io.BytesIO()
110
+ audio.export(buffer, format="wav")
111
+ buffer.seek(0)
112
 
113
+ array, sampling_rate = sf.read(buffer)
114
  if array.dtype != np.float32:
115
+ array = array.astype(np.float32)
116
 
117
  except Exception as e:
118
  logging.error(f"Error preparing audio file for processing: {e}")
 
124
  return result
125
 
126
  def handle_video(self, path: str):
 
 
 
 
 
127
  if not self._is_file(path):
128
  raise FileNotFoundError(f"Video file not found: {path}")
129
  logging.info(f"Handling video: {path}")
 
142
  ret, frame = cap.read()
143
  if not ret:
144
  break
145
+ frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
146
+ if len(frames) >= 5: break
147
  cap.release()
148
 
 
149
  audio_temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
150
  try:
 
 
 
151
  os.system(f"ffmpeg -i \"{path}\" -q:a 0 -map a \"{audio_temp_path}\" -y")
152
  if not os.path.exists(audio_temp_path) or os.path.getsize(audio_temp_path) == 0:
153
  raise RuntimeError("FFmpeg failed to extract audio or extracted empty audio.")
154
  except Exception as e:
155
  logging.error(f"FFmpeg audio extraction failed: {e}")
156
+ audio_temp_path = None
157
 
158
  image_result = None
159
  audio_result = None
160
 
161
  if frames:
162
  try:
 
163
  image_result = self.handle_image(frames[0], task="image-classification")
164
  except Exception as e:
165
  logging.warning(f"Failed to process video frame for image classification: {e}")
166
 
167
  if audio_temp_path:
168
  try:
 
169
  audio_result = self.handle_audio(audio_temp_path, task="automatic-speech-recognition")
170
  except Exception as e:
171
  logging.warning(f"Failed to process extracted audio from video: {e}")
172
  finally:
173
  if os.path.exists(audio_temp_path):
174
+ os.remove(audio_temp_path)
175
 
176
  result = {"image_analysis": image_result, "audio_analysis": audio_result}
177
  self.memory.append({"task": "video_analysis", "input": path, "output": result})
 
183
  raise FileNotFoundError(f"PDF file not found: {path}")
184
  logging.info(f"Handling PDF: {path}")
185
 
 
186
  try:
187
  loader = PyPDFLoader(path)
188
  docs = loader.load()
 
190
  split_docs = splitter.split_documents(docs)
191
  embeddings = HuggingFaceEmbeddings()
192
  vectorstore = FAISS.from_documents(split_docs, embeddings)
193
+
194
+ # --- FIX STARTS HERE ---
195
+ # Get the text generation pipeline
196
+ text_gen_pipeline = self._get_task_pipeline("text-generation", model_name=self.default_models["rag-llm"])
197
+ # Wrap it with Langchain's HuggingFacePipeline
198
+ qa_llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
199
+ # --- FIX ENDS HERE ---
200
+
201
  qa_chain = RetrievalQA.from_chain_type(llm=qa_llm, retriever=vectorstore.as_retriever())
202
  result = qa_chain.run("Summarize this document")
203
  self.memory.append({"task": "pdf_summarization", "input": path, "output": result})
 
207
  raise ValueError(f"Could not process PDF: {e}. Ensure PDF is valid and Langchain dependencies are met.")
208
 
209
  def handle_tts(self, text: str, lang: str = 'en'):
 
210
  if not isinstance(text, str):
211
  raise TypeError("Text input for TTS must be a string.")
212
  logging.info(f"Handling TTS for text: '{text[:50]}...'")
 
217
  return temp_path
218
 
219
  def process_dataset_from_hub(self, dataset_name: str, subset_name: str, split: str, column_to_process: str, task: str, num_samples: int = 5):
 
 
 
 
220
  logging.info(f"Attempting to load dataset '{dataset_name}' (subset: {subset_name}, split: {split})...")
221
 
222
  try:
 
 
223
  if subset_name.strip():
224
  dataset = load_dataset(dataset_name, subset_name, split=split, streaming=True, trust_remote_code=True)
225
  else:
 
230
  processed_results = []
231
  for i, example in enumerate(dataset):
232
  if i >= num_samples:
233
+ break
234
 
235
  if column_to_process not in example:
236
  processed_results.append({
 
241
  continue
242
 
243
  input_data_for_processing = example[column_to_process]
244
+ temp_file_to_clean = None
245
 
 
 
246
  if isinstance(input_data_for_processing, str):
 
247
  pass
248
  elif isinstance(input_data_for_processing, dict) and 'array' in input_data_for_processing and 'sampling_rate' in input_data_for_processing:
 
 
249
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
250
  sf.write(tmp_audio.name, input_data_for_processing['array'], input_data_for_processing['sampling_rate'])
251
  input_data_for_processing = tmp_audio.name
252
  temp_file_to_clean = tmp_audio.name
253
  elif isinstance(input_data_for_processing, Image.Image):
 
 
254
  with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_image:
255
  input_data_for_processing.save(tmp_image.name)
256
  input_data_for_processing = tmp_image.name
 
261
  "status": "error",
262
  "reason": f"Unsupported data type in column '{column_to_process}': {type(input_data_for_processing)}"
263
  })
264
+ continue
265
 
266
  try:
 
267
  single_result = self.process(input_data_for_processing, task=task)
268
  processed_results.append({
269
  "sample_index": i,
 
280
  })
281
  finally:
282
  if temp_file_to_clean and os.path.exists(temp_file_to_clean):
283
+ os.remove(temp_file_to_clean)
284
 
285
  return processed_results
286
 
 
290
 
291
 
292
  def process(self, input_data, task=None, **kwargs):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
293
  if not isinstance(input_data, str):
294
  raise TypeError("Input data must be a string (raw text or file path).")
295
 
 
303
  if not task: task = "automatic-speech-recognition"
304
  return self.handle_audio(input_data, task=task, **kwargs)
305
  elif file_extension in ['mp4', 'mov', 'avi', 'mkv']:
 
306
  return self.handle_video(input_data)
307
  elif file_extension == 'pdf':
308
  return self.handle_pdf(input_data)
309
  else:
310
  raise ValueError(f"Unsupported file type: .{file_extension}. Or specify task for this file.")
311
  else:
 
312
  if task == "tts":
313
  return self.handle_tts(input_data, **kwargs)
314
+ if not task: task = "sentiment-analysis"
315
  return self.handle_text(input_data, task=task, **kwargs)
316
 
317
  # --- Example Usage (for local testing only - will be skipped when imported by app.py) ---
 
338
  tts_path = dispatcher.process(tts_text, task="tts", lang="en")
339
  print(f"TTS audio saved to: {tts_path}")
340
  if os.path.exists(tts_path):
341
+ os.remove(tts_path)
342
 
343
  # Image Examples (requires dummy image or real path)
344
  dummy_image_path = "dummy_image_for_test.png"
 
385
  os.remove(dummy_audio_path)
386
 
387
  # PDF Example (requires a dummy PDF or real path)
 
388
  # For testing, you'd need to place a small PDF file in the same directory.
389
  # dummy_pdf_path = "dummy.pdf"
390
  # if os.path.exists(dummy_pdf_path):
 
412
  print(f"Error during dataset processing example: {e}")
413
 
414
  logging.info("Local example usage complete.")