Spaces:

TarSh8654
/

Modified_tool

Sleeping

App Files Files Community

TarSh8654 commited on Jul 14, 2025

Commit

ccafac2

verified ·

1 Parent(s): 8a86ca9

Rename summarizer_tool.py to modified_summarizer_tool.py

Browse files

Files changed (1) hide show

summarizer_tool.py → modified_summarizer_tool.py +30 -79

summarizer_tool.py → modified_summarizer_tool.py RENAMED Viewed

@@ -16,12 +16,12 @@ import tempfile
 import json # Added for handling JSON output consistently
 # --- Langchain Imports ---
-# Ensure these are correct based on Langchain's modularization
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.text_splitter import RecursiveCharacterTextSplitter # This one is still in langchain
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.chains import RetrievalQA
 # --- Other Imports ---
 from gtts import gTTS
@@ -31,21 +31,19 @@ from datasets import load_dataset, Audio # Added for dataset loading
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # --- Global Cache for Pipelines ---
-# This prevents reloading the same model multiple times
 _pipeline_cache = {}
 def get_pipeline(task_name, model_name=None, **kwargs):
     """
     Retrieves a Hugging Face pipeline, caching it for efficiency.
     """
-    # Create a unique key for the cache based on task, model, and kwargs
     cache_key = f"{task_name}-{model_name}-{hash(frozenset(kwargs.items()))}"
     if cache_key not in _pipeline_cache:
         logging.info(f"Loading pipeline for task '{task_name}' with model '{model_name}'...")
         if model_name:
             _pipeline_cache[cache_key] = pipeline(task_name, model=model_name, **kwargs)
         else:
-            _pipeline_cache[cache_key] = pipeline(task_name, **kwargs)  # Uses default model for task
         logging.info(f"Pipeline '{task_name}' loaded.")
     return _pipeline_cache[cache_key]
@@ -54,19 +52,17 @@ def get_pipeline(task_name, model_name=None, **kwargs):
 class AllInOneDispatcher:
     def __init__(self):
         logging.info("Initializing AllInOneDispatcher...")
-        self.memory = [] # For storing interaction history (optional)
-        # Define default models for various tasks.
-        # These will be loaded on demand via get_pipeline.
         self.default_models = {
             "sentiment-analysis": "distilbert-base-uncased-finetuned-sst-2-english",
             "summarization": "sshleifer/distilbart-cnn-12-6",
-            "text-generation": "gpt2",
             "translation_en_to_fr": "Helsinki-NLP/opus-mt-en-fr",
             "image-classification": "google/vit-base-patch16-224",
             "object-detection": "facebook/detr-resnet-50",
-            "automatic-speech-recognition": "openai/whisper-tiny.en", # For English ASR
-            # Add other models/tasks as needed
         }
         logging.info("AllInOneDispatcher initialized.")
@@ -78,11 +74,9 @@ class AllInOneDispatcher:
         return get_pipeline(task, model_name=final_model_name)
     def _is_file(self, path):
-        """Checks if the given path exists and is a file."""
         return os.path.exists(path) and os.path.isfile(path)
     def handle_text(self, text: str, task: str = "sentiment-analysis", **kwargs):
-        """Processes text input for a given NLP task."""
         if not isinstance(text, str):
             raise TypeError("Text input must be a string.")
         logging.info(f"Handling text for task: {task}")
@@ -92,7 +86,6 @@ class AllInOneDispatcher:
         return result
     def handle_image(self, path: str, task: str = "image-classification", **kwargs):
-        """Processes image file input for a given computer vision task."""
         if not self._is_file(path):
             raise FileNotFoundError(f"Image file not found: {path}")
         logging.info(f"Handling image for task: {task}")
@@ -106,23 +99,20 @@ class AllInOneDispatcher:
         return result
     def handle_audio(self, path: str, task: str = "automatic-speech-recognition", **kwargs):
-        """Processes audio file input for a given audio task."""
         if not self._is_file(path):
             raise FileNotFoundError(f"Audio file not found: {path}")
         logging.info(f"Handling audio for task: {task}")
-        # Whisper models expect audio in a specific format (16kHz, mono, float32)
         try:
             audio = AudioSegment.from_file(path)
-            audio = audio.set_channels(1).set_frame_rate(16000) # Convert to mono, 16kHz
             buffer = io.BytesIO()
-            audio.export(buffer, format="wav") # Export to WAV in memory
-            buffer.seek(0) # Rewind buffer
-            array, sampling_rate = sf.read(buffer) # Read with soundfile
             if array.dtype != np.float32:
-                array = array.astype(np.float32) # Ensure float32
         except Exception as e:
             logging.error(f"Error preparing audio file for processing: {e}")
@@ -134,11 +124,6 @@ class AllInOneDispatcher:
         return result
     def handle_video(self, path: str):
-        """
-        Processes video file input. This is a limited implementation:
-        Extracts first few frames for image analysis and audio for ASR.
-        Requires OpenCV (cv2) and system-wide ffmpeg.
-        """
         if not self._is_file(path):
             raise FileNotFoundError(f"Video file not found: {path}")
         logging.info(f"Handling video: {path}")
@@ -157,42 +142,36 @@ class AllInOneDispatcher:
             ret, frame = cap.read()
             if not ret:
                 break
-            frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))) # Convert BGR to RGB for PIL
-            if len(frames) >= 5: break # Process only first 5 frames for efficiency
         cap.release()
-        # Extract audio from video
         audio_temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
         try:
-            # Using os.system for ffmpeg call requires ffmpeg to be in PATH
-            # This is a common way but can be less robust than a Python wrapper.
-            # Hugging Face Spaces typically has ffmpeg.
             os.system(f"ffmpeg -i \"{path}\" -q:a 0 -map a \"{audio_temp_path}\" -y")
             if not os.path.exists(audio_temp_path) or os.path.getsize(audio_temp_path) == 0:
                 raise RuntimeError("FFmpeg failed to extract audio or extracted empty audio.")
         except Exception as e:
             logging.error(f"FFmpeg audio extraction failed: {e}")
-            audio_temp_path = None # Indicate failure
         image_result = None
         audio_result = None
         if frames:
             try:
-                # Process the first frame for image classification
                 image_result = self.handle_image(frames[0], task="image-classification")
             except Exception as e:
                 logging.warning(f"Failed to process video frame for image classification: {e}")
         if audio_temp_path:
             try:
-                # Process the extracted audio for ASR
                 audio_result = self.handle_audio(audio_temp_path, task="automatic-speech-recognition")
             except Exception as e:
                 logging.warning(f"Failed to process extracted audio from video: {e}")
             finally:
                 if os.path.exists(audio_temp_path):
-                    os.remove(audio_temp_path) # Clean up temp audio file
         result = {"image_analysis": image_result, "audio_analysis": audio_result}
         self.memory.append({"task": "video_analysis", "input": path, "output": result})
@@ -204,7 +183,6 @@ class AllInOneDispatcher:
             raise FileNotFoundError(f"PDF file not found: {path}")
         logging.info(f"Handling PDF: {path}")
-        # RAG components
         try:
             loader = PyPDFLoader(path)
             docs = loader.load()
@@ -212,8 +190,14 @@ class AllInOneDispatcher:
             split_docs = splitter.split_documents(docs)
             embeddings = HuggingFaceEmbeddings()
             vectorstore = FAISS.from_documents(split_docs, embeddings)
-            # Using a text-generation pipeline as the LLM for RetrievalQA
-            qa_llm = self._get_task_pipeline("text-generation", model_name="gpt2") # Using a smaller model for RAG LLM
             qa_chain = RetrievalQA.from_chain_type(llm=qa_llm, retriever=vectorstore.as_retriever())
             result = qa_chain.run("Summarize this document")
             self.memory.append({"task": "pdf_summarization", "input": path, "output": result})
@@ -223,7 +207,6 @@ class AllInOneDispatcher:
             raise ValueError(f"Could not process PDF: {e}. Ensure PDF is valid and Langchain dependencies are met.")
     def handle_tts(self, text: str, lang: str = 'en'):
-        """Converts text to speech and returns the path to the generated audio file."""
         if not isinstance(text, str):
             raise TypeError("Text input for TTS must be a string.")
         logging.info(f"Handling TTS for text: '{text[:50]}...'")
@@ -234,15 +217,9 @@ class AllInOneDispatcher:
         return temp_path
     def process_dataset_from_hub(self, dataset_name: str, subset_name: str, split: str, column_to_process: str, task: str, num_samples: int = 5):
-        """
-        Loads a dataset from Hugging Face Hub, processes a specified column
-        for a given task, and returns results for a limited number of samples.
-        """
         logging.info(f"Attempting to load dataset '{dataset_name}' (subset: {subset_name}, split: {split})...")
         try:
-            # Load dataset. Using streaming=True for potentially very large datasets
-            # and then taking a few examples. trust_remote_code is important for some datasets.
             if subset_name.strip():
                 dataset = load_dataset(dataset_name, subset_name, split=split, streaming=True, trust_remote_code=True)
             else:
@@ -253,7 +230,7 @@ class AllInOneDispatcher:
             processed_results = []
             for i, example in enumerate(dataset):
                 if i >= num_samples:
-                    break # Stop after processing desired number of samples
                 if column_to_process not in example:
                     processed_results.append({
@@ -264,23 +241,16 @@ class AllInOneDispatcher:
                     continue
                 input_data_for_processing = example[column_to_process]
-                temp_file_to_clean = None # To track temporary files for cleanup
-                # Determine the actual data type and prepare for self.process
-                # Hugging Face datasets often load audio/image as specific objects/dicts
                 if isinstance(input_data_for_processing, str):
-                    # It's already a string, assume text or a path
                     pass
                 elif isinstance(input_data_for_processing, dict) and 'array' in input_data_for_processing and 'sampling_rate' in input_data_for_processing:
-                    # This is an audio object from datasets library
-                    # Save to a temporary WAV file for self.handle_audio
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
                         sf.write(tmp_audio.name, input_data_for_processing['array'], input_data_for_processing['sampling_rate'])
                         input_data_for_processing = tmp_audio.name
                         temp_file_to_clean = tmp_audio.name
                 elif isinstance(input_data_for_processing, Image.Image):
-                    # This is a PIL Image object from datasets library
-                    # Save to a temporary PNG file for self.handle_image
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_image:
                         input_data_for_processing.save(tmp_image.name)
                         input_data_for_processing = tmp_image.name
@@ -291,10 +261,9 @@ class AllInOneDispatcher:
                         "status": "error",
                         "reason": f"Unsupported data type in column '{column_to_process}': {type(input_data_for_processing)}"
                     })
-                    continue # Skip to next sample
                 try:
-                    # Call the general process method of the dispatcher
                     single_result = self.process(input_data_for_processing, task=task)
                     processed_results.append({
                         "sample_index": i,
@@ -311,7 +280,7 @@ class AllInOneDispatcher:
                     })
                 finally:
                     if temp_file_to_clean and os.path.exists(temp_file_to_clean):
-                        os.remove(temp_file_to_clean) # Clean up temporary file
             return processed_results
@@ -321,20 +290,6 @@ class AllInOneDispatcher:
     def process(self, input_data, task=None, **kwargs):
-        """
-        Main entry point for the AI tool. Tries to determine input type and
-        dispatches to the appropriate processing function.
-        Args:
-            input_data: Can be raw text (str) or a file path (str) for image/audio/video/pdf.
-            task (str, optional): The specific AI task to perform.
-                                  Required for non-text inputs.
-                                  For text, it defaults to "sentiment-analysis".
-            **kwargs: Additional arguments to pass to the specific handler or pipeline.
-        Returns:
-            The result from the AI model, or a file path for TTS.
-        """
         if not isinstance(input_data, str):
             raise TypeError("Input data must be a string (raw text or file path).")
@@ -348,17 +303,15 @@ class AllInOneDispatcher:
                 if not task: task = "automatic-speech-recognition"
                 return self.handle_audio(input_data, task=task, **kwargs)
             elif file_extension in ['mp4', 'mov', 'avi', 'mkv']:
-                # Video processing is a separate, more complex handler
                 return self.handle_video(input_data)
             elif file_extension == 'pdf':
                 return self.handle_pdf(input_data)
             else:
                 raise ValueError(f"Unsupported file type: .{file_extension}. Or specify task for this file.")
         else:
-            # Assume it's raw text if not a file path
             if task == "tts":
                 return self.handle_tts(input_data, **kwargs)
-            if not task: task = "sentiment-analysis" # Default text task
             return self.handle_text(input_data, task=task, **kwargs)
 # --- Example Usage (for local testing only - will be skipped when imported by app.py) ---
@@ -385,7 +338,7 @@ if __name__ == "__main__":
     tts_path = dispatcher.process(tts_text, task="tts", lang="en")
     print(f"TTS audio saved to: {tts_path}")
     if os.path.exists(tts_path):
-        os.remove(tts_path) # Clean up generated audio
     # Image Examples (requires dummy image or real path)
     dummy_image_path = "dummy_image_for_test.png"
@@ -432,7 +385,6 @@ if __name__ == "__main__":
             os.remove(dummy_audio_path)
     # PDF Example (requires a dummy PDF or real path)
-    # Note: Creating a dummy PDF programmatically is complex.
     # For testing, you'd need to place a small PDF file in the same directory.
     # dummy_pdf_path = "dummy.pdf"
     # if os.path.exists(dummy_pdf_path):
@@ -460,4 +412,3 @@ if __name__ == "__main__":
         print(f"Error during dataset processing example: {e}")
     logging.info("Local example usage complete.")

 import json # Added for handling JSON output consistently
 # --- Langchain Imports ---
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.chains import RetrievalQA
+from langchain_community.llms import HuggingFacePipeline # <--- ADD THIS LINE
 # --- Other Imports ---
 from gtts import gTTS
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # --- Global Cache for Pipelines ---
 _pipeline_cache = {}
 def get_pipeline(task_name, model_name=None, **kwargs):
     """
     Retrieves a Hugging Face pipeline, caching it for efficiency.
     """
     cache_key = f"{task_name}-{model_name}-{hash(frozenset(kwargs.items()))}"
     if cache_key not in _pipeline_cache:
         logging.info(f"Loading pipeline for task '{task_name}' with model '{model_name}'...")
         if model_name:
             _pipeline_cache[cache_key] = pipeline(task_name, model=model_name, **kwargs)
         else:
+            _pipeline_cache[cache_key] = pipeline(task_name, **kwargs)
         logging.info(f"Pipeline '{task_name}' loaded.")
     return _pipeline_cache[cache_key]
 class AllInOneDispatcher:
     def __init__(self):
         logging.info("Initializing AllInOneDispatcher...")
+        self.memory = []
         self.default_models = {
             "sentiment-analysis": "distilbert-base-uncased-finetuned-sst-2-english",
             "summarization": "sshleifer/distilbart-cnn-12-6",
+            "text-generation": "gpt2", # Keep gpt2 for general text generation
             "translation_en_to_fr": "Helsinki-NLP/opus-mt-en-fr",
             "image-classification": "google/vit-base-patch16-224",
             "object-detection": "facebook/detr-resnet-50",
+            "automatic-speech-recognition": "openai/whisper-tiny.en",
+            "rag-llm": "gpt2" # New default for the RAG LLM
         }
         logging.info("AllInOneDispatcher initialized.")
         return get_pipeline(task, model_name=final_model_name)
     def _is_file(self, path):
         return os.path.exists(path) and os.path.isfile(path)
     def handle_text(self, text: str, task: str = "sentiment-analysis", **kwargs):
         if not isinstance(text, str):
             raise TypeError("Text input must be a string.")
         logging.info(f"Handling text for task: {task}")
         return result
     def handle_image(self, path: str, task: str = "image-classification", **kwargs):
         if not self._is_file(path):
             raise FileNotFoundError(f"Image file not found: {path}")
         logging.info(f"Handling image for task: {task}")
         return result
     def handle_audio(self, path: str, task: str = "automatic-speech-recognition", **kwargs):
         if not self._is_file(path):
             raise FileNotFoundError(f"Audio file not found: {path}")
         logging.info(f"Handling audio for task: {task}")
         try:
             audio = AudioSegment.from_file(path)
+            audio = audio.set_channels(1).set_frame_rate(16000)
             buffer = io.BytesIO()
+            audio.export(buffer, format="wav")
+            buffer.seek(0)
+            array, sampling_rate = sf.read(buffer)
             if array.dtype != np.float32:
+                array = array.astype(np.float32)
         except Exception as e:
             logging.error(f"Error preparing audio file for processing: {e}")
         return result
     def handle_video(self, path: str):
         if not self._is_file(path):
             raise FileNotFoundError(f"Video file not found: {path}")
         logging.info(f"Handling video: {path}")
             ret, frame = cap.read()
             if not ret:
                 break
+            frames.append(Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)))
+            if len(frames) >= 5: break
         cap.release()
         audio_temp_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
         try:
             os.system(f"ffmpeg -i \"{path}\" -q:a 0 -map a \"{audio_temp_path}\" -y")
             if not os.path.exists(audio_temp_path) or os.path.getsize(audio_temp_path) == 0:
                 raise RuntimeError("FFmpeg failed to extract audio or extracted empty audio.")
         except Exception as e:
             logging.error(f"FFmpeg audio extraction failed: {e}")
+            audio_temp_path = None
         image_result = None
         audio_result = None
         if frames:
             try:
                 image_result = self.handle_image(frames[0], task="image-classification")
             except Exception as e:
                 logging.warning(f"Failed to process video frame for image classification: {e}")
         if audio_temp_path:
             try:
                 audio_result = self.handle_audio(audio_temp_path, task="automatic-speech-recognition")
             except Exception as e:
                 logging.warning(f"Failed to process extracted audio from video: {e}")
             finally:
                 if os.path.exists(audio_temp_path):
+                    os.remove(audio_temp_path)
         result = {"image_analysis": image_result, "audio_analysis": audio_result}
         self.memory.append({"task": "video_analysis", "input": path, "output": result})
             raise FileNotFoundError(f"PDF file not found: {path}")
         logging.info(f"Handling PDF: {path}")
         try:
             loader = PyPDFLoader(path)
             docs = loader.load()
             split_docs = splitter.split_documents(docs)
             embeddings = HuggingFaceEmbeddings()
             vectorstore = FAISS.from_documents(split_docs, embeddings)
+            # --- FIX STARTS HERE ---
+            # Get the text generation pipeline
+            text_gen_pipeline = self._get_task_pipeline("text-generation", model_name=self.default_models["rag-llm"])
+            # Wrap it with Langchain's HuggingFacePipeline
+            qa_llm = HuggingFacePipeline(pipeline=text_gen_pipeline)
+            # --- FIX ENDS HERE ---
             qa_chain = RetrievalQA.from_chain_type(llm=qa_llm, retriever=vectorstore.as_retriever())
             result = qa_chain.run("Summarize this document")
             self.memory.append({"task": "pdf_summarization", "input": path, "output": result})
             raise ValueError(f"Could not process PDF: {e}. Ensure PDF is valid and Langchain dependencies are met.")
     def handle_tts(self, text: str, lang: str = 'en'):
         if not isinstance(text, str):
             raise TypeError("Text input for TTS must be a string.")
         logging.info(f"Handling TTS for text: '{text[:50]}...'")
         return temp_path
     def process_dataset_from_hub(self, dataset_name: str, subset_name: str, split: str, column_to_process: str, task: str, num_samples: int = 5):
         logging.info(f"Attempting to load dataset '{dataset_name}' (subset: {subset_name}, split: {split})...")
         try:
             if subset_name.strip():
                 dataset = load_dataset(dataset_name, subset_name, split=split, streaming=True, trust_remote_code=True)
             else:
             processed_results = []
             for i, example in enumerate(dataset):
                 if i >= num_samples:
+                    break
                 if column_to_process not in example:
                     processed_results.append({
                     continue
                 input_data_for_processing = example[column_to_process]
+                temp_file_to_clean = None
                 if isinstance(input_data_for_processing, str):
                     pass
                 elif isinstance(input_data_for_processing, dict) and 'array' in input_data_for_processing and 'sampling_rate' in input_data_for_processing:
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_audio:
                         sf.write(tmp_audio.name, input_data_for_processing['array'], input_data_for_processing['sampling_rate'])
                         input_data_for_processing = tmp_audio.name
                         temp_file_to_clean = tmp_audio.name
                 elif isinstance(input_data_for_processing, Image.Image):
                     with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp_image:
                         input_data_for_processing.save(tmp_image.name)
                         input_data_for_processing = tmp_image.name
                         "status": "error",
                         "reason": f"Unsupported data type in column '{column_to_process}': {type(input_data_for_processing)}"
                     })
+                    continue
                 try:
                     single_result = self.process(input_data_for_processing, task=task)
                     processed_results.append({
                         "sample_index": i,
                     })
                 finally:
                     if temp_file_to_clean and os.path.exists(temp_file_to_clean):
+                        os.remove(temp_file_to_clean)
             return processed_results
     def process(self, input_data, task=None, **kwargs):
         if not isinstance(input_data, str):
             raise TypeError("Input data must be a string (raw text or file path).")
                 if not task: task = "automatic-speech-recognition"
                 return self.handle_audio(input_data, task=task, **kwargs)
             elif file_extension in ['mp4', 'mov', 'avi', 'mkv']:
                 return self.handle_video(input_data)
             elif file_extension == 'pdf':
                 return self.handle_pdf(input_data)
             else:
                 raise ValueError(f"Unsupported file type: .{file_extension}. Or specify task for this file.")
         else:
             if task == "tts":
                 return self.handle_tts(input_data, **kwargs)
+            if not task: task = "sentiment-analysis"
             return self.handle_text(input_data, task=task, **kwargs)
 # --- Example Usage (for local testing only - will be skipped when imported by app.py) ---
     tts_path = dispatcher.process(tts_text, task="tts", lang="en")
     print(f"TTS audio saved to: {tts_path}")
     if os.path.exists(tts_path):
+        os.remove(tts_path)
     # Image Examples (requires dummy image or real path)
     dummy_image_path = "dummy_image_for_test.png"
             os.remove(dummy_audio_path)
     # PDF Example (requires a dummy PDF or real path)
     # For testing, you'd need to place a small PDF file in the same directory.
     # dummy_pdf_path = "dummy.pdf"
     # if os.path.exists(dummy_pdf_path):
         print(f"Error during dataset processing example: {e}")
     logging.info("Local example usage complete.")