Spaces:

garvitcpp
/

Sum-it-up

Sleeping

App Files Files Community

garvitcpp commited on Dec 8, 2024

Commit

ac4bd9a

verified ·

1 Parent(s): 5245cbd

Update src/summarizer.py

Browse files

Files changed (1) hide show

src/summarizer.py +70 -47

src/summarizer.py CHANGED Viewed

@@ -1,47 +1,70 @@
-from transformers import pipeline
-import time
-class TextSummarizer:
-    def __init__(self, model_name="facebook/bart-large-cnn"):
-        """
-        Initialize summarization pipeline
-        Args:
-            model_name (str): Hugging Face model for summarization
-        """
-        try:
-            self.summarizer = pipeline("summarization", model=model_name)
-        except Exception as e:
-            raise RuntimeError(f"Failed to load summarization model: {e}")
-    def generate_summary(self, text, max_length=400, min_length=100):
-        """
-        Generate summary for given text
-        Args:
-            text (str): Input text to summarize
-            max_length (int): Maximum length of summary
-            min_length (int): Minimum length of summary
-        Returns:
-            str: Generated summary
-        """
-        try:
-            # Validate input text
-            if not text or len(text.strip()) == 0:
-                return "No text provided for summarization."
-            # Ensure min_length is less than max_length
-            min_length = min(min_length, max_length)
-            # Generate summary
-            summary = self.summarizer(
-                text,
-                max_length=max_length,
-                min_length=min_length,
-                do_sample=False
-            )[0]['summary_text']
-            return summary
-        except Exception as e:
-            return f"Error during summarization: {e}"

+from transformers import pipeline
+import torch
+import logging
+class TextSummarizer:
+    def __init__(self, model_name="facebook/bart-large-cnn"):
+        """
+        Initialize summarization pipeline
+        Args:
+            model_name (str): Hugging Face model for summarization
+        """
+        try:
+            # Configure device
+            device = 0 if torch.cuda.is_available() else -1
+            logging.info(f"Using device: {'cuda' if device == 0 else 'cpu'}")
+            # Initialize pipeline with explicit device mapping and lower precision
+            self.summarizer = pipeline(
+                "summarization",
+                model=model_name,
+                device=device,
+                torch_dtype=torch.float32
+            )
+            logging.info("Summarization pipeline initialized successfully")
+        except Exception as e:
+            logging.error(f"Failed to load summarization model: {str(e)}")
+            raise RuntimeError(f"Failed to load summarization model: {str(e)}")
+    def generate_summary(self, text, max_length=400, min_length=100):
+        """
+        Generate summary for given text
+        Args:
+            text (str): Input text to summarize
+            max_length (int): Maximum length of summary
+            min_length (int): Minimum length of summary
+        Returns:
+            str: Generated summary
+        """
+        try:
+            # Validate input text
+            if not text or len(text.strip()) == 0:
+                return "No text provided for summarization."
+            # Ensure min_length is less than max_length
+            min_length = min(min_length, max_length)
+            # Generate summary with chunking for long texts
+            max_chunk_length = 1024  # BART's max input length
+            chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)]
+            summaries = []
+            for chunk in chunks:
+                if chunk.strip():
+                    summary = self.summarizer(
+                        chunk,
+                        max_length=max_length // len(chunks),  # Distribute length across chunks
+                        min_length=min_length // len(chunks),
+                        do_sample=False
+                    )[0]['summary_text']
+                    summaries.append(summary)
+            return " ".join(summaries)
+        except Exception as e:
+            logging.error(f"Error during summarization: {str(e)}")
+            return f"Error during summarization: {str(e)}"