| from transformers import pipeline |
| import torch |
| import logging |
|
|
| class TextSummarizer: |
| def __init__(self, model_name="facebook/bart-large-cnn"): |
| """ |
| Initialize summarization pipeline |
| |
| Args: |
| model_name (str): Hugging Face model for summarization |
| """ |
| try: |
| |
| device = 0 if torch.cuda.is_available() else -1 |
| logging.info(f"Using device: {'cuda' if device == 0 else 'cpu'}") |
| |
| |
| self.summarizer = pipeline( |
| "summarization", |
| model=model_name, |
| device=device, |
| torch_dtype=torch.float32 |
| ) |
| logging.info("Summarization pipeline initialized successfully") |
| |
| except Exception as e: |
| logging.error(f"Failed to load summarization model: {str(e)}") |
| raise RuntimeError(f"Failed to load summarization model: {str(e)}") |
| |
| def generate_summary(self, text, max_length=400, min_length=100): |
| """ |
| Generate summary for given text |
| |
| Args: |
| text (str): Input text to summarize |
| max_length (int): Maximum length of summary |
| min_length (int): Minimum length of summary |
| |
| Returns: |
| str: Generated summary |
| """ |
| try: |
| |
| if not text or len(text.strip()) == 0: |
| return "No text provided for summarization." |
| |
| |
| min_length = min(min_length, max_length) |
| |
| |
| max_chunk_length = 1024 |
| chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)] |
| summaries = [] |
| |
| for chunk in chunks: |
| if chunk.strip(): |
| summary = self.summarizer( |
| chunk, |
| max_length=max_length // len(chunks), |
| min_length=min_length // len(chunks), |
| do_sample=False |
| )[0]['summary_text'] |
| summaries.append(summary) |
| |
| return " ".join(summaries) |
| |
| except Exception as e: |
| logging.error(f"Error during summarization: {str(e)}") |
| return f"Error during summarization: {str(e)}" |