from transformers import pipeline import torch import logging class TextSummarizer: def __init__(self, model_name="facebook/bart-large-cnn"): """ Initialize summarization pipeline Args: model_name (str): Hugging Face model for summarization """ try: # Configure device device = 0 if torch.cuda.is_available() else -1 logging.info(f"Using device: {'cuda' if device == 0 else 'cpu'}") # Initialize pipeline with explicit device mapping and lower precision self.summarizer = pipeline( "summarization", model=model_name, device=device, torch_dtype=torch.float32 ) logging.info("Summarization pipeline initialized successfully") except Exception as e: logging.error(f"Failed to load summarization model: {str(e)}") raise RuntimeError(f"Failed to load summarization model: {str(e)}") def generate_summary(self, text, max_length=400, min_length=100): """ Generate summary for given text Args: text (str): Input text to summarize max_length (int): Maximum length of summary min_length (int): Minimum length of summary Returns: str: Generated summary """ try: # Validate input text if not text or len(text.strip()) == 0: return "No text provided for summarization." # Ensure min_length is less than max_length min_length = min(min_length, max_length) # Generate summary with chunking for long texts max_chunk_length = 1024 # BART's max input length chunks = [text[i:i + max_chunk_length] for i in range(0, len(text), max_chunk_length)] summaries = [] for chunk in chunks: if chunk.strip(): summary = self.summarizer( chunk, max_length=max_length // len(chunks), # Distribute length across chunks min_length=min_length // len(chunks), do_sample=False )[0]['summary_text'] summaries.append(summary) return " ".join(summaries) except Exception as e: logging.error(f"Error during summarization: {str(e)}") return f"Error during summarization: {str(e)}"