Spaces:
Running
Running
| """ | |
| Base Summarizer Class | |
| Defines the interface for all summarization models | |
| Implements Strategy Design Pattern for interchangeable algorithms | |
| """ | |
| from abc import ABC, abstractmethod | |
| from typing import Dict, Any, Optional, List | |
| import time | |
| import logging | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| class BaseSummarizer(ABC): | |
| """ | |
| Abstract base class for all summarization models. | |
| Implements common functionality and defines interface. | |
| Design Pattern: Strategy Pattern | |
| - Allows switching between different summarization algorithms | |
| - Ensures consistent interface across models | |
| """ | |
| def __init__(self, model_name: str, model_type: str): | |
| """ | |
| Initialize base summarizer | |
| Args: | |
| model_name: Name of the model (e.g., "TextRank", "BART") | |
| model_type: Type of summarization ("Extractive" or "Abstractive") | |
| """ | |
| self.model_name = model_name | |
| self.model_type = model_type | |
| self.is_initialized = False | |
| self.stats = { | |
| 'total_summarizations': 0, | |
| 'total_processing_time': 0.0, | |
| 'average_processing_time': 0.0 | |
| } | |
| logger.info(f"Initializing {model_name} ({model_type}) summarizer") | |
| def summarize(self, text: str, **kwargs) -> str: | |
| """ | |
| Generate summary from input text. | |
| Must be implemented by all subclasses. | |
| Args: | |
| text: Input text to summarize | |
| **kwargs: Additional parameters specific to each model | |
| Returns: | |
| Generated summary string | |
| """ | |
| pass | |
| def summarize_with_metrics(self, text: str, **kwargs) -> Dict[str, Any]: | |
| """ | |
| Summarize text and return detailed metrics | |
| Args: | |
| text: Input text to summarize | |
| **kwargs: Model-specific parameters | |
| Returns: | |
| Dictionary containing summary and metadata | |
| """ | |
| start_time = time.time() | |
| # Generate summary | |
| summary = self.summarize(text, **kwargs) | |
| # Calculate metrics | |
| processing_time = time.time() - start_time | |
| self._update_stats(processing_time) | |
| return { | |
| 'summary': summary, | |
| 'metadata': { | |
| 'model_name': self.model_name, | |
| 'model_type': self.model_type, | |
| 'processing_time': processing_time, | |
| 'input_length': len(text.split()), | |
| 'summary_length': len(summary.split()), | |
| 'compression_ratio': len(summary.split()) / len(text.split()) if len(text.split()) > 0 else 0, | |
| 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S') | |
| } | |
| } | |
| def batch_summarize(self, texts: List[str], **kwargs) -> List[Dict[str, Any]]: | |
| """ | |
| Summarize multiple texts | |
| Args: | |
| texts: List of texts to summarize | |
| **kwargs: Model-specific parameters | |
| Returns: | |
| List of dictionaries with summaries and metadata | |
| """ | |
| logger.info(f"Batch summarizing {len(texts)} texts with {self.model_name}") | |
| results = [] | |
| for idx, text in enumerate(texts): | |
| logger.info(f"Processing text {idx + 1}/{len(texts)}") | |
| result = self.summarize_with_metrics(text, **kwargs) | |
| result['metadata']['batch_index'] = idx | |
| results.append(result) | |
| return results | |
| def _update_stats(self, processing_time: float): | |
| """Update internal statistics""" | |
| self.stats['total_summarizations'] += 1 | |
| self.stats['total_processing_time'] += processing_time | |
| self.stats['average_processing_time'] = ( | |
| self.stats['total_processing_time'] / self.stats['total_summarizations'] | |
| ) | |
| def get_model_info(self) -> Dict[str, Any]: | |
| """ | |
| Get detailed model information | |
| Returns: | |
| Dictionary with model specifications | |
| """ | |
| return { | |
| 'name': self.model_name, | |
| 'type': self.model_type, | |
| 'statistics': self.stats.copy(), | |
| 'is_initialized': self.is_initialized | |
| } | |
| def reset_stats(self): | |
| """Reset usage statistics""" | |
| self.stats = { | |
| 'total_summarizations': 0, | |
| 'total_processing_time': 0.0, | |
| 'average_processing_time': 0.0 | |
| } | |
| logger.info(f"Statistics reset for {self.model_name}") | |
| def validate_input(self, text: str, min_length: int = 10) -> bool: | |
| """ | |
| Validate input text | |
| Args: | |
| text: Input text | |
| min_length: Minimum number of words required | |
| Returns: | |
| Boolean indicating if input is valid | |
| Raises: | |
| ValueError: If input is invalid | |
| """ | |
| if not text or not isinstance(text, str): | |
| raise ValueError("Input text must be a non-empty string") | |
| word_count = len(text.split()) | |
| if word_count < min_length: | |
| raise ValueError( | |
| f"Input text too short. Minimum {min_length} words required, got {word_count}" | |
| ) | |
| return True | |
| def __repr__(self) -> str: | |
| """String representation of the summarizer""" | |
| return (f"{self.__class__.__name__}(model_name='{self.model_name}', " | |
| f"model_type='{self.model_type}', " | |
| f"total_summarizations={self.stats['total_summarizations']})") | |
| class SummarizerFactory: | |
| """ | |
| Factory Pattern for creating summarizer instances | |
| Centralizes model instantiation logic | |
| """ | |
| _models = {} | |
| def register_model(cls, model_class, name: str): | |
| """Register a new summarizer model""" | |
| cls._models[name.lower()] = model_class | |
| logger.info(f"Registered model: {name}") | |
| def create_summarizer(cls, model_name: str, **kwargs): | |
| """ | |
| Create a summarizer instance | |
| Args: | |
| model_name: Name of the model to create | |
| **kwargs: Model-specific initialization parameters | |
| Returns: | |
| Instance of requested summarizer | |
| Raises: | |
| ValueError: If model not found | |
| """ | |
| model_name_lower = model_name.lower() | |
| if model_name_lower not in cls._models: | |
| available = ', '.join(cls._models.keys()) | |
| raise ValueError( | |
| f"Model '{model_name}' not found. Available models: {available}" | |
| ) | |
| model_class = cls._models[model_name_lower] | |
| return model_class(**kwargs) | |
| def list_available_models(cls) -> List[str]: | |
| """Get list of available models""" | |
| return list(cls._models.keys()) |