Spaces:
Sleeping
Sleeping
| # utils/summarizer.py | |
| from transformers import pipeline | |
| from typing import List | |
| # ========== Load Summarization Pipeline ========== | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| # ========== Text Helpers ========== | |
| def clean_text(text: str) -> str: | |
| """ | |
| π§Ή Remove excessive whitespace and line breaks. | |
| """ | |
| return text.replace("\n", " ").replace(" ", " ").strip() | |
| def split_text(text: str, max_chunk_len: int = 800) -> List[str]: | |
| """ | |
| βοΈ Breaks long text into smaller chunks for safe summarization. | |
| """ | |
| sentences = text.split('. ') | |
| chunks = [] | |
| current_chunk = "" | |
| for sentence in sentences: | |
| if len(current_chunk) + len(sentence) < max_chunk_len: | |
| current_chunk += sentence + ". " | |
| else: | |
| chunks.append(current_chunk.strip()) | |
| current_chunk = sentence + ". " | |
| if current_chunk: | |
| chunks.append(current_chunk.strip()) | |
| return chunks | |
| # ========== Summarization Functions ========== | |
| def summarize_text(text: str, as_paragraph: bool = False, fallback: bool = True) -> str: | |
| """ | |
| π Generate an executive summary. | |
| Params: | |
| - as_paragraph: True β returns as 2β3 paragraph summary; False β bullet points | |
| - fallback: True β if model fails, returns manual fallback | |
| """ | |
| if not text.strip(): | |
| return "No input provided." | |
| try: | |
| cleaned = clean_text(text) | |
| chunks = split_text(cleaned) | |
| summaries = [] | |
| for chunk in chunks: | |
| result = summarizer(chunk, max_length=130, min_length=30, do_sample=False) | |
| summary = result[0]["summary_text"].strip() | |
| summaries.append(summary) | |
| if as_paragraph: | |
| return "π Executive Summary:\n\n" + "\n\n".join(summaries) | |
| # Otherwise β return as bullet points | |
| bullet_points = [] | |
| for summary in summaries: | |
| lines = summary.split('. ') | |
| for line in lines: | |
| cleaned_line = line.strip().rstrip('.') | |
| if cleaned_line: | |
| bullet_points.append(f"β’ {cleaned_line}.") | |
| return "π Executive Summary:\n" + "\n".join(bullet_points) | |
| except Exception as e: | |
| if fallback: | |
| return fallback_summary(text) | |
| return f"An error occurred: {str(e)}" | |
| # ========== Fallback Summary (manual) ========== | |
| def fallback_summary(text: str, max_lines: int = 5) -> str: | |
| """ | |
| π§ Fallback: Return first few sentences as pseudo-summary. | |
| """ | |
| lines = text.split(". ") | |
| selected = lines[:max_lines] | |
| points = [f"β’ {line.strip().rstrip('.')}" for line in selected if line.strip()] | |
| return "π (Fallback Summary)\n" + "\n".join(points) | |