synclm-demo / utils /summarizer.py
SCBconsulting's picture
Update utils/summarizer.py
8521187 verified
# utils/summarizer.py
from transformers import pipeline
from typing import List
# ========== Load Summarization Pipeline ==========
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# ========== Text Helpers ==========
def clean_text(text: str) -> str:
"""
🧹 Remove excessive whitespace and line breaks.
"""
return text.replace("\n", " ").replace(" ", " ").strip()
def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
"""
βœ‚οΈ Breaks long text into smaller chunks for safe summarization.
"""
sentences = text.split('. ')
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < max_chunk_len:
current_chunk += sentence + ". "
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + ". "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
# ========== Summarization Functions ==========
def summarize_text(text: str, as_paragraph: bool = False, fallback: bool = True) -> str:
"""
πŸ“„ Generate an executive summary.
Params:
- as_paragraph: True β†’ returns as 2–3 paragraph summary; False β†’ bullet points
- fallback: True β†’ if model fails, returns manual fallback
"""
if not text.strip():
return "No input provided."
try:
cleaned = clean_text(text)
chunks = split_text(cleaned)
summaries = []
for chunk in chunks:
result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summary = result[0]["summary_text"].strip()
summaries.append(summary)
if as_paragraph:
return "πŸ“„ Executive Summary:\n\n" + "\n\n".join(summaries)
# Otherwise β†’ return as bullet points
bullet_points = []
for summary in summaries:
lines = summary.split('. ')
for line in lines:
cleaned_line = line.strip().rstrip('.')
if cleaned_line:
bullet_points.append(f"β€’ {cleaned_line}.")
return "πŸ“„ Executive Summary:\n" + "\n".join(bullet_points)
except Exception as e:
if fallback:
return fallback_summary(text)
return f"An error occurred: {str(e)}"
# ========== Fallback Summary (manual) ==========
def fallback_summary(text: str, max_lines: int = 5) -> str:
"""
🧭 Fallback: Return first few sentences as pseudo-summary.
"""
lines = text.split(". ")
selected = lines[:max_lines]
points = [f"β€’ {line.strip().rstrip('.')}" for line in selected if line.strip()]
return "πŸ“„ (Fallback Summary)\n" + "\n".join(points)