Spaces:
Sleeping
Sleeping
File size: 2,758 Bytes
cb4344f 2c2937b f42b944 2c2937b f42b944 2c2937b 8521187 f42b944 cb4344f f42b944 cb4344f 8521187 f42b944 8521187 cb4344f 8521187 cb4344f 2c2937b 8521187 5514a9f 8521187 cb4344f 8521187 5514a9f 8521187 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# utils/summarizer.py
from transformers import pipeline
from typing import List
# ========== Load Summarization Pipeline ==========
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# ========== Text Helpers ==========
def clean_text(text: str) -> str:
"""
π§Ή Remove excessive whitespace and line breaks.
"""
return text.replace("\n", " ").replace(" ", " ").strip()
def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
"""
βοΈ Breaks long text into smaller chunks for safe summarization.
"""
sentences = text.split('. ')
chunks = []
current_chunk = ""
for sentence in sentences:
if len(current_chunk) + len(sentence) < max_chunk_len:
current_chunk += sentence + ". "
else:
chunks.append(current_chunk.strip())
current_chunk = sentence + ". "
if current_chunk:
chunks.append(current_chunk.strip())
return chunks
# ========== Summarization Functions ==========
def summarize_text(text: str, as_paragraph: bool = False, fallback: bool = True) -> str:
"""
π Generate an executive summary.
Params:
- as_paragraph: True β returns as 2β3 paragraph summary; False β bullet points
- fallback: True β if model fails, returns manual fallback
"""
if not text.strip():
return "No input provided."
try:
cleaned = clean_text(text)
chunks = split_text(cleaned)
summaries = []
for chunk in chunks:
result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
summary = result[0]["summary_text"].strip()
summaries.append(summary)
if as_paragraph:
return "π Executive Summary:\n\n" + "\n\n".join(summaries)
# Otherwise β return as bullet points
bullet_points = []
for summary in summaries:
lines = summary.split('. ')
for line in lines:
cleaned_line = line.strip().rstrip('.')
if cleaned_line:
bullet_points.append(f"β’ {cleaned_line}.")
return "π Executive Summary:\n" + "\n".join(bullet_points)
except Exception as e:
if fallback:
return fallback_summary(text)
return f"An error occurred: {str(e)}"
# ========== Fallback Summary (manual) ==========
def fallback_summary(text: str, max_lines: int = 5) -> str:
"""
π§ Fallback: Return first few sentences as pseudo-summary.
"""
lines = text.split(". ")
selected = lines[:max_lines]
points = [f"β’ {line.strip().rstrip('.')}" for line in selected if line.strip()]
return "π (Fallback Summary)\n" + "\n".join(points)
|