Spaces:
Sleeping
Sleeping
Update utils/summarizer.py
Browse files- utils/summarizer.py +23 -12
utils/summarizer.py
CHANGED
|
@@ -1,13 +1,16 @@
|
|
| 1 |
# utils/summarizer.py
|
| 2 |
|
| 3 |
from transformers import pipeline
|
|
|
|
| 4 |
|
| 5 |
-
# Load
|
| 6 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 7 |
|
| 8 |
-
|
|
|
|
|
|
|
| 9 |
"""
|
| 10 |
-
Breaks long
|
| 11 |
"""
|
| 12 |
sentences = text.split('. ')
|
| 13 |
chunks = []
|
|
@@ -25,25 +28,33 @@ def split_text(text, max_chunk_len=800):
|
|
| 25 |
|
| 26 |
return chunks
|
| 27 |
|
| 28 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
"""
|
| 30 |
-
Generate a readable executive summary using bullet points.
|
| 31 |
"""
|
| 32 |
if not text.strip():
|
| 33 |
return "No input provided."
|
| 34 |
|
| 35 |
-
|
| 36 |
-
chunks = split_text(
|
| 37 |
|
| 38 |
bullet_points = []
|
| 39 |
|
| 40 |
for chunk in chunks:
|
| 41 |
result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
|
| 42 |
summary = result[0]["summary_text"].strip()
|
| 43 |
-
|
| 44 |
-
for line in
|
| 45 |
-
|
| 46 |
-
if
|
| 47 |
-
bullet_points.append(f"• {
|
| 48 |
|
| 49 |
return "📄 Executive Summary:\n" + "\n".join(bullet_points)
|
|
|
|
| 1 |
# utils/summarizer.py
|
| 2 |
|
| 3 |
from transformers import pipeline
|
| 4 |
+
from typing import List
|
| 5 |
|
| 6 |
+
# ========== Load Summarization Pipeline ==========
|
| 7 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
| 8 |
|
| 9 |
+
# ========== Helper Functions ==========
|
| 10 |
+
|
| 11 |
+
def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
|
| 12 |
"""
|
| 13 |
+
✂️ Breaks long text into smaller chunks for safe summarization.
|
| 14 |
"""
|
| 15 |
sentences = text.split('. ')
|
| 16 |
chunks = []
|
|
|
|
| 28 |
|
| 29 |
return chunks
|
| 30 |
|
| 31 |
+
def clean_text(text: str) -> str:
|
| 32 |
+
"""
|
| 33 |
+
🧹 Remove excessive whitespace and line breaks.
|
| 34 |
+
"""
|
| 35 |
+
return text.replace("\n", " ").replace(" ", " ").strip()
|
| 36 |
+
|
| 37 |
+
# ========== Summarization Function ==========
|
| 38 |
+
|
| 39 |
+
def summarize_text(text: str) -> str:
|
| 40 |
"""
|
| 41 |
+
📄 Generate a readable executive summary using bullet points.
|
| 42 |
"""
|
| 43 |
if not text.strip():
|
| 44 |
return "No input provided."
|
| 45 |
|
| 46 |
+
cleaned = clean_text(text)
|
| 47 |
+
chunks = split_text(cleaned)
|
| 48 |
|
| 49 |
bullet_points = []
|
| 50 |
|
| 51 |
for chunk in chunks:
|
| 52 |
result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
|
| 53 |
summary = result[0]["summary_text"].strip()
|
| 54 |
+
lines = summary.split('. ')
|
| 55 |
+
for line in lines:
|
| 56 |
+
cleaned_line = line.strip().rstrip('.')
|
| 57 |
+
if cleaned_line:
|
| 58 |
+
bullet_points.append(f"• {cleaned_line}.")
|
| 59 |
|
| 60 |
return "📄 Executive Summary:\n" + "\n".join(bullet_points)
|