File size: 2,758 Bytes
cb4344f
 
2c2937b
f42b944
2c2937b
f42b944
2c2937b
 
8521187
 
 
 
 
 
 
f42b944
 
cb4344f
f42b944
cb4344f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8521187
f42b944
8521187
cb4344f
8521187
 
 
 
 
cb4344f
2c2937b
 
 
8521187
 
 
 
 
 
 
 
 
 
 
 
5514a9f
8521187
 
 
 
 
 
 
 
cb4344f
8521187
5514a9f
8521187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# utils/summarizer.py

from transformers import pipeline
from typing import List

# ========== Load Summarization Pipeline ==========
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# ========== Text Helpers ==========

def clean_text(text: str) -> str:
    """
    🧹 Remove excessive whitespace and line breaks.
    """
    return text.replace("\n", " ").replace("  ", " ").strip()

def split_text(text: str, max_chunk_len: int = 800) -> List[str]:
    """
    βœ‚οΈ Breaks long text into smaller chunks for safe summarization.
    """
    sentences = text.split('. ')
    chunks = []
    current_chunk = ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) < max_chunk_len:
            current_chunk += sentence + ". "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "

    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

# ========== Summarization Functions ==========

def summarize_text(text: str, as_paragraph: bool = False, fallback: bool = True) -> str:
    """
    πŸ“„ Generate an executive summary.
    
    Params:
    - as_paragraph: True β†’ returns as 2–3 paragraph summary; False β†’ bullet points
    - fallback: True β†’ if model fails, returns manual fallback
    """
    if not text.strip():
        return "No input provided."

    try:
        cleaned = clean_text(text)
        chunks = split_text(cleaned)
        summaries = []

        for chunk in chunks:
            result = summarizer(chunk, max_length=130, min_length=30, do_sample=False)
            summary = result[0]["summary_text"].strip()
            summaries.append(summary)

        if as_paragraph:
            return "πŸ“„ Executive Summary:\n\n" + "\n\n".join(summaries)

        # Otherwise β†’ return as bullet points
        bullet_points = []
        for summary in summaries:
            lines = summary.split('. ')
            for line in lines:
                cleaned_line = line.strip().rstrip('.')
                if cleaned_line:
                    bullet_points.append(f"β€’ {cleaned_line}.")

        return "πŸ“„ Executive Summary:\n" + "\n".join(bullet_points)

    except Exception as e:
        if fallback:
            return fallback_summary(text)
        return f"An error occurred: {str(e)}"

# ========== Fallback Summary (manual) ==========

def fallback_summary(text: str, max_lines: int = 5) -> str:
    """
    🧭 Fallback: Return first few sentences as pseudo-summary.
    """
    lines = text.split(". ")
    selected = lines[:max_lines]
    points = [f"β€’ {line.strip().rstrip('.')}" for line in selected if line.strip()]
    return "πŸ“„ (Fallback Summary)\n" + "\n".join(points)