File size: 903 Bytes
92ddc05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from transformers import pipeline
from config import SUMMARIZATION_MODEL
import textwrap

# Use Pegasus model for better abstractive summarization
summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)

MAX_CHARS = 800  # Adjust based on model token limit

def chunk_text(text, max_chunk_size=MAX_CHARS):
    return textwrap.wrap(text, max_chunk_size)

def summarize_text(text):
    chunks = chunk_text(text)
    summaries = []
    for chunk in chunks:
        try:
            summary = summarizer(
                chunk,
                max_length=60,     # Force concise summary
                min_length=20,
                do_sample=False,   # No randomness
                clean_up_tokenization_spaces=True
            )[0]['summary_text']
            summaries.append(summary)
        except Exception as e:
            summaries.append(f"[Error: {e}]")
    return " ".join(summaries)