HuggingNLP_Suite_1 / utils /summarizer.py
SathvikGanta's picture
Create utils/summarizer.py
92ddc05 verified
raw
history blame contribute delete
903 Bytes
from transformers import pipeline
from config import SUMMARIZATION_MODEL
import textwrap
# Use Pegasus model for better abstractive summarization
summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
MAX_CHARS = 800 # Adjust based on model token limit
def chunk_text(text, max_chunk_size=MAX_CHARS):
return textwrap.wrap(text, max_chunk_size)
def summarize_text(text):
chunks = chunk_text(text)
summaries = []
for chunk in chunks:
try:
summary = summarizer(
chunk,
max_length=60, # Force concise summary
min_length=20,
do_sample=False, # No randomness
clean_up_tokenization_spaces=True
)[0]['summary_text']
summaries.append(summary)
except Exception as e:
summaries.append(f"[Error: {e}]")
return " ".join(summaries)