SathvikGanta commited on
Commit
92ddc05
·
verified ·
1 Parent(s): 4a3d963

Create utils/summarizer.py

Browse files
Files changed (1) hide show
  1. utils/summarizer.py +28 -0
utils/summarizer.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from config import SUMMARIZATION_MODEL
3
+ import textwrap
4
+
5
+ # Use Pegasus model for better abstractive summarization
6
+ summarizer = pipeline("summarization", model=SUMMARIZATION_MODEL)
7
+
8
+ MAX_CHARS = 800 # Adjust based on model token limit
9
+
10
+ def chunk_text(text, max_chunk_size=MAX_CHARS):
11
+ return textwrap.wrap(text, max_chunk_size)
12
+
13
+ def summarize_text(text):
14
+ chunks = chunk_text(text)
15
+ summaries = []
16
+ for chunk in chunks:
17
+ try:
18
+ summary = summarizer(
19
+ chunk,
20
+ max_length=60, # Force concise summary
21
+ min_length=20,
22
+ do_sample=False, # No randomness
23
+ clean_up_tokenization_spaces=True
24
+ )[0]['summary_text']
25
+ summaries.append(summary)
26
+ except Exception as e:
27
+ summaries.append(f"[Error: {e}]")
28
+ return " ".join(summaries)