dev2004v commited on
Commit
08a6381
·
verified ·
1 Parent(s): fdd0d4b

Update app/core/summarizer.py

Browse files
Files changed (1) hide show
  1. app/core/summarizer.py +24 -5
app/core/summarizer.py CHANGED
@@ -1,10 +1,29 @@
1
- import os
2
  from transformers import pipeline
3
 
4
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/huggingface"
5
-
6
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
  def get_summary(text: str) -> str:
9
- result = summarizer(text, max_length=150, min_length=30, do_sample=False)
10
- return result[0]["summary_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from transformers import pipeline
2
 
3
+ # Load the summarization pipeline
 
4
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
5
 
6
  def get_summary(text: str) -> str:
7
+ max_chunk_chars = 3000 # ~1024 tokens, safe limit for BART
8
+ overlap = 200 # overlap to avoid cutting in middle of sentence
9
+ summaries = []
10
+
11
+ # Generate chunks
12
+ i = 0
13
+ while i < len(text):
14
+ end = i + max_chunk_chars
15
+ chunk = text[i:end]
16
+ if len(chunk.strip()) > 50: # avoid empty/short segments
17
+ try:
18
+ summary = summarizer(
19
+ chunk,
20
+ max_length=150,
21
+ min_length=30,
22
+ do_sample=False
23
+ )
24
+ summaries.append(summary[0]['summary_text'])
25
+ except Exception as e:
26
+ summaries.append(f"[Error summarizing chunk: {e}]")
27
+ i += max_chunk_chars - overlap
28
+
29
+ return "\n\n".join(summaries)