dev2004v commited on
Commit
c4b095b
·
verified ·
1 Parent(s): 48c2147

Update app/core/summarizer.py

Browse files
Files changed (1) hide show
  1. app/core/summarizer.py +27 -23
app/core/summarizer.py CHANGED
@@ -1,29 +1,33 @@
1
  from transformers import pipeline
2
 
3
- # Load the summarization pipeline
4
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
5
 
6
- def get_summary(text: str) -> str:
7
- max_chunk_chars = 3000 # ~1024 tokens, safe limit for BART
8
- overlap = 200 # overlap to avoid cutting in middle of sentence
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  summaries = []
 
 
 
 
 
 
 
10
 
11
- # Generate chunks
12
- i = 0
13
- while i < len(text):
14
- end = i + max_chunk_chars
15
- chunk = text[i:end]
16
- if len(chunk.strip()) > 50: # avoid empty/short segments
17
- try:
18
- summary = summarizer(
19
- chunk,
20
- max_length=150,
21
- min_length=30,
22
- do_sample=False
23
- )
24
- summaries.append(summary[0]['summary_text'])
25
- except Exception as e:
26
- summaries.append(f"[Error summarizing chunk: {e}]")
27
- i += max_chunk_chars - overlap
28
-
29
- return "\n\n".join(summaries)
 
1
  from transformers import pipeline
2
 
 
3
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
4
 
5
+ def chunk_text(text, max_len=700):
6
+ words = text.split()
7
+ chunks = []
8
+ current = []
9
+
10
+ for word in words:
11
+ current.append(word)
12
+ if len(" ".join(current)) >= max_len:
13
+ chunks.append(" ".join(current))
14
+ current = []
15
+
16
+ if current:
17
+ chunks.append(" ".join(current))
18
+
19
+ return chunks[:3] # Limit to first 3 chunks
20
+
21
+ def get_summary(text):
22
+ print("Input length:", len(text))
23
+
24
  summaries = []
25
+ for chunk in chunk_text(text):
26
+ try:
27
+ result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
28
+ summaries.append(result[0]['summary_text'])
29
+ except Exception as e:
30
+ print("Summarization failed:", e)
31
+ summaries.append("...")
32
 
33
+ return "\n".join(summaries)