dev2004v commited on
Commit
b744819
·
verified ·
1 Parent(s): 85059d4

Update app/core/summarizer.py

Browse files
Files changed (1) hide show
  1. app/core/summarizer.py +30 -8
app/core/summarizer.py CHANGED
@@ -1,8 +1,19 @@
 
1
  from transformers import pipeline
2
 
 
 
 
 
 
3
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
4
 
5
- def chunk_text(text, max_len=700):
 
 
 
 
 
6
  words = text.split()
7
  chunks = []
8
  current = []
@@ -16,18 +27,29 @@ def chunk_text(text, max_len=700):
16
  if current:
17
  chunks.append(" ".join(current))
18
 
19
- return chunks[:3] # Limit to first 3 chunks
 
 
 
20
 
21
- def get_summary(text):
22
- print("Input length:", len(text))
 
 
 
 
 
23
 
24
  summaries = []
25
- for chunk in chunk_text(text):
 
 
26
  try:
 
27
  result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
28
  summaries.append(result[0]['summary_text'])
29
  except Exception as e:
30
- print("Summarization failed:", e)
31
- summaries.append("...")
32
 
33
- return "\n".join(summaries)
 
1
+ import logging
2
  from transformers import pipeline
3
 
4
+ # Setup logging
5
+ logging.basicConfig(level=logging.INFO)
6
+ logger = logging.getLogger("AutoTLDR")
7
+
8
+ # Load summarizer
9
  summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
 
11
+ # Configuration
12
+ MAX_TEXT_LENGTH = 6000 # max total input length
13
+ MAX_CHUNKS = 3 # max number of chunks
14
+ CHUNK_SIZE = 700 # approx number of characters per chunk
15
+
16
+ def chunk_text(text, max_len=CHUNK_SIZE):
17
  words = text.split()
18
  chunks = []
19
  current = []
 
27
  if current:
28
  chunks.append(" ".join(current))
29
 
30
+ return chunks[:MAX_CHUNKS] # Only take the first 3 chunks
31
+
32
+ def get_summary(text: str) -> str:
33
+ logger.info(f"Input text length: {len(text)} characters")
34
 
35
+ if not text or len(text.strip()) < 100:
36
+ logger.warning("Input too short to summarize.")
37
+ return "The text is too short to summarize."
38
+
39
+ if len(text) > MAX_TEXT_LENGTH:
40
+ logger.error(f"Text too long ({len(text)} chars). Aborting.")
41
+ return f"Input too long to summarize (max {MAX_TEXT_LENGTH} characters allowed)."
42
 
43
  summaries = []
44
+ chunks = chunk_text(text)
45
+
46
+ for idx, chunk in enumerate(chunks):
47
  try:
48
+ logger.info(f"Summarizing chunk {idx + 1}/{len(chunks)} (length: {len(chunk)})")
49
  result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
50
  summaries.append(result[0]['summary_text'])
51
  except Exception as e:
52
+ logger.exception(f"Error summarizing chunk {idx + 1}")
53
+ summaries.append("[Summarization failed for this section.]")
54
 
55
+ return "\n\n".join(summaries)