dev2004v commited on
Commit
6c145e9
·
verified ·
1 Parent(s): b744819

Update app/core/summarizer.py

Browse files
Files changed (1) hide show
  1. app/core/summarizer.py +22 -27
app/core/summarizer.py CHANGED
@@ -1,19 +1,8 @@
1
  import logging
2
- from transformers import pipeline
3
 
4
- # Setup logging
5
- logging.basicConfig(level=logging.INFO)
6
- logger = logging.getLogger("AutoTLDR")
7
 
8
- # Load summarizer
9
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
10
-
11
- # Configuration
12
- MAX_TEXT_LENGTH = 6000 # max total input length
13
- MAX_CHUNKS = 3 # max number of chunks
14
- CHUNK_SIZE = 700 # approx number of characters per chunk
15
-
16
- def chunk_text(text, max_len=CHUNK_SIZE):
17
  words = text.split()
18
  chunks = []
19
  current = []
@@ -27,29 +16,35 @@ def chunk_text(text, max_len=CHUNK_SIZE):
27
  if current:
28
  chunks.append(" ".join(current))
29
 
30
- return chunks[:MAX_CHUNKS] # Only take the first 3 chunks
31
 
32
- def get_summary(text: str) -> str:
33
- logger.info(f"Input text length: {len(text)} characters")
 
 
34
 
35
- if not text or len(text.strip()) < 100:
36
- logger.warning("Input too short to summarize.")
37
- return "The text is too short to summarize."
38
 
39
- if len(text) > MAX_TEXT_LENGTH:
40
- logger.error(f"Text too long ({len(text)} chars). Aborting.")
41
- return f"Input too long to summarize (max {MAX_TEXT_LENGTH} characters allowed)."
42
 
43
  summaries = []
44
  chunks = chunk_text(text)
45
 
46
- for idx, chunk in enumerate(chunks):
 
47
  try:
48
- logger.info(f"Summarizing chunk {idx + 1}/{len(chunks)} (length: {len(chunk)})")
49
  result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
50
  summaries.append(result[0]['summary_text'])
 
51
  except Exception as e:
52
- logger.exception(f"Error summarizing chunk {idx + 1}")
53
- summaries.append("[Summarization failed for this section.]")
 
 
 
 
54
 
55
- return "\n\n".join(summaries)
 
1
  import logging
 
2
 
3
+ MAX_INPUT_CHARS = 6000 # Limit text length
 
 
4
 
5
+ def chunk_text(text, max_len=700):
 
 
 
 
 
 
 
 
6
  words = text.split()
7
  chunks = []
8
  current = []
 
16
  if current:
17
  chunks.append(" ".join(current))
18
 
19
+ return chunks[:3] # Limit to first 3 chunks
20
 
21
+ def get_summary(text):
22
+ logging.info("=== AutoTLDR Incoming Request ===")
23
+ logging.info(f"Raw input (first 500 chars):\n{text[:500]}")
24
+ logging.info(f"Total input length: {len(text)} characters")
25
 
26
+ if len(text) > MAX_INPUT_CHARS:
27
+ logging.warning("Input too long! Aborting.")
28
+ raise ValueError("Text too long to summarize. Please try a shorter page.")
29
 
30
+ from transformers import pipeline
31
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
32
 
33
  summaries = []
34
  chunks = chunk_text(text)
35
 
36
+ for i, chunk in enumerate(chunks):
37
+ logging.info(f"Summarizing chunk {i+1}/{len(chunks)} (length: {len(chunk)})")
38
  try:
 
39
  result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
40
  summaries.append(result[0]['summary_text'])
41
+ logging.info(f"Chunk {i+1} summary: {result[0]['summary_text']}")
42
  except Exception as e:
43
+ logging.exception(f"Error summarizing chunk {i+1}")
44
+ summaries.append("...")
45
+
46
+ final_summary = "\n".join(summaries)
47
+ logging.info("=== Final Summary Output ===")
48
+ logging.info(final_summary)
49
 
50
+ return final_summary