Spaces:

dev2004v
/

autotldr

Sleeping

App Files Files Community

dev2004v commited on Jun 13, 2025

Commit

6c145e9

verified ·

1 Parent(s): b744819

Update app/core/summarizer.py

Browse files

Files changed (1) hide show

app/core/summarizer.py +22 -27

app/core/summarizer.py CHANGED Viewed

@@ -1,19 +1,8 @@
 import logging
-from transformers import pipeline
-# Setup logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger("AutoTLDR")
-# Load summarizer
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-# Configuration
-MAX_TEXT_LENGTH = 6000  # max total input length
-MAX_CHUNKS = 3          # max number of chunks
-CHUNK_SIZE = 700        # approx number of characters per chunk
-def chunk_text(text, max_len=CHUNK_SIZE):
     words = text.split()
     chunks = []
     current = []
@@ -27,29 +16,35 @@ def chunk_text(text, max_len=CHUNK_SIZE):
     if current:
         chunks.append(" ".join(current))
-    return chunks[:MAX_CHUNKS]  # Only take the first 3 chunks
-def get_summary(text: str) -> str:
-    logger.info(f"Input text length: {len(text)} characters")
-    if not text or len(text.strip()) < 100:
-        logger.warning("Input too short to summarize.")
-        return "The text is too short to summarize."
-    if len(text) > MAX_TEXT_LENGTH:
-        logger.error(f"Text too long ({len(text)} chars). Aborting.")
-        return f"Input too long to summarize (max {MAX_TEXT_LENGTH} characters allowed)."
     summaries = []
     chunks = chunk_text(text)
-    for idx, chunk in enumerate(chunks):
         try:
-            logger.info(f"Summarizing chunk {idx + 1}/{len(chunks)} (length: {len(chunk)})")
             result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
             summaries.append(result[0]['summary_text'])
         except Exception as e:
-            logger.exception(f"Error summarizing chunk {idx + 1}")
-            summaries.append("[Summarization failed for this section.]")
-    return "\n\n".join(summaries)

 import logging
+MAX_INPUT_CHARS = 6000  # Limit text length
+def chunk_text(text, max_len=700):
     words = text.split()
     chunks = []
     current = []
     if current:
         chunks.append(" ".join(current))
+    return chunks[:3]  # Limit to first 3 chunks
+def get_summary(text):
+    logging.info("=== AutoTLDR Incoming Request ===")
+    logging.info(f"Raw input (first 500 chars):\n{text[:500]}")
+    logging.info(f"Total input length: {len(text)} characters")
+    if len(text) > MAX_INPUT_CHARS:
+        logging.warning("Input too long! Aborting.")
+        raise ValueError("Text too long to summarize. Please try a shorter page.")
+    from transformers import pipeline
+    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
     summaries = []
     chunks = chunk_text(text)
+    for i, chunk in enumerate(chunks):
+        logging.info(f"Summarizing chunk {i+1}/{len(chunks)} (length: {len(chunk)})")
         try:
             result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
             summaries.append(result[0]['summary_text'])
+            logging.info(f"Chunk {i+1} summary: {result[0]['summary_text']}")
         except Exception as e:
+            logging.exception(f"Error summarizing chunk {i+1}")
+            summaries.append("...")
+    final_summary = "\n".join(summaries)
+    logging.info("=== Final Summary Output ===")
+    logging.info(final_summary)
+    return final_summary