import logging MAX_INPUT_CHARS = 6000 # Limit text length def chunk_text(text, max_len=700): words = text.split() chunks = [] current = [] for word in words: current.append(word) if len(" ".join(current)) >= max_len: chunks.append(" ".join(current)) current = [] if current: chunks.append(" ".join(current)) return chunks[:3] # Limit to first 3 chunks def get_summary(text): logging.info("=== AutoTLDR Incoming Request ===") logging.info(f"Raw input (first 500 chars):\n{text[:500]}") logging.info(f"Total input length: {len(text)} characters") if len(text) > MAX_INPUT_CHARS: logging.warning("Input too long! Aborting.") raise ValueError("Text too long to summarize. Please try a shorter page.") from transformers import pipeline summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum") summaries = [] chunks = chunk_text(text) for i, chunk in enumerate(chunks): logging.info(f"Summarizing chunk {i+1}/{len(chunks)} (length: {len(chunk)})") try: result = summarizer(chunk, max_length=120, min_length=20, do_sample=False) summaries.append(result[0]['summary_text']) logging.info(f"Chunk {i+1} summary: {result[0]['summary_text']}") except Exception as e: logging.exception(f"Error summarizing chunk {i+1}") summaries.append("...") final_summary = "\n".join(summaries) logging.info("=== Final Summary Output ===") logging.info(final_summary) return final_summary