Spaces:

dev2004v
/

autotldr

Sleeping

File size: 1,624 Bytes

b744819
b0a67bf
6c145e9
b744819
6c145e9
c4b095b
 
 
 
 
 
 
 
 
 
 
 
 
6c145e9
b744819
6c145e9
 
 
 
c4b095b
6c145e9
 
 
b744819
6c145e9
db17d73
c4b095b
08a6381
b744819
 
6c145e9
 
c4b095b
123d640
c4b095b
6c145e9
c4b095b
6c145e9
 
 
 
 
 
08a6381
6c145e9

import logging

MAX_INPUT_CHARS = 6000  # Limit text length

def chunk_text(text, max_len=700):
    words = text.split()
    chunks = []
    current = []

    for word in words:
        current.append(word)
        if len(" ".join(current)) >= max_len:
            chunks.append(" ".join(current))
            current = []

    if current:
        chunks.append(" ".join(current))

    return chunks[:3]  # Limit to first 3 chunks

def get_summary(text):
    logging.info("=== AutoTLDR Incoming Request ===")
    logging.info(f"Raw input (first 500 chars):\n{text[:500]}")
    logging.info(f"Total input length: {len(text)} characters")

    if len(text) > MAX_INPUT_CHARS:
        logging.warning("Input too long! Aborting.")
        raise ValueError("Text too long to summarize. Please try a shorter page.")

    from transformers import pipeline
    summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")

    summaries = []
    chunks = chunk_text(text)

    for i, chunk in enumerate(chunks):
        logging.info(f"Summarizing chunk {i+1}/{len(chunks)} (length: {len(chunk)})")
        try:
            result = summarizer(chunk, max_length=120, min_length=20, do_sample=False)
            summaries.append(result[0]['summary_text'])
            logging.info(f"Chunk {i+1} summary: {result[0]['summary_text']}")
        except Exception as e:
            logging.exception(f"Error summarizing chunk {i+1}")
            summaries.append("...")

    final_summary = "\n".join(summaries)
    logging.info("=== Final Summary Output ===")
    logging.info(final_summary)

    return final_summary