File size: 1,624 Bytes
b744819
b0a67bf
6c145e9
b744819
6c145e9
c4b095b
 
 
 
 
 
 
 
 
 
 
 
 
6c145e9
b744819
6c145e9
 
 
 
c4b095b
6c145e9
 
 
b744819
6c145e9
db17d73
c4b095b
08a6381
b744819
 
6c145e9
 
c4b095b
123d640
c4b095b
6c145e9
c4b095b
6c145e9
 
 
 
 
 
08a6381
6c145e9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import logging

MAX_INPUT_CHARS = 6000  # Limit text length

def chunk_text(text, max_len=700):
    words = text.split()
    chunks = []
    current = []

    for word in words:
        current.append(word)
        if len(" ".join(current)) >= max_len:
            chunks.append(" ".join(current))
            current = []

    if current:
        chunks.append(" ".join(current))

    return chunks[:3]  # Limit to first 3 chunks

def get_summary(text):
    logging.info("=== AutoTLDR Incoming Request ===")
    logging.info(f"Raw input (first 500 chars):\n{text[:500]}")
    logging.info(f"Total input length: {len(text)} characters")

    if len(text) > MAX_INPUT_CHARS:
        logging.warning("Input too long! Aborting.")
        raise ValueError("Text too long to summarize. Please try a shorter page.")

    from transformers import pipeline
    summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")

    summaries = []
    chunks = chunk_text(text)

    for i, chunk in enumerate(chunks):
        logging.info(f"Summarizing chunk {i+1}/{len(chunks)} (length: {len(chunk)})")
        try:
            result = summarizer(chunk, max_length=120, min_length=20, do_sample=False)
            summaries.append(result[0]['summary_text'])
            logging.info(f"Chunk {i+1} summary: {result[0]['summary_text']}")
        except Exception as e:
            logging.exception(f"Error summarizing chunk {i+1}")
            summaries.append("...")

    final_summary = "\n".join(summaries)
    logging.info("=== Final Summary Output ===")
    logging.info(final_summary)

    return final_summary