Spaces:

Azidan
/

textSum

Running

App Files Files Community

Azidan commited on 6 days ago

Commit

bb331f0

verified ·

1 Parent(s): aba9518

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -12

app.py CHANGED Viewed

@@ -7,7 +7,8 @@ import tempfile
 # =========================
 # Model setup (CPU-safe)
 # =========================
-MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 summarizer = pipeline(
     "summarization",
@@ -16,10 +17,10 @@ summarizer = pipeline(
     device=-1  # CPU only
 )
-# Better AI advice generator - flan-t5-base is still quite CPU friendly
 advice_generator = pipeline(
     "text2text-generation",
-    model="google/flan-t5-base",
     device=-1  # CPU only
 )
@@ -101,28 +102,32 @@ def extract_possible_headings(text: str) -> str:
         return "### Extracted Possible Headings/Subtitles\n\n" + "\n- ".join([''] + headings) + "\n\n---\n\n"
     return ""
-def summarize_long_text(text: str) -> str:
     """Summarize long text in chunks + add AI study advice.
     Now with longer summaries per chunk and formatted as bullet points."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
     # Extract possible headings first
     headings_section = extract_possible_headings(text)
     chunks = chunk_text(text)
-    summaries = []
-    for i, chunk in enumerate(chunks, 1):
         try:
             summary = summarizer(
                 chunk,
-                max_length=250,  # Increased for longer summaries
-                min_length=80,   # Increased for more detail
                 do_sample=False
             )[0]["summary_text"]
             cleaned = clean_text(summary)
-            summaries.append(f"**Chunk {i} Summary:** {cleaned}")
         except Exception:
             pass  # skip problematic chunks
@@ -131,8 +136,10 @@ def summarize_long_text(text: str) -> str:
     for s in summaries:
         summary_md += f"- {s}\n"
     ai_advice = generate_ai_advice(summary_md)  # Use the bulleted summary for advice generation
     return headings_section + summary_md + ai_advice
 def read_pdf(file) -> str:
@@ -156,9 +163,10 @@ def create_download_file(content: str) -> str:
 # =========================
 # Main handler
 # =========================
-def process_input(text: str, file):
     input_text = ""
     if file is not None:
         input_text = read_pdf(file)
     elif text.strip():
@@ -166,7 +174,7 @@ def process_input(text: str, file):
     else:
         return "Please paste some text or upload a PDF.", None
-    result = summarize_long_text(input_text)
     download_path = create_download_file(result)
     return result, download_path
@@ -182,7 +190,8 @@ with gr.Blocks() as demo:
         "• Runs on CPU – works on free hardware\n"
         "• Gives you **longer, bullet-point summaries** with possible headings/subtitles\n"
         "• Includes **5 AI-generated study tips** tailored to the content\n"
-        "• Download result as .txt file"
     )
     with gr.Row():

 # =========================
 # Model setup (CPU-safe)
 # =========================
+# Use smaller, faster models to speed up processing
+MODEL_NAME = "sshleifer/distilbart-cnn-6-6"  # Smaller than 12-6, faster on CPU
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 summarizer = pipeline(
     "summarization",
     device=-1  # CPU only
 )
+# Use smaller flan-t5-small for faster advice generation
 advice_generator = pipeline(
     "text2text-generation",
+    model="google/flan-t5-small",
     device=-1  # CPU only
 )
         return "### Extracted Possible Headings/Subtitles\n\n" + "\n- ".join([''] + headings) + "\n\n---\n\n"
     return ""
+def summarize_long_text(text: str, progress=gr.Progress()) -> str:
     """Summarize long text in chunks + add AI study advice.
     Now with longer summaries per chunk and formatted as bullet points."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
+    progress(0, desc="Extracting headings...")
     # Extract possible headings first
     headings_section = extract_possible_headings(text)
+    progress(0.1, desc="Chunking text...")
     chunks = chunk_text(text)
+    summaries = []
+    progress(0.2, desc="Summarizing chunks...")
+    for i in progress.tqdm(range(len(chunks))):
+        chunk = chunks[i]
         try:
             summary = summarizer(
                 chunk,
+                max_length=200,  # Reduced slightly for speed (compromise between length and time)
+                min_length=60,   # Reduced for speed
                 do_sample=False
             )[0]["summary_text"]
             cleaned = clean_text(summary)
+            summaries.append(f"**Chunk {i+1} Summary:** {cleaned}")
         except Exception:
             pass  # skip problematic chunks
     for s in summaries:
         summary_md += f"- {s}\n"
+    progress(0.8, desc="Generating AI advice...")
     ai_advice = generate_ai_advice(summary_md)  # Use the bulleted summary for advice generation
+    progress(1, desc="Done!")
     return headings_section + summary_md + ai_advice
 def read_pdf(file) -> str:
 # =========================
 # Main handler
 # =========================
+def process_input(text: str, file, progress=gr.Progress()):
     input_text = ""
+    progress(0, desc="Reading input...")
     if file is not None:
         input_text = read_pdf(file)
     elif text.strip():
     else:
         return "Please paste some text or upload a PDF.", None
+    result = summarize_long_text(input_text, progress)
     download_path = create_download_file(result)
     return result, download_path
         "• Runs on CPU – works on free hardware\n"
         "• Gives you **longer, bullet-point summaries** with possible headings/subtitles\n"
         "• Includes **5 AI-generated study tips** tailored to the content\n"
+        "• Download result as .txt file\n"
+        "**Note**: Processing may take time for long documents on CPU (initial model load + inference). Please be patient!"
     )
     with gr.Row():