Spaces:

Azidan
/

textSum

Running

App Files Files Community

Azidan commited on 7 days ago

Commit

aba9518

verified ·

1 Parent(s): 641953a

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -11

app.py CHANGED Viewed

@@ -88,38 +88,59 @@ def generate_ai_advice(summary: str) -> str:
     advice_md += "\n**Pro tip**: Combine these with spaced repetition (Anki / Quizlet) for long-term retention!"
     return advice_md
 def summarize_long_text(text: str) -> str:
-    """Summarize long text in chunks + add AI study advice."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
     chunks = chunk_text(text)
     summaries = []
-    for chunk in chunks:
         try:
             summary = summarizer(
                 chunk,
-                max_length=150,
-                min_length=40,
                 do_sample=False
             )[0]["summary_text"]
-            summaries.append(summary)
         except Exception:
             pass  # skip problematic chunks
-    merged = " ".join(summaries)
-    cleaned_summary = clean_text(merged)
-    ai_advice = generate_ai_advice(cleaned_summary)
-    return cleaned_summary + ai_advice
 def read_pdf(file) -> str:
     """Safely extract text from PDF."""
     try:
         reader = PdfReader(file)
         pages = [page.extract_text() or "" for page in reader.pages]
-        return " ".join(pages)
     except Exception as e:
         return f"PDF read error: {str(e)}"
@@ -159,7 +180,8 @@ with gr.Blocks() as demo:
         "• Handles very long documents (thousands of words)\n"
         "• Supports **PDF** upload or direct paste\n"
         "• Runs on CPU – works on free hardware\n"
-        "• Gives you **5 AI-generated study tips** tailored to the content\n"
         "• Download result as .txt file"
     )

     advice_md += "\n**Pro tip**: Combine these with spaced repetition (Anki / Quizlet) for long-term retention!"
     return advice_md
+def extract_possible_headings(text: str) -> str:
+    """Attempt to extract potential titles and subtitles from raw text.
+    This is a simple heuristic: short lines, all caps, or starting with numbers/sections."""
+    lines = text.split('\n')
+    headings = []
+    for line in lines:
+        stripped = line.strip()
+        if stripped and (len(stripped) < 80) and (stripped.isupper() or re.match(r'^\d+\.?\s', stripped) or re.match(r'^[A-Z][a-z]+\s[A-Z]', stripped)):
+            headings.append(stripped)
+    if headings:
+        return "### Extracted Possible Headings/Subtitles\n\n" + "\n- ".join([''] + headings) + "\n\n---\n\n"
+    return ""
 def summarize_long_text(text: str) -> str:
+    """Summarize long text in chunks + add AI study advice.
+    Now with longer summaries per chunk and formatted as bullet points."""
     if not text or len(text.strip()) == 0:
         return "No text provided."
+    # Extract possible headings first
+    headings_section = extract_possible_headings(text)
     chunks = chunk_text(text)
     summaries = []
+    for i, chunk in enumerate(chunks, 1):
         try:
             summary = summarizer(
                 chunk,
+                max_length=250,  # Increased for longer summaries
+                min_length=80,   # Increased for more detail
                 do_sample=False
             )[0]["summary_text"]
+            cleaned = clean_text(summary)
+            summaries.append(f"**Chunk {i} Summary:** {cleaned}")
         except Exception:
             pass  # skip problematic chunks
+    # Format summaries as bullet points
+    summary_md = "### Detailed Summary (in Bullet Points)\n\n"
+    for s in summaries:
+        summary_md += f"- {s}\n"
+    ai_advice = generate_ai_advice(summary_md)  # Use the bulleted summary for advice generation
+    return headings_section + summary_md + ai_advice
 def read_pdf(file) -> str:
     """Safely extract text from PDF."""
     try:
         reader = PdfReader(file)
         pages = [page.extract_text() or "" for page in reader.pages]
+        return "\n".join(pages)  # Join with newlines to preserve line breaks for heading detection
     except Exception as e:
         return f"PDF read error: {str(e)}"
         "• Handles very long documents (thousands of words)\n"
         "• Supports **PDF** upload or direct paste\n"
         "• Runs on CPU – works on free hardware\n"
+        "• Gives you **longer, bullet-point summaries** with possible headings/subtitles\n"
+        "• Includes **5 AI-generated study tips** tailored to the content\n"
         "• Download result as .txt file"
     )