Spaces:

Azidan
/

text-sum

Sleeping

App Files Files Community

Azidan commited on Jan 12

Commit

82b870b

verified ·

1 Parent(s): b512372

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -34

app.py CHANGED Viewed

@@ -6,40 +6,33 @@ from docx import Document
 import io
 import os
-# Load the summarization model once
 device = 0 if torch.cuda.is_available() else -1
 print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
 summarizer = pipeline(
     "summarization",
-    model="sshleifer/distilbart-cnn-12-6",  # Fast & good for CPU
     device=device
 )
 def extract_text(file_path):
     if file_path is None:
         return ""
-    # file_path is a string (temp path) or NamedString-like object; convert to str
-    file_path = str(file_path)  # Ensure it's a plain string
     filename = os.path.basename(file_path).lower()
     try:
         if filename.endswith('.pdf'):
             with pdfplumber.open(file_path) as pdf:
                 return "\n".join(page.extract_text() or "" for page in pdf.pages)
         elif filename.endswith('.docx'):
             doc = Document(file_path)
             return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
         elif filename.endswith('.txt'):
             with open(file_path, "r", encoding="utf-8", errors="replace") as f:
                 return f.read()
         else:
             return "Unsupported file. Please use .pdf, .docx, or .txt"
     except Exception as e:
         return f"Error reading file: {str(e)}"
@@ -54,13 +47,20 @@ def summarize(input_text, file_path, detail_level):
     words = len(text.split())
     if words < 100:
-        return text  # Too short → return as-is
-    # Convert slider to target ratio
     target_ratio = detail_level
     target_length = int(words * target_ratio)
-    max_l = max(500, min(1400, target_length + 250))
-    min_l = max(300, int(target_length * 0.65))
     try:
         result = summarizer(
@@ -75,34 +75,20 @@ def summarize(input_text, file_path, detail_level):
         )
         return result[0]['summary_text']
     except Exception as e:
-        return f"Error during summarization: {str(e)}\n(Try shorter text or lower detail level)"
-# Create Gradio interface
 interface = gr.Interface(
     fn=summarize,
     inputs=[
-        gr.Textbox(
-            lines=12,
-            placeholder="Paste your lecture text here (or use the upload below)...",
-            label="Lecture Text (Paste)"
-        ),
-        gr.File(
-            file_types=[".pdf", ".docx", ".txt"],
-            label="Upload Lecture File"
-        ),
-        gr.Slider(
-            minimum=0.15,
-            maximum=0.60,
-            value=0.32,
-            step=0.01,
-            label="Detail Level (higher = longer, more detailed summary)"
-        )
     ],
     outputs=gr.Textbox(label="Generated Summary"),
     title="Lecture Summarizer",
-    description="Upload a lecture file (PDF/DOCX/TXT) or paste text. Adjust the slider for shorter or more detailed summaries.",
     flagging_mode="never",
 )
-# Launch with theme
 interface.launch(theme="soft")

 import io
 import os
+# Load model
 device = 0 if torch.cuda.is_available() else -1
 print(f"Using device: {'GPU' if device == 0 else 'CPU'}")
 summarizer = pipeline(
     "summarization",
+    model="sshleifer/distilbart-cnn-12-6",
     device=device
 )
 def extract_text(file_path):
     if file_path is None:
         return ""
+    file_path = str(file_path)
     filename = os.path.basename(file_path).lower()
     try:
         if filename.endswith('.pdf'):
             with pdfplumber.open(file_path) as pdf:
                 return "\n".join(page.extract_text() or "" for page in pdf.pages)
         elif filename.endswith('.docx'):
             doc = Document(file_path)
             return "\n".join(para.text for para in doc.paragraphs if para.text.strip())
         elif filename.endswith('.txt'):
             with open(file_path, "r", encoding="utf-8", errors="replace") as f:
                 return f.read()
         else:
             return "Unsupported file. Please use .pdf, .docx, or .txt"
     except Exception as e:
         return f"Error reading file: {str(e)}"
     words = len(text.split())
     if words < 100:
+        return text
     target_ratio = detail_level
     target_length = int(words * target_ratio)
+    # Safeguards: cap lengths to prevent min > max
+    max_l = max(500, min(1400, target_length + 250))  # Hard cap at 1400 (model limit-ish)
+    min_l = max(100, int(target_length * 0.65))
+    # Force min_l < max_l if overflow
+    if min_l >= max_l:
+        min_l = max_l - 100  # Reasonable fallback
+        if min_l < 100:
+            min_l = 100
     try:
         result = summarizer(
         )
         return result[0]['summary_text']
     except Exception as e:
+        return f"Error during summarization: {str(e)}\n(Try shorter text, lower detail level, or paste instead of upload.)"
+# Interface
 interface = gr.Interface(
     fn=summarize,
     inputs=[
+        gr.Textbox(lines=12, placeholder="Paste your lecture text here...", label="Lecture Text (Paste)"),
+        gr.File(file_types=[".pdf", ".docx", ".txt"], label="Upload Lecture File"),
+        gr.Slider(0.15, 0.60, value=0.32, step=0.01, label="Detail Level (higher = longer summary)")
     ],
     outputs=gr.Textbox(label="Generated Summary"),
     title="Lecture Summarizer",
+    description="Upload PDF/DOCX/TXT lecture or paste text. Adjust slider for detail. For very long files, use lower detail or chunk text.",
     flagging_mode="never",
 )
 interface.launch(theme="soft")