Spaces:

tejovanth
/

examplethree

Sleeping

App Files Files Community

tejovanth commited on Apr 18, 2025

Commit

d65c22a

verified ·

1 Parent(s): bbd6510

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -13

app.py CHANGED Viewed

@@ -5,21 +5,20 @@ from transformers import pipeline
 import time, logging
 import re
 import tempfile
-import os
-# === Setup ===
 logging.basicConfig(level=logging.ERROR)
 device = -1  # CPU
 print("⚠️ CPU-only mode. Expect ~20–30s for large documents.")
-# === Load summarization model ===
 try:
     summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32)
 except Exception as e:
     print(f"❌ Model loading failed: {e}")
     exit(1)
-# === Text Preprocessing ===
 def smart_chunk(text, max_chunk_len=2000):
     sentences = re.split(r'(?<=[.!?]) +', text)
     chunks, current_chunk = [], ""
@@ -33,7 +32,7 @@ def smart_chunk(text, max_chunk_len=2000):
         chunks.append(current_chunk.strip())
     return chunks
-# === Summarization per file ===
 def summarize_file_bytes(file_bytes, filename):
     start_time = time.time()
     try:
@@ -48,7 +47,7 @@ def summarize_file_bytes(file_bytes, filename):
     if not text:
         return f"{filename}: ❌ No text found.", ""
-    text = text[:300000]
     chunks = smart_chunk(text)
     summaries, line_count = [], 0
@@ -69,19 +68,19 @@ def summarize_file_bytes(file_bytes, filename):
     summary_text = f"📄 **{filename}**\n**Characters**: {len(text)} | **Time**: {total_time:.2f}s\n\n" + "\n\n".join(summaries)
     return summary_text, summary_text
-# === Gradio Wrapper ===
-def summarize_multiple_files(files):
     all_summaries = []
     combined_text = ""
-    for file_obj in files:
         file_bytes = file_obj.read()
         filename = file_obj.name.split("/")[-1]
-        summary, raw = summarize_file_bytes(file_bytes, filename)
         all_summaries.append(summary)
-        combined_text += f"\n\n{raw}\n" + "="*60 + "\n"
-    # Write summary to temp .txt file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") as f:
         f.write(combined_text)
         summary_file_path = f.name
@@ -91,7 +90,7 @@ def summarize_multiple_files(files):
 # === Gradio Interface ===
 demo = gr.Interface(
     fn=summarize_multiple_files,
-    inputs=gr.File(label="📄 Upload PDF or TXT files", file_types=[".pdf", ".txt"], type="file", file_count="multiple"),
     outputs=[
         gr.Textbox(label="📝 Summary", lines=30, max_lines=100),
         gr.File(label="📥 Download Summary as .txt")
@@ -100,6 +99,7 @@ demo = gr.Interface(
     description="Summarizes multiple PDFs or TXTs into at least 15 lines each. Download final output as .txt. CPU-optimized."
 )
 if __name__ == "__main__":
     try:
         demo.launch(share=False, server_port=7860)
@@ -110,3 +110,4 @@ if __name__ == "__main__":

 import time, logging
 import re
 import tempfile
+# === Setup Logging and Device ===
 logging.basicConfig(level=logging.ERROR)
 device = -1  # CPU
 print("⚠️ CPU-only mode. Expect ~20–30s for large documents.")
+# === Load the Summarization Model ===
 try:
     summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32)
 except Exception as e:
     print(f"❌ Model loading failed: {e}")
     exit(1)
+# === Sentence-Smart Chunking ===
 def smart_chunk(text, max_chunk_len=2000):
     sentences = re.split(r'(?<=[.!?]) +', text)
     chunks, current_chunk = [], ""
         chunks.append(current_chunk.strip())
     return chunks
+# === Summarization for a Single File ===
 def summarize_file_bytes(file_bytes, filename):
     start_time = time.time()
     try:
     if not text:
         return f"{filename}: ❌ No text found.", ""
+    text = text[:300000]  # Trim to model-safe size
     chunks = smart_chunk(text)
     summaries, line_count = [], 0
     summary_text = f"📄 **{filename}**\n**Characters**: {len(text)} | **Time**: {total_time:.2f}s\n\n" + "\n\n".join(summaries)
     return summary_text, summary_text
+# === Function for Multiple Files ===
+def summarize_multiple_files(file_objs):
     all_summaries = []
     combined_text = ""
+    for file_obj in file_objs:
         file_bytes = file_obj.read()
         filename = file_obj.name.split("/")[-1]
+        summary, raw_text = summarize_file_bytes(file_bytes, filename)
         all_summaries.append(summary)
+        combined_text += f"\n\n{raw_text}\n" + "="*60 + "\n"
+    # Save combined summary to a temp .txt file
     with tempfile.NamedTemporaryFile(delete=False, suffix=".txt", mode="w", encoding="utf-8") as f:
         f.write(combined_text)
         summary_file_path = f.name
 # === Gradio Interface ===
 demo = gr.Interface(
     fn=summarize_multiple_files,
+    inputs=gr.File(label="📄 Upload PDF or TXT files", file_types=[".pdf", ".txt"], type="binary", file_count="multiple"),
     outputs=[
         gr.Textbox(label="📝 Summary", lines=30, max_lines=100),
         gr.File(label="📥 Download Summary as .txt")
     description="Summarizes multiple PDFs or TXTs into at least 15 lines each. Download final output as .txt. CPU-optimized."
 )
+# === Run the App ===
 if __name__ == "__main__":
     try:
         demo.launch(share=False, server_port=7860)