Spaces:

tejovanth
/

example_five

Sleeping

App Files Files Community

tejovanth commited on Apr 25, 2025

Commit

2e40204

verified ·

1 Parent(s): ef48701

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -8

app.py CHANGED Viewed

@@ -1,24 +1,34 @@
 import gradio as gr
-import fitz
 import torch
 from transformers import pipeline
 import time, logging, re
 import matplotlib
-matplotlib.use('Agg')  # Use non-interactive backend for headless environments
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
 logging.basicConfig(level=logging.ERROR)
 device = -1  # CPU-only
 print("⚠️ CPU-only. Expect ~20–30s for 300,000 chars.")
 try:
     summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32)
 except Exception as e:
-    print(f"❌ Model loading failed: {str(e)}")
     exit(1)
 def visualize_chunk_status(chunk_data):
     status_colors = {'summarized': 'green', 'skipped': 'orange', 'error': 'red'}
     labels = [f"C{i['chunk']}" for i in chunk_data]
@@ -34,9 +44,10 @@ def visualize_chunk_status(chunk_data):
     buf = io.BytesIO()
     plt.savefig(buf, format='png')
     buf.seek(0)
-    plt.close(fig)  # Release memory
     return Image.open(buf)
 def summarize_file(file_bytes):
     start = time.time()
     chunk_info = []
@@ -85,19 +96,56 @@ def summarize_file(file_bytes):
     image = visualize_chunk_status(chunk_info)
     return final_summary, image
-demo = gr.Interface(
     fn=summarize_file,
     inputs=gr.File(label="📄 Upload PDF", type="binary"),
     outputs=[
         gr.Textbox(label="📝 Summarized Output"),
         gr.Image(label="📊 Visual Process Flow", type="pil")
     ],
-    title="AI-Powered PDF Summarizer",
     description="Summarizes long PDFs (up to 300,000 characters) and visualizes chunk-level automation status."
 )
 if __name__ == "__main__":
     try:
-        demo.launch(share=False, server_port=7860)
     except Exception as e:
-        print(f"❌ Gradio launch failed: {str(e)}")

 import gradio as gr
+import fitz  # PyMuPDF
 import torch
 from transformers import pipeline
 import time, logging, re
 import matplotlib
+matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
+# Logging and setup
 logging.basicConfig(level=logging.ERROR)
 device = -1  # CPU-only
 print("⚠️ CPU-only. Expect ~20–30s for 300,000 chars.")
+# Load summarizer
 try:
     summarizer = pipeline("summarization", model="t5-small", device=device, torch_dtype=torch.float32)
 except Exception as e:
+    print(f"❌ Summarizer model loading failed: {str(e)}")
     exit(1)
+# Load question-answering model
+try:
+    qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad", device=device)
+except Exception as e:
+    print(f"❌ QA model loading failed: {str(e)}")
+    exit(1)
+# Function: Visualize chunk processing status
 def visualize_chunk_status(chunk_data):
     status_colors = {'summarized': 'green', 'skipped': 'orange', 'error': 'red'}
     labels = [f"C{i['chunk']}" for i in chunk_data]
     buf = io.BytesIO()
     plt.savefig(buf, format='png')
     buf.seek(0)
+    plt.close(fig)
     return Image.open(buf)
+# Function: Summarization
 def summarize_file(file_bytes):
     start = time.time()
     chunk_info = []
     image = visualize_chunk_status(chunk_info)
     return final_summary, image
+# Function: QA from PDF
+def answer_question(file_bytes, question):
+    try:
+        doc = fitz.open(stream=file_bytes, filetype="pdf")
+        text = "".join(page.get_text("text") for page in doc)
+        text = re.sub(r"\s+", " ", text).strip()
+        text = "".join(c for c in text if ord(c) < 128)
+        context = text[:300000]
+    except Exception as e:
+        return f"❌ Text extraction failed: {str(e)}"
+    if not question.strip():
+        return "⚠️ Please enter a valid question."
+    try:
+        result = qa_pipeline(question=question, context=context)
+        return f"**Answer**: {result['answer']}\n\n**Score**: {result['score']:.2f}"
+    except Exception as e:
+        return f"❌ QA failed: {str(e)}"
+# Gradio UI for Summarizer
+summarizer_ui = gr.Interface(
     fn=summarize_file,
     inputs=gr.File(label="📄 Upload PDF", type="binary"),
     outputs=[
         gr.Textbox(label="📝 Summarized Output"),
         gr.Image(label="📊 Visual Process Flow", type="pil")
     ],
+    title="📝 AI-Powered PDF Summarizer",
     description="Summarizes long PDFs (up to 300,000 characters) and visualizes chunk-level automation status."
 )
+# Gradio UI for Q&A
+qa_ui = gr.Interface(
+    fn=answer_question,
+    inputs=[
+        gr.File(label="📄 Upload PDF", type="binary"),
+        gr.Textbox(label="❓ Ask a Question")
+    ],
+    outputs=gr.Textbox(label="🔍 Answer"),
+    title="📚 PDF Q&A Assistant",
+    description="Ask natural language questions based on the uploaded PDF content."
+)
+# Combine both in tabs
 if __name__ == "__main__":
     try:
+        gr.TabbedInterface(
+            [summarizer_ui, qa_ui],
+            ["📝 Summarizer", "❓ Q&A Assistant"]
+        ).launch(server_port=7860)
     except Exception as e:
+        print(f"❌ Gradio launch failed: {str(e)}")