Spaces:

tejovanth
/

examplefour

Runtime error

App Files Files Community

tejovanth commited on Apr 25, 2025

Commit

ac73cea

verified ·

1 Parent(s): 681c001

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -121

app.py CHANGED Viewed

@@ -1,138 +1,81 @@
-import gradio as gr
-import fitz  # PyMuPDF
-import torch
-from transformers import pipeline
-import time, logging, re
-import matplotlib
-matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 import io
 from PIL import Image
-logging.basicConfig(level=logging.ERROR)
-# Set device (CPU or GPU)
-device = 0 if torch.cuda.is_available() else -1
-print(f"🔧 Using {'GPU' if device == 0 else 'CPU'}")
-# Load model
-try:
-    summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", device=device)
-except Exception as e:
-    print(f"❌ Model loading failed: {str(e)}")
-    exit(1)
-def visualize_chunk_status(chunk_data):
-    status_colors = {'summarized': 'green', 'skipped': 'orange', 'error': 'red'}
-    labels = [f"C{i['chunk']}" for i in chunk_data]
-    colors = [status_colors.get(i['status'], 'gray') for i in chunk_data]
-    times = [i.get('time', 0.1) for i in chunk_data]
-    fig, ax = plt.subplots(figsize=(10, 2.5))
-    ax.barh(labels, times, color=colors)
-    ax.set_xlabel("Time (s)")
-    ax.set_title("📊 Chunk Processing Status")
-    plt.tight_layout()
-    buf = io.BytesIO()
-    plt.savefig(buf, format='png')
-    buf.seek(0)
-    plt.close(fig)
-    return Image.open(buf)
-def create_summary_flowchart(summaries):
-    filtered = [
-        s for s in summaries
-        if s.startswith("**Chunk") and "Skipped" not in s and "Error" not in s
-    ]
-    if not filtered:
-        return None
-    fig_height = max(2, len(filtered) * 0.8 + 1)
-    fig, ax = plt.subplots(figsize=(6, fig_height))
     ax.axis('off')
-    ypos = list(range(len(filtered) * 2, 0, -2))
-    boxprops = dict(boxstyle="round,pad=0.5", facecolor="lightblue", edgecolor="black")
-    for i, (y, summary) in enumerate(zip(ypos, filtered)):
-        summary_text = summary.split("**Chunk")[1]
-        summary_text = summary_text.replace("**:", ":").split("\n", 1)[-1].strip()
-        if len(summary_text) > 120:
-            summary_text = summary_text[:120] + "..."
-        ax.text(0.5, y, summary_text, ha='center', va='center', bbox=boxprops, fontsize=9)
-        if i < len(filtered) - 1:
-            ax.annotate('', xy=(0.5, y - 1.2), xytext=(0.5, y - 0.3),
-                        arrowprops=dict(arrowstyle="->", lw=1.5))
     plt.tight_layout()
     buf = io.BytesIO()
     fig.savefig(buf, format='png', bbox_inches='tight')
-    buf.seek(0)
     plt.close(fig)
     return Image.open(buf)
-def summarize_file(file_bytes):
-    start = time.time()
-    chunk_info = []
-    summaries = []
-    try:
-        doc = fitz.open(stream=file_bytes, filetype="pdf")
-        text = "".join(page.get_text("text") for page in doc)
-        text = re.sub(r"\$\s*([^$]+)\s*\$", r"\1", text)
-        text = re.sub(r"\\cap", "intersection", text)
-        text = re.sub(r"\s+", " ", text).strip()
-        text = "".join(c for c in text if ord(c) < 128)
-    except Exception as e:
-        return f"❌ Text extraction failed: {str(e)}", None, None
-    if not text.strip():
-        return "❌ No text found", None, None
-    chunks = [text[i:i+1500] for i in range(0, min(len(text), 30000), 1500)]
-    for i, chunk in enumerate(chunks):
-        chunk_start = time.time()
-        chunk_result = {'chunk': i + 1, 'status': '', 'time': 0}
-        if sum(1 for c in chunk if not c.isalnum()) / len(chunk) > 0.5:
-            summaries.append(f"**Chunk {i+1}**: Skipped (equation-heavy)")
-            chunk_result['status'] = 'skipped'
-        else:
-            try:
-                summary = summarizer(chunk, max_length=80, min_length=15, do_sample=False)[0]['summary_text']
-                summaries.append(f"**Chunk {i+1}**:\n{summary}")
-                chunk_result['status'] = 'summarized'
-            except Exception as e:
-                summaries.append(f"**Chunk {i+1}**: ❌ Error: {str(e)}")
-                chunk_result['status'] = 'error'
-        chunk_result['time'] = time.time() - chunk_start
-        chunk_info.append(chunk_result)
-    final_summary = f"**Processed chunks**: {len(chunks)}\n**Time**: {time.time() - start:.2f}s\n\n" + "\n\n".join(summaries)
-    process_img = visualize_chunk_status(chunk_info)
-    flow_img = create_summary_flowchart(summaries)
-    return final_summary, process_img, flow_img
-demo = gr.Interface(
-    fn=summarize_file,
-    inputs=gr.File(label="📄 Upload PDF", type="binary"),
-    outputs=[
-        gr.Textbox(label="📝 Summary", lines=20),
-        gr.Image(label="📊 Chunk Status", type="pil"),
-        gr.Image(label="🔁 Flow Summary", type="pil")
-    ],
-    title="📘 PDF Summarizer with Visual Flow",
-    description="Summarizes up to 30,000 characters from a PDF. Includes chunk status and flowchart visualizations."
-)
-if __name__ == "__main__":
-    try:
-        demo.launch(share=False, server_port=7860)
-    except Exception as e:
-        print(f"❌ Gradio launch failed: {str(e)}")

 import matplotlib.pyplot as plt
+from matplotlib.patches import FancyBboxPatch, Circle, FancyArrowPatch
 import io
 from PIL import Image
+def create_process_flowchart():
+    fig, ax = plt.subplots(figsize=(8, 10))
     ax.axis('off')
+    # Define node positions (y decreases as we move down)
+    nodes = [
+        ("Start", 9, "circle", "lightgreen"),
+        ("Load PDF", 8, "box", "lightblue"),
+        ("Extract Text", 7, "box", "lightblue"),
+        ("Text Valid?", 6, "diamond", "lightyellow"),
+        ("Split into Chunks", 5, "box", "lightblue"),
+        ("Process Chunks", 4, "box", "lightblue"),
+        ("Chunk Eligible?", 3, "diamond", "lightyellow"),
+        ("Summarize Chunk", 2.5, "box", "lightblue"),
+        ("Generate Visualizations", 2, "box", "lightblue"),
+        ("End", 1, "circle", "lightcoral")
+    ]
+    # Draw nodes
+    for label, y, shape, color in nodes:
+        if shape == "circle":
+            node = Circle((0.5, y), 0.4, facecolor=color, edgecolor="black")
+            ax.add_patch(node)
+            ax.text(0.5, y, label, ha='center', va='center', fontsize=10)
+        elif shape == "box":
+            node = FancyBboxPatch((0.3, y-0.3), 0.4, 0.6, boxstyle="round,pad=0.3",
+                                 facecolor=color, edgecolor="black")
+            ax.add_patch(node)
+            ax.text(0.5, y, label, ha='center', va='center', fontsize=10)
+        elif shape == "diamond":
+            points = [(0.5, y+0.4), (0.7, y), (0.5, y-0.4), (0.3, y)]
+            node = plt.Polygon(points, facecolor=color, edgecolor="black")
+            ax.add_patch(node)
+            ax.text(0.5, y, label, ha='center', va='center', fontsize=10)
+    # Draw arrows
+    arrows = [
+        (9, 8),  # Start -> Load PDF
+        (8, 7),  # Load PDF -> Extract Text
+        (7, 6),  # Extract Text -> Text Valid?
+        (6, 5),  # Text Valid? -> Split into Chunks (Yes)
+        (6, 1, 0.7, "No"),  # Text Valid? -> End (No)
+        (5, 4),  # Split into Chunks -> Process Chunks
+        (4, 3),  # Process Chunks -> Chunk Eligible?
+        (3, 2.5),  # Chunk Eligible? -> Summarize Chunk (Yes)
+        (3, 2, 0.3, "No"),  # Chunk Eligible? -> Generate Visualizations (No)
+        (2.5, 2),  # Summarize Chunk -> Generate Visualizations
+        (2, 1)   # Generate Visualizations -> End
+    ]
+    for start_y, end_y, *extras in arrows:
+        x_offset = extras[0] if extras else 0.5
+        label = extras[1] if len(extras) > 1 else ""
+        arrow = FancyArrowPatch((x_offset, start_y-0.4), (x_offset, end_y+0.4),
+                               arrowstyle="->", mutation_scale=20, lw=1.5)
+        ax.add_patch(arrow)
+        if label:
+            ax.text(x_offset+0.1, (start_y+end_y)/2, label, fontsize=8, va='center')
+    plt.xlim(0, 1)
+    plt.ylim(0, 10)
     plt.tight_layout()
+    # Save to buffer
     buf = io.BytesIO()
     fig.savefig(buf, format='png', bbox_inches='tight')
     plt.close(fig)
+    buf.seek(0)
     return Image.open(buf)
+# Generate and save the flowchart
+flowchart = create_process_flowchart()
+flowchart.save('summary_process_flowchart.png')