tejovanth commited on
Commit
d4654eb
Β·
verified Β·
1 Parent(s): 0a3441b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -8
app.py CHANGED
@@ -4,7 +4,7 @@ import torch
4
  from transformers import pipeline
5
  import time, logging, re
6
  import matplotlib
7
- matplotlib.use('Agg') # Use non-interactive backend for headless environments
8
  import matplotlib.pyplot as plt
9
  import io
10
  from PIL import Image
@@ -34,7 +34,30 @@ def visualize_chunk_status(chunk_data):
34
  buf = io.BytesIO()
35
  plt.savefig(buf, format='png')
36
  buf.seek(0)
37
- plt.close(fig) # Release memory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  return Image.open(buf)
39
 
40
  def summarize_file(file_bytes):
@@ -49,10 +72,10 @@ def summarize_file(file_bytes):
49
  text = re.sub(r"\s+", " ", text).strip()
50
  text = "".join(c for c in text if ord(c) < 128)
51
  except Exception as e:
52
- return f"❌ Text extraction failed: {str(e)}", None
53
 
54
  if not text.strip():
55
- return "❌ No text found", None
56
 
57
  text = text[:300000]
58
  chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
@@ -82,18 +105,20 @@ def summarize_file(file_bytes):
82
  chunk_info.append(chunk_result)
83
 
84
  final_summary = f"**Chars**: {len(text)}\n**Time**: {time.time()-start:.2f}s\n\n" + "\n\n".join(summaries)
85
- image = visualize_chunk_status(chunk_info)
86
- return final_summary, image
 
87
 
88
  demo = gr.Interface(
89
  fn=summarize_file,
90
  inputs=gr.File(label="πŸ“„ Upload PDF", type="binary"),
91
  outputs=[
92
  gr.Textbox(label="πŸ“ Summarized Output"),
93
- gr.Image(label="πŸ“Š Visual Process Flow", type="pil")
 
94
  ],
95
  title="AI-Powered PDF Summarizer",
96
- description="Summarizes long PDFs (up to 300,000 characters) and visualizes chunk-level automation status."
97
  )
98
 
99
  if __name__ == "__main__":
@@ -102,3 +127,4 @@ if __name__ == "__main__":
102
  except Exception as e:
103
  print(f"❌ Gradio launch failed: {str(e)}")
104
 
 
 
4
  from transformers import pipeline
5
  import time, logging, re
6
  import matplotlib
7
+ matplotlib.use('Agg')
8
  import matplotlib.pyplot as plt
9
  import io
10
  from PIL import Image
 
34
  buf = io.BytesIO()
35
  plt.savefig(buf, format='png')
36
  buf.seek(0)
37
+ plt.close(fig)
38
+ return Image.open(buf)
39
+
40
+ def create_summary_flowchart(summaries):
41
+ fig, ax = plt.subplots(figsize=(6, len(summaries) * 0.8 + 1))
42
+ ax.axis('off')
43
+
44
+ ypos = list(range(len(summaries) * 2, 0, -2))
45
+ boxprops = dict(boxstyle="round,pad=0.5", facecolor="lightblue", edgecolor="black")
46
+
47
+ for i, (y, summary) in enumerate(zip(ypos, summaries)):
48
+ summary_text = summary.split("**Chunk")[1] if summary.startswith("**Chunk") else summary
49
+ summary_text = summary_text.strip().replace("**:", ":")[:120] + ("..." if len(summary) > 120 else "")
50
+ ax.text(0.5, y, summary_text, ha='center', va='center', bbox=boxprops, fontsize=9, wrap=True)
51
+
52
+ if i < len(summaries) - 1:
53
+ ax.annotate('', xy=(0.5, y - 1), xytext=(0.5, y - 0.2),
54
+ arrowprops=dict(arrowstyle="->", lw=1.5))
55
+
56
+ buf = io.BytesIO()
57
+ plt.tight_layout()
58
+ plt.savefig(buf, format='png')
59
+ buf.seek(0)
60
+ plt.close(fig)
61
  return Image.open(buf)
62
 
63
  def summarize_file(file_bytes):
 
72
  text = re.sub(r"\s+", " ", text).strip()
73
  text = "".join(c for c in text if ord(c) < 128)
74
  except Exception as e:
75
+ return f"❌ Text extraction failed: {str(e)}", None, None
76
 
77
  if not text.strip():
78
+ return "❌ No text found", None, None
79
 
80
  text = text[:300000]
81
  chunks = [text[i:i+2000] for i in range(0, len(text), 2000)]
 
105
  chunk_info.append(chunk_result)
106
 
107
  final_summary = f"**Chars**: {len(text)}\n**Time**: {time.time()-start:.2f}s\n\n" + "\n\n".join(summaries)
108
+ process_img = visualize_chunk_status(chunk_info)
109
+ flow_img = create_summary_flowchart(summaries)
110
+ return final_summary, process_img, flow_img
111
 
112
  demo = gr.Interface(
113
  fn=summarize_file,
114
  inputs=gr.File(label="πŸ“„ Upload PDF", type="binary"),
115
  outputs=[
116
  gr.Textbox(label="πŸ“ Summarized Output"),
117
+ gr.Image(label="πŸ“Š Chunk Status", type="pil"),
118
+ gr.Image(label="πŸ” Flowchart Summary", type="pil")
119
  ],
120
  title="AI-Powered PDF Summarizer",
121
+ description="Summarizes long PDFs (up to 300,000 characters) and visualizes chunk processing + flow of content."
122
  )
123
 
124
  if __name__ == "__main__":
 
127
  except Exception as e:
128
  print(f"❌ Gradio launch failed: {str(e)}")
129
 
130
+