DeepLearning101 commited on
Commit
087fd05
·
verified ·
1 Parent(s): c2c01d9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -125
app.py CHANGED
@@ -6,6 +6,8 @@ import shutil
6
  import base64
7
  import json
8
  import re
 
 
9
  from pdf2image import convert_from_path
10
  from PIL import Image
11
  from dotenv import load_dotenv
@@ -46,6 +48,106 @@ class NotebookLMTool:
46
  except:
47
  return []
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def process_pdf(self, pdf_file, progress=gr.Progress()):
50
  if not self.client:
51
  raise ValueError("請先輸入 Google API Key!")
@@ -53,10 +155,12 @@ class NotebookLMTool:
53
  if pdf_file is None:
54
  return None, None, None, ""
55
 
56
- # 初始化 Token 計數器
57
  total_input_tokens = 0
58
  total_output_tokens = 0
59
-
 
 
60
  # 1. 準備環境
61
  temp_dir = tempfile.mkdtemp()
62
  img_output_dir = os.path.join(temp_dir, "cleaned_images")
@@ -67,134 +171,94 @@ class NotebookLMTool:
67
  prs.slide_width = Inches(16)
68
  prs.slide_height = Inches(9)
69
 
70
- # 2. PDF 轉圖片
71
- progress(0.1, desc="正在將 PDF 轉為圖片...")
72
  try:
73
- images = convert_from_path(pdf_file)
 
74
  except Exception as e:
75
  raise ValueError(f"PDF 轉換失敗: {str(e)}")
76
 
77
- full_text_log = ""
78
- cleaned_images_paths = []
79
- gallery_preview = []
 
80
 
81
- # 3. 逐頁處理
82
- for i, img in enumerate(images):
83
- progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
84
- slide = prs.slides.add_slide(prs.slide_layouts[6])
85
-
86
- # ==========================================
87
- # 步驟 A: 背景處理 (去字 + 嚴格保留原圖)
88
- # ==========================================
89
- save_name = f"slide_{i+1:02d}.png"
90
- final_bg_path = os.path.join(img_output_dir, save_name)
91
- bg_success = False
92
 
93
- try:
94
- # Prompt 優化:強調 "Strictly preserve" 和 "Do not add"
95
- clean_prompt = """
96
- Strictly remove all text, titles, text-boxes, and bullet points from this slide image.
97
- CRITICAL INSTRUCTION:
98
- 1. Preserve the original background pattern, colors, logos, and non-text graphics EXACTLY as they are.
99
- 2. Do NOT add any new objects, decorations, or hallucinations.
100
- 3. Do NOT change the aspect ratio or style.
101
- 4. Simply fill the gaps left by removed text with the surrounding background texture naturally.
102
- Output ONLY the image.
103
- """
104
-
105
- resp_img = self.client.models.generate_content(
106
- model="gemini-2.5-flash-image",
107
- contents=[clean_prompt, img],
108
- config=types.GenerateContentConfig(response_modalities=["IMAGE"])
109
- )
 
 
 
110
 
111
- # --- Token 統計 (圖片生成) ---
112
- if resp_img.usage_metadata:
113
- total_input_tokens += resp_img.usage_metadata.prompt_token_count
114
- total_output_tokens += resp_img.usage_metadata.candidates_token_count
115
-
116
- # 處理圖片資料
117
- image_data = None
118
- if hasattr(resp_img, 'parts') and resp_img.parts:
119
- for part in resp_img.parts:
120
- if part.inline_data: image_data = part.inline_data.data; break
121
- if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
122
- image_data = resp_img.bytes
123
-
124
- if image_data:
125
- if isinstance(image_data, str): image_data = base64.b64decode(image_data)
126
- with open(final_bg_path, "wb") as f: f.write(image_data)
127
- cleaned_images_paths.append(final_bg_path)
128
- bg_success = True
129
- else:
130
- print(f"Page {i+1}: AI returned text instead of image.")
131
-
132
- except Exception as e:
133
- print(f"Page {i+1} Clean Error: {e}")
134
-
135
- # 失敗處理:使用原圖,但不要貼入 PPT 避免疊字,僅存檔供參考
136
- if bg_success:
137
  try:
138
- slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
139
- gallery_preview.append((final_bg_path, f"Page {i+1} Cleaned"))
140
  except: pass
141
- else:
142
- img.save(final_bg_path)
143
- gallery_preview.append((final_bg_path, f"Page {i+1} (Original - Clean Failed)"))
144
-
145
- # ==========================================
146
- # 步驟 B: 文字與佈局還原 (Layout Analysis)
147
- # ==========================================
148
- try:
149
- layout_prompt = """
150
- Analyze this slide. Return a JSON list of all text blocks.
151
- Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
152
- """
153
 
154
- resp_layout = self.client.models.generate_content(
155
- model="gemini-2.5-flash",
156
- contents=[layout_prompt, img],
157
- config=types.GenerateContentConfig(response_mime_type="application/json")
158
- )
159
-
160
- # --- Token 統計 (文字分析) ---
161
- if resp_layout.usage_metadata:
162
- total_input_tokens += resp_layout.usage_metadata.prompt_token_count
163
- total_output_tokens += resp_layout.usage_metadata.candidates_token_count
164
 
165
- blocks = self._extract_json(resp_layout.text)
 
 
 
 
166
 
167
- for block in blocks:
168
- text_content = block.get("text", "")
169
- if not text_content: continue
170
- full_text_log += f"[P{i+1}] {text_content}\n"
171
-
172
- # 座標與樣式還原
173
- box = block.get("box_2d", [0, 0, 100, 100])
174
- ymin, xmin, ymax, xmax = box
175
- left = Inches((xmin / 1000) * 16)
176
- top = Inches((ymin / 1000) * 9)
177
- width = Inches(((xmax - xmin) / 1000) * 16)
178
- height = Inches(((ymax - ymin) / 1000) * 9)
179
-
180
- textbox = slide.shapes.add_textbox(left, top, width, height)
181
- tf = textbox.text_frame
182
- tf.word_wrap = True
183
- p = tf.paragraphs[0]
184
- p.text = text_content
185
- try: p.font.size = Pt(int(block.get("font_size", 18)))
186
- except: p.font.size = Pt(18)
187
- p.font.bold = block.get("is_bold", False)
188
- try:
189
- hex_c = block.get("color", "#000000").replace("#", "")
190
- if not bg_success and hex_c.upper() == "FFFFFF": hex_c = "000000"
191
- p.font.color.rgb = RGBColor.from_string(hex_c)
192
- except: pass
193
-
194
- except Exception as e:
195
- print(f"Layout Error Page {i+1}: {e}")
196
-
197
- # 4. 打包與統計
198
  progress(0.9, desc="正在打包檔案...")
199
  pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
200
  prs.save(pptx_path)
@@ -206,15 +270,13 @@ class NotebookLMTool:
206
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
207
  zf.write(pptx_path, "restored_slides.pptx")
208
  zf.write(txt_path, "content_log.txt")
209
- if os.path.exists(img_output_dir):
210
- for img_name in os.listdir(img_output_dir):
211
- zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
212
 
213
- # 格式化 Token 統計訊息
214
  token_stats = f"""
215
  ### 📊 Token 用量統計
216
- - **總輸入 (Prompt Tokens):** {total_input_tokens:,}
217
- - **總輸出 (Response Tokens):** {total_output_tokens:,}
218
  - **總計消耗:** {total_input_tokens + total_output_tokens:,}
219
  """
220
 
@@ -251,7 +313,7 @@ with gr.Blocks(title="NotebookLM Slide Restorer,PPT.404", theme=gr.themes.Soft
251
 
252
  gr.Markdown("---")
253
  pdf_input = gr.File(label="上傳 PDF")
254
- btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
255
 
256
  with gr.Column():
257
  out_zip = gr.File(label="📦 下載完整包")
 
6
  import base64
7
  import json
8
  import re
9
+ import concurrent.futures
10
+ import time
11
  from pdf2image import convert_from_path
12
  from PIL import Image
13
  from dotenv import load_dotenv
 
48
  except:
49
  return []
50
 
51
+ # --- 單頁處理邏輯 (獨立出來以便平行運算) ---
52
+ def process_single_page(self, page_index, img, img_output_dir):
53
+ """處理單一頁面的:去字(背景) + 文字分析(Layout)"""
54
+ print(f"🚀 [Page {page_index+1}] 開始處理...", flush=True)
55
+
56
+ # 結果容器
57
+ result = {
58
+ "index": page_index,
59
+ "bg_path": None,
60
+ "blocks": [],
61
+ "log": "",
62
+ "preview": None,
63
+ "tokens_in": 0,
64
+ "tokens_out": 0
65
+ }
66
+
67
+ save_name = f"slide_{page_index+1:02d}.png"
68
+ final_bg_path = os.path.join(img_output_dir, save_name)
69
+ bg_success = False
70
+
71
+ # 1. 背景去字 (Image Cleaning)
72
+ try:
73
+ clean_prompt = """
74
+ Strictly remove all text, titles, text-boxes, and bullet points from this slide image.
75
+ CRITICAL INSTRUCTION:
76
+ 1. Preserve the original background pattern, colors, logos, and non-text graphics EXACTLY as they are.
77
+ 2. Do NOT add any new objects, decorations, or hallucinations.
78
+ 3. Output ONLY the image.
79
+ """
80
+
81
+ # 使用 2.0-flash-exp 進行繪圖
82
+ resp_img = self.client.models.generate_content(
83
+ model="gemini-2.5-flash-image",
84
+ contents=[clean_prompt, img],
85
+ config=types.GenerateContentConfig(response_modalities=["IMAGE"])
86
+ )
87
+
88
+ # Token 統計
89
+ if resp_img.usage_metadata:
90
+ result["tokens_in"] += resp_img.usage_metadata.prompt_token_count
91
+ result["tokens_out"] += resp_img.usage_metadata.candidates_token_count
92
+
93
+ # 存圖邏輯
94
+ image_data = None
95
+ if hasattr(resp_img, 'parts') and resp_img.parts:
96
+ for part in resp_img.parts:
97
+ if part.inline_data: image_data = part.inline_data.data; break
98
+ if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
99
+ image_data = resp_img.bytes
100
+
101
+ if image_data:
102
+ if isinstance(image_data, str): image_data = base64.b64decode(image_data)
103
+ with open(final_bg_path, "wb") as f: f.write(image_data)
104
+ bg_success = True
105
+ result["bg_path"] = final_bg_path
106
+ result["preview"] = (final_bg_path, f"Page {page_index+1} Cleaned")
107
+ else:
108
+ print(f"⚠️ [Page {page_index+1}] 去字失敗: 未回傳圖片", flush=True)
109
+
110
+ except Exception as e:
111
+ print(f"❌ [Page {page_index+1}] Clean Error: {e}", flush=True)
112
+
113
+ # 失敗回退原圖
114
+ if not bg_success:
115
+ img.save(final_bg_path)
116
+ result["bg_path"] = final_bg_path # 仍需路徑給 PPT 使用
117
+ result["preview"] = (final_bg_path, f"Page {page_index+1} (Original)")
118
+ result["log"] += f"[P{page_index+1}] Warning: Background cleaning failed.\n"
119
+
120
+ # 2. 文字與佈局分析 (Layout Analysis)
121
+ try:
122
+ layout_prompt = """
123
+ Analyze this slide. Return a JSON list of all text blocks.
124
+ Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
125
+ """
126
+
127
+ resp_layout = self.client.models.generate_content(
128
+ model="gemini-2.5-flash",
129
+ contents=[layout_prompt, img],
130
+ config=types.GenerateContentConfig(response_mime_type="application/json")
131
+ )
132
+
133
+ if resp_layout.usage_metadata:
134
+ result["tokens_in"] += resp_layout.usage_metadata.prompt_token_count
135
+ result["tokens_out"] += resp_layout.usage_metadata.candidates_token_count
136
+
137
+ blocks = self._extract_json(resp_layout.text)
138
+ result["blocks"] = blocks
139
+
140
+ # 紀錄 Log
141
+ for b in blocks:
142
+ if b.get("text"): result["log"] += f"[P{page_index+1}] {b['text'][:20]}...\n"
143
+
144
+ except Exception as e:
145
+ print(f"❌ [Page {page_index+1}] Layout Error: {e}", flush=True)
146
+ result["log"] += f"[P{page_index+1}] Layout Analysis Failed.\n"
147
+
148
+ print(f"✅ [Page {page_index+1}] 完成!", flush=True)
149
+ return result
150
+
151
  def process_pdf(self, pdf_file, progress=gr.Progress()):
152
  if not self.client:
153
  raise ValueError("請先輸入 Google API Key!")
 
155
  if pdf_file is None:
156
  return None, None, None, ""
157
 
158
+ # 統計數據
159
  total_input_tokens = 0
160
  total_output_tokens = 0
161
+ full_text_log = ""
162
+ gallery_preview = []
163
+
164
  # 1. 準備環境
165
  temp_dir = tempfile.mkdtemp()
166
  img_output_dir = os.path.join(temp_dir, "cleaned_images")
 
171
  prs.slide_width = Inches(16)
172
  prs.slide_height = Inches(9)
173
 
174
+ # 2. PDF 轉圖片 (降低 DPI 加速)
175
+ progress(0.1, desc="正在將 PDF 轉為圖片 (DPI=150)...")
176
  try:
177
+ # dpi=150 足夠螢幕檢視,且大幅減少上傳時間
178
+ images = convert_from_path(pdf_file, dpi=150)
179
  except Exception as e:
180
  raise ValueError(f"PDF 轉換失敗: {str(e)}")
181
 
182
+ # 3. 平行處理 (Parallel Execution)
183
+ # 根據 CPU 核心數或 API 限制設定 workers,建議 3-5 避免 Rate Limit
184
+ max_workers = 4
185
+ results_map = {} # 用來存結果,確保順序正確
186
 
187
+ progress(0.2, desc="🚀 AI 多工處理中 (可能需要稍等)...")
188
+
189
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
190
+ # 提交所有任務
191
+ future_to_page = {
192
+ executor.submit(self.process_single_page, i, img, img_output_dir): i
193
+ for i, img in enumerate(images)
194
+ }
 
 
 
195
 
196
+ # 等待完成
197
+ for future in concurrent.futures.as_completed(future_to_page):
198
+ try:
199
+ res = future.result()
200
+ results_map[res["index"]] = res
201
+ # 更新 Token
202
+ total_input_tokens += res["tokens_in"]
203
+ total_output_tokens += res["tokens_out"]
204
+ except Exception as exc:
205
+ print(f"Page processing generated an exception: {exc}")
206
+
207
+ # 4. 依序組裝 PPTX (確保順序正確)
208
+ progress(0.8, desc="正在組裝 PPTX...")
209
+
210
+ cleaned_images_paths = [] # 用於 ZIP
211
+
212
+ for i in range(len(images)):
213
+ if i not in results_map:
214
+ print(f"Missing result for page {i}")
215
+ continue
216
 
217
+ res = results_map[i]
218
+
219
+ # 更新 Log 與 Preview
220
+ full_text_log += res["log"]
221
+ if res["preview"]: gallery_preview.append(res["preview"])
222
+ if res["bg_path"]: cleaned_images_paths.append(res["bg_path"])
223
+
224
+ # 建立 Slide
225
+ slide = prs.slides.add_slide(prs.slide_layouts[6])
226
+
227
+ # A. 貼背景
228
+ if res["bg_path"] and os.path.exists(res["bg_path"]):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  try:
230
+ slide.shapes.add_picture(res["bg_path"], 0, 0, width=prs.slide_width, height=prs.slide_height)
 
231
  except: pass
232
+
233
+ # B. 貼文字
234
+ for block in res["blocks"]:
235
+ text_content = block.get("text", "")
236
+ if not text_content: continue
 
 
 
 
 
 
 
237
 
238
+ # 座標轉換
239
+ box = block.get("box_2d", [0, 0, 100, 100])
240
+ ymin, xmin, ymax, xmax = box
241
+ left = Inches((xmin / 1000) * 16)
242
+ top = Inches((ymin / 1000) * 9)
243
+ width = Inches(((xmax - xmin) / 1000) * 16)
244
+ height = Inches(((ymax - ymin) / 1000) * 9)
 
 
 
245
 
246
+ textbox = slide.shapes.add_textbox(left, top, width, height)
247
+ tf = textbox.text_frame
248
+ tf.word_wrap = True
249
+ p = tf.paragraphs[0]
250
+ p.text = text_content
251
 
252
+ try: p.font.size = Pt(int(block.get("font_size", 18)))
253
+ except: p.font.size = Pt(18)
254
+ p.font.bold = block.get("is_bold", False)
255
+ try:
256
+ hex_c = block.get("color", "#000000").replace("#", "")
257
+ # 如果背景去字失敗,原圖背景可能很複雜,文字顏色可能需要調整 (這裡暫不處理,保持原色)
258
+ p.font.color.rgb = RGBColor.from_string(hex_c)
259
+ except: pass
260
+
261
+ # 5. 打包
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  progress(0.9, desc="正在打包檔案...")
263
  pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
264
  prs.save(pptx_path)
 
270
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
271
  zf.write(pptx_path, "restored_slides.pptx")
272
  zf.write(txt_path, "content_log.txt")
273
+ for img_path in cleaned_images_paths:
274
+ zf.write(img_path, os.path.join("cleaned_backgrounds", os.path.basename(img_path)))
 
275
 
 
276
  token_stats = f"""
277
  ### 📊 Token 用量統計
278
+ - **總輸入:** {total_input_tokens:,}
279
+ - **總輸出:** {total_output_tokens:,}
280
  - **總計消耗:** {total_input_tokens + total_output_tokens:,}
281
  """
282
 
 
313
 
314
  gr.Markdown("---")
315
  pdf_input = gr.File(label="上傳 PDF")
316
+ btn_process = gr.Button("🚀 開始還原 PPTX (平行加速版)", variant="primary")
317
 
318
  with gr.Column():
319
  out_zip = gr.File(label="📦 下載完整包")