DeepLearning101 commited on
Commit
17d162a
·
verified ·
1 Parent(s): 05abb73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -90
app.py CHANGED
@@ -36,22 +36,14 @@ class NotebookLMTool:
36
  return "⚠️ Key 無效"
37
 
38
  def _extract_json(self, text):
39
- """強化版 JSON 提取,處理 Markdown 代碼塊"""
40
  try:
41
- # 1. 嘗試抓取 ```json ... ```
42
  match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
43
- if match:
44
- return json.loads(match.group(1))
45
-
46
- # 2. 嘗試抓取純 [...] 陣列結構
47
  match = re.search(r"\[\s*\{.*\}\s*\]", text, re.DOTALL)
48
- if match:
49
- return json.loads(match.group(0))
50
-
51
- # 3. 嘗試直接 parse (假設整段就是 JSON)
52
  return json.loads(text)
53
  except:
54
- print(f"JSON Parse Error. Raw text snippet: {text[:100]}...")
55
  return []
56
 
57
  def process_pdf(self, pdf_file, progress=gr.Progress()):
@@ -59,7 +51,11 @@ class NotebookLMTool:
59
  raise ValueError("請先輸入 Google API Key!")
60
 
61
  if pdf_file is None:
62
- return None, None, None
 
 
 
 
63
 
64
  # 1. 準備環境
65
  temp_dir = tempfile.mkdtemp()
@@ -68,7 +64,6 @@ class NotebookLMTool:
68
 
69
  # 初始化 PPTX
70
  prs = Presentation()
71
- # 設定為 16:9 比例
72
  prs.slide_width = Inches(16)
73
  prs.slide_height = Inches(9)
74
 
@@ -86,34 +81,43 @@ class NotebookLMTool:
86
  # 3. 逐頁處理
87
  for i, img in enumerate(images):
88
  progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
89
-
90
- # 建立空白投影片
91
  slide = prs.slides.add_slide(prs.slide_layouts[6])
92
 
93
  # ==========================================
94
- # 步驟 A: 背景處理 (去字) - 關鍵修正
95
  # ==========================================
96
  save_name = f"slide_{i+1:02d}.png"
97
  final_bg_path = os.path.join(img_output_dir, save_name)
98
-
99
  bg_success = False
100
 
101
  try:
 
 
 
 
 
 
 
 
 
 
 
102
  resp_img = self.client.models.generate_content(
103
- model="gemini-2.5-flash-image",
104
- contents=[
105
- "Remove all text, titles, bullet points, and diagrams containing text from this slide. Preserve only the pure background styling, colors, logos, and non-text decorative elements. Output ONLY the image.",
106
- img
107
- ],
108
  config=types.GenerateContentConfig(response_modalities=["IMAGE"])
109
  )
110
 
 
 
 
 
 
111
  # 處理圖片資料
112
  image_data = None
113
  if hasattr(resp_img, 'parts') and resp_img.parts:
114
  for part in resp_img.parts:
115
- if part.inline_data:
116
- image_data = part.inline_data.data; break
117
  if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
118
  image_data = resp_img.bytes
119
 
@@ -122,116 +126,82 @@ class NotebookLMTool:
122
  with open(final_bg_path, "wb") as f: f.write(image_data)
123
  cleaned_images_paths.append(final_bg_path)
124
  bg_success = True
125
- print(f"Page {i+1}: Background cleaned successfully.")
126
  else:
127
- print(f"Page {i+1}: AI returned text instead of image: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
128
 
129
  except Exception as e:
130
- print(f"Page {i+1} Background Gen Error: {e}")
131
 
132
- # 策略:如果去字成功,貼上乾淨背景。
133
- # 如果失敗,不要貼原圖!否則字會重疊。寧可留白或貼一個提示圖。
134
  if bg_success:
135
  try:
136
  slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
137
- gallery_preview.append((final_bg_path, f"Page {i+1} Background (Cleaned)"))
138
- except Exception as e:
139
- print(f"PPTX Image Insert Error: {e}")
140
  else:
141
- # 失敗時,存原圖但標記失敗,這樣使用者在 ZIP 裡還是拿得到原圖,但 PPT 上不會亂
142
- img.save(final_bg_path)
143
- gallery_preview.append((final_bg_path, f"Page {i+1} (Cleaning Failed - Original Saved)"))
144
- full_text_log += f"[P{i+1}] Warning: Background cleaning failed. Slide background left blank to avoid text duplication.\n"
145
 
146
  # ==========================================
147
  # 步驟 B: 文字與佈局還原 (Layout Analysis)
148
  # ==========================================
149
  try:
150
- prompt = """
151
- Analyze this slide image to reconstruct it in PowerPoint.
152
- Identify all text blocks.
153
- Return a JSON list strictly. Each item must have:
154
- - "text": The exact content string.
155
- - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
156
- - "font_size": estimated font size (integer, e.g., 40 for big titles, 14 for body).
157
- - "color": estimated hex color code (e.g., "#000000", "#FFFFFF").
158
- - "is_bold": boolean.
159
-
160
- Example format:
161
- [{"text": "Introduction", "box_2d": [50, 50, 150, 400], "font_size": 32, "color": "#000000", "is_bold": true}]
162
  """
163
 
164
- # 使用 2.0 Flash 進行邏輯分析
165
  resp_layout = self.client.models.generate_content(
166
- model="gemini-2.5-flash",
167
- contents=[prompt, img],
168
  config=types.GenerateContentConfig(response_mime_type="application/json")
169
  )
 
 
 
 
 
170
 
171
  blocks = self._extract_json(resp_layout.text)
172
 
173
- # 將文字區塊寫入 PPTX
174
  for block in blocks:
175
  text_content = block.get("text", "")
176
  if not text_content: continue
 
177
 
178
- full_text_log += f"[P{i+1}] Text: {text_content}\n"
179
-
180
- # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
181
  box = block.get("box_2d", [0, 0, 100, 100])
182
  ymin, xmin, ymax, xmax = box
183
-
184
- # 轉換為英吋 (基於 16:9)
185
  left = Inches((xmin / 1000) * 16)
186
  top = Inches((ymin / 1000) * 9)
187
  width = Inches(((xmax - xmin) / 1000) * 16)
188
  height = Inches(((ymax - ymin) / 1000) * 9)
189
 
190
- # 建立文字方塊
191
  textbox = slide.shapes.add_textbox(left, top, width, height)
192
  tf = textbox.text_frame
193
  tf.word_wrap = True
194
-
195
  p = tf.paragraphs[0]
196
  p.text = text_content
197
-
198
- # 字體設定
199
- try:
200
- sz = int(block.get("font_size", 18))
201
- # 簡單的縮放修正,有時候 AI 估計的字偏小
202
- p.font.size = Pt(sz)
203
- except:
204
- p.font.size = Pt(18)
205
-
206
  p.font.bold = block.get("is_bold", False)
207
-
208
- # 顏色處理
209
  try:
210
- hex_color = block.get("color", "#000000").replace("#", "")
211
- # 避免白色背景配白色文字 (簡單防呆)
212
- if not bg_success and hex_color.upper() == "FFFFFF":
213
- hex_color = "000000"
214
- p.font.color.rgb = RGBColor.from_string(hex_color)
215
- except:
216
- pass
217
 
218
  except Exception as e:
219
- print(f"Layout Analysis Error Page {i+1}: {e}")
220
- full_text_log += f"[P{i+1}] Error parsing layout text.\n"
221
 
222
- # 4. 打包結果
223
  progress(0.9, desc="正在打包檔案...")
224
-
225
- # 儲存 PPTX
226
  pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
227
  prs.save(pptx_path)
228
 
229
- # 儲存文字記錄
230
  txt_path = os.path.join(temp_dir, "content_log.txt")
231
- with open(txt_path, "w", encoding="utf-8") as f:
232
- f.write(full_text_log)
233
 
234
- # 建立 ZIP
235
  zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
236
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
237
  zf.write(pptx_path, "restored_slides.pptx")
@@ -240,7 +210,15 @@ class NotebookLMTool:
240
  for img_name in os.listdir(img_output_dir):
241
  zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
242
 
243
- return zip_path, pptx_path, gallery_preview
 
 
 
 
 
 
 
 
244
 
245
  # Init
246
  tool = NotebookLMTool()
@@ -276,8 +254,9 @@ with gr.Blocks(title="NotebookLM Slide Restorer,PPT.404", theme=gr.themes.Soft
276
  btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
277
 
278
  with gr.Column():
279
- out_zip = gr.File(label="📦 下載完整包 (含背景圖+Log)")
280
- out_pptx = gr.File(label="📊 直接下載可編輯 PPTX")
 
281
 
282
  gr.Markdown("### 🖼️ 背景去字效果預覽")
283
  out_gallery = gr.Gallery(columns=4)
@@ -287,7 +266,7 @@ with gr.Blocks(title="NotebookLM Slide Restorer,PPT.404", theme=gr.themes.Soft
287
  btn_process.click(
288
  tool.process_pdf,
289
  inputs=[pdf_input],
290
- outputs=[out_zip, out_pptx, out_gallery]
291
  )
292
 
293
  if __name__ == "__main__":
 
36
  return "⚠️ Key 無效"
37
 
38
  def _extract_json(self, text):
39
+ """強化版 JSON 提取"""
40
  try:
 
41
  match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
42
+ if match: return json.loads(match.group(1))
 
 
 
43
  match = re.search(r"\[\s*\{.*\}\s*\]", text, re.DOTALL)
44
+ if match: return json.loads(match.group(0))
 
 
 
45
  return json.loads(text)
46
  except:
 
47
  return []
48
 
49
  def process_pdf(self, pdf_file, progress=gr.Progress()):
 
51
  raise ValueError("請先輸入 Google API Key!")
52
 
53
  if pdf_file is None:
54
+ return None, None, None, ""
55
+
56
+ # 初始化 Token 計數器
57
+ total_input_tokens = 0
58
+ total_output_tokens = 0
59
 
60
  # 1. 準備環境
61
  temp_dir = tempfile.mkdtemp()
 
64
 
65
  # 初始化 PPTX
66
  prs = Presentation()
 
67
  prs.slide_width = Inches(16)
68
  prs.slide_height = Inches(9)
69
 
 
81
  # 3. 逐頁處理
82
  for i, img in enumerate(images):
83
  progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
 
 
84
  slide = prs.slides.add_slide(prs.slide_layouts[6])
85
 
86
  # ==========================================
87
+ # 步驟 A: 背景處理 (去字 + 嚴格保留原圖)
88
  # ==========================================
89
  save_name = f"slide_{i+1:02d}.png"
90
  final_bg_path = os.path.join(img_output_dir, save_name)
 
91
  bg_success = False
92
 
93
  try:
94
+ # Prompt 優化:強調 "Strictly preserve" 和 "Do not add"
95
+ clean_prompt = """
96
+ Strictly remove all text, titles, text-boxes, and bullet points from this slide image.
97
+ CRITICAL INSTRUCTION:
98
+ 1. Preserve the original background pattern, colors, logos, and non-text graphics EXACTLY as they are.
99
+ 2. Do NOT add any new objects, decorations, or hallucinations.
100
+ 3. Do NOT change the aspect ratio or style.
101
+ 4. Simply fill the gaps left by removed text with the surrounding background texture naturally.
102
+ Output ONLY the image.
103
+ """
104
+
105
  resp_img = self.client.models.generate_content(
106
+ model="gemini-2.0-flash-exp",
107
+ contents=[clean_prompt, img],
 
 
 
108
  config=types.GenerateContentConfig(response_modalities=["IMAGE"])
109
  )
110
 
111
+ # --- Token 統計 (圖片生成) ---
112
+ if resp_img.usage_metadata:
113
+ total_input_tokens += resp_img.usage_metadata.prompt_token_count
114
+ total_output_tokens += resp_img.usage_metadata.candidates_token_count
115
+
116
  # 處理圖片資料
117
  image_data = None
118
  if hasattr(resp_img, 'parts') and resp_img.parts:
119
  for part in resp_img.parts:
120
+ if part.inline_data: image_data = part.inline_data.data; break
 
121
  if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
122
  image_data = resp_img.bytes
123
 
 
126
  with open(final_bg_path, "wb") as f: f.write(image_data)
127
  cleaned_images_paths.append(final_bg_path)
128
  bg_success = True
 
129
  else:
130
+ print(f"Page {i+1}: AI returned text instead of image.")
131
 
132
  except Exception as e:
133
+ print(f"Page {i+1} Clean Error: {e}")
134
 
135
+ # 失敗處理:使用原圖,但不要貼入 PPT 避免疊字,僅存檔供參考
 
136
  if bg_success:
137
  try:
138
  slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
139
+ gallery_preview.append((final_bg_path, f"Page {i+1} Cleaned"))
140
+ except: pass
 
141
  else:
142
+ img.save(final_bg_path)
143
+ gallery_preview.append((final_bg_path, f"Page {i+1} (Original - Clean Failed)"))
 
 
144
 
145
  # ==========================================
146
  # 步驟 B: 文字與佈局還原 (Layout Analysis)
147
  # ==========================================
148
  try:
149
+ layout_prompt = """
150
+ Analyze this slide. Return a JSON list of all text blocks.
151
+ Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
 
 
 
 
 
 
 
 
 
152
  """
153
 
 
154
  resp_layout = self.client.models.generate_content(
155
+ model="gemini-2.0-flash",
156
+ contents=[layout_prompt, img],
157
  config=types.GenerateContentConfig(response_mime_type="application/json")
158
  )
159
+
160
+ # --- Token 統計 (文字分析) ---
161
+ if resp_layout.usage_metadata:
162
+ total_input_tokens += resp_layout.usage_metadata.prompt_token_count
163
+ total_output_tokens += resp_layout.usage_metadata.candidates_token_count
164
 
165
  blocks = self._extract_json(resp_layout.text)
166
 
 
167
  for block in blocks:
168
  text_content = block.get("text", "")
169
  if not text_content: continue
170
+ full_text_log += f"[P{i+1}] {text_content}\n"
171
 
172
+ # 座標與樣式還原
 
 
173
  box = block.get("box_2d", [0, 0, 100, 100])
174
  ymin, xmin, ymax, xmax = box
 
 
175
  left = Inches((xmin / 1000) * 16)
176
  top = Inches((ymin / 1000) * 9)
177
  width = Inches(((xmax - xmin) / 1000) * 16)
178
  height = Inches(((ymax - ymin) / 1000) * 9)
179
 
 
180
  textbox = slide.shapes.add_textbox(left, top, width, height)
181
  tf = textbox.text_frame
182
  tf.word_wrap = True
 
183
  p = tf.paragraphs[0]
184
  p.text = text_content
185
+ try: p.font.size = Pt(int(block.get("font_size", 18)))
186
+ except: p.font.size = Pt(18)
 
 
 
 
 
 
 
187
  p.font.bold = block.get("is_bold", False)
 
 
188
  try:
189
+ hex_c = block.get("color", "#000000").replace("#", "")
190
+ if not bg_success and hex_c.upper() == "FFFFFF": hex_c = "000000"
191
+ p.font.color.rgb = RGBColor.from_string(hex_c)
192
+ except: pass
 
 
 
193
 
194
  except Exception as e:
195
+ print(f"Layout Error Page {i+1}: {e}")
 
196
 
197
+ # 4. 打包與統計
198
  progress(0.9, desc="正在打包檔案...")
 
 
199
  pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
200
  prs.save(pptx_path)
201
 
 
202
  txt_path = os.path.join(temp_dir, "content_log.txt")
203
+ with open(txt_path, "w", encoding="utf-8") as f: f.write(full_text_log)
 
204
 
 
205
  zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
206
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
207
  zf.write(pptx_path, "restored_slides.pptx")
 
210
  for img_name in os.listdir(img_output_dir):
211
  zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
212
 
213
+ # 格式化 Token 統計訊息
214
+ token_stats = f"""
215
+ ### 📊 Token 用量統計
216
+ - **總輸入 (Prompt Tokens):** {total_input_tokens:,}
217
+ - **總輸出 (Response Tokens):** {total_output_tokens:,}
218
+ - **總計消耗:** {total_input_tokens + total_output_tokens:,}
219
+ """
220
+
221
+ return zip_path, pptx_path, gallery_preview, token_stats
222
 
223
  # Init
224
  tool = NotebookLMTool()
 
254
  btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
255
 
256
  with gr.Column():
257
+ out_zip = gr.File(label="📦 下載完整包")
258
+ out_pptx = gr.File(label="📊 直接下載 PPTX")
259
+ out_tokens = gr.Markdown("### 📊 等待處理...")
260
 
261
  gr.Markdown("### 🖼️ 背景去字效果預覽")
262
  out_gallery = gr.Gallery(columns=4)
 
266
  btn_process.click(
267
  tool.process_pdf,
268
  inputs=[pdf_input],
269
+ outputs=[out_zip, out_pptx, out_gallery, out_tokens]
270
  )
271
 
272
  if __name__ == "__main__":