DeepLearning101 commited on
Commit
05abb73
·
verified ·
1 Parent(s): 6ad1309

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -40
app.py CHANGED
@@ -36,18 +36,22 @@ class NotebookLMTool:
36
  return "⚠️ Key 無效"
37
 
38
  def _extract_json(self, text):
39
- """嘗試從回應中提取 JSON 字串"""
40
  try:
41
- # 尋找 ```json ... ``` 區塊
42
  match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
43
  if match:
44
  return json.loads(match.group(1))
45
- # 或者是直接的 JSON 結構
46
- match = re.search(r"\[.*\]", text, re.DOTALL)
 
47
  if match:
48
  return json.loads(match.group(0))
49
- return []
 
 
50
  except:
 
51
  return []
52
 
53
  def process_pdf(self, pdf_file, progress=gr.Progress()):
@@ -64,7 +68,7 @@ class NotebookLMTool:
64
 
65
  # 初始化 PPTX
66
  prs = Presentation()
67
- # 設定為 16:9 (寬 16 英吋, 高 9 英吋) - 這是 NotebookLM 常見比例
68
  prs.slide_width = Inches(16)
69
  prs.slide_height = Inches(9)
70
 
@@ -81,23 +85,24 @@ class NotebookLMTool:
81
 
82
  # 3. 逐頁處理
83
  for i, img in enumerate(images):
84
- progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在重建第 {i+1}/{len(images)} 頁...")
85
 
86
- # 建立空白投影片 (Layout 6 is usually blank)
87
  slide = prs.slides.add_slide(prs.slide_layouts[6])
88
 
89
- # --- 步驟 A: 圖片去字 (Clean Background) ---
 
 
90
  save_name = f"slide_{i+1:02d}.png"
91
  final_bg_path = os.path.join(img_output_dir, save_name)
92
 
93
- # 先儲存原圖備用
94
- img.save(final_bg_path)
95
 
96
  try:
97
  resp_img = self.client.models.generate_content(
98
- model="gemini-2.5-flash-image", # 或是 gemini-2.0-flash-exp
99
  contents=[
100
- "Remove all text, titles, and bullet points from this slide. Keep the background design, logos, and non-text graphics exactly as they are. Output ONLY the image.",
101
  img
102
  ],
103
  config=types.GenerateContentConfig(response_modalities=["IMAGE"])
@@ -116,35 +121,49 @@ class NotebookLMTool:
116
  if isinstance(image_data, str): image_data = base64.b64decode(image_data)
117
  with open(final_bg_path, "wb") as f: f.write(image_data)
118
  cleaned_images_paths.append(final_bg_path)
 
 
119
  else:
120
- print(f"Page {i+1}: Background gen failed, using original.")
121
 
122
  except Exception as e:
123
- print(f"Bg Gen Error Page {i+1}: {e}")
124
 
125
- # 將背景圖貼到 PPTX (佔滿全螢幕)
126
- try:
127
- slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
128
- gallery_preview.append((final_bg_path, f"Page {i+1} Background"))
129
- except Exception as e:
130
- print(f"PPTX Image Insert Error: {e}")
 
 
 
 
 
 
 
131
 
132
- # --- 步驟 B: 佈局分析 (Layout Analysis to JSON) ---
 
 
133
  try:
134
  prompt = """
135
- Analyze this slide image. Identify all text blocks.
 
136
  Return a JSON list strictly. Each item must have:
137
- - "text": The content string.
138
  - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
139
- - "font_size": estimated font size (integer, e.g., 24 for titles, 12 for body).
140
- - "color": estimated hex color code (e.g., "#000000").
141
  - "is_bold": boolean.
142
 
143
- Example: [{"text": "Title", "box_2d": [10, 10, 200, 500], "font_size": 40, "color": "#333333", "is_bold": true}]
 
144
  """
145
 
 
146
  resp_layout = self.client.models.generate_content(
147
- model="gemini-2.0-flash", # 使用 2.0 Flash 處理邏輯較強
148
  contents=[prompt, img],
149
  config=types.GenerateContentConfig(response_mime_type="application/json")
150
  )
@@ -156,14 +175,13 @@ class NotebookLMTool:
156
  text_content = block.get("text", "")
157
  if not text_content: continue
158
 
159
- full_text_log += f"[P{i+1}] {text_content}\n"
160
 
161
  # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
162
- # box_2d: [ymin, xmin, ymax, xmax]
163
  box = block.get("box_2d", [0, 0, 100, 100])
164
  ymin, xmin, ymax, xmax = box
165
 
166
- # 轉換為英吋
167
  left = Inches((xmin / 1000) * 16)
168
  top = Inches((ymin / 1000) * 9)
169
  width = Inches(((xmax - xmin) / 1000) * 16)
@@ -176,19 +194,30 @@ class NotebookLMTool:
176
 
177
  p = tf.paragraphs[0]
178
  p.text = text_content
179
- p.font.size = Pt(block.get("font_size", 18))
 
 
 
 
 
 
 
 
180
  p.font.bold = block.get("is_bold", False)
181
 
182
  # 顏色處理
183
  try:
184
  hex_color = block.get("color", "#000000").replace("#", "")
 
 
 
185
  p.font.color.rgb = RGBColor.from_string(hex_color)
186
  except:
187
- pass # Fallback to black
188
 
189
  except Exception as e:
190
  print(f"Layout Analysis Error Page {i+1}: {e}")
191
- full_text_log += f"[P{i+1}] Error parsing layout.\n"
192
 
193
  # 4. 打包結果
194
  progress(0.9, desc="正在打包檔案...")
@@ -202,13 +231,14 @@ class NotebookLMTool:
202
  with open(txt_path, "w", encoding="utf-8") as f:
203
  f.write(full_text_log)
204
 
205
- # 建立 ZIP (包含 PPTX, 文字檔, 與乾淨圖)
206
  zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
207
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
208
  zf.write(pptx_path, "restored_slides.pptx")
209
  zf.write(txt_path, "content_log.txt")
210
- for img_path in cleaned_images_paths:
211
- zf.write(img_path, os.path.join("cleaned_backgrounds", os.path.basename(img_path)))
 
212
 
213
  return zip_path, pptx_path, gallery_preview
214
 
@@ -246,10 +276,10 @@ with gr.Blocks(title="NotebookLM Slide Restorer,PPT.404", theme=gr.themes.Soft
246
  btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
247
 
248
  with gr.Column():
249
- out_zip = gr.File(label="📦 下載完整包 (含 PPTX, 圖, 文)")
250
- out_pptx = gr.File(label="📊 直接下載 PPTX")
251
 
252
- gr.Markdown("### 🖼️ 背景分離預覽")
253
  out_gallery = gr.Gallery(columns=4)
254
 
255
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
 
36
  return "⚠️ Key 無效"
37
 
38
  def _extract_json(self, text):
39
+ """強化版 JSON 提取,處理 Markdown 代碼塊"""
40
  try:
41
+ # 1. 嘗試抓取 ```json ... ```
42
  match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
43
  if match:
44
  return json.loads(match.group(1))
45
+
46
+ # 2. 嘗試抓取純 [...] 陣列結構
47
+ match = re.search(r"\[\s*\{.*\}\s*\]", text, re.DOTALL)
48
  if match:
49
  return json.loads(match.group(0))
50
+
51
+ # 3. 嘗試直接 parse (假設整段就是 JSON)
52
+ return json.loads(text)
53
  except:
54
+ print(f"JSON Parse Error. Raw text snippet: {text[:100]}...")
55
  return []
56
 
57
  def process_pdf(self, pdf_file, progress=gr.Progress()):
 
68
 
69
  # 初始化 PPTX
70
  prs = Presentation()
71
+ # 設定為 16:9 比例
72
  prs.slide_width = Inches(16)
73
  prs.slide_height = Inches(9)
74
 
 
85
 
86
  # 3. 逐頁處理
87
  for i, img in enumerate(images):
88
+ progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
89
 
90
+ # 建立空白投影片
91
  slide = prs.slides.add_slide(prs.slide_layouts[6])
92
 
93
+ # ==========================================
94
+ # 步驟 A: 背景處理 (去字) - 關鍵修正
95
+ # ==========================================
96
  save_name = f"slide_{i+1:02d}.png"
97
  final_bg_path = os.path.join(img_output_dir, save_name)
98
 
99
+ bg_success = False
 
100
 
101
  try:
102
  resp_img = self.client.models.generate_content(
103
+ model="gemini-2.5-flash-image",
104
  contents=[
105
+ "Remove all text, titles, bullet points, and diagrams containing text from this slide. Preserve only the pure background styling, colors, logos, and non-text decorative elements. Output ONLY the image.",
106
  img
107
  ],
108
  config=types.GenerateContentConfig(response_modalities=["IMAGE"])
 
121
  if isinstance(image_data, str): image_data = base64.b64decode(image_data)
122
  with open(final_bg_path, "wb") as f: f.write(image_data)
123
  cleaned_images_paths.append(final_bg_path)
124
+ bg_success = True
125
+ print(f"Page {i+1}: Background cleaned successfully.")
126
  else:
127
+ print(f"Page {i+1}: AI returned text instead of image: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
128
 
129
  except Exception as e:
130
+ print(f"Page {i+1} Background Gen Error: {e}")
131
 
132
+ # 策略:如果去字成功,貼上乾淨背景。
133
+ # 如果失敗,不要貼原圖!否則字會重疊。寧可留白或貼一個提示圖。
134
+ if bg_success:
135
+ try:
136
+ slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
137
+ gallery_preview.append((final_bg_path, f"Page {i+1} Background (Cleaned)"))
138
+ except Exception as e:
139
+ print(f"PPTX Image Insert Error: {e}")
140
+ else:
141
+ # 失敗時,存原圖但標記失敗,這樣使用者在 ZIP 裡還是拿得到原圖,但 PPT 上不會亂
142
+ img.save(final_bg_path)
143
+ gallery_preview.append((final_bg_path, f"Page {i+1} (Cleaning Failed - Original Saved)"))
144
+ full_text_log += f"[P{i+1}] Warning: Background cleaning failed. Slide background left blank to avoid text duplication.\n"
145
 
146
+ # ==========================================
147
+ # 步驟 B: 文字與佈局還原 (Layout Analysis)
148
+ # ==========================================
149
  try:
150
  prompt = """
151
+ Analyze this slide image to reconstruct it in PowerPoint.
152
+ Identify all text blocks.
153
  Return a JSON list strictly. Each item must have:
154
+ - "text": The exact content string.
155
  - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
156
+ - "font_size": estimated font size (integer, e.g., 40 for big titles, 14 for body).
157
+ - "color": estimated hex color code (e.g., "#000000", "#FFFFFF").
158
  - "is_bold": boolean.
159
 
160
+ Example format:
161
+ [{"text": "Introduction", "box_2d": [50, 50, 150, 400], "font_size": 32, "color": "#000000", "is_bold": true}]
162
  """
163
 
164
+ # 使用 2.0 Flash 進行邏輯分析
165
  resp_layout = self.client.models.generate_content(
166
+ model="gemini-2.5-flash",
167
  contents=[prompt, img],
168
  config=types.GenerateContentConfig(response_mime_type="application/json")
169
  )
 
175
  text_content = block.get("text", "")
176
  if not text_content: continue
177
 
178
+ full_text_log += f"[P{i+1}] Text: {text_content}\n"
179
 
180
  # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
 
181
  box = block.get("box_2d", [0, 0, 100, 100])
182
  ymin, xmin, ymax, xmax = box
183
 
184
+ # 轉換為英吋 (基於 16:9)
185
  left = Inches((xmin / 1000) * 16)
186
  top = Inches((ymin / 1000) * 9)
187
  width = Inches(((xmax - xmin) / 1000) * 16)
 
194
 
195
  p = tf.paragraphs[0]
196
  p.text = text_content
197
+
198
+ # 字體設定
199
+ try:
200
+ sz = int(block.get("font_size", 18))
201
+ # 簡單的縮放修正,有時候 AI 估計的字偏小
202
+ p.font.size = Pt(sz)
203
+ except:
204
+ p.font.size = Pt(18)
205
+
206
  p.font.bold = block.get("is_bold", False)
207
 
208
  # 顏色處理
209
  try:
210
  hex_color = block.get("color", "#000000").replace("#", "")
211
+ # 避免白色背景配白色文字 (簡單防呆)
212
+ if not bg_success and hex_color.upper() == "FFFFFF":
213
+ hex_color = "000000"
214
  p.font.color.rgb = RGBColor.from_string(hex_color)
215
  except:
216
+ pass
217
 
218
  except Exception as e:
219
  print(f"Layout Analysis Error Page {i+1}: {e}")
220
+ full_text_log += f"[P{i+1}] Error parsing layout text.\n"
221
 
222
  # 4. 打包結果
223
  progress(0.9, desc="正在打包檔案...")
 
231
  with open(txt_path, "w", encoding="utf-8") as f:
232
  f.write(full_text_log)
233
 
234
+ # 建立 ZIP
235
  zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
236
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
237
  zf.write(pptx_path, "restored_slides.pptx")
238
  zf.write(txt_path, "content_log.txt")
239
+ if os.path.exists(img_output_dir):
240
+ for img_name in os.listdir(img_output_dir):
241
+ zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
242
 
243
  return zip_path, pptx_path, gallery_preview
244
 
 
276
  btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
277
 
278
  with gr.Column():
279
+ out_zip = gr.File(label="📦 下載完整包 (含背景圖+Log)")
280
+ out_pptx = gr.File(label="📊 直接下載可編輯 PPTX")
281
 
282
+ gr.Markdown("### 🖼️ 背景去字效果預覽")
283
  out_gallery = gr.Gallery(columns=4)
284
 
285
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)