DeepLearning101 commited on
Commit
6ad1309
·
verified ·
1 Parent(s): b42c65e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -62
app.py CHANGED
@@ -4,10 +4,17 @@ import tempfile
4
  import zipfile
5
  import shutil
6
  import base64
 
 
7
  from pdf2image import convert_from_path
8
  from PIL import Image
9
  from dotenv import load_dotenv
10
 
 
 
 
 
 
11
  # 使用 Google 新版 SDK
12
  from google import genai
13
  from google.genai import types
@@ -28,6 +35,21 @@ class NotebookLMTool:
28
  return "✅ API Key 已更新!"
29
  return "⚠️ Key 無效"
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  def process_pdf(self, pdf_file, progress=gr.Progress()):
32
  if not self.client:
33
  raise ValueError("請先輸入 Google API Key!")
@@ -35,118 +57,173 @@ class NotebookLMTool:
35
  if pdf_file is None:
36
  return None, None, None
37
 
38
- # 1. 準備暫存目錄
39
  temp_dir = tempfile.mkdtemp()
40
  img_output_dir = os.path.join(temp_dir, "cleaned_images")
41
  os.makedirs(img_output_dir, exist_ok=True)
42
 
 
 
 
 
 
 
43
  # 2. PDF 轉圖片
44
  progress(0.1, desc="正在將 PDF 轉為圖片...")
45
  try:
46
  images = convert_from_path(pdf_file)
47
  except Exception as e:
48
- raise ValueError(f"PDF 轉換失敗 (請確認 packages.txt 有加入 poppler-utils): {str(e)}")
49
 
50
- full_text = ""
51
  cleaned_images_paths = []
52
  gallery_preview = []
53
 
54
  # 3. 逐頁處理
55
  for i, img in enumerate(images):
56
- progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
57
-
58
- # --- 步驟 A: 提取文字 (OCR) ---
59
- # 使用標準 Flash 模型處理文字,速度最快
60
- try:
61
- resp_text = self.client.models.generate_content(
62
- model="gemini-2.5-flash",
63
- contents=["Extract all text content from this slide strictly.", img]
64
- )
65
- page_content = resp_text.text if resp_text.text else "[No Text Found]"
66
- except Exception as e:
67
- page_content = f"[OCR Error: {e}]"
68
 
69
- full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
 
70
 
71
- # --- 步驟 B: 圖片去字 (Image Generation) ---
72
- # 關鍵修改:必須使用 'gemini-2.0-flash-exp' 且該模型目前才支援 IMAGE 輸出
73
  save_name = f"slide_{i+1:02d}.png"
74
- final_path = os.path.join(img_output_dir, save_name)
 
 
 
75
 
76
  try:
77
  resp_img = self.client.models.generate_content(
78
- model="gemini-2.5-flash-image", # 修正:使用支援圖片輸出的實驗模型
79
  contents=[
80
- "Remove all text from this image. Fill the gaps using the surrounding background texture to make it look clean and natural. Output ONLY the image.",
81
  img
82
  ],
83
- config=types.GenerateContentConfig(
84
- response_modalities=["IMAGE"] # ✅ 修正:明確告知需要圖片模態
85
- )
86
  )
87
 
88
- # 處理圖片回傳 (解析 SDK 回應)
89
  image_data = None
90
-
91
- # 檢查 inline_data (Base64)
92
  if hasattr(resp_img, 'parts') and resp_img.parts:
93
  for part in resp_img.parts:
94
  if part.inline_data:
95
- image_data = part.inline_data.data
96
- break
97
-
98
- # 部分 SDK 版本可能直接放在 bytes
99
  if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
100
  image_data = resp_img.bytes
101
 
102
  if image_data:
103
- # 如果是 Base64 字串,需要解碼
104
- if isinstance(image_data, str):
105
- image_data = base64.b64decode(image_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- with open(final_path, "wb") as f:
108
- f.write(image_data)
109
 
110
- cleaned_images_paths.append(final_path)
111
- gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
112
- print(f"Page {i+1}: Image generated successfully.")
113
- else:
114
- # 失敗回退:保留原圖並標記
115
- print(f"Page {i+1} Failed: No image data. Text: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
116
- img.save(final_path)
117
- gallery_preview.append((final_path, f"Page {i+1} (Original - Gen Failed)"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  except Exception as e:
120
- print(f"Page {i+1} Error: {str(e)}")
121
- img.save(final_path)
122
- gallery_preview.append((final_path, f"Page {i+1} (Original - Error)"))
123
 
124
  # 4. 打包結果
125
- progress(0.9, desc="正在打包 ZIP...")
126
 
127
- txt_path = os.path.join(temp_dir, "extracted_text.txt")
 
 
 
 
 
128
  with open(txt_path, "w", encoding="utf-8") as f:
129
- f.write(full_text)
130
 
131
- zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
 
132
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
133
- zf.write(txt_path, "content.txt")
 
134
  for img_path in cleaned_images_paths:
135
- zf.write(img_path, os.path.join("cleaned_slides", os.path.basename(img_path)))
136
 
137
- return zip_path, full_text, gallery_preview
138
 
139
  # Init
140
  tool = NotebookLMTool()
141
 
142
  # --- Gradio UI ---
143
- with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
144
- gr.Markdown("# 🛠️ NotebookLM 投影片 PDF 拆解神器")
145
  gr.Markdown("""
146
  <div align="center">
147
 
148
- # 🛠️ 上傳 NotebookLM 投影片 PDF,AI 自動幫你:**1. 抓出所有文字** | **2. 重繪乾淨背景圖**
149
  👉 歡迎 Star [GitHub](https://github.com/Deep-Learning-101/) ⭐ 覺得不錯 👈
 
150
  <h3>🧠 補腦專區:<a href="https://deep-learning-101.github.io/" target="_blank">Deep Learning 101</a></h3>
151
 
152
  | 🔥 技術傳送門 (Tech Stack) | 📚 必讀心法 (Must Read) |
@@ -166,13 +243,13 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
166
 
167
  gr.Markdown("---")
168
  pdf_input = gr.File(label="上傳 PDF")
169
- btn_process = gr.Button("🚀 開始拆解", variant="primary")
170
 
171
  with gr.Column():
172
- out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
173
- out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
174
 
175
- gr.Markdown("### 🖼️ 處理結果預覽")
176
  out_gallery = gr.Gallery(columns=4)
177
 
178
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
@@ -180,7 +257,7 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
180
  btn_process.click(
181
  tool.process_pdf,
182
  inputs=[pdf_input],
183
- outputs=[out_zip, out_text, out_gallery]
184
  )
185
 
186
  if __name__ == "__main__":
 
4
  import zipfile
5
  import shutil
6
  import base64
7
+ import json
8
+ import re
9
  from pdf2image import convert_from_path
10
  from PIL import Image
11
  from dotenv import load_dotenv
12
 
13
+ # PPTX 處理套件
14
+ from pptx import Presentation
15
+ from pptx.util import Inches, Pt
16
+ from pptx.dml.color import RGBColor
17
+
18
  # 使用 Google 新版 SDK
19
  from google import genai
20
  from google.genai import types
 
35
  return "✅ API Key 已更新!"
36
  return "⚠️ Key 無效"
37
 
38
+ def _extract_json(self, text):
39
+ """嘗試從回應中提取 JSON 字串"""
40
+ try:
41
+ # 尋找 ```json ... ``` 區塊
42
+ match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
43
+ if match:
44
+ return json.loads(match.group(1))
45
+ # 或者是直接的 JSON 結構
46
+ match = re.search(r"\[.*\]", text, re.DOTALL)
47
+ if match:
48
+ return json.loads(match.group(0))
49
+ return []
50
+ except:
51
+ return []
52
+
53
  def process_pdf(self, pdf_file, progress=gr.Progress()):
54
  if not self.client:
55
  raise ValueError("請先輸入 Google API Key!")
 
57
  if pdf_file is None:
58
  return None, None, None
59
 
60
+ # 1. 準備環境
61
  temp_dir = tempfile.mkdtemp()
62
  img_output_dir = os.path.join(temp_dir, "cleaned_images")
63
  os.makedirs(img_output_dir, exist_ok=True)
64
 
65
+ # 初始化 PPTX
66
+ prs = Presentation()
67
+ # 設定為 16:9 (寬 16 英吋, 高 9 英吋) - 這是 NotebookLM 常見比例
68
+ prs.slide_width = Inches(16)
69
+ prs.slide_height = Inches(9)
70
+
71
  # 2. PDF 轉圖片
72
  progress(0.1, desc="正在將 PDF 轉為圖片...")
73
  try:
74
  images = convert_from_path(pdf_file)
75
  except Exception as e:
76
+ raise ValueError(f"PDF 轉換失敗: {str(e)}")
77
 
78
+ full_text_log = ""
79
  cleaned_images_paths = []
80
  gallery_preview = []
81
 
82
  # 3. 逐頁處理
83
  for i, img in enumerate(images):
84
+ progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在重建第 {i+1}/{len(images)} 頁...")
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ # 建立空白投影片 (Layout 6 is usually blank)
87
+ slide = prs.slides.add_slide(prs.slide_layouts[6])
88
 
89
+ # --- 步驟 A: 圖片去字 (Clean Background) ---
 
90
  save_name = f"slide_{i+1:02d}.png"
91
+ final_bg_path = os.path.join(img_output_dir, save_name)
92
+
93
+ # 先儲存原圖備用
94
+ img.save(final_bg_path)
95
 
96
  try:
97
  resp_img = self.client.models.generate_content(
98
+ model="gemini-2.5-flash-image", # 或是 gemini-2.0-flash-exp
99
  contents=[
100
+ "Remove all text, titles, and bullet points from this slide. Keep the background design, logos, and non-text graphics exactly as they are. Output ONLY the image.",
101
  img
102
  ],
103
+ config=types.GenerateContentConfig(response_modalities=["IMAGE"])
 
 
104
  )
105
 
106
+ # 處理圖片資料
107
  image_data = None
 
 
108
  if hasattr(resp_img, 'parts') and resp_img.parts:
109
  for part in resp_img.parts:
110
  if part.inline_data:
111
+ image_data = part.inline_data.data; break
 
 
 
112
  if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
113
  image_data = resp_img.bytes
114
 
115
  if image_data:
116
+ if isinstance(image_data, str): image_data = base64.b64decode(image_data)
117
+ with open(final_bg_path, "wb") as f: f.write(image_data)
118
+ cleaned_images_paths.append(final_bg_path)
119
+ else:
120
+ print(f"Page {i+1}: Background gen failed, using original.")
121
+
122
+ except Exception as e:
123
+ print(f"Bg Gen Error Page {i+1}: {e}")
124
+
125
+ # 將背景圖貼到 PPTX (佔滿全螢幕)
126
+ try:
127
+ slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
128
+ gallery_preview.append((final_bg_path, f"Page {i+1} Background"))
129
+ except Exception as e:
130
+ print(f"PPTX Image Insert Error: {e}")
131
+
132
+ # --- 步驟 B: 佈局分析 (Layout Analysis to JSON) ---
133
+ try:
134
+ prompt = """
135
+ Analyze this slide image. Identify all text blocks.
136
+ Return a JSON list strictly. Each item must have:
137
+ - "text": The content string.
138
+ - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
139
+ - "font_size": estimated font size (integer, e.g., 24 for titles, 12 for body).
140
+ - "color": estimated hex color code (e.g., "#000000").
141
+ - "is_bold": boolean.
142
+
143
+ Example: [{"text": "Title", "box_2d": [10, 10, 200, 500], "font_size": 40, "color": "#333333", "is_bold": true}]
144
+ """
145
+
146
+ resp_layout = self.client.models.generate_content(
147
+ model="gemini-2.0-flash", # 使用 2.0 Flash 處理邏輯較強
148
+ contents=[prompt, img],
149
+ config=types.GenerateContentConfig(response_mime_type="application/json")
150
+ )
151
+
152
+ blocks = self._extract_json(resp_layout.text)
153
+
154
+ # 將文字區塊寫入 PPTX
155
+ for block in blocks:
156
+ text_content = block.get("text", "")
157
+ if not text_content: continue
158
 
159
+ full_text_log += f"[P{i+1}] {text_content}\n"
 
160
 
161
+ # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
162
+ # box_2d: [ymin, xmin, ymax, xmax]
163
+ box = block.get("box_2d", [0, 0, 100, 100])
164
+ ymin, xmin, ymax, xmax = box
165
+
166
+ # 轉換為英吋
167
+ left = Inches((xmin / 1000) * 16)
168
+ top = Inches((ymin / 1000) * 9)
169
+ width = Inches(((xmax - xmin) / 1000) * 16)
170
+ height = Inches(((ymax - ymin) / 1000) * 9)
171
+
172
+ # 建立文字方塊
173
+ textbox = slide.shapes.add_textbox(left, top, width, height)
174
+ tf = textbox.text_frame
175
+ tf.word_wrap = True
176
+
177
+ p = tf.paragraphs[0]
178
+ p.text = text_content
179
+ p.font.size = Pt(block.get("font_size", 18))
180
+ p.font.bold = block.get("is_bold", False)
181
+
182
+ # 顏色處理
183
+ try:
184
+ hex_color = block.get("color", "#000000").replace("#", "")
185
+ p.font.color.rgb = RGBColor.from_string(hex_color)
186
+ except:
187
+ pass # Fallback to black
188
 
189
  except Exception as e:
190
+ print(f"Layout Analysis Error Page {i+1}: {e}")
191
+ full_text_log += f"[P{i+1}] Error parsing layout.\n"
 
192
 
193
  # 4. 打包結果
194
+ progress(0.9, desc="正在打包檔案...")
195
 
196
+ # 儲存 PPTX
197
+ pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
198
+ prs.save(pptx_path)
199
+
200
+ # 儲存文字記錄
201
+ txt_path = os.path.join(temp_dir, "content_log.txt")
202
  with open(txt_path, "w", encoding="utf-8") as f:
203
+ f.write(full_text_log)
204
 
205
+ # 建立 ZIP (包含 PPTX, 文字檔, 與乾淨圖)
206
+ zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
207
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
208
+ zf.write(pptx_path, "restored_slides.pptx")
209
+ zf.write(txt_path, "content_log.txt")
210
  for img_path in cleaned_images_paths:
211
+ zf.write(img_path, os.path.join("cleaned_backgrounds", os.path.basename(img_path)))
212
 
213
+ return zip_path, pptx_path, gallery_preview
214
 
215
  # Init
216
  tool = NotebookLMTool()
217
 
218
  # --- Gradio UI ---
219
+ with gr.Blocks(title="NotebookLM Slide Restorer,PPT.404", theme=gr.themes.Soft()) as demo:
220
+ gr.Markdown("# 🛠️ NotebookLM 投影片 PDF 還原神器 (PPT.404)")
221
  gr.Markdown("""
222
  <div align="center">
223
 
224
+ # 🪄 上傳 PDF,AI 自動:**去字背景** + **版面分析** + **合成可編輯 PPTX**
225
  👉 歡迎 Star [GitHub](https://github.com/Deep-Learning-101/) ⭐ 覺得不錯 👈
226
+
227
  <h3>🧠 補腦專區:<a href="https://deep-learning-101.github.io/" target="_blank">Deep Learning 101</a></h3>
228
 
229
  | 🔥 技術傳送門 (Tech Stack) | 📚 必讀心法 (Must Read) |
 
243
 
244
  gr.Markdown("---")
245
  pdf_input = gr.File(label="上傳 PDF")
246
+ btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
247
 
248
  with gr.Column():
249
+ out_zip = gr.File(label="📦 下載完整包 (含 PPTX, 圖, 文)")
250
+ out_pptx = gr.File(label="📊 直接下載 PPTX")
251
 
252
+ gr.Markdown("### 🖼️ 背景分離預覽")
253
  out_gallery = gr.Gallery(columns=4)
254
 
255
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
 
257
  btn_process.click(
258
  tool.process_pdf,
259
  inputs=[pdf_input],
260
+ outputs=[out_zip, out_pptx, out_gallery]
261
  )
262
 
263
  if __name__ == "__main__":