DeepLearning101 commited on
Commit
3abb76d
·
verified ·
1 Parent(s): 087fd05

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -33
app.py CHANGED
@@ -8,6 +8,7 @@ import json
8
  import re
9
  import concurrent.futures
10
  import time
 
11
  from pdf2image import convert_from_path
12
  from PIL import Image
13
  from dotenv import load_dotenv
@@ -20,6 +21,7 @@ from pptx.dml.color import RGBColor
20
  # 使用 Google 新版 SDK
21
  from google import genai
22
  from google.genai import types
 
23
 
24
  load_dotenv()
25
 
@@ -48,12 +50,39 @@ class NotebookLMTool:
48
  except:
49
  return []
50
 
51
- # --- 單頁處理邏輯 (獨立出來以便平行運算) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def process_single_page(self, page_index, img, img_output_dir):
53
  """處理單一頁面的:去字(背景) + 文字分析(Layout)"""
54
  print(f"🚀 [Page {page_index+1}] 開始處理...", flush=True)
55
 
56
- # 結果容器
57
  result = {
58
  "index": page_index,
59
  "bg_path": None,
@@ -78,9 +107,9 @@ class NotebookLMTool:
78
  3. Output ONLY the image.
79
  """
80
 
81
- # 使用 2.0-flash-exp 進行繪圖
82
- resp_img = self.client.models.generate_content(
83
- model="gemini-2.5-flash-image",
84
  contents=[clean_prompt, img],
85
  config=types.GenerateContentConfig(response_modalities=["IMAGE"])
86
  )
@@ -113,9 +142,9 @@ class NotebookLMTool:
113
  # 失敗回退原圖
114
  if not bg_success:
115
  img.save(final_bg_path)
116
- result["bg_path"] = final_bg_path # 仍需路徑給 PPT 使用
117
  result["preview"] = (final_bg_path, f"Page {page_index+1} (Original)")
118
- result["log"] += f"[P{page_index+1}] Warning: Background cleaning failed.\n"
119
 
120
  # 2. 文字與佈局分析 (Layout Analysis)
121
  try:
@@ -124,8 +153,9 @@ class NotebookLMTool:
124
  Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
125
  """
126
 
127
- resp_layout = self.client.models.generate_content(
128
- model="gemini-2.5-flash",
 
129
  contents=[layout_prompt, img],
130
  config=types.GenerateContentConfig(response_mime_type="application/json")
131
  )
@@ -137,10 +167,6 @@ class NotebookLMTool:
137
  blocks = self._extract_json(resp_layout.text)
138
  result["blocks"] = blocks
139
 
140
- # 紀錄 Log
141
- for b in blocks:
142
- if b.get("text"): result["log"] += f"[P{page_index+1}] {b['text'][:20]}...\n"
143
-
144
  except Exception as e:
145
  print(f"❌ [Page {page_index+1}] Layout Error: {e}", flush=True)
146
  result["log"] += f"[P{page_index+1}] Layout Analysis Failed.\n"
@@ -174,54 +200,49 @@ class NotebookLMTool:
174
  # 2. PDF 轉圖片 (降低 DPI 加速)
175
  progress(0.1, desc="正在將 PDF 轉為圖片 (DPI=150)...")
176
  try:
177
- # dpi=150 足夠螢幕檢視,且大幅減少上傳時間
178
  images = convert_from_path(pdf_file, dpi=150)
179
  except Exception as e:
180
  raise ValueError(f"PDF 轉換失敗: {str(e)}")
181
 
182
- # 3. 平行處理 (Parallel Execution)
183
- # 根據 CPU 核心數或 API 限制設定 workers,建議 3-5 避免 Rate Limit
184
- max_workers = 4
185
- results_map = {} # 用來存結果,確保順序正確
186
 
187
- progress(0.2, desc="🚀 AI 多工處理中 (可能需要稍等)...")
188
 
189
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
190
- # 提交所有任務
191
- future_to_page = {
192
- executor.submit(self.process_single_page, i, img, img_output_dir): i
193
- for i, img in enumerate(images)
194
- }
 
195
 
196
  # 等待完成
197
  for future in concurrent.futures.as_completed(future_to_page):
198
  try:
199
  res = future.result()
200
  results_map[res["index"]] = res
201
- # 更新 Token
202
  total_input_tokens += res["tokens_in"]
203
  total_output_tokens += res["tokens_out"]
204
  except Exception as exc:
205
  print(f"Page processing generated an exception: {exc}")
206
 
207
- # 4. 依序組裝 PPTX (確保順序正確)
208
  progress(0.8, desc="正在組裝 PPTX...")
209
 
210
- cleaned_images_paths = [] # 用於 ZIP
211
 
212
  for i in range(len(images)):
213
  if i not in results_map:
214
- print(f"Missing result for page {i}")
215
  continue
216
-
217
  res = results_map[i]
218
 
219
- # 更新 Log 與 Preview
220
  full_text_log += res["log"]
221
  if res["preview"]: gallery_preview.append(res["preview"])
222
  if res["bg_path"]: cleaned_images_paths.append(res["bg_path"])
223
 
224
- # 建立 Slide
225
  slide = prs.slides.add_slide(prs.slide_layouts[6])
226
 
227
  # A. 貼背景
@@ -235,7 +256,6 @@ class NotebookLMTool:
235
  text_content = block.get("text", "")
236
  if not text_content: continue
237
 
238
- # 座標轉換
239
  box = block.get("box_2d", [0, 0, 100, 100])
240
  ymin, xmin, ymax, xmax = box
241
  left = Inches((xmin / 1000) * 16)
@@ -254,7 +274,6 @@ class NotebookLMTool:
254
  p.font.bold = block.get("is_bold", False)
255
  try:
256
  hex_c = block.get("color", "#000000").replace("#", "")
257
- # 如果背景去字失敗,原圖背景可能很複雜,文字顏色可能需要調整 (這裡暫不處理,保持原色)
258
  p.font.color.rgb = RGBColor.from_string(hex_c)
259
  except: pass
260
 
 
8
  import re
9
  import concurrent.futures
10
  import time
11
+ import random
12
  from pdf2image import convert_from_path
13
  from PIL import Image
14
  from dotenv import load_dotenv
 
21
  # 使用 Google 新版 SDK
22
  from google import genai
23
  from google.genai import types
24
+ from google.api_core import exceptions as google_exceptions
25
 
26
  load_dotenv()
27
 
 
50
  except:
51
  return []
52
 
53
+ def _call_gemini_with_retry(self, model_name, contents, config=None, retries=5):
54
+ """
55
+ 封裝 Gemini 呼叫,加入指數退避重試機制 (Exponential Backoff)
56
+ 專門處理 429 Resource Exhausted 錯誤
57
+ """
58
+ delay = 10 # 初始等待秒數
59
+
60
+ for attempt in range(retries):
61
+ try:
62
+ response = self.client.models.generate_content(
63
+ model=model_name,
64
+ contents=contents,
65
+ config=config
66
+ )
67
+ return response
68
+ except Exception as e:
69
+ # 檢查是否為 Rate Limit 相關錯誤 (包含 429 或 Service Unavailable)
70
+ error_str = str(e)
71
+ if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str or "503" in error_str:
72
+ wait_time = delay + random.uniform(0, 5) # 加入隨機抖動避免同時重試
73
+ print(f"⚠️ 觸發 API 限制,暫停 {wait_time:.1f} 秒後重試 ({attempt+1}/{retries})...", flush=True)
74
+ time.sleep(wait_time)
75
+ delay *= 2 # 等待時間加倍 (10s -> 20s -> 40s...)
76
+ else:
77
+ raise e # 其他錯誤直接拋出
78
+
79
+ raise Exception("API 重試次數過多,請稍後再試。")
80
+
81
+ # --- 單頁處理邏輯 ---
82
  def process_single_page(self, page_index, img, img_output_dir):
83
  """處理單一頁面的:去字(背景) + 文字分析(Layout)"""
84
  print(f"🚀 [Page {page_index+1}] 開始處理...", flush=True)
85
 
 
86
  result = {
87
  "index": page_index,
88
  "bg_path": None,
 
107
  3. Output ONLY the image.
108
  """
109
 
110
+ # 使用帶重試機制的呼叫
111
+ resp_img = self._call_gemini_with_retry(
112
+ model_name="gemini-2.0-flash-exp",
113
  contents=[clean_prompt, img],
114
  config=types.GenerateContentConfig(response_modalities=["IMAGE"])
115
  )
 
142
  # 失敗回退原圖
143
  if not bg_success:
144
  img.save(final_bg_path)
145
+ result["bg_path"] = final_bg_path
146
  result["preview"] = (final_bg_path, f"Page {page_index+1} (Original)")
147
+ result["log"] += f"[P{page_index+1}] Warning: Background cleaning failed (Rate Limit or Error).\n"
148
 
149
  # 2. 文字與佈局分析 (Layout Analysis)
150
  try:
 
153
  Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
154
  """
155
 
156
+ # 使用帶重試機制的呼叫
157
+ resp_layout = self._call_gemini_with_retry(
158
+ model_name="gemini-2.0-flash",
159
  contents=[layout_prompt, img],
160
  config=types.GenerateContentConfig(response_mime_type="application/json")
161
  )
 
167
  blocks = self._extract_json(resp_layout.text)
168
  result["blocks"] = blocks
169
 
 
 
 
 
170
  except Exception as e:
171
  print(f"❌ [Page {page_index+1}] Layout Error: {e}", flush=True)
172
  result["log"] += f"[P{page_index+1}] Layout Analysis Failed.\n"
 
200
  # 2. PDF 轉圖片 (降低 DPI 加速)
201
  progress(0.1, desc="正在將 PDF 轉為圖片 (DPI=150)...")
202
  try:
 
203
  images = convert_from_path(pdf_file, dpi=150)
204
  except Exception as e:
205
  raise ValueError(f"PDF 轉換失敗: {str(e)}")
206
 
207
+ # 3. 平行處理 (Parallel Execution with Safety)
208
+ # 降低併發數以適應免費版 API
209
+ max_workers = 2
210
+ results_map = {}
211
 
212
+ progress(0.2, desc="🚀 AI 處理中 (已啟用速率保護)...")
213
 
214
  with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
215
+ # 提交任務,但加入微小延遲避免瞬間併發過高
216
+ future_to_page = {}
217
+ for i, img in enumerate(images):
218
+ time.sleep(1) # 讓請求稍微錯開
219
+ future = executor.submit(self.process_single_page, i, img, img_output_dir)
220
+ future_to_page[future] = i
221
 
222
  # 等待完成
223
  for future in concurrent.futures.as_completed(future_to_page):
224
  try:
225
  res = future.result()
226
  results_map[res["index"]] = res
 
227
  total_input_tokens += res["tokens_in"]
228
  total_output_tokens += res["tokens_out"]
229
  except Exception as exc:
230
  print(f"Page processing generated an exception: {exc}")
231
 
232
+ # 4. 依序組裝 PPTX
233
  progress(0.8, desc="正在組裝 PPTX...")
234
 
235
+ cleaned_images_paths = []
236
 
237
  for i in range(len(images)):
238
  if i not in results_map:
 
239
  continue
 
240
  res = results_map[i]
241
 
 
242
  full_text_log += res["log"]
243
  if res["preview"]: gallery_preview.append(res["preview"])
244
  if res["bg_path"]: cleaned_images_paths.append(res["bg_path"])
245
 
 
246
  slide = prs.slides.add_slide(prs.slide_layouts[6])
247
 
248
  # A. 貼背景
 
256
  text_content = block.get("text", "")
257
  if not text_content: continue
258
 
 
259
  box = block.get("box_2d", [0, 0, 100, 100])
260
  ymin, xmin, ymax, xmax = box
261
  left = Inches((xmin / 1000) * 16)
 
274
  p.font.bold = block.get("is_bold", False)
275
  try:
276
  hex_c = block.get("color", "#000000").replace("#", "")
 
277
  p.font.color.rgb = RGBColor.from_string(hex_c)
278
  except: pass
279