Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,6 +8,7 @@ import json
|
|
| 8 |
import re
|
| 9 |
import concurrent.futures
|
| 10 |
import time
|
|
|
|
| 11 |
from pdf2image import convert_from_path
|
| 12 |
from PIL import Image
|
| 13 |
from dotenv import load_dotenv
|
|
@@ -20,6 +21,7 @@ from pptx.dml.color import RGBColor
|
|
| 20 |
# 使用 Google 新版 SDK
|
| 21 |
from google import genai
|
| 22 |
from google.genai import types
|
|
|
|
| 23 |
|
| 24 |
load_dotenv()
|
| 25 |
|
|
@@ -48,12 +50,39 @@ class NotebookLMTool:
|
|
| 48 |
except:
|
| 49 |
return []
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def process_single_page(self, page_index, img, img_output_dir):
|
| 53 |
"""處理單一頁面的:去字(背景) + 文字分析(Layout)"""
|
| 54 |
print(f"🚀 [Page {page_index+1}] 開始處理...", flush=True)
|
| 55 |
|
| 56 |
-
# 結果容器
|
| 57 |
result = {
|
| 58 |
"index": page_index,
|
| 59 |
"bg_path": None,
|
|
@@ -78,9 +107,9 @@ class NotebookLMTool:
|
|
| 78 |
3. Output ONLY the image.
|
| 79 |
"""
|
| 80 |
|
| 81 |
-
#
|
| 82 |
-
resp_img = self.
|
| 83 |
-
|
| 84 |
contents=[clean_prompt, img],
|
| 85 |
config=types.GenerateContentConfig(response_modalities=["IMAGE"])
|
| 86 |
)
|
|
@@ -113,9 +142,9 @@ class NotebookLMTool:
|
|
| 113 |
# 失敗回退原圖
|
| 114 |
if not bg_success:
|
| 115 |
img.save(final_bg_path)
|
| 116 |
-
result["bg_path"] = final_bg_path
|
| 117 |
result["preview"] = (final_bg_path, f"Page {page_index+1} (Original)")
|
| 118 |
-
result["log"] += f"[P{page_index+1}] Warning: Background cleaning failed.\n"
|
| 119 |
|
| 120 |
# 2. 文字與佈局分析 (Layout Analysis)
|
| 121 |
try:
|
|
@@ -124,8 +153,9 @@ class NotebookLMTool:
|
|
| 124 |
Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
|
| 125 |
"""
|
| 126 |
|
| 127 |
-
|
| 128 |
-
|
|
|
|
| 129 |
contents=[layout_prompt, img],
|
| 130 |
config=types.GenerateContentConfig(response_mime_type="application/json")
|
| 131 |
)
|
|
@@ -137,10 +167,6 @@ class NotebookLMTool:
|
|
| 137 |
blocks = self._extract_json(resp_layout.text)
|
| 138 |
result["blocks"] = blocks
|
| 139 |
|
| 140 |
-
# 紀錄 Log
|
| 141 |
-
for b in blocks:
|
| 142 |
-
if b.get("text"): result["log"] += f"[P{page_index+1}] {b['text'][:20]}...\n"
|
| 143 |
-
|
| 144 |
except Exception as e:
|
| 145 |
print(f"❌ [Page {page_index+1}] Layout Error: {e}", flush=True)
|
| 146 |
result["log"] += f"[P{page_index+1}] Layout Analysis Failed.\n"
|
|
@@ -174,54 +200,49 @@ class NotebookLMTool:
|
|
| 174 |
# 2. PDF 轉圖片 (降低 DPI 加速)
|
| 175 |
progress(0.1, desc="正在將 PDF 轉為圖片 (DPI=150)...")
|
| 176 |
try:
|
| 177 |
-
# dpi=150 足夠螢幕檢視,且大幅減少上傳時間
|
| 178 |
images = convert_from_path(pdf_file, dpi=150)
|
| 179 |
except Exception as e:
|
| 180 |
raise ValueError(f"PDF 轉換失敗: {str(e)}")
|
| 181 |
|
| 182 |
-
# 3. 平行處理 (Parallel Execution)
|
| 183 |
-
#
|
| 184 |
-
max_workers =
|
| 185 |
-
results_map = {}
|
| 186 |
|
| 187 |
-
progress(0.2, desc="🚀 AI
|
| 188 |
|
| 189 |
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 190 |
-
#
|
| 191 |
-
future_to_page = {
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
| 195 |
|
| 196 |
# 等待完成
|
| 197 |
for future in concurrent.futures.as_completed(future_to_page):
|
| 198 |
try:
|
| 199 |
res = future.result()
|
| 200 |
results_map[res["index"]] = res
|
| 201 |
-
# 更新 Token
|
| 202 |
total_input_tokens += res["tokens_in"]
|
| 203 |
total_output_tokens += res["tokens_out"]
|
| 204 |
except Exception as exc:
|
| 205 |
print(f"Page processing generated an exception: {exc}")
|
| 206 |
|
| 207 |
-
# 4. 依序組裝 PPTX
|
| 208 |
progress(0.8, desc="正在組裝 PPTX...")
|
| 209 |
|
| 210 |
-
cleaned_images_paths = []
|
| 211 |
|
| 212 |
for i in range(len(images)):
|
| 213 |
if i not in results_map:
|
| 214 |
-
print(f"Missing result for page {i}")
|
| 215 |
continue
|
| 216 |
-
|
| 217 |
res = results_map[i]
|
| 218 |
|
| 219 |
-
# 更新 Log 與 Preview
|
| 220 |
full_text_log += res["log"]
|
| 221 |
if res["preview"]: gallery_preview.append(res["preview"])
|
| 222 |
if res["bg_path"]: cleaned_images_paths.append(res["bg_path"])
|
| 223 |
|
| 224 |
-
# 建立 Slide
|
| 225 |
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
| 226 |
|
| 227 |
# A. 貼背景
|
|
@@ -235,7 +256,6 @@ class NotebookLMTool:
|
|
| 235 |
text_content = block.get("text", "")
|
| 236 |
if not text_content: continue
|
| 237 |
|
| 238 |
-
# 座標轉換
|
| 239 |
box = block.get("box_2d", [0, 0, 100, 100])
|
| 240 |
ymin, xmin, ymax, xmax = box
|
| 241 |
left = Inches((xmin / 1000) * 16)
|
|
@@ -254,7 +274,6 @@ class NotebookLMTool:
|
|
| 254 |
p.font.bold = block.get("is_bold", False)
|
| 255 |
try:
|
| 256 |
hex_c = block.get("color", "#000000").replace("#", "")
|
| 257 |
-
# 如果背景去字失敗,原圖背景可能很複雜,文字顏色可能需要調整 (這裡暫不處理,保持原色)
|
| 258 |
p.font.color.rgb = RGBColor.from_string(hex_c)
|
| 259 |
except: pass
|
| 260 |
|
|
|
|
| 8 |
import re
|
| 9 |
import concurrent.futures
|
| 10 |
import time
|
| 11 |
+
import random
|
| 12 |
from pdf2image import convert_from_path
|
| 13 |
from PIL import Image
|
| 14 |
from dotenv import load_dotenv
|
|
|
|
| 21 |
# 使用 Google 新版 SDK
|
| 22 |
from google import genai
|
| 23 |
from google.genai import types
|
| 24 |
+
from google.api_core import exceptions as google_exceptions
|
| 25 |
|
| 26 |
load_dotenv()
|
| 27 |
|
|
|
|
| 50 |
except:
|
| 51 |
return []
|
| 52 |
|
| 53 |
+
def _call_gemini_with_retry(self, model_name, contents, config=None, retries=5):
|
| 54 |
+
"""
|
| 55 |
+
封裝 Gemini 呼叫,加入指數退避重試機制 (Exponential Backoff)
|
| 56 |
+
專門處理 429 Resource Exhausted 錯誤
|
| 57 |
+
"""
|
| 58 |
+
delay = 10 # 初始等待秒數
|
| 59 |
+
|
| 60 |
+
for attempt in range(retries):
|
| 61 |
+
try:
|
| 62 |
+
response = self.client.models.generate_content(
|
| 63 |
+
model=model_name,
|
| 64 |
+
contents=contents,
|
| 65 |
+
config=config
|
| 66 |
+
)
|
| 67 |
+
return response
|
| 68 |
+
except Exception as e:
|
| 69 |
+
# 檢查是否為 Rate Limit 相關錯誤 (包含 429 或 Service Unavailable)
|
| 70 |
+
error_str = str(e)
|
| 71 |
+
if "429" in error_str or "RESOURCE_EXHAUSTED" in error_str or "503" in error_str:
|
| 72 |
+
wait_time = delay + random.uniform(0, 5) # 加入隨機抖動避免同時重試
|
| 73 |
+
print(f"⚠️ 觸發 API 限制,暫停 {wait_time:.1f} 秒後重試 ({attempt+1}/{retries})...", flush=True)
|
| 74 |
+
time.sleep(wait_time)
|
| 75 |
+
delay *= 2 # 等待時間加倍 (10s -> 20s -> 40s...)
|
| 76 |
+
else:
|
| 77 |
+
raise e # 其他錯誤直接拋出
|
| 78 |
+
|
| 79 |
+
raise Exception("API 重試次數過多,請稍後再試。")
|
| 80 |
+
|
| 81 |
+
# --- 單頁處理邏輯 ---
|
| 82 |
def process_single_page(self, page_index, img, img_output_dir):
|
| 83 |
"""處理單一頁面的:去字(背景) + 文字分析(Layout)"""
|
| 84 |
print(f"🚀 [Page {page_index+1}] 開始處理...", flush=True)
|
| 85 |
|
|
|
|
| 86 |
result = {
|
| 87 |
"index": page_index,
|
| 88 |
"bg_path": None,
|
|
|
|
| 107 |
3. Output ONLY the image.
|
| 108 |
"""
|
| 109 |
|
| 110 |
+
# 使用帶重試機制的呼叫
|
| 111 |
+
resp_img = self._call_gemini_with_retry(
|
| 112 |
+
model_name="gemini-2.0-flash-exp",
|
| 113 |
contents=[clean_prompt, img],
|
| 114 |
config=types.GenerateContentConfig(response_modalities=["IMAGE"])
|
| 115 |
)
|
|
|
|
| 142 |
# 失敗回退原圖
|
| 143 |
if not bg_success:
|
| 144 |
img.save(final_bg_path)
|
| 145 |
+
result["bg_path"] = final_bg_path
|
| 146 |
result["preview"] = (final_bg_path, f"Page {page_index+1} (Original)")
|
| 147 |
+
result["log"] += f"[P{page_index+1}] Warning: Background cleaning failed (Rate Limit or Error).\n"
|
| 148 |
|
| 149 |
# 2. 文字與佈局分析 (Layout Analysis)
|
| 150 |
try:
|
|
|
|
| 153 |
Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
|
| 154 |
"""
|
| 155 |
|
| 156 |
+
# 使用帶重試機制的呼叫
|
| 157 |
+
resp_layout = self._call_gemini_with_retry(
|
| 158 |
+
model_name="gemini-2.0-flash",
|
| 159 |
contents=[layout_prompt, img],
|
| 160 |
config=types.GenerateContentConfig(response_mime_type="application/json")
|
| 161 |
)
|
|
|
|
| 167 |
blocks = self._extract_json(resp_layout.text)
|
| 168 |
result["blocks"] = blocks
|
| 169 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
except Exception as e:
|
| 171 |
print(f"❌ [Page {page_index+1}] Layout Error: {e}", flush=True)
|
| 172 |
result["log"] += f"[P{page_index+1}] Layout Analysis Failed.\n"
|
|
|
|
| 200 |
# 2. PDF 轉圖片 (降低 DPI 加速)
|
| 201 |
progress(0.1, desc="正在將 PDF 轉為圖片 (DPI=150)...")
|
| 202 |
try:
|
|
|
|
| 203 |
images = convert_from_path(pdf_file, dpi=150)
|
| 204 |
except Exception as e:
|
| 205 |
raise ValueError(f"PDF 轉換失敗: {str(e)}")
|
| 206 |
|
| 207 |
+
# 3. 平行處理 (Parallel Execution with Safety)
|
| 208 |
+
# 降低併發數以適應免費版 API
|
| 209 |
+
max_workers = 2
|
| 210 |
+
results_map = {}
|
| 211 |
|
| 212 |
+
progress(0.2, desc="🚀 AI 處理中 (已啟用速率保護)...")
|
| 213 |
|
| 214 |
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 215 |
+
# 提交任務,但加入微小延遲避免瞬間併發過高
|
| 216 |
+
future_to_page = {}
|
| 217 |
+
for i, img in enumerate(images):
|
| 218 |
+
time.sleep(1) # 讓請求稍微錯開
|
| 219 |
+
future = executor.submit(self.process_single_page, i, img, img_output_dir)
|
| 220 |
+
future_to_page[future] = i
|
| 221 |
|
| 222 |
# 等待完成
|
| 223 |
for future in concurrent.futures.as_completed(future_to_page):
|
| 224 |
try:
|
| 225 |
res = future.result()
|
| 226 |
results_map[res["index"]] = res
|
|
|
|
| 227 |
total_input_tokens += res["tokens_in"]
|
| 228 |
total_output_tokens += res["tokens_out"]
|
| 229 |
except Exception as exc:
|
| 230 |
print(f"Page processing generated an exception: {exc}")
|
| 231 |
|
| 232 |
+
# 4. 依序組裝 PPTX
|
| 233 |
progress(0.8, desc="正在組裝 PPTX...")
|
| 234 |
|
| 235 |
+
cleaned_images_paths = []
|
| 236 |
|
| 237 |
for i in range(len(images)):
|
| 238 |
if i not in results_map:
|
|
|
|
| 239 |
continue
|
|
|
|
| 240 |
res = results_map[i]
|
| 241 |
|
|
|
|
| 242 |
full_text_log += res["log"]
|
| 243 |
if res["preview"]: gallery_preview.append(res["preview"])
|
| 244 |
if res["bg_path"]: cleaned_images_paths.append(res["bg_path"])
|
| 245 |
|
|
|
|
| 246 |
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
| 247 |
|
| 248 |
# A. 貼背景
|
|
|
|
| 256 |
text_content = block.get("text", "")
|
| 257 |
if not text_content: continue
|
| 258 |
|
|
|
|
| 259 |
box = block.get("box_2d", [0, 0, 100, 100])
|
| 260 |
ymin, xmin, ymax, xmax = box
|
| 261 |
left = Inches((xmin / 1000) * 16)
|
|
|
|
| 274 |
p.font.bold = block.get("is_bold", False)
|
| 275 |
try:
|
| 276 |
hex_c = block.get("color", "#000000").replace("#", "")
|
|
|
|
| 277 |
p.font.color.rgb = RGBColor.from_string(hex_c)
|
| 278 |
except: pass
|
| 279 |
|