Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -6,6 +6,8 @@ import shutil
|
|
| 6 |
import base64
|
| 7 |
import json
|
| 8 |
import re
|
|
|
|
|
|
|
| 9 |
from pdf2image import convert_from_path
|
| 10 |
from PIL import Image
|
| 11 |
from dotenv import load_dotenv
|
|
@@ -46,6 +48,106 @@ class NotebookLMTool:
|
|
| 46 |
except:
|
| 47 |
return []
|
| 48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
def process_pdf(self, pdf_file, progress=gr.Progress()):
|
| 50 |
if not self.client:
|
| 51 |
raise ValueError("請先輸入 Google API Key!")
|
|
@@ -53,10 +155,12 @@ class NotebookLMTool:
|
|
| 53 |
if pdf_file is None:
|
| 54 |
return None, None, None, ""
|
| 55 |
|
| 56 |
-
#
|
| 57 |
total_input_tokens = 0
|
| 58 |
total_output_tokens = 0
|
| 59 |
-
|
|
|
|
|
|
|
| 60 |
# 1. 準備環境
|
| 61 |
temp_dir = tempfile.mkdtemp()
|
| 62 |
img_output_dir = os.path.join(temp_dir, "cleaned_images")
|
|
@@ -67,134 +171,94 @@ class NotebookLMTool:
|
|
| 67 |
prs.slide_width = Inches(16)
|
| 68 |
prs.slide_height = Inches(9)
|
| 69 |
|
| 70 |
-
# 2. PDF 轉圖片
|
| 71 |
-
progress(0.1, desc="正在將 PDF
|
| 72 |
try:
|
| 73 |
-
|
|
|
|
| 74 |
except Exception as e:
|
| 75 |
raise ValueError(f"PDF 轉換失敗: {str(e)}")
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
save_name = f"slide_{i+1:02d}.png"
|
| 90 |
-
final_bg_path = os.path.join(img_output_dir, save_name)
|
| 91 |
-
bg_success = False
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
if image_data:
|
| 125 |
-
if isinstance(image_data, str): image_data = base64.b64decode(image_data)
|
| 126 |
-
with open(final_bg_path, "wb") as f: f.write(image_data)
|
| 127 |
-
cleaned_images_paths.append(final_bg_path)
|
| 128 |
-
bg_success = True
|
| 129 |
-
else:
|
| 130 |
-
print(f"Page {i+1}: AI returned text instead of image.")
|
| 131 |
-
|
| 132 |
-
except Exception as e:
|
| 133 |
-
print(f"Page {i+1} Clean Error: {e}")
|
| 134 |
-
|
| 135 |
-
# 失敗處理:使用原圖,但不要貼入 PPT 避免疊字,僅存檔供參考
|
| 136 |
-
if bg_success:
|
| 137 |
try:
|
| 138 |
-
slide.shapes.add_picture(
|
| 139 |
-
gallery_preview.append((final_bg_path, f"Page {i+1} Cleaned"))
|
| 140 |
except: pass
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
# 步驟 B: 文字與佈局還原 (Layout Analysis)
|
| 147 |
-
# ==========================================
|
| 148 |
-
try:
|
| 149 |
-
layout_prompt = """
|
| 150 |
-
Analyze this slide. Return a JSON list of all text blocks.
|
| 151 |
-
Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
|
| 152 |
-
"""
|
| 153 |
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
)
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
if resp_layout.usage_metadata:
|
| 162 |
-
total_input_tokens += resp_layout.usage_metadata.prompt_token_count
|
| 163 |
-
total_output_tokens += resp_layout.usage_metadata.candidates_token_count
|
| 164 |
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
#
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
width = Inches(((xmax - xmin) / 1000) * 16)
|
| 178 |
-
height = Inches(((ymax - ymin) / 1000) * 9)
|
| 179 |
-
|
| 180 |
-
textbox = slide.shapes.add_textbox(left, top, width, height)
|
| 181 |
-
tf = textbox.text_frame
|
| 182 |
-
tf.word_wrap = True
|
| 183 |
-
p = tf.paragraphs[0]
|
| 184 |
-
p.text = text_content
|
| 185 |
-
try: p.font.size = Pt(int(block.get("font_size", 18)))
|
| 186 |
-
except: p.font.size = Pt(18)
|
| 187 |
-
p.font.bold = block.get("is_bold", False)
|
| 188 |
-
try:
|
| 189 |
-
hex_c = block.get("color", "#000000").replace("#", "")
|
| 190 |
-
if not bg_success and hex_c.upper() == "FFFFFF": hex_c = "000000"
|
| 191 |
-
p.font.color.rgb = RGBColor.from_string(hex_c)
|
| 192 |
-
except: pass
|
| 193 |
-
|
| 194 |
-
except Exception as e:
|
| 195 |
-
print(f"Layout Error Page {i+1}: {e}")
|
| 196 |
-
|
| 197 |
-
# 4. 打包與統計
|
| 198 |
progress(0.9, desc="正在打包檔案...")
|
| 199 |
pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
|
| 200 |
prs.save(pptx_path)
|
|
@@ -206,15 +270,13 @@ class NotebookLMTool:
|
|
| 206 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 207 |
zf.write(pptx_path, "restored_slides.pptx")
|
| 208 |
zf.write(txt_path, "content_log.txt")
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
|
| 212 |
|
| 213 |
-
# 格式化 Token 統計訊息
|
| 214 |
token_stats = f"""
|
| 215 |
### 📊 Token 用量統計
|
| 216 |
-
-
|
| 217 |
-
-
|
| 218 |
- **總計消耗:** {total_input_tokens + total_output_tokens:,}
|
| 219 |
"""
|
| 220 |
|
|
@@ -251,7 +313,7 @@ with gr.Blocks(title="NotebookLM Slide Restorer,PPT.404", theme=gr.themes.Soft
|
|
| 251 |
|
| 252 |
gr.Markdown("---")
|
| 253 |
pdf_input = gr.File(label="上傳 PDF")
|
| 254 |
-
btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
|
| 255 |
|
| 256 |
with gr.Column():
|
| 257 |
out_zip = gr.File(label="📦 下載完整包")
|
|
|
|
| 6 |
import base64
|
| 7 |
import json
|
| 8 |
import re
|
| 9 |
+
import concurrent.futures
|
| 10 |
+
import time
|
| 11 |
from pdf2image import convert_from_path
|
| 12 |
from PIL import Image
|
| 13 |
from dotenv import load_dotenv
|
|
|
|
| 48 |
except:
|
| 49 |
return []
|
| 50 |
|
| 51 |
+
# --- 單頁處理邏輯 (獨立出來以便平行運算) ---
|
| 52 |
+
def process_single_page(self, page_index, img, img_output_dir):
|
| 53 |
+
"""處理單一頁面的:去字(背景) + 文字分析(Layout)"""
|
| 54 |
+
print(f"🚀 [Page {page_index+1}] 開始處理...", flush=True)
|
| 55 |
+
|
| 56 |
+
# 結果容器
|
| 57 |
+
result = {
|
| 58 |
+
"index": page_index,
|
| 59 |
+
"bg_path": None,
|
| 60 |
+
"blocks": [],
|
| 61 |
+
"log": "",
|
| 62 |
+
"preview": None,
|
| 63 |
+
"tokens_in": 0,
|
| 64 |
+
"tokens_out": 0
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
save_name = f"slide_{page_index+1:02d}.png"
|
| 68 |
+
final_bg_path = os.path.join(img_output_dir, save_name)
|
| 69 |
+
bg_success = False
|
| 70 |
+
|
| 71 |
+
# 1. 背景去字 (Image Cleaning)
|
| 72 |
+
try:
|
| 73 |
+
clean_prompt = """
|
| 74 |
+
Strictly remove all text, titles, text-boxes, and bullet points from this slide image.
|
| 75 |
+
CRITICAL INSTRUCTION:
|
| 76 |
+
1. Preserve the original background pattern, colors, logos, and non-text graphics EXACTLY as they are.
|
| 77 |
+
2. Do NOT add any new objects, decorations, or hallucinations.
|
| 78 |
+
3. Output ONLY the image.
|
| 79 |
+
"""
|
| 80 |
+
|
| 81 |
+
# 使用 2.0-flash-exp 進行繪圖
|
| 82 |
+
resp_img = self.client.models.generate_content(
|
| 83 |
+
model="gemini-2.5-flash-image",
|
| 84 |
+
contents=[clean_prompt, img],
|
| 85 |
+
config=types.GenerateContentConfig(response_modalities=["IMAGE"])
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
# Token 統計
|
| 89 |
+
if resp_img.usage_metadata:
|
| 90 |
+
result["tokens_in"] += resp_img.usage_metadata.prompt_token_count
|
| 91 |
+
result["tokens_out"] += resp_img.usage_metadata.candidates_token_count
|
| 92 |
+
|
| 93 |
+
# 存圖邏輯
|
| 94 |
+
image_data = None
|
| 95 |
+
if hasattr(resp_img, 'parts') and resp_img.parts:
|
| 96 |
+
for part in resp_img.parts:
|
| 97 |
+
if part.inline_data: image_data = part.inline_data.data; break
|
| 98 |
+
if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
|
| 99 |
+
image_data = resp_img.bytes
|
| 100 |
+
|
| 101 |
+
if image_data:
|
| 102 |
+
if isinstance(image_data, str): image_data = base64.b64decode(image_data)
|
| 103 |
+
with open(final_bg_path, "wb") as f: f.write(image_data)
|
| 104 |
+
bg_success = True
|
| 105 |
+
result["bg_path"] = final_bg_path
|
| 106 |
+
result["preview"] = (final_bg_path, f"Page {page_index+1} Cleaned")
|
| 107 |
+
else:
|
| 108 |
+
print(f"⚠️ [Page {page_index+1}] 去字失敗: 未回傳圖片", flush=True)
|
| 109 |
+
|
| 110 |
+
except Exception as e:
|
| 111 |
+
print(f"❌ [Page {page_index+1}] Clean Error: {e}", flush=True)
|
| 112 |
+
|
| 113 |
+
# 失敗回退原圖
|
| 114 |
+
if not bg_success:
|
| 115 |
+
img.save(final_bg_path)
|
| 116 |
+
result["bg_path"] = final_bg_path # 仍需路徑給 PPT 使用
|
| 117 |
+
result["preview"] = (final_bg_path, f"Page {page_index+1} (Original)")
|
| 118 |
+
result["log"] += f"[P{page_index+1}] Warning: Background cleaning failed.\n"
|
| 119 |
+
|
| 120 |
+
# 2. 文字與佈局分析 (Layout Analysis)
|
| 121 |
+
try:
|
| 122 |
+
layout_prompt = """
|
| 123 |
+
Analyze this slide. Return a JSON list of all text blocks.
|
| 124 |
+
Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
|
| 125 |
+
"""
|
| 126 |
+
|
| 127 |
+
resp_layout = self.client.models.generate_content(
|
| 128 |
+
model="gemini-2.5-flash",
|
| 129 |
+
contents=[layout_prompt, img],
|
| 130 |
+
config=types.GenerateContentConfig(response_mime_type="application/json")
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
if resp_layout.usage_metadata:
|
| 134 |
+
result["tokens_in"] += resp_layout.usage_metadata.prompt_token_count
|
| 135 |
+
result["tokens_out"] += resp_layout.usage_metadata.candidates_token_count
|
| 136 |
+
|
| 137 |
+
blocks = self._extract_json(resp_layout.text)
|
| 138 |
+
result["blocks"] = blocks
|
| 139 |
+
|
| 140 |
+
# 紀錄 Log
|
| 141 |
+
for b in blocks:
|
| 142 |
+
if b.get("text"): result["log"] += f"[P{page_index+1}] {b['text'][:20]}...\n"
|
| 143 |
+
|
| 144 |
+
except Exception as e:
|
| 145 |
+
print(f"❌ [Page {page_index+1}] Layout Error: {e}", flush=True)
|
| 146 |
+
result["log"] += f"[P{page_index+1}] Layout Analysis Failed.\n"
|
| 147 |
+
|
| 148 |
+
print(f"✅ [Page {page_index+1}] 完成!", flush=True)
|
| 149 |
+
return result
|
| 150 |
+
|
| 151 |
def process_pdf(self, pdf_file, progress=gr.Progress()):
|
| 152 |
if not self.client:
|
| 153 |
raise ValueError("請先輸入 Google API Key!")
|
|
|
|
| 155 |
if pdf_file is None:
|
| 156 |
return None, None, None, ""
|
| 157 |
|
| 158 |
+
# 統計數據
|
| 159 |
total_input_tokens = 0
|
| 160 |
total_output_tokens = 0
|
| 161 |
+
full_text_log = ""
|
| 162 |
+
gallery_preview = []
|
| 163 |
+
|
| 164 |
# 1. 準備環境
|
| 165 |
temp_dir = tempfile.mkdtemp()
|
| 166 |
img_output_dir = os.path.join(temp_dir, "cleaned_images")
|
|
|
|
| 171 |
prs.slide_width = Inches(16)
|
| 172 |
prs.slide_height = Inches(9)
|
| 173 |
|
| 174 |
+
# 2. PDF 轉圖片 (降低 DPI 加速)
|
| 175 |
+
progress(0.1, desc="正在將 PDF 轉為圖片 (DPI=150)...")
|
| 176 |
try:
|
| 177 |
+
# dpi=150 足夠螢幕檢視,且大幅減少上傳時間
|
| 178 |
+
images = convert_from_path(pdf_file, dpi=150)
|
| 179 |
except Exception as e:
|
| 180 |
raise ValueError(f"PDF 轉換失敗: {str(e)}")
|
| 181 |
|
| 182 |
+
# 3. 平行處理 (Parallel Execution)
|
| 183 |
+
# 根據 CPU 核心數或 API 限制設定 workers,建議 3-5 避免 Rate Limit
|
| 184 |
+
max_workers = 4
|
| 185 |
+
results_map = {} # 用來存結果,確保順序正確
|
| 186 |
|
| 187 |
+
progress(0.2, desc="🚀 AI 多工處理中 (可能需要稍等)...")
|
| 188 |
+
|
| 189 |
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
| 190 |
+
# 提交所有任務
|
| 191 |
+
future_to_page = {
|
| 192 |
+
executor.submit(self.process_single_page, i, img, img_output_dir): i
|
| 193 |
+
for i, img in enumerate(images)
|
| 194 |
+
}
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
# 等待完成
|
| 197 |
+
for future in concurrent.futures.as_completed(future_to_page):
|
| 198 |
+
try:
|
| 199 |
+
res = future.result()
|
| 200 |
+
results_map[res["index"]] = res
|
| 201 |
+
# 更新 Token
|
| 202 |
+
total_input_tokens += res["tokens_in"]
|
| 203 |
+
total_output_tokens += res["tokens_out"]
|
| 204 |
+
except Exception as exc:
|
| 205 |
+
print(f"Page processing generated an exception: {exc}")
|
| 206 |
+
|
| 207 |
+
# 4. 依序組裝 PPTX (確保順序正確)
|
| 208 |
+
progress(0.8, desc="正在組裝 PPTX...")
|
| 209 |
+
|
| 210 |
+
cleaned_images_paths = [] # 用於 ZIP
|
| 211 |
+
|
| 212 |
+
for i in range(len(images)):
|
| 213 |
+
if i not in results_map:
|
| 214 |
+
print(f"Missing result for page {i}")
|
| 215 |
+
continue
|
| 216 |
|
| 217 |
+
res = results_map[i]
|
| 218 |
+
|
| 219 |
+
# 更新 Log 與 Preview
|
| 220 |
+
full_text_log += res["log"]
|
| 221 |
+
if res["preview"]: gallery_preview.append(res["preview"])
|
| 222 |
+
if res["bg_path"]: cleaned_images_paths.append(res["bg_path"])
|
| 223 |
+
|
| 224 |
+
# 建立 Slide
|
| 225 |
+
slide = prs.slides.add_slide(prs.slide_layouts[6])
|
| 226 |
+
|
| 227 |
+
# A. 貼背景
|
| 228 |
+
if res["bg_path"] and os.path.exists(res["bg_path"]):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
try:
|
| 230 |
+
slide.shapes.add_picture(res["bg_path"], 0, 0, width=prs.slide_width, height=prs.slide_height)
|
|
|
|
| 231 |
except: pass
|
| 232 |
+
|
| 233 |
+
# B. 貼文字
|
| 234 |
+
for block in res["blocks"]:
|
| 235 |
+
text_content = block.get("text", "")
|
| 236 |
+
if not text_content: continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
+
# 座標轉換
|
| 239 |
+
box = block.get("box_2d", [0, 0, 100, 100])
|
| 240 |
+
ymin, xmin, ymax, xmax = box
|
| 241 |
+
left = Inches((xmin / 1000) * 16)
|
| 242 |
+
top = Inches((ymin / 1000) * 9)
|
| 243 |
+
width = Inches(((xmax - xmin) / 1000) * 16)
|
| 244 |
+
height = Inches(((ymax - ymin) / 1000) * 9)
|
|
|
|
|
|
|
|
|
|
| 245 |
|
| 246 |
+
textbox = slide.shapes.add_textbox(left, top, width, height)
|
| 247 |
+
tf = textbox.text_frame
|
| 248 |
+
tf.word_wrap = True
|
| 249 |
+
p = tf.paragraphs[0]
|
| 250 |
+
p.text = text_content
|
| 251 |
|
| 252 |
+
try: p.font.size = Pt(int(block.get("font_size", 18)))
|
| 253 |
+
except: p.font.size = Pt(18)
|
| 254 |
+
p.font.bold = block.get("is_bold", False)
|
| 255 |
+
try:
|
| 256 |
+
hex_c = block.get("color", "#000000").replace("#", "")
|
| 257 |
+
# 如果背景去字失敗,原圖背景可能很複雜,文字顏色可能需要調整 (這裡暫不處理,保持原色)
|
| 258 |
+
p.font.color.rgb = RGBColor.from_string(hex_c)
|
| 259 |
+
except: pass
|
| 260 |
+
|
| 261 |
+
# 5. 打包
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
progress(0.9, desc="正在打包檔案...")
|
| 263 |
pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
|
| 264 |
prs.save(pptx_path)
|
|
|
|
| 270 |
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 271 |
zf.write(pptx_path, "restored_slides.pptx")
|
| 272 |
zf.write(txt_path, "content_log.txt")
|
| 273 |
+
for img_path in cleaned_images_paths:
|
| 274 |
+
zf.write(img_path, os.path.join("cleaned_backgrounds", os.path.basename(img_path)))
|
|
|
|
| 275 |
|
|
|
|
| 276 |
token_stats = f"""
|
| 277 |
### 📊 Token 用量統計
|
| 278 |
+
- **總輸入:** {total_input_tokens:,}
|
| 279 |
+
- **總輸出:** {total_output_tokens:,}
|
| 280 |
- **總計消耗:** {total_input_tokens + total_output_tokens:,}
|
| 281 |
"""
|
| 282 |
|
|
|
|
| 313 |
|
| 314 |
gr.Markdown("---")
|
| 315 |
pdf_input = gr.File(label="上傳 PDF")
|
| 316 |
+
btn_process = gr.Button("🚀 開始還原 PPTX (平行加速版)", variant="primary")
|
| 317 |
|
| 318 |
with gr.Column():
|
| 319 |
out_zip = gr.File(label="📦 下載完整包")
|