Spaces:

DeepLearning101
/

PPT.404

Running

App Files Files Community

DeepLearning101 commited on 28 days ago

Commit

087fd05

verified ·

1 Parent(s): c2c01d9

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -125

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ import shutil
 import base64
 import json
 import re
 from pdf2image import convert_from_path
 from PIL import Image
 from dotenv import load_dotenv
@@ -46,6 +48,106 @@ class NotebookLMTool:
         except:
             return []
     def process_pdf(self, pdf_file, progress=gr.Progress()):
         if not self.client:
             raise ValueError("請先輸入 Google API Key！")
@@ -53,10 +155,12 @@ class NotebookLMTool:
         if pdf_file is None:
             return None, None, None, ""
-        # 初始化 Token 計數器
         total_input_tokens = 0
         total_output_tokens = 0
         # 1. 準備環境
         temp_dir = tempfile.mkdtemp()
         img_output_dir = os.path.join(temp_dir, "cleaned_images")
@@ -67,134 +171,94 @@ class NotebookLMTool:
         prs.slide_width = Inches(16)
         prs.slide_height = Inches(9)
-        # 2. PDF 轉圖片
-        progress(0.1, desc="正在將 PDF 轉為圖片...")
         try:
-            images = convert_from_path(pdf_file)
         except Exception as e:
             raise ValueError(f"PDF 轉換失敗: {str(e)}")
-        full_text_log = ""
-        cleaned_images_paths = []
-        gallery_preview = []
-        # 3. 逐頁處理
-        for i, img in enumerate(images):
-            progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
-            slide = prs.slides.add_slide(prs.slide_layouts[6])
-            # ==========================================
-            # 步驟 A: 背景處理 (去字 + 嚴格保留原圖)
-            # ==========================================
-            save_name = f"slide_{i+1:02d}.png"
-            final_bg_path = os.path.join(img_output_dir, save_name)
-            bg_success = False
-            try:
-                # Prompt 優化：強調 "Strictly preserve" 和 "Do not add"
-                clean_prompt = """
-                Strictly remove all text, titles, text-boxes, and bullet points from this slide image.
-                CRITICAL INSTRUCTION:
-                1. Preserve the original background pattern, colors, logos, and non-text graphics EXACTLY as they are.
-                2. Do NOT add any new objects, decorations, or hallucinations.
-                3. Do NOT change the aspect ratio or style.
-                4. Simply fill the gaps left by removed text with the surrounding background texture naturally.
-                Output ONLY the image.
-                """
-                resp_img = self.client.models.generate_content(
-                    model="gemini-2.5-flash-image",
-                    contents=[clean_prompt, img],
-                    config=types.GenerateContentConfig(response_modalities=["IMAGE"])
-                )
-                # --- Token 統計 (圖片生成) ---
-                if resp_img.usage_metadata:
-                    total_input_tokens += resp_img.usage_metadata.prompt_token_count
-                    total_output_tokens += resp_img.usage_metadata.candidates_token_count
-                # 處理圖片資料
-                image_data = None
-                if hasattr(resp_img, 'parts') and resp_img.parts:
-                    for part in resp_img.parts:
-                        if part.inline_data: image_data = part.inline_data.data; break
-                if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
-                    image_data = resp_img.bytes
-                if image_data:
-                    if isinstance(image_data, str): image_data = base64.b64decode(image_data)
-                    with open(final_bg_path, "wb") as f: f.write(image_data)
-                    cleaned_images_paths.append(final_bg_path)
-                    bg_success = True
-                else:
-                    print(f"Page {i+1}: AI returned text instead of image.")
-            except Exception as e:
-                print(f"Page {i+1} Clean Error: {e}")
-            # 失敗處理：使用原圖，但不要貼入 PPT 避免疊字，僅存檔供參考
-            if bg_success:
                 try:
-                    slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
-                    gallery_preview.append((final_bg_path, f"Page {i+1} Cleaned"))
                 except: pass
-            else:
-                img.save(final_bg_path)
-                gallery_preview.append((final_bg_path, f"Page {i+1} (Original - Clean Failed)"))
-            # ==========================================
-            # 步驟 B: 文字與佈局還原 (Layout Analysis)
-            # ==========================================
-            try:
-                layout_prompt = """
-                Analyze this slide. Return a JSON list of all text blocks.
-                Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
-                """
-                resp_layout = self.client.models.generate_content(
-                    model="gemini-2.5-flash",
-                    contents=[layout_prompt, img],
-                    config=types.GenerateContentConfig(response_mime_type="application/json")
-                )
-                # --- Token 統計 (文字分析) ---
-                if resp_layout.usage_metadata:
-                    total_input_tokens += resp_layout.usage_metadata.prompt_token_count
-                    total_output_tokens += resp_layout.usage_metadata.candidates_token_count
-                blocks = self._extract_json(resp_layout.text)
-                for block in blocks:
-                    text_content = block.get("text", "")
-                    if not text_content: continue
-                    full_text_log += f"[P{i+1}] {text_content}\n"
-                    # 座標與樣式還原
-                    box = block.get("box_2d", [0, 0, 100, 100])
-                    ymin, xmin, ymax, xmax = box
-                    left = Inches((xmin / 1000) * 16)
-                    top = Inches((ymin / 1000) * 9)
-                    width = Inches(((xmax - xmin) / 1000) * 16)
-                    height = Inches(((ymax - ymin) / 1000) * 9)
-                    textbox = slide.shapes.add_textbox(left, top, width, height)
-                    tf = textbox.text_frame
-                    tf.word_wrap = True
-                    p = tf.paragraphs[0]
-                    p.text = text_content
-                    try: p.font.size = Pt(int(block.get("font_size", 18)))
-                    except: p.font.size = Pt(18)
-                    p.font.bold = block.get("is_bold", False)
-                    try:
-                        hex_c = block.get("color", "#000000").replace("#", "")
-                        if not bg_success and hex_c.upper() == "FFFFFF": hex_c = "000000"
-                        p.font.color.rgb = RGBColor.from_string(hex_c)
-                    except: pass
-            except Exception as e:
-                print(f"Layout Error Page {i+1}: {e}")
-        # 4. 打包與統計
         progress(0.9, desc="正在打包檔案...")
         pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
         prs.save(pptx_path)
@@ -206,15 +270,13 @@ class NotebookLMTool:
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(pptx_path, "restored_slides.pptx")
             zf.write(txt_path, "content_log.txt")
-            if os.path.exists(img_output_dir):
-                for img_name in os.listdir(img_output_dir):
-                    zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
-        # 格式化 Token 統計訊息
         token_stats = f"""
         ### 📊 Token 用量統計
-        - **總輸入 (Prompt Tokens):** {total_input_tokens:,}
-        - **總輸出 (Response Tokens):** {total_output_tokens:,}
         - **總計消耗:** {total_input_tokens + total_output_tokens:,}
         """
@@ -251,7 +313,7 @@ with gr.Blocks(title="NotebookLM Slide Restorer，PPT.404", theme=gr.themes.Soft
             gr.Markdown("---")
             pdf_input = gr.File(label="上傳 PDF")
-            btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
         with gr.Column():
             out_zip = gr.File(label="📦 下載完整包")

 import base64
 import json
 import re
+import concurrent.futures
+import time
 from pdf2image import convert_from_path
 from PIL import Image
 from dotenv import load_dotenv
         except:
             return []
+    # --- 單頁處理邏輯 (獨立出來以便平行運算) ---
+    def process_single_page(self, page_index, img, img_output_dir):
+        """處理單一頁面的：去字(背景) + 文字分析(Layout)"""
+        print(f"🚀 [Page {page_index+1}] 開始處理...", flush=True)
+        # 結果容器
+        result = {
+            "index": page_index,
+            "bg_path": None,
+            "blocks": [],
+            "log": "",
+            "preview": None,
+            "tokens_in": 0,
+            "tokens_out": 0
+        }
+        save_name = f"slide_{page_index+1:02d}.png"
+        final_bg_path = os.path.join(img_output_dir, save_name)
+        bg_success = False
+        # 1. 背景去字 (Image Cleaning)
+        try:
+            clean_prompt = """
+            Strictly remove all text, titles, text-boxes, and bullet points from this slide image.
+            CRITICAL INSTRUCTION:
+            1. Preserve the original background pattern, colors, logos, and non-text graphics EXACTLY as they are.
+            2. Do NOT add any new objects, decorations, or hallucinations.
+            3. Output ONLY the image.
+            """
+            # 使用 2.0-flash-exp 進行繪圖
+            resp_img = self.client.models.generate_content(
+                model="gemini-2.5-flash-image",
+                contents=[clean_prompt, img],
+                config=types.GenerateContentConfig(response_modalities=["IMAGE"])
+            )
+            # Token 統計
+            if resp_img.usage_metadata:
+                result["tokens_in"] += resp_img.usage_metadata.prompt_token_count
+                result["tokens_out"] += resp_img.usage_metadata.candidates_token_count
+            # 存圖邏輯
+            image_data = None
+            if hasattr(resp_img, 'parts') and resp_img.parts:
+                for part in resp_img.parts:
+                    if part.inline_data: image_data = part.inline_data.data; break
+            if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
+                image_data = resp_img.bytes
+            if image_data:
+                if isinstance(image_data, str): image_data = base64.b64decode(image_data)
+                with open(final_bg_path, "wb") as f: f.write(image_data)
+                bg_success = True
+                result["bg_path"] = final_bg_path
+                result["preview"] = (final_bg_path, f"Page {page_index+1} Cleaned")
+            else:
+                print(f"⚠️ [Page {page_index+1}] 去字失敗: 未回傳圖片", flush=True)
+        except Exception as e:
+            print(f"❌ [Page {page_index+1}] Clean Error: {e}", flush=True)
+        # 失敗回退原圖
+        if not bg_success:
+            img.save(final_bg_path)
+            result["bg_path"] = final_bg_path # 仍需路徑給 PPT 使用
+            result["preview"] = (final_bg_path, f"Page {page_index+1} (Original)")
+            result["log"] += f"[P{page_index+1}] Warning: Background cleaning failed.\n"
+        # 2. 文字與佈局分析 (Layout Analysis)
+        try:
+            layout_prompt = """
+            Analyze this slide. Return a JSON list of all text blocks.
+            Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
+            """
+            resp_layout = self.client.models.generate_content(
+                model="gemini-2.5-flash",
+                contents=[layout_prompt, img],
+                config=types.GenerateContentConfig(response_mime_type="application/json")
+            )
+            if resp_layout.usage_metadata:
+                result["tokens_in"] += resp_layout.usage_metadata.prompt_token_count
+                result["tokens_out"] += resp_layout.usage_metadata.candidates_token_count
+            blocks = self._extract_json(resp_layout.text)
+            result["blocks"] = blocks
+            # 紀錄 Log
+            for b in blocks:
+                if b.get("text"): result["log"] += f"[P{page_index+1}] {b['text'][:20]}...\n"
+        except Exception as e:
+            print(f"❌ [Page {page_index+1}] Layout Error: {e}", flush=True)
+            result["log"] += f"[P{page_index+1}] Layout Analysis Failed.\n"
+        print(f"✅ [Page {page_index+1}] 完成！", flush=True)
+        return result
     def process_pdf(self, pdf_file, progress=gr.Progress()):
         if not self.client:
             raise ValueError("請先輸入 Google API Key！")
         if pdf_file is None:
             return None, None, None, ""
+        # 統計數據
         total_input_tokens = 0
         total_output_tokens = 0
+        full_text_log = ""
+        gallery_preview = []
         # 1. 準備環境
         temp_dir = tempfile.mkdtemp()
         img_output_dir = os.path.join(temp_dir, "cleaned_images")
         prs.slide_width = Inches(16)
         prs.slide_height = Inches(9)
+        # 2. PDF 轉圖片 (降低 DPI 加速)
+        progress(0.1, desc="正在將 PDF 轉為圖片 (DPI=150)...")
         try:
+            # dpi=150 足夠螢幕檢視，且大幅減少上傳時間
+            images = convert_from_path(pdf_file, dpi=150)
         except Exception as e:
             raise ValueError(f"PDF 轉換失敗: {str(e)}")
+        # 3. 平行處理 (Parallel Execution)
+        # 根據 CPU 核心數或 API 限制設定 workers，建議 3-5 避免 Rate Limit
+        max_workers = 4
+        results_map = {} # 用來存結果，確保順序正確
+        progress(0.2, desc="🚀 AI 多工處理中 (可能需要稍等)...")
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            # 提交所有任務
+            future_to_page = {
+                executor.submit(self.process_single_page, i, img, img_output_dir): i
+                for i, img in enumerate(images)
+            }
+            # 等待完成
+            for future in concurrent.futures.as_completed(future_to_page):
+                try:
+                    res = future.result()
+                    results_map[res["index"]] = res
+                    # 更新 Token
+                    total_input_tokens += res["tokens_in"]
+                    total_output_tokens += res["tokens_out"]
+                except Exception as exc:
+                    print(f"Page processing generated an exception: {exc}")
+        # 4. 依序組裝 PPTX (確保順序正確)
+        progress(0.8, desc="正在組裝 PPTX...")
+        cleaned_images_paths = [] # 用於 ZIP
+        for i in range(len(images)):
+            if i not in results_map:
+                print(f"Missing result for page {i}")
+                continue
+            res = results_map[i]
+            # 更新 Log 與 Preview
+            full_text_log += res["log"]
+            if res["preview"]: gallery_preview.append(res["preview"])
+            if res["bg_path"]: cleaned_images_paths.append(res["bg_path"])
+            # 建立 Slide
+            slide = prs.slides.add_slide(prs.slide_layouts[6])
+            # A. 貼背景
+            if res["bg_path"] and os.path.exists(res["bg_path"]):
                 try:
+                    slide.shapes.add_picture(res["bg_path"], 0, 0, width=prs.slide_width, height=prs.slide_height)
                 except: pass
+            # B. 貼文字
+            for block in res["blocks"]:
+                text_content = block.get("text", "")
+                if not text_content: continue
+                # 座標轉換
+                box = block.get("box_2d", [0, 0, 100, 100])
+                ymin, xmin, ymax, xmax = box
+                left = Inches((xmin / 1000) * 16)
+                top = Inches((ymin / 1000) * 9)
+                width = Inches(((xmax - xmin) / 1000) * 16)
+                height = Inches(((ymax - ymin) / 1000) * 9)
+                textbox = slide.shapes.add_textbox(left, top, width, height)
+                tf = textbox.text_frame
+                tf.word_wrap = True
+                p = tf.paragraphs[0]
+                p.text = text_content
+                try: p.font.size = Pt(int(block.get("font_size", 18)))
+                except: p.font.size = Pt(18)
+                p.font.bold = block.get("is_bold", False)
+                try:
+                    hex_c = block.get("color", "#000000").replace("#", "")
+                    # 如果背景去字失敗，原圖背景可能很複雜，文字顏色可能需要調整 (這裡暫不處理，保持原色)
+                    p.font.color.rgb = RGBColor.from_string(hex_c)
+                except: pass
+        # 5. 打包
         progress(0.9, desc="正在打包檔案...")
         pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
         prs.save(pptx_path)
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(pptx_path, "restored_slides.pptx")
             zf.write(txt_path, "content_log.txt")
+            for img_path in cleaned_images_paths:
+                zf.write(img_path, os.path.join("cleaned_backgrounds", os.path.basename(img_path)))
         token_stats = f"""
         ### 📊 Token 用量統計
+        - **總輸入:** {total_input_tokens:,}
+        - **總輸出:** {total_output_tokens:,}
         - **總計消耗:** {total_input_tokens + total_output_tokens:,}
         """
             gr.Markdown("---")
             pdf_input = gr.File(label="上傳 PDF")
+            btn_process = gr.Button("🚀 開始還原 PPTX (平行加速版)", variant="primary")
         with gr.Column():
             out_zip = gr.File(label="📦 下載完整包")