Spaces:

DeepLearning101
/

PPT.404

Sleeping

App Files Files Community

DeepLearning101 commited on 25 days ago

Commit

17d162a

verified ·

1 Parent(s): 05abb73

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -90

app.py CHANGED Viewed

@@ -36,22 +36,14 @@ class NotebookLMTool:
         return "⚠️ Key 無效"
     def _extract_json(self, text):
-        """強化版 JSON 提取，處理 Markdown 代碼塊"""
         try:
-            # 1. 嘗試抓取 ```json ... ```
             match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
-            if match:
-                return json.loads(match.group(1))
-            # 2. 嘗試抓取純 [...] 陣列結構
             match = re.search(r"\[\s*\{.*\}\s*\]", text, re.DOTALL)
-            if match:
-                return json.loads(match.group(0))
-            # 3. 嘗試直接 parse (假設整段就是 JSON)
             return json.loads(text)
         except:
-            print(f"JSON Parse Error. Raw text snippet: {text[:100]}...")
             return []
     def process_pdf(self, pdf_file, progress=gr.Progress()):
@@ -59,7 +51,11 @@ class NotebookLMTool:
             raise ValueError("請先輸入 Google API Key！")
         if pdf_file is None:
-            return None, None, None
         # 1. 準備環境
         temp_dir = tempfile.mkdtemp()
@@ -68,7 +64,6 @@ class NotebookLMTool:
         # 初始化 PPTX
         prs = Presentation()
-        # 設定為 16:9 比例
         prs.slide_width = Inches(16)
         prs.slide_height = Inches(9)
@@ -86,34 +81,43 @@ class NotebookLMTool:
         # 3. 逐頁處理
         for i, img in enumerate(images):
             progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
-            # 建立空白投影片
             slide = prs.slides.add_slide(prs.slide_layouts[6])
             # ==========================================
-            # 步驟 A: 背景處理 (去字) - 關鍵修正
             # ==========================================
             save_name = f"slide_{i+1:02d}.png"
             final_bg_path = os.path.join(img_output_dir, save_name)
             bg_success = False
             try:
                 resp_img = self.client.models.generate_content(
-                    model="gemini-2.5-flash-image",
-                    contents=[
-                        "Remove all text, titles, bullet points, and diagrams containing text from this slide. Preserve only the pure background styling, colors, logos, and non-text decorative elements. Output ONLY the image.",
-                        img
-                    ],
                     config=types.GenerateContentConfig(response_modalities=["IMAGE"])
                 )
                 # 處理圖片資料
                 image_data = None
                 if hasattr(resp_img, 'parts') and resp_img.parts:
                     for part in resp_img.parts:
-                        if part.inline_data:
-                            image_data = part.inline_data.data; break
                 if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
                     image_data = resp_img.bytes
@@ -122,116 +126,82 @@ class NotebookLMTool:
                     with open(final_bg_path, "wb") as f: f.write(image_data)
                     cleaned_images_paths.append(final_bg_path)
                     bg_success = True
-                    print(f"Page {i+1}: Background cleaned successfully.")
                 else:
-                    print(f"Page {i+1}: AI returned text instead of image: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
             except Exception as e:
-                print(f"Page {i+1} Background Gen Error: {e}")
-            # 策略：如果去字成功，貼上乾淨背景。
-            # 如果失敗，不要貼原圖！否則字會重疊。寧可留白或貼一個提示圖。
             if bg_success:
                 try:
                     slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
-                    gallery_preview.append((final_bg_path, f"Page {i+1} Background (Cleaned)"))
-                except Exception as e:
-                    print(f"PPTX Image Insert Error: {e}")
             else:
-                # 失敗時，存原圖但標記失敗，這樣使用者在 ZIP 裡還是拿得到原圖，但 PPT 上不會亂
-                img.save(final_bg_path)
-                gallery_preview.append((final_bg_path, f"Page {i+1} (Cleaning Failed - Original Saved)"))
-                full_text_log += f"[P{i+1}] Warning: Background cleaning failed. Slide background left blank to avoid text duplication.\n"
             # ==========================================
             # 步驟 B: 文字與佈局還原 (Layout Analysis)
             # ==========================================
             try:
-                prompt = """
-                Analyze this slide image to reconstruct it in PowerPoint.
-                Identify all text blocks.
-                Return a JSON list strictly. Each item must have:
-                - "text": The exact content string.
-                - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
-                - "font_size": estimated font size (integer, e.g., 40 for big titles, 14 for body).
-                - "color": estimated hex color code (e.g., "#000000", "#FFFFFF").
-                - "is_bold": boolean.
-                Example format:
-                [{"text": "Introduction", "box_2d": [50, 50, 150, 400], "font_size": 32, "color": "#000000", "is_bold": true}]
                 """
-                # 使用 2.0 Flash 進行邏輯分析
                 resp_layout = self.client.models.generate_content(
-                    model="gemini-2.5-flash",
-                    contents=[prompt, img],
                     config=types.GenerateContentConfig(response_mime_type="application/json")
                 )
                 blocks = self._extract_json(resp_layout.text)
-                # 將文字區塊寫入 PPTX
                 for block in blocks:
                     text_content = block.get("text", "")
                     if not text_content: continue
-                    full_text_log += f"[P{i+1}] Text: {text_content}\n"
-                    # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
                     box = block.get("box_2d", [0, 0, 100, 100])
                     ymin, xmin, ymax, xmax = box
-                    # 轉換為英吋 (基於 16:9)
                     left = Inches((xmin / 1000) * 16)
                     top = Inches((ymin / 1000) * 9)
                     width = Inches(((xmax - xmin) / 1000) * 16)
                     height = Inches(((ymax - ymin) / 1000) * 9)
-                    # 建立文字方塊
                     textbox = slide.shapes.add_textbox(left, top, width, height)
                     tf = textbox.text_frame
                     tf.word_wrap = True
                     p = tf.paragraphs[0]
                     p.text = text_content
-                    # 字體設定
-                    try:
-                        sz = int(block.get("font_size", 18))
-                        # 簡單的縮放修正，有時候 AI 估計的字偏小
-                        p.font.size = Pt(sz)
-                    except:
-                        p.font.size = Pt(18)
                     p.font.bold = block.get("is_bold", False)
-                    # 顏色處理
                     try:
-                        hex_color = block.get("color", "#000000").replace("#", "")
-                        # 避免白色背景配白色文字 (簡單防呆)
-                        if not bg_success and hex_color.upper() == "FFFFFF":
-                            hex_color = "000000"
-                        p.font.color.rgb = RGBColor.from_string(hex_color)
-                    except:
-                        pass
             except Exception as e:
-                print(f"Layout Analysis Error Page {i+1}: {e}")
-                full_text_log += f"[P{i+1}] Error parsing layout text.\n"
-        # 4. 打包結果
         progress(0.9, desc="正在打包檔案...")
-        # 儲存 PPTX
         pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
         prs.save(pptx_path)
-        # 儲存文字記錄
         txt_path = os.path.join(temp_dir, "content_log.txt")
-        with open(txt_path, "w", encoding="utf-8") as f:
-            f.write(full_text_log)
-        # 建立 ZIP
         zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(pptx_path, "restored_slides.pptx")
@@ -240,7 +210,15 @@ class NotebookLMTool:
                 for img_name in os.listdir(img_output_dir):
                     zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
-        return zip_path, pptx_path, gallery_preview
 # Init
 tool = NotebookLMTool()
@@ -276,8 +254,9 @@ with gr.Blocks(title="NotebookLM Slide Restorer，PPT.404", theme=gr.themes.Soft
             btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
         with gr.Column():
-            out_zip = gr.File(label="📦 下載完整包 (含背景圖+Log)")
-            out_pptx = gr.File(label="📊 直接下載可編輯 PPTX")
     gr.Markdown("### 🖼️ 背景去字效果預覽")
     out_gallery = gr.Gallery(columns=4)
@@ -287,7 +266,7 @@ with gr.Blocks(title="NotebookLM Slide Restorer，PPT.404", theme=gr.themes.Soft
     btn_process.click(
         tool.process_pdf,
         inputs=[pdf_input],
-        outputs=[out_zip, out_pptx, out_gallery]
     )
 if __name__ == "__main__":

         return "⚠️ Key 無效"
     def _extract_json(self, text):
+        """強化版 JSON 提取"""
         try:
             match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
+            if match: return json.loads(match.group(1))
             match = re.search(r"\[\s*\{.*\}\s*\]", text, re.DOTALL)
+            if match: return json.loads(match.group(0))
             return json.loads(text)
         except:
             return []
     def process_pdf(self, pdf_file, progress=gr.Progress()):
             raise ValueError("請先輸入 Google API Key！")
         if pdf_file is None:
+            return None, None, None, ""
+        # 初始化 Token 計數器
+        total_input_tokens = 0
+        total_output_tokens = 0
         # 1. 準備環境
         temp_dir = tempfile.mkdtemp()
         # 初始化 PPTX
         prs = Presentation()
         prs.slide_width = Inches(16)
         prs.slide_height = Inches(9)
         # 3. 逐頁處理
         for i, img in enumerate(images):
             progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
             slide = prs.slides.add_slide(prs.slide_layouts[6])
             # ==========================================
+            # 步驟 A: 背景處理 (去字 + 嚴格保留原圖)
             # ==========================================
             save_name = f"slide_{i+1:02d}.png"
             final_bg_path = os.path.join(img_output_dir, save_name)
             bg_success = False
             try:
+                # Prompt 優化：強調 "Strictly preserve" 和 "Do not add"
+                clean_prompt = """
+                Strictly remove all text, titles, text-boxes, and bullet points from this slide image.
+                CRITICAL INSTRUCTION:
+                1. Preserve the original background pattern, colors, logos, and non-text graphics EXACTLY as they are.
+                2. Do NOT add any new objects, decorations, or hallucinations.
+                3. Do NOT change the aspect ratio or style.
+                4. Simply fill the gaps left by removed text with the surrounding background texture naturally.
+                Output ONLY the image.
+                """
                 resp_img = self.client.models.generate_content(
+                    model="gemini-2.0-flash-exp",
+                    contents=[clean_prompt, img],
                     config=types.GenerateContentConfig(response_modalities=["IMAGE"])
                 )
+                # --- Token 統計 (圖片生成) ---
+                if resp_img.usage_metadata:
+                    total_input_tokens += resp_img.usage_metadata.prompt_token_count
+                    total_output_tokens += resp_img.usage_metadata.candidates_token_count
                 # 處理圖片資料
                 image_data = None
                 if hasattr(resp_img, 'parts') and resp_img.parts:
                     for part in resp_img.parts:
+                        if part.inline_data: image_data = part.inline_data.data; break
                 if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
                     image_data = resp_img.bytes
                     with open(final_bg_path, "wb") as f: f.write(image_data)
                     cleaned_images_paths.append(final_bg_path)
                     bg_success = True
                 else:
+                    print(f"Page {i+1}: AI returned text instead of image.")
             except Exception as e:
+                print(f"Page {i+1} Clean Error: {e}")
+            # 失敗處理：使用原圖，但不要貼入 PPT 避免疊字，僅存檔供參考
             if bg_success:
                 try:
                     slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
+                    gallery_preview.append((final_bg_path, f"Page {i+1} Cleaned"))
+                except: pass
             else:
+                img.save(final_bg_path)
+                gallery_preview.append((final_bg_path, f"Page {i+1} (Original - Clean Failed)"))
             # ==========================================
             # 步驟 B: 文字與佈局還原 (Layout Analysis)
             # ==========================================
             try:
+                layout_prompt = """
+                Analyze this slide. Return a JSON list of all text blocks.
+                Each item: {"text": string, "box_2d": [ymin, xmin, ymax, xmax] (0-1000), "font_size": int, "color": hex, "is_bold": bool}
                 """
                 resp_layout = self.client.models.generate_content(
+                    model="gemini-2.0-flash",
+                    contents=[layout_prompt, img],
                     config=types.GenerateContentConfig(response_mime_type="application/json")
                 )
+                # --- Token 統計 (文字分析) ---
+                if resp_layout.usage_metadata:
+                    total_input_tokens += resp_layout.usage_metadata.prompt_token_count
+                    total_output_tokens += resp_layout.usage_metadata.candidates_token_count
                 blocks = self._extract_json(resp_layout.text)
                 for block in blocks:
                     text_content = block.get("text", "")
                     if not text_content: continue
+                    full_text_log += f"[P{i+1}] {text_content}\n"
+                    # 座標與樣式還原
                     box = block.get("box_2d", [0, 0, 100, 100])
                     ymin, xmin, ymax, xmax = box
                     left = Inches((xmin / 1000) * 16)
                     top = Inches((ymin / 1000) * 9)
                     width = Inches(((xmax - xmin) / 1000) * 16)
                     height = Inches(((ymax - ymin) / 1000) * 9)
                     textbox = slide.shapes.add_textbox(left, top, width, height)
                     tf = textbox.text_frame
                     tf.word_wrap = True
                     p = tf.paragraphs[0]
                     p.text = text_content
+                    try: p.font.size = Pt(int(block.get("font_size", 18)))
+                    except: p.font.size = Pt(18)
                     p.font.bold = block.get("is_bold", False)
                     try:
+                        hex_c = block.get("color", "#000000").replace("#", "")
+                        if not bg_success and hex_c.upper() == "FFFFFF": hex_c = "000000"
+                        p.font.color.rgb = RGBColor.from_string(hex_c)
+                    except: pass
             except Exception as e:
+                print(f"Layout Error Page {i+1}: {e}")
+        # 4. 打包與統計
         progress(0.9, desc="正在打包檔案...")
         pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
         prs.save(pptx_path)
         txt_path = os.path.join(temp_dir, "content_log.txt")
+        with open(txt_path, "w", encoding="utf-8") as f: f.write(full_text_log)
         zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(pptx_path, "restored_slides.pptx")
                 for img_name in os.listdir(img_output_dir):
                     zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
+        # 格式化 Token 統計訊息
+        token_stats = f"""
+        ### 📊 Token 用量統計
+        - **總輸入 (Prompt Tokens):** {total_input_tokens:,}
+        - **總輸出 (Response Tokens):** {total_output_tokens:,}
+        - **總計消耗:** {total_input_tokens + total_output_tokens:,}
+        """
+        return zip_path, pptx_path, gallery_preview, token_stats
 # Init
 tool = NotebookLMTool()
             btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
         with gr.Column():
+            out_zip = gr.File(label="📦 下載完整包")
+            out_pptx = gr.File(label="📊 直接下載 PPTX")
+            out_tokens = gr.Markdown("### 📊 等待處理...")
     gr.Markdown("### 🖼️ 背景去字效果預覽")
     out_gallery = gr.Gallery(columns=4)
     btn_process.click(
         tool.process_pdf,
         inputs=[pdf_input],
+        outputs=[out_zip, out_pptx, out_gallery, out_tokens]
     )
 if __name__ == "__main__":