Spaces:

DeepLearning101
/

PPT.404

Sleeping

App Files Files Community

DeepLearning101 commited on 25 days ago

Commit

05abb73

verified ·

1 Parent(s): 6ad1309

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -40

app.py CHANGED Viewed

@@ -36,18 +36,22 @@ class NotebookLMTool:
         return "⚠️ Key 無效"
     def _extract_json(self, text):
-        """嘗試從回應中提取 JSON 字串"""
         try:
-            # 尋找 ```json ... ``` 區塊
             match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
             if match:
                 return json.loads(match.group(1))
-            # 或者是直接的 JSON 結構
-            match = re.search(r"\[.*\]", text, re.DOTALL)
             if match:
                 return json.loads(match.group(0))
-            return []
         except:
             return []
     def process_pdf(self, pdf_file, progress=gr.Progress()):
@@ -64,7 +68,7 @@ class NotebookLMTool:
         # 初始化 PPTX
         prs = Presentation()
-        # 設定為 16:9 (寬 16 英吋, 高 9 英吋) - 這是 NotebookLM 常見比例
         prs.slide_width = Inches(16)
         prs.slide_height = Inches(9)
@@ -81,23 +85,24 @@ class NotebookLMTool:
         # 3. 逐頁處理
         for i, img in enumerate(images):
-            progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在重建第 {i+1}/{len(images)} 頁...")
-            # 建立空白投影片 (Layout 6 is usually blank)
             slide = prs.slides.add_slide(prs.slide_layouts[6])
-            # --- 步驟 A: 圖片去字 (Clean Background) ---
             save_name = f"slide_{i+1:02d}.png"
             final_bg_path = os.path.join(img_output_dir, save_name)
-            # 先儲存原圖備用
-            img.save(final_bg_path)
             try:
                 resp_img = self.client.models.generate_content(
-                    model="gemini-2.5-flash-image", # 或是 gemini-2.0-flash-exp
                     contents=[
-                        "Remove all text, titles, and bullet points from this slide. Keep the background design, logos, and non-text graphics exactly as they are. Output ONLY the image.",
                         img
                     ],
                     config=types.GenerateContentConfig(response_modalities=["IMAGE"])
@@ -116,35 +121,49 @@ class NotebookLMTool:
                     if isinstance(image_data, str): image_data = base64.b64decode(image_data)
                     with open(final_bg_path, "wb") as f: f.write(image_data)
                     cleaned_images_paths.append(final_bg_path)
                 else:
-                    print(f"Page {i+1}: Background gen failed, using original.")
             except Exception as e:
-                print(f"Bg Gen Error Page {i+1}: {e}")
-            # 將背景圖貼到 PPTX (佔滿全螢幕)
-            try:
-                slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
-                gallery_preview.append((final_bg_path, f"Page {i+1} Background"))
-            except Exception as e:
-                print(f"PPTX Image Insert Error: {e}")
-            # --- 步驟 B: 佈局分析 (Layout Analysis to JSON) ---
             try:
                 prompt = """
-                Analyze this slide image. Identify all text blocks.
                 Return a JSON list strictly. Each item must have:
-                - "text": The content string.
                 - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
-                - "font_size": estimated font size (integer, e.g., 24 for titles, 12 for body).
-                - "color": estimated hex color code (e.g., "#000000").
                 - "is_bold": boolean.
-                Example: [{"text": "Title", "box_2d": [10, 10, 200, 500], "font_size": 40, "color": "#333333", "is_bold": true}]
                 """
                 resp_layout = self.client.models.generate_content(
-                    model="gemini-2.0-flash", # 使用 2.0 Flash 處理邏輯較強
                     contents=[prompt, img],
                     config=types.GenerateContentConfig(response_mime_type="application/json")
                 )
@@ -156,14 +175,13 @@ class NotebookLMTool:
                     text_content = block.get("text", "")
                     if not text_content: continue
-                    full_text_log += f"[P{i+1}] {text_content}\n"
                     # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
-                    # box_2d: [ymin, xmin, ymax, xmax]
                     box = block.get("box_2d", [0, 0, 100, 100])
                     ymin, xmin, ymax, xmax = box
-                    # 轉換為英吋
                     left = Inches((xmin / 1000) * 16)
                     top = Inches((ymin / 1000) * 9)
                     width = Inches(((xmax - xmin) / 1000) * 16)
@@ -176,19 +194,30 @@ class NotebookLMTool:
                     p = tf.paragraphs[0]
                     p.text = text_content
-                    p.font.size = Pt(block.get("font_size", 18))
                     p.font.bold = block.get("is_bold", False)
                     # 顏色處理
                     try:
                         hex_color = block.get("color", "#000000").replace("#", "")
                         p.font.color.rgb = RGBColor.from_string(hex_color)
                     except:
-                        pass # Fallback to black
             except Exception as e:
                 print(f"Layout Analysis Error Page {i+1}: {e}")
-                full_text_log += f"[P{i+1}] Error parsing layout.\n"
         # 4. 打包結果
         progress(0.9, desc="正在打包檔案...")
@@ -202,13 +231,14 @@ class NotebookLMTool:
         with open(txt_path, "w", encoding="utf-8") as f:
             f.write(full_text_log)
-        # 建立 ZIP (包含 PPTX, 文字檔, 與乾淨圖)
         zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(pptx_path, "restored_slides.pptx")
             zf.write(txt_path, "content_log.txt")
-            for img_path in cleaned_images_paths:
-                zf.write(img_path, os.path.join("cleaned_backgrounds", os.path.basename(img_path)))
         return zip_path, pptx_path, gallery_preview
@@ -246,10 +276,10 @@ with gr.Blocks(title="NotebookLM Slide Restorer，PPT.404", theme=gr.themes.Soft
             btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
         with gr.Column():
-            out_zip = gr.File(label="📦 下載完整包 (含 PPTX, 圖, 文)")
-            out_pptx = gr.File(label="📊 直接下載 PPTX")
-    gr.Markdown("### 🖼️ 背景分離預覽")
     out_gallery = gr.Gallery(columns=4)
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)

         return "⚠️ Key 無效"
     def _extract_json(self, text):
+        """強化版 JSON 提取，處理 Markdown 代碼塊"""
         try:
+            # 1. 嘗試抓取 ```json ... ```
             match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
             if match:
                 return json.loads(match.group(1))
+            # 2. 嘗試抓取純 [...] 陣列結構
+            match = re.search(r"\[\s*\{.*\}\s*\]", text, re.DOTALL)
             if match:
                 return json.loads(match.group(0))
+            # 3. 嘗試直接 parse (假設整段就是 JSON)
+            return json.loads(text)
         except:
+            print(f"JSON Parse Error. Raw text snippet: {text[:100]}...")
             return []
     def process_pdf(self, pdf_file, progress=gr.Progress()):
         # 初始化 PPTX
         prs = Presentation()
+        # 設定為 16:9 比例
         prs.slide_width = Inches(16)
         prs.slide_height = Inches(9)
         # 3. 逐頁處理
         for i, img in enumerate(images):
+            progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
+            # 建立空白投影片
             slide = prs.slides.add_slide(prs.slide_layouts[6])
+            # ==========================================
+            # 步驟 A: 背景處理 (去字) - 關鍵修正
+            # ==========================================
             save_name = f"slide_{i+1:02d}.png"
             final_bg_path = os.path.join(img_output_dir, save_name)
+            bg_success = False
             try:
                 resp_img = self.client.models.generate_content(
+                    model="gemini-2.5-flash-image",
                     contents=[
+                        "Remove all text, titles, bullet points, and diagrams containing text from this slide. Preserve only the pure background styling, colors, logos, and non-text decorative elements. Output ONLY the image.",
                         img
                     ],
                     config=types.GenerateContentConfig(response_modalities=["IMAGE"])
                     if isinstance(image_data, str): image_data = base64.b64decode(image_data)
                     with open(final_bg_path, "wb") as f: f.write(image_data)
                     cleaned_images_paths.append(final_bg_path)
+                    bg_success = True
+                    print(f"Page {i+1}: Background cleaned successfully.")
                 else:
+                    print(f"Page {i+1}: AI returned text instead of image: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
             except Exception as e:
+                print(f"Page {i+1} Background Gen Error: {e}")
+            # 策略：如果去字成功，貼上乾淨背景。
+            # 如果失敗，不要貼原圖！否則字會重疊。寧可留白或貼一個提示圖。
+            if bg_success:
+                try:
+                    slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
+                    gallery_preview.append((final_bg_path, f"Page {i+1} Background (Cleaned)"))
+                except Exception as e:
+                    print(f"PPTX Image Insert Error: {e}")
+            else:
+                # 失敗時，存原圖但標記失敗，這樣使用者在 ZIP 裡還是拿得到原圖，但 PPT 上不會亂
+                img.save(final_bg_path)
+                gallery_preview.append((final_bg_path, f"Page {i+1} (Cleaning Failed - Original Saved)"))
+                full_text_log += f"[P{i+1}] Warning: Background cleaning failed. Slide background left blank to avoid text duplication.\n"
+            # ==========================================
+            # 步驟 B: 文字與佈局還原 (Layout Analysis)
+            # ==========================================
             try:
                 prompt = """
+                Analyze this slide image to reconstruct it in PowerPoint.
+                Identify all text blocks.
                 Return a JSON list strictly. Each item must have:
+                - "text": The exact content string.
                 - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
+                - "font_size": estimated font size (integer, e.g., 40 for big titles, 14 for body).
+                - "color": estimated hex color code (e.g., "#000000", "#FFFFFF").
                 - "is_bold": boolean.
+                Example format:
+                [{"text": "Introduction", "box_2d": [50, 50, 150, 400], "font_size": 32, "color": "#000000", "is_bold": true}]
                 """
+                # 使用 2.0 Flash 進行邏輯分析
                 resp_layout = self.client.models.generate_content(
+                    model="gemini-2.5-flash",
                     contents=[prompt, img],
                     config=types.GenerateContentConfig(response_mime_type="application/json")
                 )
                     text_content = block.get("text", "")
                     if not text_content: continue
+                    full_text_log += f"[P{i+1}] Text: {text_content}\n"
                     # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
                     box = block.get("box_2d", [0, 0, 100, 100])
                     ymin, xmin, ymax, xmax = box
+                    # 轉換為英吋 (基於 16:9)
                     left = Inches((xmin / 1000) * 16)
                     top = Inches((ymin / 1000) * 9)
                     width = Inches(((xmax - xmin) / 1000) * 16)
                     p = tf.paragraphs[0]
                     p.text = text_content
+                    # 字體設定
+                    try:
+                        sz = int(block.get("font_size", 18))
+                        # 簡單的縮放修正，有時候 AI 估計的字偏小
+                        p.font.size = Pt(sz)
+                    except:
+                        p.font.size = Pt(18)
                     p.font.bold = block.get("is_bold", False)
                     # 顏色處理
                     try:
                         hex_color = block.get("color", "#000000").replace("#", "")
+                        # 避免白色背景配白色文字 (簡單防呆)
+                        if not bg_success and hex_color.upper() == "FFFFFF":
+                            hex_color = "000000"
                         p.font.color.rgb = RGBColor.from_string(hex_color)
                     except:
+                        pass
             except Exception as e:
                 print(f"Layout Analysis Error Page {i+1}: {e}")
+                full_text_log += f"[P{i+1}] Error parsing layout text.\n"
         # 4. 打包結果
         progress(0.9, desc="正在打包檔案...")
         with open(txt_path, "w", encoding="utf-8") as f:
             f.write(full_text_log)
+        # 建立 ZIP
         zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(pptx_path, "restored_slides.pptx")
             zf.write(txt_path, "content_log.txt")
+            if os.path.exists(img_output_dir):
+                for img_name in os.listdir(img_output_dir):
+                    zf.write(os.path.join(img_output_dir, img_name), os.path.join("cleaned_backgrounds", img_name))
         return zip_path, pptx_path, gallery_preview
             btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
         with gr.Column():
+            out_zip = gr.File(label="📦 下載完整包 (含背景圖+Log)")
+            out_pptx = gr.File(label="📊 直接下載可編輯 PPTX")
+    gr.Markdown("### 🖼️ 背景去字效果預覽")
     out_gallery = gr.Gallery(columns=4)
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)