Spaces:

DeepLearning101
/

PPT.404

Sleeping

App Files Files Community

DeepLearning101 commited on 21 days ago

Commit

6ad1309

verified ·

1 Parent(s): b42c65e

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -62

app.py CHANGED Viewed

@@ -4,10 +4,17 @@ import tempfile
 import zipfile
 import shutil
 import base64
 from pdf2image import convert_from_path
 from PIL import Image
 from dotenv import load_dotenv
 # 使用 Google 新版 SDK
 from google import genai
 from google.genai import types
@@ -28,6 +35,21 @@ class NotebookLMTool:
             return "✅ API Key 已更新！"
         return "⚠️ Key 無效"
     def process_pdf(self, pdf_file, progress=gr.Progress()):
         if not self.client:
             raise ValueError("請先輸入 Google API Key！")
@@ -35,118 +57,173 @@ class NotebookLMTool:
         if pdf_file is None:
             return None, None, None
-        # 1. 準備暫存目錄
         temp_dir = tempfile.mkdtemp()
         img_output_dir = os.path.join(temp_dir, "cleaned_images")
         os.makedirs(img_output_dir, exist_ok=True)
         # 2. PDF 轉圖片
         progress(0.1, desc="正在將 PDF 轉為圖片...")
         try:
             images = convert_from_path(pdf_file)
         except Exception as e:
-            raise ValueError(f"PDF 轉換失敗 (請確認 packages.txt 有加入 poppler-utils): {str(e)}")
-        full_text = ""
         cleaned_images_paths = []
         gallery_preview = []
         # 3. 逐頁處理
         for i, img in enumerate(images):
-            progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
-            # --- 步驟 A: 提取文字 (OCR) ---
-            # 使用標準 Flash 模型處理文字，速度最快
-            try:
-                resp_text = self.client.models.generate_content(
-                    model="gemini-2.5-flash",
-                    contents=["Extract all text content from this slide strictly.", img]
-                )
-                page_content = resp_text.text if resp_text.text else "[No Text Found]"
-            except Exception as e:
-                page_content = f"[OCR Error: {e}]"
-            full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
-            # --- 步驟 B: 圖片去字 (Image Generation) ---
-            # 關鍵修改：必須使用 'gemini-2.0-flash-exp' 且該模型目前才支援 IMAGE 輸出
             save_name = f"slide_{i+1:02d}.png"
-            final_path = os.path.join(img_output_dir, save_name)
             try:
                 resp_img = self.client.models.generate_content(
-                    model="gemini-2.5-flash-image",  # ✅ 修正：使用支援圖片輸出的實驗模型
                     contents=[
-                        "Remove all text from this image. Fill the gaps using the surrounding background texture to make it look clean and natural. Output ONLY the image.",
                         img
                     ],
-                    config=types.GenerateContentConfig(
-                        response_modalities=["IMAGE"] # ✅ 修正：明確告知需要圖片模態
-                    )
                 )
-                # 處理圖片回傳 (解析 SDK 回應)
                 image_data = None
-                # 檢查 inline_data (Base64)
                 if hasattr(resp_img, 'parts') and resp_img.parts:
                     for part in resp_img.parts:
                         if part.inline_data:
-                            image_data = part.inline_data.data
-                            break
-                # 部分 SDK 版本可能直接放在 bytes
                 if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
                     image_data = resp_img.bytes
                 if image_data:
-                    # 如果是 Base64 字串，需要解碼
-                    if isinstance(image_data, str):
-                        image_data = base64.b64decode(image_data)
-                    with open(final_path, "wb") as f:
-                        f.write(image_data)
-                    cleaned_images_paths.append(final_path)
-                    gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
-                    print(f"Page {i+1}: Image generated successfully.")
-                else:
-                    # 失敗回退：保留原圖並標記
-                    print(f"Page {i+1} Failed: No image data. Text: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
-                    img.save(final_path)
-                    gallery_preview.append((final_path, f"Page {i+1} (Original - Gen Failed)"))
             except Exception as e:
-                print(f"Page {i+1} Error: {str(e)}")
-                img.save(final_path)
-                gallery_preview.append((final_path, f"Page {i+1} (Original - Error)"))
         # 4. 打包結果
-        progress(0.9, desc="正在打包 ZIP...")
-        txt_path = os.path.join(temp_dir, "extracted_text.txt")
         with open(txt_path, "w", encoding="utf-8") as f:
-            f.write(full_text)
-        zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
-            zf.write(txt_path, "content.txt")
             for img_path in cleaned_images_paths:
-                zf.write(img_path, os.path.join("cleaned_slides", os.path.basename(img_path)))
-        return zip_path, full_text, gallery_preview
 # Init
 tool = NotebookLMTool()
 # --- Gradio UI ---
-with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🛠️ NotebookLM 投影片 PDF 拆解神器")
     gr.Markdown("""
     <div align="center">
-    # 🛠️ 上傳 NotebookLM 投影片 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 重繪乾淨背景圖**
     👉 歡迎 Star [GitHub](https://github.com/Deep-Learning-101/) ⭐ 覺得不錯 👈
     <h3>🧠 補腦專區：<a href="https://deep-learning-101.github.io/" target="_blank">Deep Learning 101</a></h3>
     | 🔥 技術傳送門 (Tech Stack) | 📚 必讀心法 (Must Read) |
@@ -166,13 +243,13 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
             gr.Markdown("---")
             pdf_input = gr.File(label="上傳 PDF")
-            btn_process = gr.Button("🚀 開始拆解", variant="primary")
         with gr.Column():
-            out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
-            out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
-    gr.Markdown("### 🖼️ 處理結果預覽")
     out_gallery = gr.Gallery(columns=4)
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
@@ -180,7 +257,7 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
     btn_process.click(
         tool.process_pdf,
         inputs=[pdf_input],
-        outputs=[out_zip, out_text, out_gallery]
     )
 if __name__ == "__main__":

 import zipfile
 import shutil
 import base64
+import json
+import re
 from pdf2image import convert_from_path
 from PIL import Image
 from dotenv import load_dotenv
+# PPTX 處理套件
+from pptx import Presentation
+from pptx.util import Inches, Pt
+from pptx.dml.color import RGBColor
 # 使用 Google 新版 SDK
 from google import genai
 from google.genai import types
             return "✅ API Key 已更新！"
         return "⚠️ Key 無效"
+    def _extract_json(self, text):
+        """嘗試從回應中提取 JSON 字串"""
+        try:
+            # 尋找 ```json ... ``` 區塊
+            match = re.search(r"```json\s*(.*)\s*```", text, re.DOTALL)
+            if match:
+                return json.loads(match.group(1))
+            # 或者是直接的 JSON 結構
+            match = re.search(r"\[.*\]", text, re.DOTALL)
+            if match:
+                return json.loads(match.group(0))
+            return []
+        except:
+            return []
     def process_pdf(self, pdf_file, progress=gr.Progress()):
         if not self.client:
             raise ValueError("請先輸入 Google API Key！")
         if pdf_file is None:
             return None, None, None
+        # 1. 準備環境
         temp_dir = tempfile.mkdtemp()
         img_output_dir = os.path.join(temp_dir, "cleaned_images")
         os.makedirs(img_output_dir, exist_ok=True)
+        # 初始化 PPTX
+        prs = Presentation()
+        # 設定為 16:9 (寬 16 英吋, 高 9 英吋) - 這是 NotebookLM 常見比例
+        prs.slide_width = Inches(16)
+        prs.slide_height = Inches(9)
         # 2. PDF 轉圖片
         progress(0.1, desc="正在將 PDF 轉為圖片...")
         try:
             images = convert_from_path(pdf_file)
         except Exception as e:
+            raise ValueError(f"PDF 轉換失敗: {str(e)}")
+        full_text_log = ""
         cleaned_images_paths = []
         gallery_preview = []
         # 3. 逐頁處理
         for i, img in enumerate(images):
+            progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在重建第 {i+1}/{len(images)} 頁...")
+            # 建立空白投影片 (Layout 6 is usually blank)
+            slide = prs.slides.add_slide(prs.slide_layouts[6])
+            # --- 步驟 A: 圖片去字 (Clean Background) ---
             save_name = f"slide_{i+1:02d}.png"
+            final_bg_path = os.path.join(img_output_dir, save_name)
+            # 先儲存原圖備用
+            img.save(final_bg_path)
             try:
                 resp_img = self.client.models.generate_content(
+                    model="gemini-2.5-flash-image", # 或是 gemini-2.0-flash-exp
                     contents=[
+                        "Remove all text, titles, and bullet points from this slide. Keep the background design, logos, and non-text graphics exactly as they are. Output ONLY the image.",
                         img
                     ],
+                    config=types.GenerateContentConfig(response_modalities=["IMAGE"])
                 )
+                # 處理圖片資料
                 image_data = None
                 if hasattr(resp_img, 'parts') and resp_img.parts:
                     for part in resp_img.parts:
                         if part.inline_data:
+                            image_data = part.inline_data.data; break
                 if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
                     image_data = resp_img.bytes
                 if image_data:
+                    if isinstance(image_data, str): image_data = base64.b64decode(image_data)
+                    with open(final_bg_path, "wb") as f: f.write(image_data)
+                    cleaned_images_paths.append(final_bg_path)
+                else:
+                    print(f"Page {i+1}: Background gen failed, using original.")
+            except Exception as e:
+                print(f"Bg Gen Error Page {i+1}: {e}")
+            # 將背景圖貼到 PPTX (佔滿全螢幕)
+            try:
+                slide.shapes.add_picture(final_bg_path, 0, 0, width=prs.slide_width, height=prs.slide_height)
+                gallery_preview.append((final_bg_path, f"Page {i+1} Background"))
+            except Exception as e:
+                print(f"PPTX Image Insert Error: {e}")
+            # --- 步驟 B: 佈局分析 (Layout Analysis to JSON) ---
+            try:
+                prompt = """
+                Analyze this slide image. Identify all text blocks.
+                Return a JSON list strictly. Each item must have:
+                - "text": The content string.
+                - "box_2d": [ymin, xmin, ymax, xmax] (coordinates normalized 0-1000).
+                - "font_size": estimated font size (integer, e.g., 24 for titles, 12 for body).
+                - "color": estimated hex color code (e.g., "#000000").
+                - "is_bold": boolean.
+                Example: [{"text": "Title", "box_2d": [10, 10, 200, 500], "font_size": 40, "color": "#333333", "is_bold": true}]
+                """
+                resp_layout = self.client.models.generate_content(
+                    model="gemini-2.0-flash", # 使用 2.0 Flash 處理邏輯較強
+                    contents=[prompt, img],
+                    config=types.GenerateContentConfig(response_mime_type="application/json")
+                )
+                blocks = self._extract_json(resp_layout.text)
+                # 將文字區塊寫入 PPTX
+                for block in blocks:
+                    text_content = block.get("text", "")
+                    if not text_content: continue
+                    full_text_log += f"[P{i+1}] {text_content}\n"
+                    # 座標轉換 (Gemini 0-1000 -> PPTX Inches)
+                    # box_2d: [ymin, xmin, ymax, xmax]
+                    box = block.get("box_2d", [0, 0, 100, 100])
+                    ymin, xmin, ymax, xmax = box
+                    # 轉換為英吋
+                    left = Inches((xmin / 1000) * 16)
+                    top = Inches((ymin / 1000) * 9)
+                    width = Inches(((xmax - xmin) / 1000) * 16)
+                    height = Inches(((ymax - ymin) / 1000) * 9)
+                    # 建立文字方塊
+                    textbox = slide.shapes.add_textbox(left, top, width, height)
+                    tf = textbox.text_frame
+                    tf.word_wrap = True
+                    p = tf.paragraphs[0]
+                    p.text = text_content
+                    p.font.size = Pt(block.get("font_size", 18))
+                    p.font.bold = block.get("is_bold", False)
+                    # 顏色處理
+                    try:
+                        hex_color = block.get("color", "#000000").replace("#", "")
+                        p.font.color.rgb = RGBColor.from_string(hex_color)
+                    except:
+                        pass # Fallback to black
             except Exception as e:
+                print(f"Layout Analysis Error Page {i+1}: {e}")
+                full_text_log += f"[P{i+1}] Error parsing layout.\n"
         # 4. 打包結果
+        progress(0.9, desc="正在打包檔案...")
+        # 儲存 PPTX
+        pptx_path = os.path.join(temp_dir, "restored_presentation.pptx")
+        prs.save(pptx_path)
+        # 儲存文字記錄
+        txt_path = os.path.join(temp_dir, "content_log.txt")
         with open(txt_path, "w", encoding="utf-8") as f:
+            f.write(full_text_log)
+        # 建立 ZIP (包含 PPTX, 文字檔, 與乾淨圖)
+        zip_path = os.path.join(temp_dir, "notebooklm_restore_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
+            zf.write(pptx_path, "restored_slides.pptx")
+            zf.write(txt_path, "content_log.txt")
             for img_path in cleaned_images_paths:
+                zf.write(img_path, os.path.join("cleaned_backgrounds", os.path.basename(img_path)))
+        return zip_path, pptx_path, gallery_preview
 # Init
 tool = NotebookLMTool()
 # --- Gradio UI ---
+with gr.Blocks(title="NotebookLM Slide Restorer，PPT.404", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🛠️ NotebookLM 投影片 PDF 還原神器 (PPT.404)")
     gr.Markdown("""
     <div align="center">
+    # 🪄 上傳 PDF，AI 自動：**去字背景** + **版面分析** + **合成可編輯 PPTX**
     👉 歡迎 Star [GitHub](https://github.com/Deep-Learning-101/) ⭐ 覺得不錯 👈
     <h3>🧠 補腦專區：<a href="https://deep-learning-101.github.io/" target="_blank">Deep Learning 101</a></h3>
     | 🔥 技術傳送門 (Tech Stack) | 📚 必讀心法 (Must Read) |
             gr.Markdown("---")
             pdf_input = gr.File(label="上傳 PDF")
+            btn_process = gr.Button("🚀 開始還原 PPTX", variant="primary")
         with gr.Column():
+            out_zip = gr.File(label="📦 下載完整包 (含 PPTX, 圖, 文)")
+            out_pptx = gr.File(label="📊 直接下載 PPTX")
+    gr.Markdown("### 🖼️ 背景分離預覽")
     out_gallery = gr.Gallery(columns=4)
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
     btn_process.click(
         tool.process_pdf,
         inputs=[pdf_input],
+        outputs=[out_zip, out_pptx, out_gallery]
     )
 if __name__ == "__main__":