Spaces:

DeepLearning101
/

PPT.404

Running

App Files Files Community

DeepLearning101 commited on 28 days ago

Commit

df128f2

verified ·

1 Parent(s): 02a1875

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -21

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import tempfile
 import zipfile
 import shutil
 from pdf2image import convert_from_path
 from PIL import Image
 from dotenv import load_dotenv
@@ -56,10 +57,9 @@ class NotebookLMTool:
             # --- 步驟 A: 提取文字 (OCR) ---
             try:
-                # OCR 任務
                 resp_text = self.client.models.generate_content(
-                    model="gemini-2.5-flash",
-                    contents=["Extract all text content from this slide strictly. Ignore layout description.", img]
                 )
                 page_content = resp_text.text if resp_text.text else "[No Text Found]"
             except Exception as e:
@@ -67,39 +67,56 @@ class NotebookLMTool:
             full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
-            # --- 步驟 B: 圖片去字 (重繪背景) ---
-            # 策略調整：要求模型「重繪」而非「編輯」
             save_name = f"slide_{i+1:02d}.png"
             final_path = os.path.join(img_output_dir, save_name)
             try:
-                # 使用 flash-exp 模型，支援度較高
                 resp_img = self.client.models.generate_content(
-                    model="gemini-2.5-flash",
                     contents=[
-                        "Look at this image. Generate a NEW image that looks exactly like the background of this slide, but remove ALL text, letters, and words. Keep the layout, colors, and shapes identical. Output ONLY the image.",
                         img
                     ],
-                    config=types.GenerateContentConfig(response_mime_type="image/png")
                 )
-                # 檢查是否有二進位圖片回傳
-                if resp_img.bytes:
                     with open(final_path, "wb") as f:
-                        f.write(resp_img.bytes)
                     cleaned_images_paths.append(final_path)
                     gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
                     print(f"Page {i+1}: Image generated successfully.")
                 else:
-                    # 如果沒有 bytes，通常是因為模型回傳了拒絕的文字
-                    print(f"Page {i+1} Failed: Model returned text -> {resp_img.text}")
-                    # 失敗時：儲存原圖並標記 Failed
                     img.save(final_path)
                     gallery_preview.append((final_path, f"Page {i+1} (Failed - Original)"))
             except Exception as e:
                 print(f"Page {i+1} Error: {str(e)}")
-                # 發生錯誤時也保留原圖
                 img.save(final_path)
                 gallery_preview.append((final_path, f"Page {i+1} (Error - Original)"))
@@ -113,9 +130,8 @@ class NotebookLMTool:
         zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(txt_path, "content.txt")
-            for root, dirs, files in os.walk(img_output_dir):
-                for file in files:
-                    zf.write(os.path.join(root, file), os.path.join("images", file))
         return zip_path, full_text, gallery_preview
@@ -124,7 +140,7 @@ tool = NotebookLMTool()
 # --- Gradio UI ---
 with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V2 修正版)")
     gr.Markdown("上傳 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
     with gr.Row():
@@ -141,7 +157,7 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
             out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
             out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
-    gr.Markdown("### 🖼️ 處理結果預覽 (若去字失敗將顯示原圖)")
     out_gallery = gr.Gallery(columns=4)
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)

 import tempfile
 import zipfile
 import shutil
+import base64
 from pdf2image import convert_from_path
 from PIL import Image
 from dotenv import load_dotenv
             # --- 步驟 A: 提取文字 (OCR) ---
             try:
                 resp_text = self.client.models.generate_content(
+                    model="gemini-2.0-flash",
+                    contents=["Extract all text content from this slide strictly.", img]
                 )
                 page_content = resp_text.text if resp_text.text else "[No Text Found]"
             except Exception as e:
             full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
+            # --- 步驟 B: 圖片去字 (使用 response_modalities) ---
             save_name = f"slide_{i+1:02d}.png"
             final_path = os.path.join(img_output_dir, save_name)
             try:
+                # 參考你的 JS：使用 responseModalities = ["IMAGE"]
                 resp_img = self.client.models.generate_content(
+                    model="gemini-2.0-flash-exp",
                     contents=[
+                        "Remove all text from this image. Fill the gaps using the surrounding background texture to make it look clean and natural. Output ONLY the image.",
                         img
                     ],
+                    config=types.GenerateContentConfig(
+                        response_modalities=["IMAGE"]  # ✅ 修正點：對應 JS 的 responseModalities
+                    )
                 )
+                # 處理圖片回傳 (SDK 解析)
+                image_data = None
+                # 檢查是否有 inline_data (Base64)
+                if hasattr(resp_img, 'parts') and resp_img.parts:
+                    for part in resp_img.parts:
+                        if part.inline_data:
+                            image_data = part.inline_data.data
+                            break
+                # 如果 SDK 自動處理了 bytes (部分版本)
+                if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
+                    image_data = resp_img.bytes
+                if image_data:
+                    # 如果是 Base64 字串，需要解碼
+                    if isinstance(image_data, str):
+                        image_data = base64.b64decode(image_data)
                     with open(final_path, "wb") as f:
+                        f.write(image_data)
                     cleaned_images_paths.append(final_path)
                     gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
                     print(f"Page {i+1}: Image generated successfully.")
                 else:
+                    # 失敗回退：保留原圖
+                    print(f"Page {i+1} Failed: No image data returned. Text: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
                     img.save(final_path)
                     gallery_preview.append((final_path, f"Page {i+1} (Failed - Original)"))
             except Exception as e:
                 print(f"Page {i+1} Error: {str(e)}")
                 img.save(final_path)
                 gallery_preview.append((final_path, f"Page {i+1} (Error - Original)"))
         zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
             zf.write(txt_path, "content.txt")
+            for img_path in cleaned_images_paths:
+                zf.write(img_path, os.path.join("cleaned_slides", os.path.basename(img_path)))
         return zip_path, full_text, gallery_preview
 # --- Gradio UI ---
 with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V3 修復版)")
     gr.Markdown("上傳 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
     with gr.Row():
             out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
             out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
+    gr.Markdown("### 🖼️ 處理結果預覽")
     out_gallery = gr.Gallery(columns=4)
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)