Spaces:

DeepLearning101
/

PPT.404

Paused

App Files Files Community

DeepLearning101 commited on Jan 8

Commit

02a1875

verified ·

1 Parent(s): f0e6e15

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -36

app.py CHANGED Viewed

@@ -15,14 +15,12 @@ load_dotenv()
 class NotebookLMTool:
     def __init__(self):
-        # 嘗試從環境變數讀取 Key
         self.api_key = os.getenv("GEMINI_API_KEY")
         self.client = None
         if self.api_key:
             self.client = genai.Client(api_key=self.api_key)
     def set_key(self, user_key):
-        """讓使用者從介面設定 Key"""
         if user_key and user_key.strip():
             self.api_key = user_key.strip()
             self.client = genai.Client(api_key=self.api_key)
@@ -58,64 +56,76 @@ class NotebookLMTool:
             # --- 步驟 A: 提取文字 (OCR) ---
             try:
-                # 使用 Gemini 2.0 Flash 提取文字
-                response_text = self.client.models.generate_content(
-                    model="gemini-2.0-flash",
-                    contents=["Extract all text from this image directly. Do not describe the layout, just give me the text content.", img]
                 )
-                page_content = response_text.text if response_text.text else "[No Text Found]"
             except Exception as e:
                 page_content = f"[OCR Error: {e}]"
             full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
-            # --- 步驟 B: 圖片去字 (Clean) ---
-            # 注意：Gemini 2.0 直接回傳 Image 的支援度視 prompt 而定，
-            # 這裡我們使用 prompt 讓它嘗試還原背景。
             try:
-                response_clean = self.client.models.generate_content(
-                    model="gemini-2.0-flash",
-                    contents=["Remove all text from this image and fill in the background to make it look like a clean slide background. Return the image.", img],
-                    config=types.GenerateContentConfig(response_mime_type="image/png")
                 )
-                # 處理回傳的圖片 (Binary)
-                if response_clean.bytes:
-                    saved_path = os.path.join(img_output_dir, f"slide_{i+1:02d}.png")
-                    with open(saved_path, "wb") as f:
-                        f.write(response_clean.bytes)
-                    cleaned_images_paths.append(saved_path)
-                    gallery_preview.append((saved_path, f"Page {i+1}"))
                 else:
-                    # 如果 AI 拒絕生成圖片，我們保留原圖但標記失敗
-                    print(f"Page {i+1}: Model did not return an image.")
             except Exception as e:
-                print(f"Clean Error Page {i+1}: {e}")
         # 4. 打包結果
         progress(0.9, desc="正在打包 ZIP...")
-        # 寫入文字檔
         txt_path = os.path.join(temp_dir, "extracted_text.txt")
         with open(txt_path, "w", encoding="utf-8") as f:
             f.write(full_text)
-        # 壓縮
-        zip_path = os.path.join(temp_dir, "notebooklm_clean_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
-            zf.write(txt_path, "all_text.txt")
-            for img_path in cleaned_images_paths:
-                zf.write(img_path, os.path.join("cleaned_slides", os.path.basename(img_path)))
         return zip_path, full_text, gallery_preview
-# 初始化工具
 tool = NotebookLMTool()
-# --- Gradio 介面 ---
 with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手")
-    gr.Markdown("上傳 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 移除文字還原乾淨背景圖**")
     with gr.Row():
         with gr.Column():
@@ -131,10 +141,9 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
             out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
             out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
-    gr.Markdown("### 🖼️ 背景還原預覽")
     out_gallery = gr.Gallery(columns=4)
-    # 事件綁定
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
     btn_process.click(

 class NotebookLMTool:
     def __init__(self):
         self.api_key = os.getenv("GEMINI_API_KEY")
         self.client = None
         if self.api_key:
             self.client = genai.Client(api_key=self.api_key)
     def set_key(self, user_key):
         if user_key and user_key.strip():
             self.api_key = user_key.strip()
             self.client = genai.Client(api_key=self.api_key)
             # --- 步驟 A: 提取文字 (OCR) ---
             try:
+                # OCR 任務
+                resp_text = self.client.models.generate_content(
+                    model="gemini-2.5-flash",
+                    contents=["Extract all text content from this slide strictly. Ignore layout description.", img]
                 )
+                page_content = resp_text.text if resp_text.text else "[No Text Found]"
             except Exception as e:
                 page_content = f"[OCR Error: {e}]"
             full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
+            # --- 步驟 B: 圖片去字 (重繪背景) ---
+            # 策略調整：要求模型「重繪」而非「編輯」
+            save_name = f"slide_{i+1:02d}.png"
+            final_path = os.path.join(img_output_dir, save_name)
             try:
+                # 使用 flash-exp 模型，支援度較高
+                resp_img = self.client.models.generate_content(
+                    model="gemini-2.5-flash",
+                    contents=[
+                        "Look at this image. Generate a NEW image that looks exactly like the background of this slide, but remove ALL text, letters, and words. Keep the layout, colors, and shapes identical. Output ONLY the image.",
+                        img
+                    ],
+                    config=types.GenerateContentConfig(response_mime_type="image/png")
                 )
+                # 檢查是否有二進位圖片回傳
+                if resp_img.bytes:
+                    with open(final_path, "wb") as f:
+                        f.write(resp_img.bytes)
+                    cleaned_images_paths.append(final_path)
+                    gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
+                    print(f"Page {i+1}: Image generated successfully.")
                 else:
+                    # 如果沒有 bytes，通常是因為模型回傳了拒絕的文字
+                    print(f"Page {i+1} Failed: Model returned text -> {resp_img.text}")
+                    # 失敗時：儲存原圖並標記 Failed
+                    img.save(final_path)
+                    gallery_preview.append((final_path, f"Page {i+1} (Failed - Original)"))
             except Exception as e:
+                print(f"Page {i+1} Error: {str(e)}")
+                # 發生錯誤時也保留原圖
+                img.save(final_path)
+                gallery_preview.append((final_path, f"Page {i+1} (Error - Original)"))
         # 4. 打包結果
         progress(0.9, desc="正在打包 ZIP...")
         txt_path = os.path.join(temp_dir, "extracted_text.txt")
         with open(txt_path, "w", encoding="utf-8") as f:
             f.write(full_text)
+        zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
+            zf.write(txt_path, "content.txt")
+            for root, dirs, files in os.walk(img_output_dir):
+                for file in files:
+                    zf.write(os.path.join(root, file), os.path.join("images", file))
         return zip_path, full_text, gallery_preview
+# Init
 tool = NotebookLMTool()
+# --- Gradio UI ---
 with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V2 修正版)")
+    gr.Markdown("上傳 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
     with gr.Row():
         with gr.Column():
             out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
             out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
+    gr.Markdown("### 🖼️ 處理結果預覽 (若去字失敗將顯示原圖)")
     out_gallery = gr.Gallery(columns=4)
     btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
     btn_process.click(