Spaces:

DeepLearning101
/

PPT.404

Paused

App Files Files Community

DeepLearning101 commited on Jan 8

Commit

0f96e28

verified ·

1 Parent(s): 30f214b

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -13

app.py CHANGED Viewed

@@ -56,9 +56,10 @@ class NotebookLMTool:
             progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
             # --- 步驟 A: 提取文字 (OCR) ---
             try:
                 resp_text = self.client.models.generate_content(
-                    model="gemini-2.5-flash",
                     contents=["Extract all text content from this slide strictly.", img]
                 )
                 page_content = resp_text.text if resp_text.text else "[No Text Found]"
@@ -67,34 +68,34 @@ class NotebookLMTool:
             full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
-            # --- 步驟 B: 圖片去字 (使用 response_modalities) ---
             save_name = f"slide_{i+1:02d}.png"
             final_path = os.path.join(img_output_dir, save_name)
             try:
-                # 參考你的 JS：使用 responseModalities = ["IMAGE"]
                 resp_img = self.client.models.generate_content(
-                    model="gemini-2.5-flash",
                     contents=[
                         "Remove all text from this image. Fill the gaps using the surrounding background texture to make it look clean and natural. Output ONLY the image.",
                         img
                     ],
                     config=types.GenerateContentConfig(
-                        response_modalities=["IMAGE"]  # ✅ 修正點：對應 JS 的 responseModalities
                     )
                 )
-                # 處理圖片回傳 (SDK 解析)
                 image_data = None
-                # 檢查是否有 inline_data (Base64)
                 if hasattr(resp_img, 'parts') and resp_img.parts:
                     for part in resp_img.parts:
                         if part.inline_data:
                             image_data = part.inline_data.data
                             break
-                # 如果 SDK 自動處理了 bytes (部分版本)
                 if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
                     image_data = resp_img.bytes
@@ -110,15 +111,15 @@ class NotebookLMTool:
                     gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
                     print(f"Page {i+1}: Image generated successfully.")
                 else:
-                    # 失敗回退：保留原圖
-                    print(f"Page {i+1} Failed: No image data returned. Text: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
                     img.save(final_path)
-                    gallery_preview.append((final_path, f"Page {i+1} (Failed - Original)"))
             except Exception as e:
                 print(f"Page {i+1} Error: {str(e)}")
                 img.save(final_path)
-                gallery_preview.append((final_path, f"Page {i+1} (Error - Original)"))
         # 4. 打包結果
         progress(0.9, desc="正在打包 ZIP...")
@@ -140,7 +141,7 @@ tool = NotebookLMTool()
 # --- Gradio UI ---
 with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V3 修復版)")
     gr.Markdown("上傳 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
     with gr.Row():

             progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
             # --- 步驟 A: 提取文字 (OCR) ---
+            # 使用標準 Flash 模型處理文字，速度最快
             try:
                 resp_text = self.client.models.generate_content(
+                    model="gemini-2.5-flash",
                     contents=["Extract all text content from this slide strictly.", img]
                 )
                 page_content = resp_text.text if resp_text.text else "[No Text Found]"
             full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
+            # --- 步驟 B: 圖片去字 (Image Generation) ---
+            # 關鍵修改：必須使用 'gemini-2.0-flash-exp' 且該模型目前才支援 IMAGE 輸出
             save_name = f"slide_{i+1:02d}.png"
             final_path = os.path.join(img_output_dir, save_name)
             try:
                 resp_img = self.client.models.generate_content(
+                    model="gemini-2.0-flash-exp",  # ✅ 修正：使用支援圖片輸出的實驗模型
                     contents=[
                         "Remove all text from this image. Fill the gaps using the surrounding background texture to make it look clean and natural. Output ONLY the image.",
                         img
                     ],
                     config=types.GenerateContentConfig(
+                        response_modalities=["IMAGE"] # ✅ 修正：明確告知需要圖片模態
                     )
                 )
+                # 處理圖片回傳 (解析 SDK 回應)
                 image_data = None
+                # 檢查 inline_data (Base64)
                 if hasattr(resp_img, 'parts') and resp_img.parts:
                     for part in resp_img.parts:
                         if part.inline_data:
                             image_data = part.inline_data.data
                             break
+                # 部分 SDK 版本可能直接放在 bytes
                 if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
                     image_data = resp_img.bytes
                     gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
                     print(f"Page {i+1}: Image generated successfully.")
                 else:
+                    # 失敗回退：保留原圖並標記
+                    print(f"Page {i+1} Failed: No image data. Text: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
                     img.save(final_path)
+                    gallery_preview.append((final_path, f"Page {i+1} (Original - Gen Failed)"))
             except Exception as e:
                 print(f"Page {i+1} Error: {str(e)}")
                 img.save(final_path)
+                gallery_preview.append((final_path, f"Page {i+1} (Original - Error)"))
         # 4. 打包結果
         progress(0.9, desc="正在打包 ZIP...")
 # --- Gradio UI ---
 with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V4 最終修復版)")
     gr.Markdown("上傳 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
     with gr.Row():