DeepLearning101 commited on
Commit
df128f2
·
verified ·
1 Parent(s): 02a1875

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -21
app.py CHANGED
@@ -3,6 +3,7 @@ import os
3
  import tempfile
4
  import zipfile
5
  import shutil
 
6
  from pdf2image import convert_from_path
7
  from PIL import Image
8
  from dotenv import load_dotenv
@@ -56,10 +57,9 @@ class NotebookLMTool:
56
 
57
  # --- 步驟 A: 提取文字 (OCR) ---
58
  try:
59
- # OCR 任務
60
  resp_text = self.client.models.generate_content(
61
- model="gemini-2.5-flash",
62
- contents=["Extract all text content from this slide strictly. Ignore layout description.", img]
63
  )
64
  page_content = resp_text.text if resp_text.text else "[No Text Found]"
65
  except Exception as e:
@@ -67,39 +67,56 @@ class NotebookLMTool:
67
 
68
  full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
69
 
70
- # --- 步驟 B: 圖片去字 (重繪背景) ---
71
- # 策略調整:要求模型「重繪」而非「編輯」
72
  save_name = f"slide_{i+1:02d}.png"
73
  final_path = os.path.join(img_output_dir, save_name)
74
 
75
  try:
76
- # 使用 flash-exp 模型,支援度較高
77
  resp_img = self.client.models.generate_content(
78
- model="gemini-2.5-flash",
79
  contents=[
80
- "Look at this image. Generate a NEW image that looks exactly like the background of this slide, but remove ALL text, letters, and words. Keep the layout, colors, and shapes identical. Output ONLY the image.",
81
  img
82
  ],
83
- config=types.GenerateContentConfig(response_mime_type="image/png")
 
 
84
  )
85
 
86
- # 檢查是否有二進位圖片回傳
87
- if resp_img.bytes:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  with open(final_path, "wb") as f:
89
- f.write(resp_img.bytes)
 
90
  cleaned_images_paths.append(final_path)
91
  gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
92
  print(f"Page {i+1}: Image generated successfully.")
93
  else:
94
- # 如果沒有 bytes,通常是因為模型回傳了拒絕的文字
95
- print(f"Page {i+1} Failed: Model returned text -> {resp_img.text}")
96
- # 失敗時:儲存原圖並標記 Failed
97
  img.save(final_path)
98
  gallery_preview.append((final_path, f"Page {i+1} (Failed - Original)"))
99
 
100
  except Exception as e:
101
  print(f"Page {i+1} Error: {str(e)}")
102
- # 發生錯誤時也保留原圖
103
  img.save(final_path)
104
  gallery_preview.append((final_path, f"Page {i+1} (Error - Original)"))
105
 
@@ -113,9 +130,8 @@ class NotebookLMTool:
113
  zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
114
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
115
  zf.write(txt_path, "content.txt")
116
- for root, dirs, files in os.walk(img_output_dir):
117
- for file in files:
118
- zf.write(os.path.join(root, file), os.path.join("images", file))
119
 
120
  return zip_path, full_text, gallery_preview
121
 
@@ -124,7 +140,7 @@ tool = NotebookLMTool()
124
 
125
  # --- Gradio UI ---
126
  with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
127
- gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V2 修正版)")
128
  gr.Markdown("上傳 PDF,AI 自動幫你:**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
129
 
130
  with gr.Row():
@@ -141,7 +157,7 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
141
  out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
142
  out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
143
 
144
- gr.Markdown("### 🖼️ 處理結果預覽 (若去字失敗將顯示原圖)")
145
  out_gallery = gr.Gallery(columns=4)
146
 
147
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
 
3
  import tempfile
4
  import zipfile
5
  import shutil
6
+ import base64
7
  from pdf2image import convert_from_path
8
  from PIL import Image
9
  from dotenv import load_dotenv
 
57
 
58
  # --- 步驟 A: 提取文字 (OCR) ---
59
  try:
 
60
  resp_text = self.client.models.generate_content(
61
+ model="gemini-2.0-flash",
62
+ contents=["Extract all text content from this slide strictly.", img]
63
  )
64
  page_content = resp_text.text if resp_text.text else "[No Text Found]"
65
  except Exception as e:
 
67
 
68
  full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
69
 
70
+ # --- 步驟 B: 圖片去字 (使用 response_modalities) ---
 
71
  save_name = f"slide_{i+1:02d}.png"
72
  final_path = os.path.join(img_output_dir, save_name)
73
 
74
  try:
75
+ # 參考你的 JS:使用 responseModalities = ["IMAGE"]
76
  resp_img = self.client.models.generate_content(
77
+ model="gemini-2.0-flash-exp",
78
  contents=[
79
+ "Remove all text from this image. Fill the gaps using the surrounding background texture to make it look clean and natural. Output ONLY the image.",
80
  img
81
  ],
82
+ config=types.GenerateContentConfig(
83
+ response_modalities=["IMAGE"] # ✅ 修正點:對應 JS 的 responseModalities
84
+ )
85
  )
86
 
87
+ # 處理圖片回傳 (SDK 解析)
88
+ image_data = None
89
+
90
+ # 檢查是否有 inline_data (Base64)
91
+ if hasattr(resp_img, 'parts') and resp_img.parts:
92
+ for part in resp_img.parts:
93
+ if part.inline_data:
94
+ image_data = part.inline_data.data
95
+ break
96
+
97
+ # 如果 SDK 自動處理了 bytes (部分版本)
98
+ if image_data is None and hasattr(resp_img, 'bytes') and resp_img.bytes:
99
+ image_data = resp_img.bytes
100
+
101
+ if image_data:
102
+ # 如果是 Base64 字串,需要解碼
103
+ if isinstance(image_data, str):
104
+ image_data = base64.b64decode(image_data)
105
+
106
  with open(final_path, "wb") as f:
107
+ f.write(image_data)
108
+
109
  cleaned_images_paths.append(final_path)
110
  gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
111
  print(f"Page {i+1}: Image generated successfully.")
112
  else:
113
+ # 失敗回退:保留原圖
114
+ print(f"Page {i+1} Failed: No image data returned. Text: {resp_img.text if hasattr(resp_img, 'text') else 'Unknown'}")
 
115
  img.save(final_path)
116
  gallery_preview.append((final_path, f"Page {i+1} (Failed - Original)"))
117
 
118
  except Exception as e:
119
  print(f"Page {i+1} Error: {str(e)}")
 
120
  img.save(final_path)
121
  gallery_preview.append((final_path, f"Page {i+1} (Error - Original)"))
122
 
 
130
  zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
131
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
132
  zf.write(txt_path, "content.txt")
133
+ for img_path in cleaned_images_paths:
134
+ zf.write(img_path, os.path.join("cleaned_slides", os.path.basename(img_path)))
 
135
 
136
  return zip_path, full_text, gallery_preview
137
 
 
140
 
141
  # --- Gradio UI ---
142
  with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
143
+ gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V3 修復版)")
144
  gr.Markdown("上傳 PDF,AI 自動幫你:**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
145
 
146
  with gr.Row():
 
157
  out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
158
  out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
159
 
160
+ gr.Markdown("### 🖼️ 處理結果預覽")
161
  out_gallery = gr.Gallery(columns=4)
162
 
163
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)