DeepLearning101 commited on
Commit
02a1875
·
verified ·
1 Parent(s): f0e6e15

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -36
app.py CHANGED
@@ -15,14 +15,12 @@ load_dotenv()
15
 
16
  class NotebookLMTool:
17
  def __init__(self):
18
- # 嘗試從環境變數讀取 Key
19
  self.api_key = os.getenv("GEMINI_API_KEY")
20
  self.client = None
21
  if self.api_key:
22
  self.client = genai.Client(api_key=self.api_key)
23
 
24
  def set_key(self, user_key):
25
- """讓使用者從介面設定 Key"""
26
  if user_key and user_key.strip():
27
  self.api_key = user_key.strip()
28
  self.client = genai.Client(api_key=self.api_key)
@@ -58,64 +56,76 @@ class NotebookLMTool:
58
 
59
  # --- 步驟 A: 提取文字 (OCR) ---
60
  try:
61
- # 使用 Gemini 2.0 Flash 提取文字
62
- response_text = self.client.models.generate_content(
63
- model="gemini-2.0-flash",
64
- contents=["Extract all text from this image directly. Do not describe the layout, just give me the text content.", img]
65
  )
66
- page_content = response_text.text if response_text.text else "[No Text Found]"
67
  except Exception as e:
68
  page_content = f"[OCR Error: {e}]"
69
 
70
  full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
71
 
72
- # --- 步驟 B: 圖片去字 (Clean) ---
73
- # 注意:Gemini 2.0 直接回傳 Image 的支援度視 prompt 而定,
74
- # 這裡我們使用 prompt 讓它嘗試還原背景。
 
 
75
  try:
76
- response_clean = self.client.models.generate_content(
77
- model="gemini-2.0-flash",
78
- contents=["Remove all text from this image and fill in the background to make it look like a clean slide background. Return the image.", img],
79
- config=types.GenerateContentConfig(response_mime_type="image/png")
 
 
 
 
80
  )
81
 
82
- # 處理回傳的圖片 (Binary)
83
- if response_clean.bytes:
84
- saved_path = os.path.join(img_output_dir, f"slide_{i+1:02d}.png")
85
- with open(saved_path, "wb") as f:
86
- f.write(response_clean.bytes)
87
- cleaned_images_paths.append(saved_path)
88
- gallery_preview.append((saved_path, f"Page {i+1}"))
89
  else:
90
- # 如果 AI 拒絕生成圖片,我們保留原圖但標記失敗
91
- print(f"Page {i+1}: Model did not return an image.")
 
 
 
 
92
  except Exception as e:
93
- print(f"Clean Error Page {i+1}: {e}")
 
 
 
94
 
95
  # 4. 打包結果
96
  progress(0.9, desc="正在打包 ZIP...")
97
 
98
- # 寫入文字檔
99
  txt_path = os.path.join(temp_dir, "extracted_text.txt")
100
  with open(txt_path, "w", encoding="utf-8") as f:
101
  f.write(full_text)
102
 
103
- # 壓縮
104
- zip_path = os.path.join(temp_dir, "notebooklm_clean_pack.zip")
105
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
106
- zf.write(txt_path, "all_text.txt")
107
- for img_path in cleaned_images_paths:
108
- zf.write(img_path, os.path.join("cleaned_slides", os.path.basename(img_path)))
 
109
 
110
  return zip_path, full_text, gallery_preview
111
 
112
- # 初始化工具
113
  tool = NotebookLMTool()
114
 
115
- # --- Gradio 介面 ---
116
  with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
117
- gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手")
118
- gr.Markdown("上傳 PDF,AI 自動幫你:**1. 抓出所有文字** | **2. 移除文字還原乾淨背景圖**")
119
 
120
  with gr.Row():
121
  with gr.Column():
@@ -131,10 +141,9 @@ with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as d
131
  out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
132
  out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
133
 
134
- gr.Markdown("### 🖼️ 背景還原預覽")
135
  out_gallery = gr.Gallery(columns=4)
136
 
137
- # 事件綁定
138
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
139
 
140
  btn_process.click(
 
15
 
16
  class NotebookLMTool:
17
  def __init__(self):
 
18
  self.api_key = os.getenv("GEMINI_API_KEY")
19
  self.client = None
20
  if self.api_key:
21
  self.client = genai.Client(api_key=self.api_key)
22
 
23
  def set_key(self, user_key):
 
24
  if user_key and user_key.strip():
25
  self.api_key = user_key.strip()
26
  self.client = genai.Client(api_key=self.api_key)
 
56
 
57
  # --- 步驟 A: 提取文字 (OCR) ---
58
  try:
59
+ # OCR 任務
60
+ resp_text = self.client.models.generate_content(
61
+ model="gemini-2.5-flash",
62
+ contents=["Extract all text content from this slide strictly. Ignore layout description.", img]
63
  )
64
+ page_content = resp_text.text if resp_text.text else "[No Text Found]"
65
  except Exception as e:
66
  page_content = f"[OCR Error: {e}]"
67
 
68
  full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
69
 
70
+ # --- 步驟 B: 圖片去字 (重繪背景) ---
71
+ # 策略調整:要求模型「重繪」而非「編輯」
72
+ save_name = f"slide_{i+1:02d}.png"
73
+ final_path = os.path.join(img_output_dir, save_name)
74
+
75
  try:
76
+ # 使用 flash-exp 模型,支援度較高
77
+ resp_img = self.client.models.generate_content(
78
+ model="gemini-2.5-flash",
79
+ contents=[
80
+ "Look at this image. Generate a NEW image that looks exactly like the background of this slide, but remove ALL text, letters, and words. Keep the layout, colors, and shapes identical. Output ONLY the image.",
81
+ img
82
+ ],
83
+ config=types.GenerateContentConfig(response_mime_type="image/png")
84
  )
85
 
86
+ # 檢查是否有二進位圖片回傳
87
+ if resp_img.bytes:
88
+ with open(final_path, "wb") as f:
89
+ f.write(resp_img.bytes)
90
+ cleaned_images_paths.append(final_path)
91
+ gallery_preview.append((final_path, f"Page {i+1} (Cleaned)"))
92
+ print(f"Page {i+1}: Image generated successfully.")
93
  else:
94
+ # 如果沒有 bytes,通常是因為模型回傳了拒絕的文字
95
+ print(f"Page {i+1} Failed: Model returned text -> {resp_img.text}")
96
+ # 失敗時:儲存原圖並標記 Failed
97
+ img.save(final_path)
98
+ gallery_preview.append((final_path, f"Page {i+1} (Failed - Original)"))
99
+
100
  except Exception as e:
101
+ print(f"Page {i+1} Error: {str(e)}")
102
+ # 發生錯誤時也保留原圖
103
+ img.save(final_path)
104
+ gallery_preview.append((final_path, f"Page {i+1} (Error - Original)"))
105
 
106
  # 4. 打包結果
107
  progress(0.9, desc="正在打包 ZIP...")
108
 
 
109
  txt_path = os.path.join(temp_dir, "extracted_text.txt")
110
  with open(txt_path, "w", encoding="utf-8") as f:
111
  f.write(full_text)
112
 
113
+ zip_path = os.path.join(temp_dir, "notebooklm_pack.zip")
 
114
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
115
+ zf.write(txt_path, "content.txt")
116
+ for root, dirs, files in os.walk(img_output_dir):
117
+ for file in files:
118
+ zf.write(os.path.join(root, file), os.path.join("images", file))
119
 
120
  return zip_path, full_text, gallery_preview
121
 
122
+ # Init
123
  tool = NotebookLMTool()
124
 
125
+ # --- Gradio UI ---
126
  with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
127
+ gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手 (V2 修正版)")
128
+ gr.Markdown("上傳 PDF,AI 自動幫你:**1. 抓出所有文字** | **2. 重繪乾淨背景圖**")
129
 
130
  with gr.Row():
131
  with gr.Column():
 
141
  out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
142
  out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
143
 
144
+ gr.Markdown("### 🖼️ 處理結果預覽 (若去字失敗將顯示原圖)")
145
  out_gallery = gr.Gallery(columns=4)
146
 
 
147
  btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
148
 
149
  btn_process.click(