Spaces:

DeepLearning101
/

PPT.404

Paused

App Files Files Community

DeepLearning101 commited on Jan 8

Commit

f0e6e15

verified ·

1 Parent(s): 7da7a76

Update app.py

Browse files

Files changed (1) hide show

app.py +106 -580

app.py CHANGED Viewed

@@ -1,621 +1,147 @@
 import gradio as gr
-import json
 import os
-import pandas as pd
 import tempfile
 import zipfile
 import shutil
-from dotenv import load_dotenv
-from huggingface_hub import HfApi, hf_hub_download
 from pdf2image import convert_from_path
-import google.generativeai as genai
-from google.genai import types # 確保相容性
 from PIL import Image
-# Load Env
 load_dotenv()
-PROF_SAVE_FILE = "saved_professors.json"
-COMP_SAVE_FILE = "saved_companies.json"
-HF_TOKEN = os.getenv("HF_TOKEN")
-DATASET_REPO_ID = os.getenv("DATASET_REPO_ID")
-# ==========================================
-# 🧠 Unified AI Service (整合後端邏輯)
-# ==========================================
-class UnifiedService:
     def __init__(self):
-        self.api_key = self._get_api_key()
         if self.api_key:
-            genai.configure(api_key=self.api_key)
-            self.model_id = "gemini-2.0-flash-exp" # 使用較新的模型
-        else:
-            print("⚠️ Warning: No API Key found.")
-    def _get_api_key(self):
-        # 優先讀取環境變數 (Secrets)
-        return os.getenv("GEMINI_API_KEY")
-    def set_user_key(self, key):
-        """允許使用者在介面上暫時替換 Key"""
-        if key and key.strip():
-            self.api_key = key.strip()
-            genai.configure(api_key=self.api_key)
-    def _check_client(self):
-        if not self.api_key:
-            raise ValueError("API Key 未設定，請檢查 .env, Secrets 或在介面上輸入")
-    # --- 🛠️ New Feature: PDF 智能拆解 (NotebookLM 專用) ---
-    def decompose_pdf(self, pdf_file, progress=gr.Progress()):
-        self._check_client()
-        if not pdf_file: return None, None, "請上傳 PDF"
-        # 1. PDF 轉圖片
         progress(0.1, desc="正在將 PDF 轉為圖片...")
         try:
             images = convert_from_path(pdf_file)
         except Exception as e:
-            return None, None, f"PDF 轉換失敗 (請確認系統已安裝 poppler): {e}"
-        # 準備暫存資料夾
-        tmp_dir = tempfile.mkdtemp()
-        clean_img_dir = os.path.join(tmp_dir, "cleaned_images")
-        os.makedirs(clean_img_dir, exist_ok=True)
-        full_text_content = ""
-        processed_images = []
-        model = genai.GenerativeModel(self.model_id)
-        # 2. 逐頁處理
         for i, img in enumerate(images):
-            progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在拆解第 {i+1}/{len(images)} 頁...")
-            # Action A: 提取文字 (OCR)
             try:
-                prompt_ocr = "Extract all text content from this image strictly. Do not describe the layout."
-                ocr_resp = model.generate_content([prompt_ocr, img])
-                page_text = ocr_resp.text
-            except:
-                page_text = "[Text Extraction Failed]"
-            full_text_content += f"--- Page {i+1} ---\n{page_text}\n\n"
-            # Action B: 移除文字 (In-painting)
             try:
-                prompt_clean = "Remove all text from this image and fill in the background naturally. Return only the image."
-                clean_resp = model.generate_content([prompt_clean, img])
-                # 嘗試取得圖片 (處理 V1/V2 SDK 差異)
-                try:
-                    clean_img = clean_resp.parts[0].image
-                except:
-                    # Fallback 若 SDK 版本不同或回傳格式不同
-                    clean_img = img # 若失敗則保留原圖
-                # 存檔
-                img_filename = f"page_{i+1:03d}_clean.png"
-                img_path = os.path.join(clean_img_dir, img_filename)
-                clean_img.save(img_path)
-                processed_images.append(clean_img)
             except Exception as e:
-                print(f"Clean Error on page {i}: {e}")
-                processed_images.append(img)
-        # 3. 打包結果
-        progress(0.9, desc="正在打包檔案...")
-        # 儲存文字檔
-        txt_path = os.path.join(tmp_dir, "extracted_text.txt")
         with open(txt_path, "w", encoding="utf-8") as f:
-            f.write(full_text_content)
-        # 建立 ZIP
-        zip_path = os.path.join(tmp_dir, "notebooklm_result.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
-            zf.write(txt_path, "content.txt")
-            for root, dirs, files in os.walk(clean_img_dir):
-                for file in files:
-                    zf.write(os.path.join(root, file), os.path.join("images", file))
-        return zip_path, full_text_content, processed_images
-    # --- 🎓 Professor Search Logic (Copied from original) ---
-    def search_professors(self, query, exclude_names=[]):
-        self._check_client()
-        exclusion = f"IMPORTANT: Do not include: {', '.join(exclude_names)}." if exclude_names else ""
-        # Phase 1: Search
-        tools = [{"google_search": {}}]
-        model_tools = genai.GenerativeModel(self.model_id, tools=tools)
-        prompt = f"""
-        Using Google Search, find 10 prominent professors in universities across Taiwan who are experts in "{query}".
-        FACT CHECK: Must be current faculty. {exclusion}
-        List them (Name - University - Department) in Traditional Chinese.
-        """
-        resp1 = model_tools.generate_content(prompt)
-        # Phase 2: Extract JSON
-        model_pure = genai.GenerativeModel(self.model_id)
-        extract_prompt = f"""
-        Extract professor names, universities, and departments from the text below.
-        Return ONLY a JSON array: [{{"name": "...", "university": "...", "department": "...", "tags": ["tag1"]}}]
-        Text: {resp1.text}
-        """
-        resp2 = model_pure.generate_content(extract_prompt, generation_config={"response_mime_type": "application/json"})
-        try: return json.loads(resp2.text)
-        except: return []
-    def get_professor_details(self, professor):
-        self._check_client()
-        tools = [{"google_search": {}}]
-        model = genai.GenerativeModel(self.model_id, tools=tools)
-        prompt = f"Act as academic consultant. Investigate Professor {professor.get('name')} from {professor.get('university')}. Find key publications and industry projects. Report in Traditional Chinese Markdown."
-        resp = model.generate_content(prompt)
-        return self._format_response_with_sources(resp)
-    # --- 🏢 Company Search Logic (Copied from original) ---
-    def search_companies(self, query, exclude_names=[]):
-        self._check_client()
-        exclusion = f"IMPORTANT: Do not include: {', '.join(exclude_names)}." if exclude_names else ""
-        tools = [{"google_search": {}}]
-        model = genai.GenerativeModel(self.model_id, tools=tools)
-        prompt = f"""
-        Using Google Search, find 5-10 Taiwanese companies related to: "{query}".
-        {exclusion}
-        List them (Name - Industry) in Traditional Chinese.
-        """
-        resp1 = model.generate_content(prompt)
-        model_pure = genai.GenerativeModel(self.model_id)
-        extract_prompt = f"""
-        Extract company names and industry from text.
-        Return ONLY JSON array: [{{"name": "...", "industry": "...", "tags": ["tag1"]}}]
-        Text: {resp1.text}
-        """
-        resp2 = model_pure.generate_content(extract_prompt, generation_config={"response_mime_type": "application/json"})
-        try: return json.loads(resp2.text)
-        except: return []
-    def get_company_details(self, company):
-        self._check_client()
-        tools = [{"google_search": {}}]
-        model = genai.GenerativeModel(self.model_id, tools=tools)
-        prompt = f"Act as Business Analyst. Investigate company: '{company.get('name')}'. Focus on products, culture, and disputes. Report in Traditional Chinese Markdown."
-        resp = model.generate_content(prompt)
-        return self._format_response_with_sources(resp)
-    # --- Shared Helpers ---
-    def chat_with_ai(self, history, msg, context, role):
-        self._check_client()
-        model = genai.GenerativeModel(self.model_id)
-        sys_prompt = f"{role}:\nContext: {context}"
-        # Convert history for Gemini
-        chat_hist = []
-        for h in history:
-            chat_hist.append({"role": "user", "parts": [h[0]]})
-            if len(h) > 1: chat_hist.append({"role": "model", "parts": [h[1]]})
-        chat = model.start_chat(history=chat_hist)
-        resp = chat.send_message(f"{sys_prompt}\nUser: {msg}")
-        return resp.text
-    def _format_response_with_sources(self, response):
-        sources = []
-        if hasattr(response.candidates[0], 'grounding_metadata'):
-            gm = response.candidates[0].grounding_metadata
-            if hasattr(gm, 'grounding_chunks'):
-                for chunk in gm.grounding_chunks:
-                    if hasattr(chunk, 'web'):
-                        sources.append({"title": chunk.web.title, "uri": chunk.web.uri})
-        # Deduplicate
-        unique_sources = list({v['uri']: v for v in sources}.values())
-        return {"text": response.text, "sources": unique_sources}
-# Init Service
-gemini_service = UnifiedService()
-# --- Helper Functions (Preserved from your code) ---
-def load_data(filename):
-    data = []
-    if HF_TOKEN and DATASET_REPO_ID:
-        try: hf_hub_download(repo_id=DATASET_REPO_ID, filename=filename, repo_type="dataset", token=HF_TOKEN, local_dir=".")
-        except: pass
-    if os.path.exists(filename):
-        try:
-            with open(filename, 'r', encoding='utf-8') as f: data = json.load(f)
-        except: data = []
-    return data
-def save_data(data, filename):
-    try:
-        with open(filename, 'w', encoding='utf-8') as f: json.dump(data, f, ensure_ascii=False, indent=2)
-    except: return
-    if HF_TOKEN and DATASET_REPO_ID:
-        try:
-            api = HfApi(token=HF_TOKEN)
-            api.upload_file(path_or_fileobj=filename, path_in_repo=filename, repo_id=DATASET_REPO_ID, repo_type="dataset", commit_message=f"Sync {filename}")
-        except: pass
-def get_tags_text(item):
-    if not item or not item.get('tags'): return "目前標籤: (無)"
-    return "🏷️ " + ", ".join([f"`{t}`" for t in item['tags']])
-def get_tags_choices(item): return item.get('tags', []) if item else []
-def prof_get_key(p): return f"{p['name']}-{p['university']}"
-def comp_get_key(c): return f"{c['name']}"
-def prof_format_df(source_list, saved_list):
-    if not source_list: return pd.DataFrame(columns=["狀態", "姓名", "大學", "系所", "標籤"])
-    if saved_list is None: saved_list = []
-    saved_map = {prof_get_key(p): p for p in saved_list}
-    data = []
-    for p in source_list:
-        dp = saved_map.get(prof_get_key(p), p)
-        icon = {'match':'✅','mismatch':'❌','pending':'❓'}.get(dp.get('status'), '')
-        detail = "📄" if dp.get('details') else ""
-        data.append([f"{icon} {detail}", dp['name'], dp['university'], dp['department'], ", ".join(dp.get('tags', []))])
-    return pd.DataFrame(data, columns=["狀態", "姓名", "大學", "系所", "標籤"])
-def comp_format_df(source_list, saved_list):
-    if not source_list: return pd.DataFrame(columns=["狀態", "公司名稱", "產業類別", "標籤"])
-    if saved_list is None: saved_list = []
-    saved_map = {comp_get_key(c): c for c in saved_list}
-    data = []
-    for c in source_list:
-        dc = saved_map.get(comp_get_key(c), c)
-        icon = {'good':'✅','risk':'⚠️','pending':'❓'}.get(dc.get('status'), '')
-        detail = "📄" if dc.get('details') else ""
-        data.append([f"{icon} {detail}", dc['name'], dc.get('industry','未知'), ", ".join(dc.get('tags', []))])
-    return pd.DataFrame(data, columns=["狀態", "公司名稱", "產業類別", "標籤"])
-# --- Wrappers for Prof Logic ---
-def prof_search(query, current_saved):
-    if not query: return gr.update(), current_saved, gr.update()
-    try:
-        res = gemini_service.search_professors(query)
-        return prof_format_df(res, current_saved), res, gr.update(visible=True)
-    except Exception as e: raise gr.Error(f"搜尋失敗: {e}")
-def prof_load_more(query, cur_res, cur_saved):
-    if not query: return gr.update(), cur_res
-    try:
-        new_res = gemini_service.search_professors(query, exclude_names=[p['name'] for p in cur_res])
-        exist_keys = set(prof_get_key(p) for p in cur_res)
-        for p in new_res:
-            if prof_get_key(p) not in exist_keys: cur_res.append(p)
-        return prof_format_df(cur_res, cur_saved), cur_res
-    except Exception as e: raise gr.Error(f"載入失敗: {e}")
-def prof_select(evt: gr.SelectData, search_res, saved_data, view_mode):
-    if not evt: return [gr.update()]*8
-    idx = evt.index[0]
-    target = saved_data if view_mode == "追蹤清單" else search_res
-    if not target or idx >= len(target): return [gr.update()]*8
-    p = target[idx]
-    key = prof_get_key(p)
-    saved_p = next((x for x in saved_data if prof_get_key(x) == key), None)
-    curr = saved_p if saved_p else p
-    md = ""
-    if curr.get('details') and len(curr.get('details')) > 10:
-        md = curr['details']
-        if not saved_p: saved_data.insert(0, curr); save_data(saved_data, PROF_SAVE_FILE)
-    else:
-        gr.Info(f"正在調查 {curr['name']}...")
-        try:
-            res = gemini_service.get_professor_details(curr)
-            curr['details'] = res['text']; curr['sources'] = res['sources']
-            md = res['text']
-            if saved_p: saved_p.update(curr)
-            else: saved_data.insert(0, curr)
-            save_data(saved_data, PROF_SAVE_FILE)
-        except Exception as e: raise gr.Error(f"調查失敗: {e}")
-    if curr.get('sources'): md += "\n\n### 📚 參考來源\n" + "\n".join([f"- [{s['title']}]({s['uri']})" for s in curr['sources']])
-    return gr.update(visible=True), md, [], curr, saved_data, get_tags_text(curr), gr.update(choices=get_tags_choices(curr), value=None), gr.update(visible=True)
-def prof_chat(hist, msg, curr):
-    if not curr: return hist, ""
-    try:
-        reply = gemini_service.chat_with_ai(hist, msg, curr.get('details', ''), "你是學術顧問，請根據這份教授資料回答")
-        hist.append((msg, reply))
-    except Exception as e: hist.append((msg, f"Error: {e}"))
-    return hist, ""
-def prof_add_tag(tag, curr, saved, mode, res):
-    if not curr or not tag: return gr.update(), gr.update(), gr.update(), saved, gr.update()
-    if 'tags' not in curr: curr['tags'] = []
-    if tag not in curr['tags']:
-        curr['tags'].append(tag)
-        key = prof_get_key(curr)
-        found = False
-        for i, p in enumerate(saved):
-            if prof_get_key(p) == key: saved[i] = curr; found=True; break
-        if not found: saved.insert(0, curr)
-        save_data(saved, PROF_SAVE_FILE)
-    return gr.update(value=""), get_tags_text(curr), gr.update(choices=curr['tags']), saved, prof_format_df(saved if mode=="追蹤清單" else res, saved)
-def prof_remove_tag(tag, curr, saved, mode, res):
-    if not curr or not tag: return gr.update(), gr.update(), saved, gr.update()
-    if 'tags' in curr and tag in curr['tags']:
-        curr['tags'].remove(tag)
-        key = prof_get_key(curr)
-        for i, p in enumerate(saved):
-            if prof_get_key(p) == key: saved[i] = curr; break
-        save_data(saved, PROF_SAVE_FILE)
-    return get_tags_text(curr), gr.update(choices=curr['tags'], value=None), saved, prof_format_df(saved if mode=="追蹤清單" else res, saved)
-def prof_update_status(stat, curr, saved, mode, res):
-    if not curr: return gr.update(), saved
-    curr['status'] = stat if curr.get('status') != stat else None
-    key = prof_get_key(curr)
-    for i, p in enumerate(saved):
-        if prof_get_key(p) == key: saved[i] = curr; break
-    save_data(saved, PROF_SAVE_FILE)
-    return prof_format_df(saved if mode=="追蹤清單" else res, saved), saved
-def prof_remove(curr, saved, mode, res):
-    if not curr: return gr.update(), gr.update(value=None), saved, gr.update(visible=False)
-    key = prof_get_key(curr)
-    new_saved = [p for p in saved if prof_get_key(p) != key]
-    save_data(new_saved, PROF_SAVE_FILE)
-    return gr.Info("已移除"), prof_format_df(new_saved if mode=="追蹤清單" else res, new_saved), new_saved, gr.update(visible=False)
-def prof_toggle(mode, res, saved):
-    return prof_format_df(res if mode=="搜尋結果" else saved, saved), gr.update(visible=mode=="搜尋結果")
-# --- Wrappers for Company Logic ---
-def comp_search(query, current_saved):
-    if not query: return gr.update(), current_saved, gr.update()
-    try:
-        res = gemini_service.search_companies(query)
-        return comp_format_df(res, current_saved), res, gr.update(visible=True)
-    except Exception as e: raise gr.Error(f"搜尋失敗: {e}")
-def comp_load_more(query, cur_res, cur_saved):
-    if not query: return gr.update(), cur_res
-    try:
-        new_res = gemini_service.search_companies(query, exclude_names=[c['name'] for c in cur_res])
-        exist_keys = set(comp_get_key(c) for c in cur_res)
-        for c in new_res:
-            if comp_get_key(c) not in exist_keys: cur_res.append(c)
-        return comp_format_df(cur_res, cur_saved), cur_res
-    except Exception as e: raise gr.Error(f"載入失敗: {e}")
-def comp_select(evt: gr.SelectData, search_res, saved_data, view_mode):
-    if not evt: return [gr.update()]*8
-    idx = evt.index[0]
-    target = saved_data if view_mode == "追蹤清單" else search_res
-    if not target or idx >= len(target): return [gr.update()]*8
-    c = target[idx]
-    key = comp_get_key(c)
-    saved_c = next((x for x in saved_data if comp_get_key(x) == key), None)
-    curr = saved_c if saved_c else c
-    md = ""
-    if curr.get('details') and len(curr.get('details')) > 10:
-        md = curr['details']
-        if not saved_c: saved_data.insert(0, curr); save_data(saved_data, COMP_SAVE_FILE)
-    else:
-        gr.Info(f"正在調查 {curr['name']}...")
-        try:
-            res = gemini_service.get_company_details(curr)
-            curr['details'] = res['text']; curr['sources'] = res['sources']
-            md = res['text']
-            if saved_c: saved_c.update(curr)
-            else: saved_data.insert(0, curr)
-            save_data(saved_data, COMP_SAVE_FILE)
-        except Exception as e: raise gr.Error(f"調查失敗: {e}")
-    if curr.get('sources'): md += "\n\n### 📚 資料來源\n" + "\n".join([f"- [{s['title']}]({s['uri']})" for s in curr['sources']])
-    return gr.update(visible=True), md, [], curr, saved_data, get_tags_text(curr), gr.update(choices=get_tags_choices(curr), value=None), gr.update(visible=True)
-def comp_chat(hist, msg, curr):
-    if not curr: return hist, ""
-    try:
-        reply = gemini_service.chat_with_ai(hist, msg, curr.get('details', ''), "你是商業顧問，請根據這份公司調查報告回答")
-        hist.append((msg, reply))
-    except Exception as e: hist.append((msg, f"Error: {e}"))
-    return hist, ""
-def comp_add_tag(tag, curr, saved, mode, res):
-    if not curr or not tag: return gr.update(), gr.update(), gr.update(), saved, gr.update()
-    if 'tags' not in curr: curr['tags'] = []
-    if tag not in curr['tags']:
-        curr['tags'].append(tag)
-        key = comp_get_key(curr)
-        found = False
-        for i, c in enumerate(saved):
-            if comp_get_key(c) == key: saved[i] = curr; found=True; break
-        if not found: saved.insert(0, curr)
-        save_data(saved, COMP_SAVE_FILE)
-    return gr.update(value=""), get_tags_text(curr), gr.update(choices=curr['tags']), saved, comp_format_df(saved if mode=="追蹤清單" else res, saved)
-def comp_remove_tag(tag, curr, saved, mode, res):
-    if not curr or not tag: return gr.update(), gr.update(), saved, gr.update()
-    if 'tags' in curr and tag in curr['tags']:
-        curr['tags'].remove(tag)
-        key = comp_get_key(curr)
-        for i, c in enumerate(saved):
-            if comp_get_key(c) == key: saved[i] = curr; break
-        save_data(saved, COMP_SAVE_FILE)
-    return get_tags_text(curr), gr.update(choices=curr['tags'], value=None), saved, comp_format_df(saved if mode=="追蹤清單" else res, saved)
-def comp_update_status(stat, curr, saved, mode, res):
-    if not curr: return gr.update(), saved
-    curr['status'] = stat if curr.get('status') != stat else None
-    key = comp_get_key(curr)
-    for i, c in enumerate(saved):
-        if comp_get_key(c) == key: saved[i] = curr; break
-    save_data(saved, COMP_SAVE_FILE)
-    return comp_format_df(saved if mode=="追蹤清單" else res, saved), saved
-def comp_remove(curr, saved, mode, res):
-    if not curr: return gr.update(), gr.update(value=None), saved, gr.update(visible=False)
-    key = comp_get_key(curr)
-    new_saved = [c for c in saved if comp_get_key(c) != key]
-    save_data(new_saved, COMP_SAVE_FILE)
-    return gr.Info("已移除"), comp_format_df(new_saved if mode=="追蹤清單" else res, new_saved), new_saved, gr.update(visible=False)
-def comp_toggle(mode, res, saved):
-    return comp_format_df(res if mode=="搜尋結果" else saved, saved), gr.update(visible=mode=="搜尋結果")
-# Init
-def prof_init(): d = load_data(PROF_SAVE_FILE); return d, prof_format_df(d, d)
-def comp_init(): d = load_data(COMP_SAVE_FILE); return d, comp_format_df(d, d)
-# ==========================
-# 🖥️ UI Layout (Modified)
-# ==========================
-with gr.Blocks(title="Prof.404.Com 產學導航系統", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("""
-    <div align="center">
-    # 🚀 Prof.404.Com 產學導航系統 (含 NotebookLM 擴充工具)
-    **學術研究啟程、產業導航、以及您的文件處理瑞士刀**
-    </div>
-    """)
-    with gr.Accordion("🔑 API Key 設定", open=False):
-        api_input = gr.Textbox(label="Gemini API Key", placeholder="若未設定環境變數，請在此輸入", type="password")
-        api_btn = gr.Button("設定 Key")
-        api_btn.click(lambda k: gemini_service.set_user_key(k), inputs=api_input)
-    with gr.Tabs():
-        # ==========================
-        # Tab 1: 🛠️ 工具箱 (PDF 智能拆解)
-        # ==========================
-        with gr.Tab("🛠️ NotebookLM 拆解工具"):
-            gr.Markdown("### 📄 PDF 智能拆解 (文字/圖片分離)")
-            gr.Markdown("上傳 NotebookLM 生成的 PDF，AI 將自動為您：**1. 提取全文文字** | **2. 移除圖片中的文字(還原背景)**")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    pdf_input = gr.File(label="上傳 PDF (來自 NotebookLM 或其他)")
-                    process_btn = gr.Button("🚀 開始一鍵拆解", variant="primary")
-                with gr.Column(scale=2):
-                    zip_output = gr.File(label="📦 下載結果 (含 clean images 與 text)")
-                    text_preview = gr.Textbox(label="📝 文字內容預覽", lines=10, max_lines=20)
-            gr.Markdown("#### 🖼️ 去字後圖片預覽 (Cleaned Images)")
-            gallery_output = gr.Gallery(label="背景還原預覽", columns=4)
-            process_btn.click(
-                gemini_service.decompose_pdf,
-                inputs=[pdf_input],
-                outputs=[zip_output, text_preview, gallery_output]
-            )
-        # ==========================
-        # Tab 2: 🎓 教授去哪兒？ (保留原功能)
-        # ==========================
-        with gr.Tab("🎓 找教授 (Prof.404)"):
-            prof_saved = gr.State([])
-            prof_res = gr.State([])
-            prof_sel = gr.State(None)
-            with gr.Row():
-                p_in = gr.Textbox(label="搜尋教授", placeholder="輸入研究領域...", scale=4)
-                p_btn = gr.Button("🔍 搜尋", variant="primary", scale=1)
-            p_view = gr.Radio(["搜尋結果", "追蹤清單"], label="顯示模式", value="追蹤清單")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    p_df = gr.Dataframe(headers=["狀態","姓名","大學","系所","標籤"], datatype=["str","str","str","str","str"], interactive=False)
-                    p_load = gr.Button("載入更多", visible=False)
-                with gr.Column(scale=2, visible=False) as p_col:
-                    p_md = gr.Markdown("...")
-                    with gr.Column():
-                        gr.Markdown("### 🤖 學術顧問")
-                        p_chat = gr.Chatbot(height=250)
-                        with gr.Row():
-                            p_msg = gr.Textbox(label="提問", scale=4)
-                            p_send = gr.Button("送出", scale=1)
-                    gr.Markdown("---")
-                    with gr.Column(visible=False) as p_tag_row:
-                        p_tag_disp = gr.Markdown("標籤: (無)")
-                        with gr.Row():
-                            p_tag_in = gr.Textbox(label="新增標籤", scale=3)
-                            p_tag_add = gr.Button("➕", scale=1)
-                        with gr.Accordion("刪除標籤", open=False):
-                            with gr.Row():
-                                p_tag_drop = gr.Dropdown(label="選擇標籤", choices=[], scale=3)
-                                p_tag_del = gr.Button("🗑️", scale=1, variant="secondary")
-                    with gr.Row():
-                        p_good = gr.Button("✅ 符���")
-                        p_bad = gr.Button("❌ 不符")
-                        p_pend = gr.Button("❓ 待觀察")
-                        p_rem = gr.Button("🗑️ 移除", variant="stop")
-            demo.load(prof_init, None, [prof_saved, p_df])
-            p_btn.click(prof_search, [p_in, prof_saved], [p_df, prof_res, p_load]).then(lambda: gr.update(value="搜尋結果"), outputs=[p_view])
-            p_load.click(prof_load_more, [p_in, prof_res, prof_saved], [p_df, prof_res])
-            p_view.change(prof_toggle, [p_view, prof_res, prof_saved], [p_df, p_load])
-            p_df.select(prof_select, [prof_res, prof_saved, p_view], [p_col, p_md, p_chat, prof_sel, prof_saved, p_tag_disp, p_tag_drop, p_tag_row])
-            p_send.click(prof_chat, [p_chat, p_msg, prof_sel], [p_chat, p_msg]); p_msg.submit(prof_chat, [p_chat, p_msg, prof_sel], [p_chat, p_msg])
-            p_tag_add.click(prof_add_tag, [p_tag_in, prof_sel, prof_saved, p_view, prof_res], [p_tag_in, p_tag_disp, p_tag_drop, prof_saved, p_df])
-            p_tag_del.click(prof_remove_tag, [p_tag_drop, prof_sel, prof_saved, p_view, prof_res], [p_tag_disp, p_tag_drop, prof_saved, p_df])
-            for btn, s in [(p_good,'match'),(p_bad,'mismatch'),(p_pend,'pending')]: btn.click(prof_update_status, [gr.State(s), prof_sel, prof_saved, p_view, prof_res], [p_df, prof_saved])
-            p_rem.click(prof_remove, [prof_sel, prof_saved, p_view, prof_res], [gr.State(None), p_df, prof_saved, p_col])
-        # ==========================
-        # Tab 3: 🏢 公司去那兒？ (保留原功能)
-        # ==========================
-        with gr.Tab("🏢 找公司 (Com.404)"):
-            comp_saved = gr.State([])
-            comp_res = gr.State([])
-            comp_sel = gr.State(None)
-            with gr.Row():
-                c_in = gr.Textbox(label="搜尋公司/領域", placeholder="輸入產業或公司...", scale=4)
-                c_btn = gr.Button("🔍 搜尋", variant="primary", scale=1)
-            c_view = gr.Radio(["搜尋結果", "追蹤清單"], label="顯示模式", value="追蹤清單")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    c_df = gr.Dataframe(headers=["狀態","公司名稱","產業類別","標籤"], datatype=["str","str","str","str"], interactive=False)
-                    c_load = gr.Button("載入更多", visible=False)
-                with gr.Column(scale=2, visible=False) as c_col:
-                    c_md = gr.Markdown("...")
-                    with gr.Column():
-                        gr.Markdown("### 🤖 商業顧問")
-                        c_chat = gr.Chatbot(height=250)
-                        with gr.Row():
-                            c_msg = gr.Textbox(label="提問", scale=4)
-                            c_send = gr.Button("送出", scale=1)
-                    gr.Markdown("---")
-                    with gr.Column(visible=False) as c_tag_row:
-                        c_tag_disp = gr.Markdown("標籤: (無)")
-                        with gr.Row():
-                            c_tag_in = gr.Textbox(label="新增標籤", scale=3)
-                            c_tag_add = gr.Button("➕", scale=1)
-                        with gr.Accordion("刪除標籤", open=False):
-                            with gr.Row():
-                                c_tag_drop = gr.Dropdown(label="選擇標籤", choices=[], scale=3)
-                                c_tag_del = gr.Button("🗑️", scale=1, variant="secondary")
-                    with gr.Row():
-                        c_good = gr.Button("✅ 優質")
-                        c_risk = gr.Button("⚠️ 風險")
-                        c_pend = gr.Button("❓ 未定")
-                        c_rem = gr.Button("🗑️ 移除", variant="stop")
-            demo.load(comp_init, None, [comp_saved, c_df])
-            c_btn.click(comp_search, [c_in, comp_saved], [c_df, comp_res, c_load]).then(lambda: gr.update(value="搜尋結果"), outputs=[c_view])
-            c_load.click(comp_load_more, [c_in, comp_res, comp_saved], [c_df, comp_res])
-            c_view.change(comp_toggle, [c_view, comp_res, comp_saved], [c_df, c_load])
-            c_df.select(comp_select, [comp_res, comp_saved, c_view], [c_col, c_md, c_chat, comp_sel, comp_saved, c_tag_disp, c_tag_drop, c_tag_row])
-            c_send.click(comp_chat, [c_chat, c_msg, comp_sel], [c_chat, c_msg]); c_msg.submit(comp_chat, [c_chat, c_msg, comp_sel], [c_chat, c_msg])
-            c_tag_add.click(comp_add_tag, [c_tag_in, comp_sel, comp_saved, c_view, comp_res], [c_tag_in, c_tag_disp, c_tag_drop, comp_saved, c_df])
-            c_tag_del.click(comp_remove_tag, [c_tag_drop, comp_sel, comp_saved, c_view, comp_res], [c_tag_disp, c_tag_drop, comp_saved, c_df])
-            for btn, s in [(c_good,'good'),(c_risk,'risk'),(c_pend,'pending')]: btn.click(comp_update_status, [gr.State(s), comp_sel, comp_saved, c_view, comp_res], [c_df, comp_saved])
-            c_rem.click(comp_remove, [comp_sel, comp_saved, c_view, comp_res], [gr.State(None), c_df, comp_saved, c_col])
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
 import os
 import tempfile
 import zipfile
 import shutil
 from pdf2image import convert_from_path
 from PIL import Image
+from dotenv import load_dotenv
+# 使用 Google 新版 SDK
+from google import genai
+from google.genai import types
 load_dotenv()
+class NotebookLMTool:
     def __init__(self):
+        # 嘗試從環境變數讀取 Key
+        self.api_key = os.getenv("GEMINI_API_KEY")
+        self.client = None
         if self.api_key:
+            self.client = genai.Client(api_key=self.api_key)
+    def set_key(self, user_key):
+        """讓使用者從介面設定 Key"""
+        if user_key and user_key.strip():
+            self.api_key = user_key.strip()
+            self.client = genai.Client(api_key=self.api_key)
+            return "✅ API Key 已更新！"
+        return "⚠️ Key 無效"
+    def process_pdf(self, pdf_file, progress=gr.Progress()):
+        if not self.client:
+            raise ValueError("請先輸入 Google API Key！")
+        if pdf_file is None:
+            return None, None, None
+        # 1. 準備暫存目錄
+        temp_dir = tempfile.mkdtemp()
+        img_output_dir = os.path.join(temp_dir, "cleaned_images")
+        os.makedirs(img_output_dir, exist_ok=True)
+        # 2. PDF 轉圖片
         progress(0.1, desc="正在將 PDF 轉為圖片...")
         try:
             images = convert_from_path(pdf_file)
         except Exception as e:
+            raise ValueError(f"PDF 轉換失敗 (請確認 packages.txt 有加入 poppler-utils): {str(e)}")
+        full_text = ""
+        cleaned_images_paths = []
+        gallery_preview = []
+        # 3. 逐頁處理
         for i, img in enumerate(images):
+            progress(0.1 + (0.8 * (i / len(images))), desc=f"AI 正在處理第 {i+1}/{len(images)} 頁...")
+            # --- 步驟 A: 提取文字 (OCR) ---
             try:
+                # 使用 Gemini 2.0 Flash 提取文字
+                response_text = self.client.models.generate_content(
+                    model="gemini-2.0-flash",
+                    contents=["Extract all text from this image directly. Do not describe the layout, just give me the text content.", img]
+                )
+                page_content = response_text.text if response_text.text else "[No Text Found]"
+            except Exception as e:
+                page_content = f"[OCR Error: {e}]"
+            full_text += f"=== Page {i+1} ===\n{page_content}\n\n"
+            # --- 步驟 B: 圖片去字 (Clean) ---
+            # 注意：Gemini 2.0 直接回傳 Image 的支援度視 prompt 而定，
+            # 這裡我們使用 prompt 讓它嘗試還原背景。
             try:
+                response_clean = self.client.models.generate_content(
+                    model="gemini-2.0-flash",
+                    contents=["Remove all text from this image and fill in the background to make it look like a clean slide background. Return the image.", img],
+                    config=types.GenerateContentConfig(response_mime_type="image/png")
+                )
+                # 處理回傳的圖片 (Binary)
+                if response_clean.bytes:
+                    saved_path = os.path.join(img_output_dir, f"slide_{i+1:02d}.png")
+                    with open(saved_path, "wb") as f:
+                        f.write(response_clean.bytes)
+                    cleaned_images_paths.append(saved_path)
+                    gallery_preview.append((saved_path, f"Page {i+1}"))
+                else:
+                    # 如果 AI 拒絕生成圖片，我們保留原圖但標記失敗
+                    print(f"Page {i+1}: Model did not return an image.")
             except Exception as e:
+                print(f"Clean Error Page {i+1}: {e}")
+        # 4. 打包結果
+        progress(0.9, desc="正在打包 ZIP...")
+        # 寫入文字檔
+        txt_path = os.path.join(temp_dir, "extracted_text.txt")
         with open(txt_path, "w", encoding="utf-8") as f:
+            f.write(full_text)
+        # 壓縮
+        zip_path = os.path.join(temp_dir, "notebooklm_clean_pack.zip")
         with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
+            zf.write(txt_path, "all_text.txt")
+            for img_path in cleaned_images_paths:
+                zf.write(img_path, os.path.join("cleaned_slides", os.path.basename(img_path)))
+        return zip_path, full_text, gallery_preview
+# 初始化工具
+tool = NotebookLMTool()
+# --- Gradio 介面 ---
+with gr.Blocks(title="NotebookLM Slide Decomposer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🛠️ NotebookLM 投影片拆解助手")
+    gr.Markdown("上傳 PDF，AI 自動幫你：**1. 抓出所有文字** | **2. 移除文字還原乾淨背景圖**")
+    with gr.Row():
+        with gr.Column():
+            api_input = gr.Textbox(label="Google API Key", type="password", placeholder="貼上你的 Gemini API Key")
+            btn_set_key = gr.Button("設定 Key")
+            status_msg = gr.Markdown("")
+            gr.Markdown("---")
+            pdf_input = gr.File(label="上傳 PDF")
+            btn_process = gr.Button("🚀 開始拆解", variant="primary")
+        with gr.Column():
+            out_zip = gr.File(label="📦 下載懶人包 (ZIP)")
+            out_text = gr.Textbox(label="📝 文字內容預覽", lines=8)
+    gr.Markdown("### 🖼️ 背景還原預覽")
+    out_gallery = gr.Gallery(columns=4)
+    # 事件綁定
+    btn_set_key.click(tool.set_key, inputs=api_input, outputs=status_msg)
+    btn_process.click(
+        tool.process_pdf,
+        inputs=[pdf_input],
+        outputs=[out_zip, out_text, out_gallery]
+    )
 if __name__ == "__main__":
     demo.launch()