Gemini_longchain_RAG

Runtime error

App Files Files Community

mikao007 commited on Oct 2, 2025

Commit

73d91a6

verified ·

1 Parent(s): a8d1bed

Update app.py

Browse files

Files changed (1) hide show

app.py +601 -591

app.py CHANGED Viewed

@@ -1,592 +1,602 @@
-from dotenv import load_dotenv
-import os
-import gradio as gr
-from PyPDF2 import PdfReader
-from langchain.text_splitter import CharacterTextSplitter
-from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
-from langchain_community.vectorstores import FAISS
-from langchain.chains.question_answering import load_qa_chain
-from langchain.prompts import PromptTemplate
-import shutil
-import tempfile
-from docx import Document
-from docx.shared import Inches
-from datetime import datetime
-# Load environment variables
-load_dotenv()
-# Set Gemini API key
-gemini_api_key = "AIzaSyA8zqhqNb-bNYU6KVb0Zj0XIKi3aZfvXE0"
-os.environ["GOOGLE_API_KEY"] = gemini_api_key
-class PDFChatBot:
-    def __init__(self):
-        self.vector_store = None
-        self.embeddings = GoogleGenerativeAIEmbeddings(
-            model="models/text-embedding-004",
-            google_api_key=gemini_api_key
-        )
-        self.processed_files = []
-        self.chat_history = []  # 儲存聊天歷史
-    def get_pdf_text(self, pdf_files):
-        """從多個PDF文件中提取文字"""
-        raw_text = ""
-        processed_count = 0
-        if not pdf_files:
-            return raw_text, processed_count
-        # 處理單個文件和多個文件
-        if not isinstance(pdf_files, list):
-            pdf_files = [pdf_files]
-        for pdf_file in pdf_files:
-            try:
-                # 如果是上傳的文件對象，使用其name屬性
-                pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
-                pdf_reader = PdfReader(pdf_path)
-                file_text = ""
-                for page in pdf_reader.pages:
-                    text = page.extract_text()
-                    if text:
-                        file_text += text + "\n"
-                if file_text.strip():
-                    raw_text += file_text
-                    processed_count += 1
-                    self.processed_files.append(os.path.basename(pdf_path))
-            except Exception as e:
-                print(f"讀取PDF時發生錯誤：{str(e)}")
-                continue
-        return raw_text, processed_count
-    def get_text_chunks(self, text):
-        """將文字分割成區塊進行處理"""
-        text_splitter = CharacterTextSplitter(
-            separator="\n",
-            chunk_size=10000,
-            chunk_overlap=1000,
-            length_function=len
-        )
-        chunks = text_splitter.split_text(text)
-        return chunks
-    def create_vector_store(self, chunks):
-        """從文字區塊創建FAISS向量存儲"""
-        try:
-            self.vector_store = FAISS.from_texts(chunks, self.embeddings)
-            self.vector_store.save_local("faiss_index")
-            return True
-        except Exception as e:
-            print(f"創建向量存儲時發生錯誤：{str(e)}")
-            return False
-    def load_vector_store(self):
-        """載入已存在的向量存儲"""
-        try:
-            if os.path.exists("faiss_index"):
-                self.vector_store = FAISS.load_local(
-                    "faiss_index",
-                    embeddings=self.embeddings,
-                    allow_dangerous_deserialization=True
-                )
-                return True
-            else:
-                return False
-        except Exception as e:
-            print(f"載入向量存儲時發生錯誤：{str(e)}")
-            return False
-    def get_conversational_chain(self, temperature=0.3, max_tokens=4096):
-        """創建對話鏈"""
-        prompt_template = """
-        根據提供的內容盡可能詳細地回答問題。確保提供所有細節。
-        如果你需要更多細節來完美回答問題，那麼請詢問你認為需要了解的更多細節。
-        如果答案不在提供的內容中，只需說"在您提供的內容中找不到答案"。不要提供錯誤的答案。
-        內容:\n {context}\n
-        問題: \n{question}\n
-        回答:
-        """
-        # Using Flash 2.0 model
-        model = ChatGoogleGenerativeAI(
-            model="gemini-2.0-flash-exp",
-            google_api_key=gemini_api_key,
-            temperature=temperature,
-            max_tokens=max_tokens,
-            top_p=0.8,
-            top_k=40
-        )
-        prompt = PromptTemplate(
-            template=prompt_template,
-            input_variables=['context', 'question']
-        )
-        chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
-        return chain
-    def answer_question(self, question, temperature=0.3, max_tokens=4096, search_k=6):
-        """回答用戶問題"""
-        if not self.vector_store:
-            return "請先上傳並處理PDF文件！"
-        if not question.strip():
-            return "請輸入您的問題。"
-        try:
-            # 搜索相關文檔
-            docs = self.vector_store.similarity_search(question, k=search_k)
-            if not docs:
-                return "在上傳的文檔中找不到相關信息。"
-            # 生成回答
-            chain = self.get_conversational_chain(temperature, max_tokens)
-            response = chain(
-                {
-                    "input_documents": docs,
-                    "question": question,
-                },
-                return_only_outputs=True
-            )
-            return response["output_text"]
-        except Exception as e:
-            return f"處理問題時發生錯誤：{str(e)}"
-    def process_pdfs(self, pdf_files, progress=gr.Progress()):
-        """處理PDF文件"""
-        if not pdf_files:
-            return "請上傳至少一個PDF文件。", ""
-        self.processed_files = []
-        progress(0, desc="開始處理PDF文件...")
-        # 提取文字
-        progress(0.2, desc="提取PDF文字內容...")
-        raw_text, processed_count = self.get_pdf_text(pdf_files)
-        if not raw_text.strip():
-            return "無法從PDF文件中提取到文字。", ""
-        progress(0.4, desc="分割文字內容...")
-        # 分割文字
-        text_chunks = self.get_text_chunks(raw_text)
-        progress(0.6, desc="創建向量存儲...")
-        # 創建向量存儲
-        success = self.create_vector_store(text_chunks)
-        progress(1.0, desc="處理完成!")
-        if success:
-            file_list = "已處理的文件:\n" + "\n".join([f"• {file}" for file in self.processed_files])
-            return f"✅ 成功處理 {processed_count} 個PDF文件！\n總共 {len(text_chunks)} 個文字區塊\n現在您可以開始提問。", file_list
-        else:
-            return "❌ PDF處理失敗，請重試。", ""
-    def clear_data(self):
-        """清除處理過的資料"""
-        try:
-            if os.path.exists("faiss_index"):
-                shutil.rmtree("faiss_index")
-            self.vector_store = None
-            self.processed_files = []
-            self.chat_history = []
-            return "✅ 已清除所有處理過的資料！", ""
-        except Exception as e:
-            return f"❌ 清除資料時發生錯誤：{str(e)}", ""
-    def create_docx_report(self, chat_history):
-        """創建包含聊天記錄的docx報告"""
-        try:
-            # 創建新的文檔
-            doc = Document()
-            # 添加標題
-            title = doc.add_heading('PDF聊天機器人 - 問答記錄', 0)
-            title.alignment = 1  # 置中對齊
-            # 添加生成時間
-            doc.add_paragraph(f'生成時間：{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}')
-            # 添加處理的文件列表
-            if self.processed_files:
-                doc.add_heading('已處理的PDF文件：', level=2)
-                for i, file in enumerate(self.processed_files, 1):
-                    doc.add_paragraph(f'{i}. {file}', style='List Number')
-            doc.add_paragraph('')  # 空行
-            # 添加問答記錄
-            doc.add_heading('問答記錄：', level=2)
-            if not chat_history:
-                doc.add_paragraph('目前沒有問答記錄。')
-            else:
-                for i in range(0, len(chat_history), 2):
-                    if i + 1 < len(chat_history):
-                        question = chat_history[i]['content']
-                        answer = chat_history[i + 1]['content']
-                        # 問題
-                        q_paragraph = doc.add_paragraph()
-                        q_run = q_paragraph.add_run(f'問題 {(i//2)+1}：')
-                        q_run.bold = True
-                        q_run.font.size = Inches(0.14)
-                        q_paragraph.add_run(question)
-                        # 回答
-                        a_paragraph = doc.add_paragraph()
-                        a_run = a_paragraph.add_run('回答：')
-                        a_run.bold = True
-                        a_run.font.size = Inches(0.14)
-                        a_paragraph.add_run(answer)
-                        # 分隔線
-                        doc.add_paragraph('─' * 50)
-            # 保存到臨時文件
-            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx')
-            doc.save(temp_file.name)
-            temp_file.close()
-            return temp_file.name
-        except Exception as e:
-            print(f"創建docx文件時發生錯誤：{str(e)}")
-            return None
-# 初始化聊天機器人
-bot = PDFChatBot()
-# Gradio 接口函數
-def upload_and_process(files, progress=gr.Progress()):
-    return bot.process_pdfs(files, progress)
-def ask_question(question, history, temperature, max_tokens, search_k):
-    if not question.strip():
-        return history, ""
-    response = bot.answer_question(question, temperature, max_tokens, search_k)
-    # 使用新的消息格式
-    user_msg = {"role": "user", "content": question}
-    assistant_msg = {"role": "assistant", "content": response}
-    history.append(user_msg)
-    history.append(assistant_msg)
-    # 同步更新聊天歷史到bot實例
-    bot.chat_history = history.copy()
-    return history, ""
-def download_chat_history():
-    """下載聊天記錄為docx文件"""
-    if not bot.chat_history:
-        return None
-    docx_path = bot.create_docx_report(bot.chat_history)
-    return docx_path
-def export_to_word():
-    """匯出問答記錄為Word文件"""
-    if not bot.chat_history:
-        return None
-    docx_path = bot.create_docx_report(bot.chat_history)
-    return docx_path
-def clear_chat():
-    """清除聊天記錄"""
-    bot.chat_history = []
-    return [], ""
-def clear_all_data():
-    return bot.clear_data()
-def load_existing_data():
-    if bot.load_vector_store():
-        return "✅ 成功載入已處理的資料！", ""
-    else:
-        return "❌ 沒有找到已處理的資料。", ""
-# 創建自定義主題
-custom_theme = gr.themes.Soft(
-    primary_hue="blue",
-    secondary_hue="gray",
-    neutral_hue="slate",
-    font=gr.themes.GoogleFont("Noto Sans TC"),
-    font_mono=gr.themes.GoogleFont("JetBrains Mono")
-)
-# 創建 Gradio 介面
-with gr.Blocks(
-    title="PDF智能問答系統",
-    theme=custom_theme,
-    css="""
-    .gradio-container {
-        max-width: 1200px !important;
-        margin: auto !important;
-    }
-    .main-header {
-        text-align: center;
-        padding: 20px;
-        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-        color: white;
-        border-radius: 10px;
-        margin-bottom: 20px;
-    }
-    .status-box {
-        background-color: #f8f9fa;
-        border-left: 4px solid #007bff;
-        padding: 15px;
-        border-radius: 5px;
-    }
-    .file-info {
-        background-color: #e8f5e8;
-        border-left: 4px solid #28a745;
-        padding: 10px;
-        border-radius: 5px;
-    }
-    """
-) as demo:
-    # 主標題區域
-    with gr.Row():
-        gr.HTML("""
-        <div class="main-header">
-            <h1>🤖 PDF智能問答系統</h1>
-            <p>基於 Gemini 2.0 Flash 的 RAG 技術 | 支持多語言問答</p>
-        </div>
-        """)
-    # 主要功能區域
-    with gr.Tab("📁 文件管理", id="file_tab"):
-        with gr.Row():
-            with gr.Column(scale=3):
-                # 文件上傳區域
-                with gr.Group():
-                    gr.Markdown("### 📤 上傳PDF文件")
-                    file_upload = gr.File(
-                        file_count="multiple",
-                        file_types=[".pdf"],
-                        label="選擇PDF文件",
-                        height=150
-                    )
-                    # 處理選項
-                    with gr.Row():
-                        process_btn = gr.Button(
-                            "🚀 開始處理",
-                            variant="primary",
-                            size="lg",
-                            scale=2
-                        )
-                        load_btn = gr.Button(
-                            "📂 載入已處理資料",
-                            variant="secondary",
-                            scale=1
-                        )
-                        clear_btn = gr.Button(
-                            "🗑️ 清除所有資料",
-                            variant="stop",
-                            scale=1
-                        )
-            with gr.Column(scale=2):
-                # 狀態顯示區域
-                with gr.Group():
-                    gr.Markdown("### 📊 處理狀態")
-                    status_text = gr.Textbox(
-                        label="處理進度",
-                        lines=6,
-                        interactive=False,
-                        elem_classes=["status-box"]
-                    )
-                    # 文件列表
-                    gr.Markdown("### 📋 已處理文件")
-                    file_list = gr.Textbox(
-                        label="文件清單",
-                        lines=8,
-                        interactive=False,
-                        elem_classes=["file-info"]
-                    )
-    with gr.Tab("💬 智能問答", id="chat_tab"):
-        with gr.Row():
-            with gr.Column(scale=4):
-                # 聊天區域
-                chatbot = gr.Chatbot(
-                    label="💬 對話記錄",
-                    height=600,
-                    show_copy_button=True,
-                    type="messages",
-                    avatar_images=["👤", "🤖"],
-                    bubble_full_width=False
-                )
-            with gr.Column(scale=1):
-                # 側邊欄功能
-                with gr.Group():
-                    gr.Markdown("### ⚙️ 問答設定")
-                    # 模型參數調整
-                    temperature = gr.Slider(
-                        minimum=0.1,
-                        maximum=1.0,
-                        value=0.3,
-                        step=0.1,
-                        label="創意度 (Temperature)",
-                        info="數值越高回答越有創意"
-                    )
-                    max_tokens = gr.Slider(
-                        minimum=512,
-                        maximum=8192,
-                        value=4096,
-                        step=512,
-                        label="最大回答長度",
-                        info="控制回答的詳細程度"
-                    )
-                    search_k = gr.Slider(
-                        minimum=2,
-                        maximum=10,
-                        value=6,
-                        step=1,
-                        label="檢索文檔數量",
-                        info="搜索相關文檔的數量"
-                    )
-        # 輸入區域
-        with gr.Row():
-            question_input = gr.Textbox(
-                placeholder="請輸入您的問題... (支援中文、英文等多語言)",
-                label="💭 問題輸入",
-                lines=3,
-                scale=4,
-                max_lines=5
-            )
-            ask_btn = gr.Button(
-                "📤 發送問題",
-                variant="primary",
-                scale=1,
-                size="lg"
-            )
-        # 快捷操作
-        with gr.Row():
-            clear_chat_btn = gr.Button(
-                "🧹 清除對話",
-                variant="secondary",
-                scale=1
-            )
-            download_btn = gr.Button(
-                "📥 下載問答記錄",
-                variant="primary",
-                scale=1
-            )
-            export_btn = gr.Button(
-                "📄 匯出為Word",
-                variant="secondary",
-                scale=1
-            )
-        # 問題範例
-        with gr.Group():
-            gr.Markdown("### 💡 問題範例")
-            gr.Examples(
-                examples=[
-                    "這份文檔的主要內容是什麼？",
-                    "請總結文檔的重點和關鍵概念",
-                    "文檔中提到了哪些重要數據或統計？",
-                    "能否詳細解釋某個特定主題或概念？",
-                    "文檔的結論是什麼？",
-                    "有哪些重要的建議或建議？",
-                    "文檔中提到了哪些風險或挑戰？",
-                    "請比較文檔中提到的不同觀點"
-                ],
-                inputs=question_input,
-                label="點擊範例快速填入"
-            )
-    # 隱藏的文件下載組件
-    download_file = gr.File(visible=False)
-    # 下載功能處理函數
-    def handle_download():
-        file_path = download_chat_history()
-        if file_path:
-            return gr.update(value=file_path, visible=True)
-        else:
-            gr.Warning("沒有聊天記錄可以下載！")
-            return gr.update(visible=False)
-    # 事件處理
-    process_btn.click(
-        fn=upload_and_process,
-        inputs=[file_upload],
-        outputs=[status_text, file_list],
-        show_progress=True
-    )
-    load_btn.click(
-        fn=load_existing_data,
-        outputs=[status_text, file_list]
-    )
-    clear_btn.click(
-        fn=clear_all_data,
-        outputs=[status_text, file_list]
-    )
-    ask_btn.click(
-        fn=ask_question,
-        inputs=[question_input, chatbot, temperature, max_tokens, search_k],
-        outputs=[chatbot, question_input]
-    )
-    question_input.submit(
-        fn=ask_question,
-        inputs=[question_input, chatbot, temperature, max_tokens, search_k],
-        outputs=[chatbot, question_input]
-    )
-    clear_chat_btn.click(
-        fn=clear_chat,
-        outputs=[chatbot, question_input]
-    )
-    download_btn.click(
-        fn=handle_download,
-        outputs=download_file
-    )
-    export_btn.click(
-        fn=export_to_word,
-        outputs=download_file
-    )
-if __name__ == "__main__":
-    # 嘗試載入現有的向量存儲
-    bot.load_vector_store()
-    # 啟動應用
-    demo.launch(
-        share=False,  # 設為 True 可獲得公共連結
-        server_name="127.0.0.1",  # 本地訪問
-        server_port=None,  # 自動選擇可用端口
-        show_error=True,
-        inbrowser=True  # 自動打開瀏覽器
     )

+from dotenv import load_dotenv
+import os
+import gradio as gr
+from PyPDF2 import PdfReader
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
+from langchain_community.vectorstores import FAISS
+from langchain.chains.question_answering import load_qa_chain
+from langchain.prompts import PromptTemplate
+import shutil
+import tempfile
+from docx import Document
+from docx.shared import Inches
+from datetime import datetime
+# Load environment variables
+load_dotenv()
+# Read Gemini API key from environment (.env or platform secrets)
+gemini_api_key = os.getenv("GOOGLE_API_KEY", "").strip()
+if gemini_api_key:
+    os.environ["GOOGLE_API_KEY"] = gemini_api_key
+else:
+    print("警告：未找到 GOOGLE_API_KEY，請在環境變數或 .env 設定。")
+class PDFChatBot:
+    def __init__(self):
+        self.vector_store = None
+        self.embeddings = GoogleGenerativeAIEmbeddings(
+            model="models/text-embedding-004",
+            google_api_key=gemini_api_key
+        )
+        self.processed_files = []
+        self.chat_history = []  # 儲存聊天歷史
+    def get_pdf_text(self, pdf_files):
+        """從多個PDF文件中提取文字"""
+        raw_text = ""
+        processed_count = 0
+        if not pdf_files:
+            return raw_text, processed_count
+        # 處理單個文件和多個文件
+        if not isinstance(pdf_files, list):
+            pdf_files = [pdf_files]
+        for pdf_file in pdf_files:
+            try:
+                # 如果是上傳的文件對象，使用其name屬性
+                pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
+                pdf_reader = PdfReader(pdf_path)
+                file_text = ""
+                for page in pdf_reader.pages:
+                    text = page.extract_text()
+                    if text:
+                        file_text += text + "\n"
+                if file_text.strip():
+                    raw_text += file_text
+                    processed_count += 1
+                    self.processed_files.append(os.path.basename(pdf_path))
+            except Exception as e:
+                print(f"讀取PDF時發生錯誤：{str(e)}")
+                continue
+        return raw_text, processed_count
+    def get_text_chunks(self, text):
+        """將文字分割成區塊進行處理"""
+        text_splitter = CharacterTextSplitter(
+            separator="\n",
+            chunk_size=10000,
+            chunk_overlap=1000,
+            length_function=len
+        )
+        chunks = text_splitter.split_text(text)
+        return chunks
+    def create_vector_store(self, chunks):
+        """從文字區塊創建FAISS向量存儲"""
+        try:
+            self.vector_store = FAISS.from_texts(chunks, self.embeddings)
+            self.vector_store.save_local("faiss_index")
+            return True
+        except Exception as e:
+            print(f"創建向量存儲時發生錯誤：{str(e)}")
+            return False
+    def load_vector_store(self):
+        """載入已存在的向量存儲"""
+        try:
+            if os.path.exists("faiss_index"):
+                self.vector_store = FAISS.load_local(
+                    "faiss_index",
+                    embeddings=self.embeddings,
+                    allow_dangerous_deserialization=True
+                )
+                return True
+            else:
+                return False
+        except Exception as e:
+            print(f"載入向量存儲時發生錯誤：{str(e)}")
+            return False
+    def get_conversational_chain(self, temperature=0.3, max_tokens=4096):
+        """創建對話鏈"""
+        prompt_template = """
+        根據提供的內容盡可能詳細地回答問題。確保提供所有細節。
+        如果你需要更多細節來完美回答問題，那麼請詢問你認為需要了解的更多細節。
+        如果答案不在提供的內容中，只需說"在您提供的內容中找不到答案"。不要提供錯誤的答案。
+        內容:\n {context}\n
+        問題: \n{question}\n
+        回答:
+        """
+        # Using Flash 2.0 model
+        model = ChatGoogleGenerativeAI(
+            model="gemini-2.0-flash-exp",
+            google_api_key=gemini_api_key,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            top_p=0.8,
+            top_k=40
+        )
+        prompt = PromptTemplate(
+            template=prompt_template,
+            input_variables=['context', 'question']
+        )
+        chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
+        return chain
+    def answer_question(self, question, temperature=0.3, max_tokens=4096, search_k=6):
+        """回答用戶問題"""
+        if not self.vector_store:
+            return "請先上傳並處理PDF文件！"
+        if not question.strip():
+            return "請輸入您的問題。"
+        try:
+            # 搜索相關文檔
+            docs = self.vector_store.similarity_search(question, k=search_k)
+            if not docs:
+                return "在上傳的文檔中找不到相關信息。"
+            # 生成回答
+            chain = self.get_conversational_chain(temperature, max_tokens)
+            response = chain(
+                {
+                    "input_documents": docs,
+                    "question": question,
+                },
+                return_only_outputs=True
+            )
+            return response["output_text"]
+        except Exception as e:
+            return f"處理問題時發生錯誤：{str(e)}"
+    def process_pdfs(self, pdf_files, progress=gr.Progress()):
+        """處理PDF文件"""
+        if not pdf_files:
+            return "請上傳至少一個PDF文件。", ""
+        self.processed_files = []
+        progress(0, desc="開始處理PDF文件...")
+        # 提取文字
+        progress(0.2, desc="提取PDF文字內容...")
+        raw_text, processed_count = self.get_pdf_text(pdf_files)
+        if not raw_text.strip():
+            return "無法從PDF文件中提取到文字。", ""
+        progress(0.4, desc="分割文字內容...")
+        # 分割文字
+        text_chunks = self.get_text_chunks(raw_text)
+        progress(0.6, desc="創建向量存儲...")
+        # 創建向量存儲
+        success = self.create_vector_store(text_chunks)
+        progress(1.0, desc="處理完成!")
+        if success:
+            file_list = "已處理的文件:\n" + "\n".join([f"• {file}" for file in self.processed_files])
+            return f"✅ 成功處理 {processed_count} 個PDF文件！\n總共 {len(text_chunks)} 個文字區塊\n現在您可以開始提問。", file_list
+        else:
+            return "❌ PDF處理失敗，請重試。", ""
+    def clear_data(self):
+        """清除處理過的資料"""
+        try:
+            if os.path.exists("faiss_index"):
+                shutil.rmtree("faiss_index")
+            self.vector_store = None
+            self.processed_files = []
+            self.chat_history = []
+            return "✅ 已清除所有處理過的資料！", ""
+        except Exception as e:
+            return f"❌ 清除資料時發生錯誤：{str(e)}", ""
+    def create_docx_report(self, chat_history):
+        """創建包含聊天記錄的docx報告"""
+        try:
+            # 創建新的文檔
+            doc = Document()
+            # 添加標題
+            title = doc.add_heading('PDF聊天機器人 - 問答記錄', 0)
+            title.alignment = 1  # 置中對齊
+            # 添加生成時間
+            doc.add_paragraph(f'生成時間：{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}')
+            # 添加處理的文件列表
+            if self.processed_files:
+                doc.add_heading('已處理的PDF文件：', level=2)
+                for i, file in enumerate(self.processed_files, 1):
+                    doc.add_paragraph(f'{i}. {file}', style='List Number')
+            doc.add_paragraph('')  # 空行
+            # 添加問答記錄
+            doc.add_heading('問答記錄：', level=2)
+            if not chat_history:
+                doc.add_paragraph('目前沒有問答記錄。')
+            else:
+                for i in range(0, len(chat_history), 2):
+                    if i + 1 < len(chat_history):
+                        question = chat_history[i]['content']
+                        answer = chat_history[i + 1]['content']
+                        # 問題
+                        q_paragraph = doc.add_paragraph()
+                        q_run = q_paragraph.add_run(f'問題 {(i//2)+1}：')
+                        q_run.bold = True
+                        q_run.font.size = Inches(0.14)
+                        q_paragraph.add_run(question)
+                        # 回答
+                        a_paragraph = doc.add_paragraph()
+                        a_run = a_paragraph.add_run('回答：')
+                        a_run.bold = True
+                        a_run.font.size = Inches(0.14)
+                        a_paragraph.add_run(answer)
+                        # 分隔線
+                        doc.add_paragraph('─' * 50)
+            # 保存到臨時文件
+            temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx')
+            doc.save(temp_file.name)
+            temp_file.close()
+            return temp_file.name
+        except Exception as e:
+            print(f"創建docx文件時發生錯誤：{str(e)}")
+            return None
+# 初始化聊天機器人
+bot = PDFChatBot()
+# Gradio 接口函數
+def upload_and_process(files, progress=gr.Progress()):
+    return bot.process_pdfs(files, progress)
+def ask_question(question, history, temperature, max_tokens, search_k):
+    if not question.strip():
+        return history, ""
+    response = bot.answer_question(question, temperature, max_tokens, search_k)
+    # 使用新的消息格式
+    user_msg = {"role": "user", "content": question}
+    assistant_msg = {"role": "assistant", "content": response}
+    history.append(user_msg)
+    history.append(assistant_msg)
+    # 同步更新聊天歷史到bot實例
+    bot.chat_history = history.copy()
+    return history, ""
+def download_chat_history():
+    """下載聊天記錄為docx文件"""
+    if not bot.chat_history:
+        return None
+    docx_path = bot.create_docx_report(bot.chat_history)
+    return docx_path
+def export_to_word():
+    """匯出問答記錄為Word文件"""
+    if not bot.chat_history:
+        return None
+    docx_path = bot.create_docx_report(bot.chat_history)
+    return docx_path
+def clear_chat():
+    """清除聊天記錄"""
+    bot.chat_history = []
+    return [], ""
+def clear_all_data():
+    return bot.clear_data()
+def load_existing_data():
+    if bot.load_vector_store():
+        return "✅ 成功載入已處理的資料！", ""
+    else:
+        return "❌ 沒有找到已處理的資料。", ""
+# 創建自定義主題
+custom_theme = gr.themes.Soft(
+    primary_hue="blue",
+    secondary_hue="gray",
+    neutral_hue="slate",
+    font=gr.themes.GoogleFont("Noto Sans TC"),
+    font_mono=gr.themes.GoogleFont("JetBrains Mono")
+)
+# 創建 Gradio 介面
+with gr.Blocks(
+    title="PDF智能問答系統",
+    theme=custom_theme,
+    css="""
+    .gradio-container {
+        max-width: 1200px !important;
+        margin: auto !important;
+    }
+    .main-header {
+        text-align: center;
+        padding: 20px;
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        border-radius: 10px;
+        margin-bottom: 20px;
+    }
+    .status-box {
+        background-color: #f8f9fa;
+        border-left: 4px solid #007bff;
+        padding: 15px;
+        border-radius: 5px;
+    }
+    .file-info {
+        background-color: #e8f5e8;
+        border-left: 4px solid #28a745;
+        padding: 10px;
+        border-radius: 5px;
+    }
+    """
+) as demo:
+    # 主標題區域
+    with gr.Row():
+        gr.HTML("""
+        <div class="main-header">
+            <h1>🤖 PDF智能問答系統</h1>
+            <p>基於 Gemini 2.0 Flash 的 RAG 技術 | 支持多語言問答</p>
+        </div>
+        """)
+    # 主要功能區域
+    with gr.Tab("📁 文件管理", id="file_tab"):
+        with gr.Row():
+            with gr.Column(scale=3):
+                # 文件上傳區域
+                with gr.Group():
+                    gr.Markdown("### 📤 上傳PDF文件")
+                    file_upload = gr.File(
+                        file_count="multiple",
+                        file_types=[".pdf"],
+                        label="選擇PDF文件",
+                        height=150
+                    )
+                    # 處理選項
+                    with gr.Row():
+                        process_btn = gr.Button(
+                            "🚀 開始處理",
+                            variant="primary",
+                            size="lg",
+                            scale=2
+                        )
+                        load_btn = gr.Button(
+                            "📂 載入已處理資料",
+                            variant="secondary",
+                            scale=1
+                        )
+                        clear_btn = gr.Button(
+                            "🗑️ 清除所有資料",
+                            variant="stop",
+                            scale=1
+                        )
+            with gr.Column(scale=2):
+                # 狀態顯示區域
+                with gr.Group():
+                    gr.Markdown("### 📊 處理狀態")
+                    status_text = gr.Textbox(
+                        label="處理進度",
+                        lines=6,
+                        interactive=False,
+                        elem_classes=["status-box"]
+                    )
+                    # 文件列表
+                    gr.Markdown("### 📋 已處理文件")
+                    file_list = gr.Textbox(
+                        label="文件清單",
+                        lines=8,
+                        interactive=False,
+                        elem_classes=["file-info"]
+                    )
+    with gr.Tab("💬 智能問答", id="chat_tab"):
+        with gr.Row():
+            with gr.Column(scale=4):
+                # 聊天區域
+                chatbot = gr.Chatbot(
+                    label="💬 對話記錄",
+                    height=600,
+                    show_copy_button=True,
+                    type="messages",
+                    avatar_images=["👤", "🤖"]
+                )
+            with gr.Column(scale=1):
+                # 側邊欄功能
+                with gr.Group():
+                    gr.Markdown("### ⚙️ 問答設定")
+                    # 模型參數調整
+                    temperature = gr.Slider(
+                        minimum=0.1,
+                        maximum=1.0,
+                        value=0.3,
+                        step=0.1,
+                        label="創意度 (Temperature)",
+                        info="數值越高回答越有創意"
+                    )
+                    max_tokens = gr.Slider(
+                        minimum=512,
+                        maximum=8192,
+                        value=4096,
+                        step=512,
+                        label="最大回答長度",
+                        info="控制回答的詳細程度"
+                    )
+                    search_k = gr.Slider(
+                        minimum=2,
+                        maximum=10,
+                        value=6,
+                        step=1,
+                        label="檢索文檔數量",
+                        info="搜索相關文檔的數量"
+                    )
+        # 輸入區域
+        with gr.Row():
+            question_input = gr.Textbox(
+                placeholder="請輸入您的問題... (支援中文、英文等多語言)",
+                label="💭 問題輸入",
+                lines=3,
+                scale=4,
+                max_lines=5
+            )
+            ask_btn = gr.Button(
+                "📤 發送問題",
+                variant="primary",
+                scale=1,
+                size="lg"
+            )
+        # 快捷操作
+        with gr.Row():
+            clear_chat_btn = gr.Button(
+                "🧹 清除對話",
+                variant="secondary",
+                scale=1
+            )
+            download_btn = gr.Button(
+                "📥 下載問答記錄",
+                variant="primary",
+                scale=1
+            )
+            export_btn = gr.Button(
+                "📄 匯出為Word",
+                variant="secondary",
+                scale=1
+            )
+        # 問題範例
+        with gr.Group():
+            gr.Markdown("### 💡 問題範例")
+            gr.Examples(
+                examples=[
+                    "這份文檔的主要內容是什麼？",
+                    "請總結文檔的重點和關鍵概念",
+                    "文檔中提到了哪些重要數據或統計？",
+                    "能否詳細解釋某個特定主題或概念？",
+                    "文檔的結論是什麼？",
+                    "有哪些重要的建議或建議？",
+                    "文檔中提到了哪些風險或挑戰？",
+                    "請比較文檔中提到的不同觀點"
+                ],
+                inputs=question_input,
+                label="點擊範例快速填入"
+            )
+    # 隱藏的文件下載組件
+    download_file = gr.File(visible=False)
+    # 下載功能處理函數
+    def handle_download():
+        file_path = download_chat_history()
+        if file_path:
+            return gr.update(value=file_path, visible=True)
+        else:
+            gr.Warning("沒有聊天記錄可以下載！")
+            return gr.update(visible=False)
+    # 事件處理
+    process_btn.click(
+        fn=upload_and_process,
+        inputs=[file_upload],
+        outputs=[status_text, file_list],
+        show_progress=True
+    )
+    load_btn.click(
+        fn=load_existing_data,
+        outputs=[status_text, file_list]
+    )
+    clear_btn.click(
+        fn=clear_all_data,
+        outputs=[status_text, file_list]
+    )
+    ask_btn.click(
+        fn=ask_question,
+        inputs=[question_input, chatbot, temperature, max_tokens, search_k],
+        outputs=[chatbot, question_input]
+    )
+    question_input.submit(
+        fn=ask_question,
+        inputs=[question_input, chatbot, temperature, max_tokens, search_k],
+        outputs=[chatbot, question_input]
+    )
+    clear_chat_btn.click(
+        fn=clear_chat,
+        outputs=[chatbot, question_input]
+    )
+    download_btn.click(
+        fn=handle_download,
+        outputs=download_file
+    )
+    export_btn.click(
+        fn=export_to_word,
+        outputs=download_file
+    )
+if __name__ == "__main__":
+    # 嘗試載入現有的向量存儲
+    bot.load_vector_store()
+    # 讀取部署相關配置
+    server_name = os.getenv("HOST", os.getenv("SERVER_NAME", "0.0.0.0"))
+    # 常見平台會傳入 PORT；若無則使用 7860（Gradio 預設）
+    server_port_env = os.getenv("PORT", os.getenv("SERVER_PORT"))
+    server_port = int(server_port_env) if server_port_env and server_port_env.isdigit() else 7860
+    inbrowser = os.getenv("INBROWSER", "false").lower() == "true"
+    share = os.getenv("GRADIO_SHARE", "false").lower() == "true"
+    # 啟動應用（綁定 0.0.0.0 以支援容器/雲端）
+    demo.launch(
+        share=share,
+        server_name=server_name,
+        server_port=server_port,
+        show_error=True,
+        inbrowser=inbrowser
     )