# 第二步:匯入必要的庫並初始化 import os import gradio as gr from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI from langchain_community.vectorstores import FAISS from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate import shutil import tempfile from docx import Document from docx.shared import Inches from datetime import datetime import resend print("📦 所有庫匯入成功!") # 第三步:設置API密鑰 print("🔑 設置API密鑰...") # Gemini API key (請替換為您自己的API密鑰) gemini_api_key = "AIzaSyBbufVdrxdZkBxXLzXxfdtGArHUMfos5Z0" os.environ["GOOGLE_API_KEY"] = gemini_api_key # Resend API key (請替換為您自己的API密鑰) resend.api_key = "re_TPd7f23i_E3gvJYJF8xibuymWSPXxKPrY" print("✅ API密鑰設置完成!") # 第四步:定義PDF聊天機器人類 class PDFChatBot: def __init__(self): self.vector_store = None self.embeddings = GoogleGenerativeAIEmbeddings( model="models/text-embedding-004", google_api_key=gemini_api_key ) self.processed_files = [] self.chat_history = [] def get_pdf_text(self, pdf_files): """從多個PDF檔案中提取文字""" raw_text = "" processed_count = 0 if not pdf_files: return raw_text, processed_count if not isinstance(pdf_files, list): pdf_files = [pdf_files] for pdf_file in pdf_files: try: pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file pdf_reader = PdfReader(pdf_path) file_text = "" for page in pdf_reader.pages: text = page.extract_text() if text: file_text += text + "\n" if file_text.strip(): raw_text += file_text processed_count += 1 self.processed_files.append(os.path.basename(pdf_path)) except Exception as e: print(f"讀取PDF時發生錯誤:{str(e)}") continue return raw_text, processed_count def get_text_chunks(self, text): """將文字分割成區塊進行處理""" text_splitter = CharacterTextSplitter( separator="\n", chunk_size=10000, chunk_overlap=1000, length_function=len ) chunks = text_splitter.split_text(text) return chunks def create_vector_store(self, chunks): """從文字區塊建立FAISS向量儲存""" try: self.vector_store = FAISS.from_texts(chunks, self.embeddings) self.vector_store.save_local("faiss_index") return True except Exception as e: print(f"建立向量儲存時發生錯誤:{str(e)}") return False def load_vector_store(self): """載入已存在的向量儲存""" try: if os.path.exists("faiss_index"): self.vector_store = FAISS.load_local( "faiss_index", embeddings=self.embeddings, allow_dangerous_deserialization=True ) return True else: return False except Exception as e: print(f"載入向量儲存時發生錯誤:{str(e)}") return False def get_conversational_chain(self): """建立對話鏈""" prompt_template = """ 根據提供的內容盡可能詳細地回答問題。確保提供所有細節。 如果你需要更多細節來完美回答問題,那麼請詢問你認為需要了解的更多細節。 如果答案不在提供的內容中,只需說"在您提供的內容中找不到答案"。不要提供錯誤的答案。 內容:\n {context}\n 問題: \n{question}\n 回答: """ model = ChatGoogleGenerativeAI( model="gemini-2.0-flash-exp", google_api_key=gemini_api_key, temperature=0.3, max_tokens=8192, top_p=0.8, top_k=40 ) prompt = PromptTemplate( template=prompt_template, input_variables=['context', 'question'] ) chain = load_qa_chain(model, chain_type="stuff", prompt=prompt) return chain def answer_question(self, question): """回答使用者問題""" if not self.vector_store: return "請先上傳並處理PDF檔案!" if not question.strip(): return "請輸入您的問題。" try: docs = self.vector_store.similarity_search(question, k=6) if not docs: return "在上傳的文件中找不到相關資訊。" chain = self.get_conversational_chain() response = chain( { "input_documents": docs, "question": question, }, return_only_outputs=True ) return response["output_text"] except Exception as e: return f"處理問題時發生錯誤:{str(e)}" def process_pdfs(self, pdf_files, progress=gr.Progress()): """處理PDF檔案""" if not pdf_files: return "請上傳至少一個PDF檔案。", "" self.processed_files = [] progress(0, desc="開始處理PDF檔案...") progress(0.2, desc="提取PDF文字內容...") raw_text, processed_count = self.get_pdf_text(pdf_files) if not raw_text.strip(): return "無法從PDF檔案中提取到文字。", "" progress(0.4, desc="分割文字內容...") text_chunks = self.get_text_chunks(raw_text) progress(0.6, desc="建立向量儲存...") success = self.create_vector_store(text_chunks) progress(1.0, desc="處理完成!") if success: file_list = "已處理的檔案:\n" + "\n".join([f"• {file}" for file in self.processed_files]) return f"✅ 成功處理 {processed_count} 個PDF檔案!\n總共 {len(text_chunks)} 個文字區塊\n現在您可以開始提問。", file_list else: return "❌ PDF處理失敗,請重試。", "" def clear_data(self): """清除處理過的資料""" try: if os.path.exists("faiss_index"): shutil.rmtree("faiss_index") self.vector_store = None self.processed_files = [] self.chat_history = [] return "✅ 已清除所有處理過的資料!", "" except Exception as e: return f"❌ 清除資料時發生錯誤:{str(e)}", "" def create_docx_report(self, chat_history): """建立包含聊天記錄的docx報告""" try: doc = Document() title = doc.add_heading('PDF聊天機器人 - 問答記錄', 0) title.alignment = 1 doc.add_paragraph(f'產生時間:{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}') if self.processed_files: doc.add_heading('已處理的PDF檔案:', level=2) for i, file in enumerate(self.processed_files, 1): doc.add_paragraph(f'{i}. {file}', style='List Number') doc.add_paragraph('') doc.add_heading('問答記錄:', level=2) if not chat_history: doc.add_paragraph('目前沒有問答記錄。') else: for i in range(0, len(chat_history), 2): if i + 1 < len(chat_history): question = chat_history[i]['content'] answer = chat_history[i + 1]['content'] q_paragraph = doc.add_paragraph() q_run = q_paragraph.add_run(f'問題 {(i//2)+1}:') q_run.bold = True q_run.font.size = Inches(0.14) q_paragraph.add_run(question) a_paragraph = doc.add_paragraph() a_run = a_paragraph.add_run('回答:') a_run.bold = True a_run.font.size = Inches(0.14) a_paragraph.add_run(answer) doc.add_paragraph('─' * 50) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx') doc.save(temp_file.name) temp_file.close() return temp_file.name except Exception as e: print(f"建立docx檔案時發生錯誤:{str(e)}") return None def create_email_html_content(self, chat_history): """建立郵件的HTML內容""" if not chat_history: return "

目前沒有問答記錄。

" html_content = f"""

🤖 PDF聊天機器人 - 問答記錄

產生時間:{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}

""" if self.processed_files: html_content += """

📁 已處理的PDF檔案:

" html_content += "

💬 問答記錄:

" for i in range(0, len(chat_history), 2): if i + 1 < len(chat_history): question = chat_history[i]['content'] answer = chat_history[i + 1]['content'] question_html = question.replace('\n', '
') answer_html = answer.replace('\n', '
') html_content += f"""
問題 {(i//2)+1}:
{question_html}
回答:
{answer_html}

""" html_content += """
""" return html_content def send_chat_history_email(self, recipient_email): """發送聊天記錄到指定信箱""" if not self.chat_history: return "❌ 沒有聊天記錄可以發送!" if not recipient_email or "@" not in recipient_email: return "❌ 請輸入有效的信箱地址!" try: html_content = self.create_email_html_content(self.chat_history) r = resend.Emails.send({ "from": "onboarding@resend.dev", "to": recipient_email, "subject": f"PDF聊天機器人問答記錄 - {datetime.now().strftime('%Y-%m-%d %H:%M')}", "html": html_content }) return f"✅ 郵件已成功發送到 {recipient_email}!\n郵件ID: {r.get('id', 'Unknown')}" except Exception as e: return f"❌ 發送郵件時發生錯誤:{str(e)}" # 第五步:初始化聊天機器人 print("🤖 初始化PDF聊天機器人...") bot = PDFChatBot() # 第六步:定義Gradio介面函數 def upload_and_process(files, progress=gr.Progress()): return bot.process_pdfs(files, progress) def ask_question(question, history): if not question.strip(): return history, "" response = bot.answer_question(question) user_msg = {"role": "user", "content": question} assistant_msg = {"role": "assistant", "content": response} history.append(user_msg) history.append(assistant_msg) bot.chat_history = history.copy() return history, "" def download_chat_history(): if not bot.chat_history: return None docx_path = bot.create_docx_report(bot.chat_history) return docx_path def send_email(email_address): return bot.send_chat_history_email(email_address) def clear_chat(): bot.chat_history = [] return [], "" def clear_all_data(): return bot.clear_data() def load_existing_data(): if bot.load_vector_store(): return "✅ 成功載入已處理的資料!", "" else: return "❌ 沒有找到已處理的資料。", "" # 第七步:建立Gradio介面 print("🎨 建立使用者介面...") with gr.Blocks(title="PDF聊天機器人", theme=gr.themes.Soft()) as demo: gr.Markdown( """ # 🤖 PDF聊天機器人 (Flash 2.0 + 郵件發送) 上傳您的PDF檔案,然後就可以向文件提問!支援多語言問答並可將記錄發送到信箱。 **🔥 在Hugging Face中執行** """ ) with gr.Tab("📁 檔案處理"): with gr.Row(): with gr.Column(scale=2): file_upload = gr.File( file_count="multiple", file_types=[".pdf"], label="上傳PDF檔案", height=200 ) with gr.Row(): process_btn = gr.Button("🚀 處理PDF檔案", variant="primary", size="lg") load_btn = gr.Button("📂 載入已處理資料", variant="secondary") clear_btn = gr.Button("🗑️ 清除資料", variant="stop") with gr.Column(scale=1): status_text = gr.Textbox( label="處理狀態", lines=8, interactive=False ) file_list = gr.Textbox( label="已處理檔案", lines=6, interactive=False ) with gr.Tab("💬 問答聊天"): chatbot = gr.Chatbot( label="聊天記錄", height=500, show_copy_button=True, type="messages" ) with gr.Row(): question_input = gr.Textbox( placeholder="請輸入您的問題...", label="問題", lines=2, scale=4 ) ask_btn = gr.Button("📤 提問", variant="primary", scale=1) with gr.Row(): clear_chat_btn = gr.Button("🧹 清除聊天記錄", variant="secondary", scale=1) download_btn = gr.Button("📥 下載問答記錄", variant="primary", scale=1) download_file = gr.File(visible=False) gr.Examples( examples=[ "這份文件的主要內容是什麼?", "請總結文件的重點。", "文件中提到了哪些重要概念?", "能否詳細解釋某個特定主題?" ], inputs=question_input, label="問題範例" ) with gr.Tab("📧 郵件發送"): gr.Markdown( """ ### 📮 發送聊天記錄到信箱 將您的問答記錄以精美的HTML格式發送到指定信箱,方便保存和分享。 """ ) with gr.Row(): with gr.Column(scale=2): email_input = gr.Textbox( label="收件人信箱", placeholder="請輸入有效的信箱地址...", value="grace.chenyiwen@gmail.com" ) send_email_btn = gr.Button("📧 發送聊天記錄", variant="primary", size="lg") with gr.Column(scale=1): email_status = gr.Textbox( label="發送狀態", lines=6, interactive=False ) gr.Markdown( """ **注意事項:** - 請確保您已經有一些問答記錄 - 郵件將包含所有處理過的PDF檔案清單和完整的問答記錄 - 郵件格式為HTML,在大多數郵件用戶端中都能正常顯示 """ ) # 事件處理 def handle_download(): file_path = download_chat_history() if file_path: return gr.update(value=file_path, visible=True) else: gr.Warning("沒有聊天記錄可以下載!") return gr.update(visible=False) # 綁定事件 process_btn.click( fn=upload_and_process, inputs=[file_upload], outputs=[status_text, file_list], show_progress=True ) load_btn.click( fn=load_existing_data, outputs=[status_text, file_list] ) clear_btn.click( fn=clear_all_data, outputs=[status_text, file_list] ) ask_btn.click( fn=ask_question, inputs=[question_input, chatbot], outputs=[chatbot, question_input] ) question_input.submit( fn=ask_question, inputs=[question_input, chatbot], outputs=[chatbot, question_input] ) clear_chat_btn.click( fn=clear_chat, outputs=[chatbot, question_input] ) download_btn.click( fn=handle_download, outputs=download_file ) send_email_btn.click( fn=send_email, inputs=[email_input], outputs=[email_status] ) # 第八步:啟動應用程式 print("🚀 啟動應用程式中...") # 嘗試載入現有的向量儲存 bot.load_vector_store() # 在Hugging Face中啟動應用程式 demo.launch( share=True, # 在Hugging Face中設為True獲得公共連結 server_name="0.0.0.0", # 允許外部存取 server_port=None, show_error=True, debug=True ) print("✅ PDF聊天機器人已成功啟動!") print("📍 請點選上方顯示的連結來存取應用程式")