ResendLangChain / app.py
cjian2025's picture
Update app.py
b04747b verified
# 第二步:匯入必要的庫並初始化
import os
import gradio as gr
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_community.vectorstores import FAISS
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
import shutil
import tempfile
from docx import Document
from docx.shared import Inches
from datetime import datetime
import resend
print("📦 所有庫匯入成功!")
# 第三步:設置API密鑰
print("🔑 設置API密鑰...")
# Gemini API key (請替換為您自己的API密鑰)
gemini_api_key = "AIzaSyBbufVdrxdZkBxXLzXxfdtGArHUMfos5Z0"
os.environ["GOOGLE_API_KEY"] = gemini_api_key
# Resend API key (請替換為您自己的API密鑰)
resend.api_key = "re_TPd7f23i_E3gvJYJF8xibuymWSPXxKPrY"
print("✅ API密鑰設置完成!")
# 第四步:定義PDF聊天機器人類
class PDFChatBot:
def __init__(self):
self.vector_store = None
self.embeddings = GoogleGenerativeAIEmbeddings(
model="models/text-embedding-004",
google_api_key=gemini_api_key
)
self.processed_files = []
self.chat_history = []
def get_pdf_text(self, pdf_files):
"""從多個PDF檔案中提取文字"""
raw_text = ""
processed_count = 0
if not pdf_files:
return raw_text, processed_count
if not isinstance(pdf_files, list):
pdf_files = [pdf_files]
for pdf_file in pdf_files:
try:
pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
pdf_reader = PdfReader(pdf_path)
file_text = ""
for page in pdf_reader.pages:
text = page.extract_text()
if text:
file_text += text + "\n"
if file_text.strip():
raw_text += file_text
processed_count += 1
self.processed_files.append(os.path.basename(pdf_path))
except Exception as e:
print(f"讀取PDF時發生錯誤:{str(e)}")
continue
return raw_text, processed_count
def get_text_chunks(self, text):
"""將文字分割成區塊進行處理"""
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=10000,
chunk_overlap=1000,
length_function=len
)
chunks = text_splitter.split_text(text)
return chunks
def create_vector_store(self, chunks):
"""從文字區塊建立FAISS向量儲存"""
try:
self.vector_store = FAISS.from_texts(chunks, self.embeddings)
self.vector_store.save_local("faiss_index")
return True
except Exception as e:
print(f"建立向量儲存時發生錯誤:{str(e)}")
return False
def load_vector_store(self):
"""載入已存在的向量儲存"""
try:
if os.path.exists("faiss_index"):
self.vector_store = FAISS.load_local(
"faiss_index",
embeddings=self.embeddings,
allow_dangerous_deserialization=True
)
return True
else:
return False
except Exception as e:
print(f"載入向量儲存時發生錯誤:{str(e)}")
return False
def get_conversational_chain(self):
"""建立對話鏈"""
prompt_template = """
根據提供的內容盡可能詳細地回答問題。確保提供所有細節。
如果你需要更多細節來完美回答問題,那麼請詢問你認為需要了解的更多細節。
如果答案不在提供的內容中,只需說"在您提供的內容中找不到答案"。不要提供錯誤的答案。
內容:\n {context}\n
問題: \n{question}\n
回答:
"""
model = ChatGoogleGenerativeAI(
model="gemini-2.0-flash-exp",
google_api_key=gemini_api_key,
temperature=0.3,
max_tokens=8192,
top_p=0.8,
top_k=40
)
prompt = PromptTemplate(
template=prompt_template,
input_variables=['context', 'question']
)
chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
return chain
def answer_question(self, question):
"""回答使用者問題"""
if not self.vector_store:
return "請先上傳並處理PDF檔案!"
if not question.strip():
return "請輸入您的問題。"
try:
docs = self.vector_store.similarity_search(question, k=6)
if not docs:
return "在上傳的文件中找不到相關資訊。"
chain = self.get_conversational_chain()
response = chain(
{
"input_documents": docs,
"question": question,
},
return_only_outputs=True
)
return response["output_text"]
except Exception as e:
return f"處理問題時發生錯誤:{str(e)}"
def process_pdfs(self, pdf_files, progress=gr.Progress()):
"""處理PDF檔案"""
if not pdf_files:
return "請上傳至少一個PDF檔案。", ""
self.processed_files = []
progress(0, desc="開始處理PDF檔案...")
progress(0.2, desc="提取PDF文字內容...")
raw_text, processed_count = self.get_pdf_text(pdf_files)
if not raw_text.strip():
return "無法從PDF檔案中提取到文字。", ""
progress(0.4, desc="分割文字內容...")
text_chunks = self.get_text_chunks(raw_text)
progress(0.6, desc="建立向量儲存...")
success = self.create_vector_store(text_chunks)
progress(1.0, desc="處理完成!")
if success:
file_list = "已處理的檔案:\n" + "\n".join([f"• {file}" for file in self.processed_files])
return f"✅ 成功處理 {processed_count} 個PDF檔案!\n總共 {len(text_chunks)} 個文字區塊\n現在您可以開始提問。", file_list
else:
return "❌ PDF處理失敗,請重試。", ""
def clear_data(self):
"""清除處理過的資料"""
try:
if os.path.exists("faiss_index"):
shutil.rmtree("faiss_index")
self.vector_store = None
self.processed_files = []
self.chat_history = []
return "✅ 已清除所有處理過的資料!", ""
except Exception as e:
return f"❌ 清除資料時發生錯誤:{str(e)}", ""
def create_docx_report(self, chat_history):
"""建立包含聊天記錄的docx報告"""
try:
doc = Document()
title = doc.add_heading('PDF聊天機器人 - 問答記錄', 0)
title.alignment = 1
doc.add_paragraph(f'產生時間:{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}')
if self.processed_files:
doc.add_heading('已處理的PDF檔案:', level=2)
for i, file in enumerate(self.processed_files, 1):
doc.add_paragraph(f'{i}. {file}', style='List Number')
doc.add_paragraph('')
doc.add_heading('問答記錄:', level=2)
if not chat_history:
doc.add_paragraph('目前沒有問答記錄。')
else:
for i in range(0, len(chat_history), 2):
if i + 1 < len(chat_history):
question = chat_history[i]['content']
answer = chat_history[i + 1]['content']
q_paragraph = doc.add_paragraph()
q_run = q_paragraph.add_run(f'問題 {(i//2)+1}:')
q_run.bold = True
q_run.font.size = Inches(0.14)
q_paragraph.add_run(question)
a_paragraph = doc.add_paragraph()
a_run = a_paragraph.add_run('回答:')
a_run.bold = True
a_run.font.size = Inches(0.14)
a_paragraph.add_run(answer)
doc.add_paragraph('─' * 50)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx')
doc.save(temp_file.name)
temp_file.close()
return temp_file.name
except Exception as e:
print(f"建立docx檔案時發生錯誤:{str(e)}")
return None
def create_email_html_content(self, chat_history):
"""建立郵件的HTML內容"""
if not chat_history:
return "<p>目前沒有問答記錄。</p>"
html_content = f"""
<html>
<head>
<style>
body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
.header {{ background-color: #f4f4f4; padding: 20px; text-align: center; }}
.content {{ padding: 20px; }}
.question {{ background-color: #e8f4f8; padding: 10px; margin: 10px 0; border-left: 4px solid #2196F3; }}
.answer {{ background-color: #f0f8e8; padding: 10px; margin: 10px 0; border-left: 4px solid #4CAF50; }}
.file-list {{ background-color: #fff3cd; padding: 10px; margin: 10px 0; border: 1px solid #ffeeba; }}
hr {{ border: none; border-top: 1px solid #ddd; margin: 20px 0; }}
</style>
</head>
<body>
<div class="header">
<h1>🤖 PDF聊天機器人 - 問答記錄</h1>
<p>產生時間:{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}</p>
</div>
<div class="content">
"""
if self.processed_files:
html_content += """
<div class="file-list">
<h3>📁 已處理的PDF檔案:</h3>
<ul>
"""
for file in self.processed_files:
html_content += f"<li>{file}</li>"
html_content += "</ul></div>"
html_content += "<h3>💬 問答記錄:</h3>"
for i in range(0, len(chat_history), 2):
if i + 1 < len(chat_history):
question = chat_history[i]['content']
answer = chat_history[i + 1]['content']
question_html = question.replace('\n', '<br>')
answer_html = answer.replace('\n', '<br>')
html_content += f"""
<div class="question">
<strong>問題 {(i//2)+1}:</strong><br>
{question_html}
</div>
<div class="answer">
<strong>回答:</strong><br>
{answer_html}
</div>
<hr>
"""
html_content += """
</div>
</body>
</html>
"""
return html_content
def send_chat_history_email(self, recipient_email):
"""發送聊天記錄到指定信箱"""
if not self.chat_history:
return "❌ 沒有聊天記錄可以發送!"
if not recipient_email or "@" not in recipient_email:
return "❌ 請輸入有效的信箱地址!"
try:
html_content = self.create_email_html_content(self.chat_history)
r = resend.Emails.send({
"from": "onboarding@resend.dev",
"to": recipient_email,
"subject": f"PDF聊天機器人問答記錄 - {datetime.now().strftime('%Y-%m-%d %H:%M')}",
"html": html_content
})
return f"✅ 郵件已成功發送到 {recipient_email}!\n郵件ID: {r.get('id', 'Unknown')}"
except Exception as e:
return f"❌ 發送郵件時發生錯誤:{str(e)}"
# 第五步:初始化聊天機器人
print("🤖 初始化PDF聊天機器人...")
bot = PDFChatBot()
# 第六步:定義Gradio介面函數
def upload_and_process(files, progress=gr.Progress()):
return bot.process_pdfs(files, progress)
def ask_question(question, history):
if not question.strip():
return history, ""
response = bot.answer_question(question)
user_msg = {"role": "user", "content": question}
assistant_msg = {"role": "assistant", "content": response}
history.append(user_msg)
history.append(assistant_msg)
bot.chat_history = history.copy()
return history, ""
def download_chat_history():
if not bot.chat_history:
return None
docx_path = bot.create_docx_report(bot.chat_history)
return docx_path
def send_email(email_address):
return bot.send_chat_history_email(email_address)
def clear_chat():
bot.chat_history = []
return [], ""
def clear_all_data():
return bot.clear_data()
def load_existing_data():
if bot.load_vector_store():
return "✅ 成功載入已處理的資料!", ""
else:
return "❌ 沒有找到已處理的資料。", ""
# 第七步:建立Gradio介面
print("🎨 建立使用者介面...")
with gr.Blocks(title="PDF聊天機器人", theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
# 🤖 PDF聊天機器人 (Flash 2.0 + 郵件發送)
上傳您的PDF檔案,然後就可以向文件提問!支援多語言問答並可將記錄發送到信箱。
**🔥 在Hugging Face中執行**
"""
)
with gr.Tab("📁 檔案處理"):
with gr.Row():
with gr.Column(scale=2):
file_upload = gr.File(
file_count="multiple",
file_types=[".pdf"],
label="上傳PDF檔案",
height=200
)
with gr.Row():
process_btn = gr.Button("🚀 處理PDF檔案", variant="primary", size="lg")
load_btn = gr.Button("📂 載入已處理資料", variant="secondary")
clear_btn = gr.Button("🗑️ 清除資料", variant="stop")
with gr.Column(scale=1):
status_text = gr.Textbox(
label="處理狀態",
lines=8,
interactive=False
)
file_list = gr.Textbox(
label="已處理檔案",
lines=6,
interactive=False
)
with gr.Tab("💬 問答聊天"):
chatbot = gr.Chatbot(
label="聊天記錄",
height=500,
show_copy_button=True,
type="messages"
)
with gr.Row():
question_input = gr.Textbox(
placeholder="請輸入您的問題...",
label="問題",
lines=2,
scale=4
)
ask_btn = gr.Button("📤 提問", variant="primary", scale=1)
with gr.Row():
clear_chat_btn = gr.Button("🧹 清除聊天記錄", variant="secondary", scale=1)
download_btn = gr.Button("📥 下載問答記錄", variant="primary", scale=1)
download_file = gr.File(visible=False)
gr.Examples(
examples=[
"這份文件的主要內容是什麼?",
"請總結文件的重點。",
"文件中提到了哪些重要概念?",
"能否詳細解釋某個特定主題?"
],
inputs=question_input,
label="問題範例"
)
with gr.Tab("📧 郵件發送"):
gr.Markdown(
"""
### 📮 發送聊天記錄到信箱
將您的問答記錄以精美的HTML格式發送到指定信箱,方便保存和分享。
"""
)
with gr.Row():
with gr.Column(scale=2):
email_input = gr.Textbox(
label="收件人信箱",
placeholder="請輸入有效的信箱地址...",
value="grace.chenyiwen@gmail.com"
)
send_email_btn = gr.Button("📧 發送聊天記錄", variant="primary", size="lg")
with gr.Column(scale=1):
email_status = gr.Textbox(
label="發送狀態",
lines=6,
interactive=False
)
gr.Markdown(
"""
**注意事項:**
- 請確保您已經有一些問答記錄
- 郵件將包含所有處理過的PDF檔案清單和完整的問答記錄
- 郵件格式為HTML,在大多數郵件用戶端中都能正常顯示
"""
)
# 事件處理
def handle_download():
file_path = download_chat_history()
if file_path:
return gr.update(value=file_path, visible=True)
else:
gr.Warning("沒有聊天記錄可以下載!")
return gr.update(visible=False)
# 綁定事件
process_btn.click(
fn=upload_and_process,
inputs=[file_upload],
outputs=[status_text, file_list],
show_progress=True
)
load_btn.click(
fn=load_existing_data,
outputs=[status_text, file_list]
)
clear_btn.click(
fn=clear_all_data,
outputs=[status_text, file_list]
)
ask_btn.click(
fn=ask_question,
inputs=[question_input, chatbot],
outputs=[chatbot, question_input]
)
question_input.submit(
fn=ask_question,
inputs=[question_input, chatbot],
outputs=[chatbot, question_input]
)
clear_chat_btn.click(
fn=clear_chat,
outputs=[chatbot, question_input]
)
download_btn.click(
fn=handle_download,
outputs=download_file
)
send_email_btn.click(
fn=send_email,
inputs=[email_input],
outputs=[email_status]
)
# 第八步:啟動應用程式
print("🚀 啟動應用程式中...")
# 嘗試載入現有的向量儲存
bot.load_vector_store()
# 在Hugging Face中啟動應用程式
demo.launch(
share=True, # 在Hugging Face中設為True獲得公共連結
server_name="0.0.0.0", # 允許外部存取
server_port=None,
show_error=True,
debug=True
)
print("✅ PDF聊天機器人已成功啟動!")
print("📍 請點選上方顯示的連結來存取應用程式")