Spaces:

cjian2025
/

ResendLangChain

Sleeping

App Files Files Community

ResendLangChain / app.py

cjian2025

Update app.py

b04747b verified 6 months ago

raw

history blame contribute delete

19 kB

	# 第二步：匯入必要的庫並初始化
	import os
	import gradio as gr
	from PyPDF2 import PdfReader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
	from langchain_community.vectorstores import FAISS
	from langchain.chains.question_answering import load_qa_chain
	from langchain.prompts import PromptTemplate
	import shutil
	import tempfile
	from docx import Document
	from docx.shared import Inches
	from datetime import datetime
	import resend

	print("📦 所有庫匯入成功！")

	# 第三步：設置API密鑰
	print("🔑 設置API密鑰...")

	# Gemini API key (請替換為您自己的API密鑰)
	gemini_api_key = "AIzaSyBbufVdrxdZkBxXLzXxfdtGArHUMfos5Z0"
	os.environ["GOOGLE_API_KEY"] = gemini_api_key

	# Resend API key (請替換為您自己的API密鑰)
	resend.api_key = "re_TPd7f23i_E3gvJYJF8xibuymWSPXxKPrY"

	print("✅ API密鑰設置完成！")

	# 第四步：定義PDF聊天機器人類
	class PDFChatBot:
	def __init__(self):
	self.vector_store = None
	self.embeddings = GoogleGenerativeAIEmbeddings(
	model="models/text-embedding-004",
	google_api_key=gemini_api_key
	)
	self.processed_files = []
	self.chat_history = []

	def get_pdf_text(self, pdf_files):
	"""從多個PDF檔案中提取文字"""
	raw_text = ""
	processed_count = 0

	if not pdf_files:
	return raw_text, processed_count

	if not isinstance(pdf_files, list):
	pdf_files = [pdf_files]

	for pdf_file in pdf_files:
	try:
	pdf_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
	pdf_reader = PdfReader(pdf_path)
	file_text = ""
	for page in pdf_reader.pages:
	text = page.extract_text()
	if text:
	file_text += text + "\n"

	if file_text.strip():
	raw_text += file_text
	processed_count += 1
	self.processed_files.append(os.path.basename(pdf_path))

	except Exception as e:
	print(f"讀取PDF時發生錯誤：{str(e)}")
	continue

	return raw_text, processed_count

	def get_text_chunks(self, text):
	"""將文字分割成區塊進行處理"""
	text_splitter = CharacterTextSplitter(
	separator="\n",
	chunk_size=10000,
	chunk_overlap=1000,
	length_function=len
	)
	chunks = text_splitter.split_text(text)
	return chunks

	def create_vector_store(self, chunks):
	"""從文字區塊建立FAISS向量儲存"""
	try:
	self.vector_store = FAISS.from_texts(chunks, self.embeddings)
	self.vector_store.save_local("faiss_index")
	return True
	except Exception as e:
	print(f"建立向量儲存時發生錯誤：{str(e)}")
	return False

	def load_vector_store(self):
	"""載入已存在的向量儲存"""
	try:
	if os.path.exists("faiss_index"):
	self.vector_store = FAISS.load_local(
	"faiss_index",
	embeddings=self.embeddings,
	allow_dangerous_deserialization=True
	)
	return True
	else:
	return False
	except Exception as e:
	print(f"載入向量儲存時發生錯誤：{str(e)}")
	return False

	def get_conversational_chain(self):
	"""建立對話鏈"""
	prompt_template = """
	根據提供的內容盡可能詳細地回答問題。確保提供所有細節。
	如果你需要更多細節來完美回答問題，那麼請詢問你認為需要了解的更多細節。
	如果答案不在提供的內容中，只需說"在您提供的內容中找不到答案"。不要提供錯誤的答案。

	內容:\n {context}\n
	問題: \n{question}\n

	回答:
	"""

	model = ChatGoogleGenerativeAI(
	model="gemini-2.0-flash-exp",
	google_api_key=gemini_api_key,
	temperature=0.3,
	max_tokens=8192,
	top_p=0.8,
	top_k=40
	)

	prompt = PromptTemplate(
	template=prompt_template,
	input_variables=['context', 'question']
	)

	chain = load_qa_chain(model, chain_type="stuff", prompt=prompt)
	return chain

	def answer_question(self, question):
	"""回答使用者問題"""
	if not self.vector_store:
	return "請先上傳並處理PDF檔案！"

	if not question.strip():
	return "請輸入您的問題。"

	try:
	docs = self.vector_store.similarity_search(question, k=6)
	if not docs:
	return "在上傳的文件中找不到相關資訊。"

	chain = self.get_conversational_chain()
	response = chain(
	{
	"input_documents": docs,
	"question": question,
	},
	return_only_outputs=True
	)

	return response["output_text"]

	except Exception as e:
	return f"處理問題時發生錯誤：{str(e)}"

	def process_pdfs(self, pdf_files, progress=gr.Progress()):
	"""處理PDF檔案"""
	if not pdf_files:
	return "請上傳至少一個PDF檔案。", ""

	self.processed_files = []
	progress(0, desc="開始處理PDF檔案...")

	progress(0.2, desc="提取PDF文字內容...")
	raw_text, processed_count = self.get_pdf_text(pdf_files)

	if not raw_text.strip():
	return "無法從PDF檔案中提取到文字。", ""

	progress(0.4, desc="分割文字內容...")
	text_chunks = self.get_text_chunks(raw_text)

	progress(0.6, desc="建立向量儲存...")
	success = self.create_vector_store(text_chunks)

	progress(1.0, desc="處理完成!")

	if success:
	file_list = "已處理的檔案:\n" + "\n".join([f"• {file}" for file in self.processed_files])
	return f"✅ 成功處理 {processed_count} 個PDF檔案！\n總共 {len(text_chunks)} 個文字區塊\n現在您可以開始提問。", file_list
	else:
	return "❌ PDF處理失敗，請重試。", ""

	def clear_data(self):
	"""清除處理過的資料"""
	try:
	if os.path.exists("faiss_index"):
	shutil.rmtree("faiss_index")
	self.vector_store = None
	self.processed_files = []
	self.chat_history = []
	return "✅ 已清除所有處理過的資料！", ""
	except Exception as e:
	return f"❌ 清除資料時發生錯誤：{str(e)}", ""

	def create_docx_report(self, chat_history):
	"""建立包含聊天記錄的docx報告"""
	try:
	doc = Document()
	title = doc.add_heading('PDF聊天機器人 - 問答記錄', 0)
	title.alignment = 1

	doc.add_paragraph(f'產生時間：{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}')

	if self.processed_files:
	doc.add_heading('已處理的PDF檔案：', level=2)
	for i, file in enumerate(self.processed_files, 1):
	doc.add_paragraph(f'{i}. {file}', style='List Number')

	doc.add_paragraph('')
	doc.add_heading('問答記錄：', level=2)

	if not chat_history:
	doc.add_paragraph('目前沒有問答記錄。')
	else:
	for i in range(0, len(chat_history), 2):
	if i + 1 < len(chat_history):
	question = chat_history[i]['content']
	answer = chat_history[i + 1]['content']

	q_paragraph = doc.add_paragraph()
	q_run = q_paragraph.add_run(f'問題 {(i//2)+1}：')
	q_run.bold = True
	q_run.font.size = Inches(0.14)
	q_paragraph.add_run(question)

	a_paragraph = doc.add_paragraph()
	a_run = a_paragraph.add_run('回答：')
	a_run.bold = True
	a_run.font.size = Inches(0.14)
	a_paragraph.add_run(answer)

	doc.add_paragraph('─' * 50)

	temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx')
	doc.save(temp_file.name)
	temp_file.close()

	return temp_file.name

	except Exception as e:
	print(f"建立docx檔案時發生錯誤：{str(e)}")
	return None

	def create_email_html_content(self, chat_history):
	"""建立郵件的HTML內容"""
	if not chat_history:
	return "<p>目前沒有問答記錄。</p>"

	html_content = f"""
	<html>
	<head>
	<style>
	body {{ font-family: Arial, sans-serif; line-height: 1.6; color: #333; }}
	.header {{ background-color: #f4f4f4; padding: 20px; text-align: center; }}
	.content {{ padding: 20px; }}
	.question {{ background-color: #e8f4f8; padding: 10px; margin: 10px 0; border-left: 4px solid #2196F3; }}
	.answer {{ background-color: #f0f8e8; padding: 10px; margin: 10px 0; border-left: 4px solid #4CAF50; }}
	.file-list {{ background-color: #fff3cd; padding: 10px; margin: 10px 0; border: 1px solid #ffeeba; }}
	hr {{ border: none; border-top: 1px solid #ddd; margin: 20px 0; }}
	</style>
	</head>
	<body>
	<div class="header">
	<h1>🤖 PDF聊天機器人 - 問答記錄</h1>
	<p>產生時間：{datetime.now().strftime("%Y年%m月%d日 %H:%M:%S")}</p>
	</div>
	<div class="content">
	"""

	if self.processed_files:
	html_content += """
	<div class="file-list">
	<h3>📁 已處理的PDF檔案：</h3>
	<ul>
	"""
	for file in self.processed_files:
	html_content += f"<li>{file}</li>"
	html_content += "</ul></div>"

	html_content += "<h3>💬 問答記錄：</h3>"

	for i in range(0, len(chat_history), 2):
	if i + 1 < len(chat_history):
	question = chat_history[i]['content']
	answer = chat_history[i + 1]['content']

	question_html = question.replace('\n', '<br>')
	answer_html = answer.replace('\n', '<br>')

	html_content += f"""
	<div class="question">
	<strong>問題 {(i//2)+1}：</strong><br>
	{question_html}
	</div>
	<div class="answer">
	<strong>回答：</strong><br>
	{answer_html}
	</div>
	<hr>
	"""

	html_content += """
	</div>
	</body>
	</html>
	"""

	return html_content

	def send_chat_history_email(self, recipient_email):
	"""發送聊天記錄到指定信箱"""
	if not self.chat_history:
	return "❌ 沒有聊天記錄可以發送！"

	if not recipient_email or "@" not in recipient_email:
	return "❌ 請輸入有效的信箱地址！"

	try:
	html_content = self.create_email_html_content(self.chat_history)

	r = resend.Emails.send({
	"from": "onboarding@resend.dev",
	"to": recipient_email,
	"subject": f"PDF聊天機器人問答記錄 - {datetime.now().strftime('%Y-%m-%d %H:%M')}",
	"html": html_content
	})

	return f"✅ 郵件已成功發送到 {recipient_email}！\n郵件ID: {r.get('id', 'Unknown')}"

	except Exception as e:
	return f"❌ 發送郵件時發生錯誤：{str(e)}"

	# 第五步：初始化聊天機器人
	print("🤖 初始化PDF聊天機器人...")
	bot = PDFChatBot()

	# 第六步：定義Gradio介面函數
	def upload_and_process(files, progress=gr.Progress()):
	return bot.process_pdfs(files, progress)

	def ask_question(question, history):
	if not question.strip():
	return history, ""

	response = bot.answer_question(question)
	user_msg = {"role": "user", "content": question}
	assistant_msg = {"role": "assistant", "content": response}

	history.append(user_msg)
	history.append(assistant_msg)
	bot.chat_history = history.copy()

	return history, ""

	def download_chat_history():
	if not bot.chat_history:
	return None
	docx_path = bot.create_docx_report(bot.chat_history)
	return docx_path

	def send_email(email_address):
	return bot.send_chat_history_email(email_address)

	def clear_chat():
	bot.chat_history = []
	return [], ""

	def clear_all_data():
	return bot.clear_data()

	def load_existing_data():
	if bot.load_vector_store():
	return "✅ 成功載入已處理的資料！", ""
	else:
	return "❌ 沒有找到已處理的資料。", ""

	# 第七步：建立Gradio介面
	print("🎨 建立使用者介面...")

	with gr.Blocks(title="PDF聊天機器人", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 🤖 PDF聊天機器人 (Flash 2.0 + 郵件發送)

	上傳您的PDF檔案，然後就可以向文件提問！支援多語言問答並可將記錄發送到信箱。

	🔥 在Hugging Face中執行
	"""
	)

	with gr.Tab("📁 檔案處理"):
	with gr.Row():
	with gr.Column(scale=2):
	file_upload = gr.File(
	file_count="multiple",
	file_types=[".pdf"],
	label="上傳PDF檔案",
	height=200
	)

	with gr.Row():
	process_btn = gr.Button("🚀 處理PDF檔案", variant="primary", size="lg")
	load_btn = gr.Button("📂 載入已處理資料", variant="secondary")
	clear_btn = gr.Button("🗑️ 清除資料", variant="stop")

	with gr.Column(scale=1):
	status_text = gr.Textbox(
	label="處理狀態",
	lines=8,
	interactive=False
	)

	file_list = gr.Textbox(
	label="已處理檔案",
	lines=6,
	interactive=False
	)

	with gr.Tab("💬 問答聊天"):
	chatbot = gr.Chatbot(
	label="聊天記錄",
	height=500,
	show_copy_button=True,
	type="messages"
	)

	with gr.Row():
	question_input = gr.Textbox(
	placeholder="請輸入您的問題...",
	label="問題",
	lines=2,
	scale=4
	)
	ask_btn = gr.Button("📤 提問", variant="primary", scale=1)

	with gr.Row():
	clear_chat_btn = gr.Button("🧹 清除聊天記錄", variant="secondary", scale=1)
	download_btn = gr.Button("📥 下載問答記錄", variant="primary", scale=1)

	download_file = gr.File(visible=False)

	gr.Examples(
	examples=[
	"這份文件的主要內容是什麼？",
	"請總結文件的重點。",
	"文件中提到了哪些重要概念？",
	"能否詳細解釋某個特定主題？"
	],
	inputs=question_input,
	label="問題範例"
	)

	with gr.Tab("📧 郵件發送"):
	gr.Markdown(
	"""
	### 📮 發送聊天記錄到信箱

	將您的問答記錄以精美的HTML格式發送到指定信箱，方便保存和分享。
	"""
	)

	with gr.Row():
	with gr.Column(scale=2):
	email_input = gr.Textbox(
	label="收件人信箱",
	placeholder="請輸入有效的信箱地址...",
	value="grace.chenyiwen@gmail.com"
	)

	send_email_btn = gr.Button("📧 發送聊天記錄", variant="primary", size="lg")

	with gr.Column(scale=1):
	email_status = gr.Textbox(
	label="發送狀態",
	lines=6,
	interactive=False
	)

	gr.Markdown(
	"""
	注意事項：
	- 請確保您已經有一些問答記錄
	- 郵件將包含所有處理過的PDF檔案清單和完整的問答記錄
	- 郵件格式為HTML，在大多數郵件用戶端中都能正常顯示
	"""
	)

	# 事件處理
	def handle_download():
	file_path = download_chat_history()
	if file_path:
	return gr.update(value=file_path, visible=True)
	else:
	gr.Warning("沒有聊天記錄可以下載！")
	return gr.update(visible=False)

	# 綁定事件
	process_btn.click(
	fn=upload_and_process,
	inputs=[file_upload],
	outputs=[status_text, file_list],
	show_progress=True
	)

	load_btn.click(
	fn=load_existing_data,
	outputs=[status_text, file_list]
	)

	clear_btn.click(
	fn=clear_all_data,
	outputs=[status_text, file_list]
	)

	ask_btn.click(
	fn=ask_question,
	inputs=[question_input, chatbot],
	outputs=[chatbot, question_input]
	)

	question_input.submit(
	fn=ask_question,
	inputs=[question_input, chatbot],
	outputs=[chatbot, question_input]
	)

	clear_chat_btn.click(
	fn=clear_chat,
	outputs=[chatbot, question_input]
	)

	download_btn.click(
	fn=handle_download,
	outputs=download_file
	)

	send_email_btn.click(
	fn=send_email,
	inputs=[email_input],
	outputs=[email_status]
	)

	# 第八步：啟動應用程式
	print("🚀 啟動應用程式中...")

	# 嘗試載入現有的向量儲存
	bot.load_vector_store()

	# 在Hugging Face中啟動應用程式
	demo.launch(
	share=True, # 在Hugging Face中設為True獲得公共連結
	server_name="0.0.0.0", # 允許外部存取
	server_port=None,
	show_error=True,
	debug=True
	)

	print("✅ PDF聊天機器人已成功啟動！")
	print("📍 請點選上方顯示的連結來存取應用程式")