zsk / app.py
ikun520's picture
Update app.py
8b1e918 verified
import gradio as gr
import json
import time
from model_utils import matter, embeddings_model
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
# def create_knowledge_base(file_name, file_path):
# """
# 创建知识库
# :param file_name: 知识库的名称
# :param file_path: 上传的PDF文件路径
# :return: None
# """
# try:
# if file_name is None or file_path is None:
# raise gr.Error("创建知识库失败,文件名或文件路径为空!")
# # 加载 PDF 文件并分割成文档页
# loader = PyPDFLoader(file_path)
# pages = loader.load_and_split()
# # 使用 FAISS 创建向量数据库,传入文档和 HuggingFaceEmbeddings 模型
# vector_db = FAISS.from_documents(pages, embeddings_model)
# vector_db.save_local(file_name)
# gr.Info("知识库创建成功!")
# except Exception as e:
# raise gr.Error(f"创建知识库失败,错误信息:{e}")
import fitz # PyMuPDF
def create_knowledge_base(file_name, file_path):
"""
创建知识库
:param file_name: 知识库的名称
:param file_path: 上传的PDF文件路径
:return: None
"""
try:
if file_name is None or file_path is None:
raise gr.Error("创建知识库失败,文件名或文件路径为空!")
# 使用 PyMuPDF 读取 PDF 文件
doc = fitz.open(file_path)
pages = [page.get_text("text") for page in doc] # 获取所有页面的文本内容
# 使用 FAISS 创建向量数据库
vector_db = FAISS.from_documents(pages, embeddings_model)
vector_db.save_local(file_name)
gr.Info("知识库创建成功!")
except Exception as e:
raise gr.Error(f"创建知识库失败,错误信息:{e}")
def call_model(file_name, temperature, model, prompt, chatbot):
"""
调用模型生成回答
"""
if not chatbot:
return chatbot
query = chatbot[-1][0]
response = matter(query)
chatbot[-1][1] = response
return chatbot
def user_input(user_message, chat_history):
"""
处理用户输入
"""
if not user_message:
return "", chat_history
if chat_history is None:
chat_history = []
return "", chat_history + [[user_message, None]]
def respond(chat_history):
"""
逐步更新对话框中的响应
"""
content = chat_history[-1][1]
chat_history[-1][1] = ""
for chat in content:
chat_history[-1][1] += chat
time.sleep(0.05)
yield chat_history
# 构建 Gradio 应用界面
with gr.Blocks(title="PdfReader") as demo:
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 编排")
prompt = gr.Textbox(label="提示词", lines=5, value="我希望您能够充当文档评审专家,用下面的内容作为你的知识库,回答用户提出的问题。")
temperature = gr.Slider(label="temperature", minimum=0.0, maximum=1.0, step=0.1, value=0.2)
pdf = gr.File(label="Upload a PDF")
file_name = gr.Textbox(label="知识库名称")
create_btn = gr.Button("创建本地知识库")
with gr.Column(scale=2):
gr.Markdown("### 调试与预览")
model = gr.Dropdown(choices=["spark-gpt", "spark-gpt-pro"], label="模型", value="spark-gpt")
chatbot = gr.Chatbot()
query = gr.Textbox()
with gr.Row():
submit = gr.Button("发送")
clear_btn = gr.Button("清空")
# 绑定按钮事件
create_btn.click(create_knowledge_base, inputs=[file_name, pdf], outputs=None)
submit.click(user_input, inputs=[query, chatbot], outputs=[query, chatbot]).then(
call_model, [file_name, temperature, model, prompt, chatbot], [chatbot]
).then(
respond, [chatbot], [chatbot]
)
clear_btn.click(fn=lambda: ('', None), outputs=[query, chatbot])
# 启动 Gradio 应用
demo.launch()