|
|
import os |
|
|
from pathlib import Path |
|
|
from typing import List, Dict, Any |
|
|
|
|
|
import gradio as gr |
|
|
from langchain_community.document_loaders import TextLoader, PyPDFLoader, Docx2txtLoader |
|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_core.prompts import PromptTemplate |
|
|
from langchain_core.output_parsers import StrOutputParser |
|
|
from langchain_core.runnables import RunnablePassthrough |
|
|
from langchain_openai import ChatOpenAI |
|
|
|
|
|
|
|
|
|
|
|
llm = ChatOpenAI( |
|
|
base_url=os.getenv("LITELLM_BASE_URL"), |
|
|
api_key=os.getenv("OPENAI_API_KEY"), |
|
|
model=os.getenv("LLM_MODEL", "azure-gpt-4.1"), |
|
|
temperature=0.3, |
|
|
) |
|
|
|
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5") |
|
|
|
|
|
|
|
|
def format_docs(docs): |
|
|
return "\n\n".join(doc.page_content for doc in docs) |
|
|
|
|
|
|
|
|
prompt = PromptTemplate.from_template( |
|
|
"""你是一個有幫助且誠實的助手,請根據以下提供的上下文來回答問題。 |
|
|
如果上下文不足以回答,請直接說「根據提供的文件,我無法回答這個問題。」,不要編造答案。 |
|
|
|
|
|
上下文: |
|
|
{context} |
|
|
|
|
|
問題:{question} |
|
|
回答:""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
def upload_and_build_db(files: List[Any], vectorstore_state: FAISS | None): |
|
|
if not files: |
|
|
return "請上傳至少一個文件。", None, vectorstore_state |
|
|
|
|
|
docs = [] |
|
|
for file in files: |
|
|
|
|
|
file_path = file.name |
|
|
|
|
|
try: |
|
|
if file_path.lower().endswith(".pdf"): |
|
|
loader = PyPDFLoader(file_path) |
|
|
elif file_path.lower().endswith(".docx"): |
|
|
loader = Docx2txtLoader(file_path) |
|
|
else: |
|
|
loader = TextLoader(file_path, encoding="utf-8") |
|
|
|
|
|
loaded_docs = loader.load() |
|
|
docs.extend(loaded_docs) |
|
|
except Exception as e: |
|
|
return f"載入檔案失敗:{os.path.basename(file_path)},錯誤:{str(e)}", None, vectorstore_state |
|
|
|
|
|
if not docs: |
|
|
return "沒有成功載入任何文件內容。", None, vectorstore_state |
|
|
|
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
|
splits = text_splitter.split_documents(docs) |
|
|
|
|
|
|
|
|
new_vectorstore = FAISS.from_documents(splits, embeddings) |
|
|
|
|
|
success_msg = f"成功載入 {len(docs)} 個文件,共 {len(splits)} 個區塊,已建立專屬知識庫!現在可以開始提問。" |
|
|
return success_msg, None, new_vectorstore |
|
|
|
|
|
|
|
|
|
|
|
def rag_answer(question: str, history: List[Dict], vectorstore_state: FAISS | None): |
|
|
if vectorstore_state is None: |
|
|
new_message = {"role": "assistant", "content": "請先上傳文件並點擊「建立知識庫」。"} |
|
|
return "", history + [new_message] |
|
|
|
|
|
retriever = vectorstore_state.as_retriever(search_kwargs={"k": 4}) |
|
|
|
|
|
chain = ( |
|
|
{"context": retriever | format_docs, "question": RunnablePassthrough()} |
|
|
| prompt |
|
|
| llm |
|
|
| StrOutputParser() |
|
|
) |
|
|
|
|
|
try: |
|
|
response = chain.invoke(question) |
|
|
except Exception as e: |
|
|
response = f"回答時發生錯誤:{str(e)}" |
|
|
|
|
|
|
|
|
new_history = history + [ |
|
|
{"role": "user", "content": question}, |
|
|
{"role": "assistant", "content": response} |
|
|
] |
|
|
|
|
|
return "", new_history |
|
|
|
|
|
|
|
|
|
|
|
def clear_chat(): |
|
|
return "", [] |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="個人 RAG 問答系統", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("# 📚 個人 RAG 問答系統\n上傳你的 TXT、PDF、DOCX 文件,建立專屬知識庫,然後向它提問!") |
|
|
|
|
|
vectorstore_state = gr.State(None) |
|
|
|
|
|
with gr.Row(): |
|
|
file_input = gr.File( |
|
|
label="上傳文件(支援 .txt、.pdf、.docx,可多檔)", |
|
|
file_count="multiple", |
|
|
type="filepath" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
build_btn = gr.Button("建立知識庫", variant="primary", scale=1) |
|
|
clear_btn = gr.Button("清除對話", variant="secondary", scale=1) |
|
|
|
|
|
status = gr.Textbox(label="狀態訊息", interactive=False) |
|
|
|
|
|
|
|
|
chatbot = gr.Chatbot( |
|
|
height=500, |
|
|
value=[], |
|
|
label="對話紀錄", |
|
|
avatar_images=("https://em-content.zobj.net/source/apple/391/man-technologist_1f468-200d-1f4bb.png", |
|
|
"https://em-content.zobj.net/source/apple/391/robot_1f916.png") |
|
|
) |
|
|
|
|
|
msg = gr.Textbox( |
|
|
label="你的問題", |
|
|
placeholder="在這裡輸入問題,按 Enter 送出...", |
|
|
scale=7 |
|
|
) |
|
|
|
|
|
|
|
|
build_btn.click( |
|
|
fn=upload_and_build_db, |
|
|
inputs=[file_input, vectorstore_state], |
|
|
outputs=[status, file_input, vectorstore_state] |
|
|
) |
|
|
|
|
|
msg.submit( |
|
|
fn=rag_answer, |
|
|
inputs=[msg, chatbot, vectorstore_state], |
|
|
outputs=[msg, chatbot] |
|
|
) |
|
|
|
|
|
clear_btn.click( |
|
|
fn=clear_chat, |
|
|
inputs=None, |
|
|
outputs=[msg, chatbot] |
|
|
) |
|
|
|
|
|
|
|
|
demo.launch() |