from langchain_community.vectorstores import Chroma from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai.embeddings import OpenAIEmbeddings from langchain.chains import ConversationalRetrievalChain from langchain_openai import ChatOpenAI import gradio as gr # 讀取檔案 file_path = "mozilla.pdf" loader = PyPDFLoader(file_path) # 選擇 splitter 並將文字切分成多個 chunk splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0) texts = loader.load_and_split(splitter) # 建立本地 db embeddings = OpenAIEmbeddings() vectorstore = Chroma.from_documents(texts, embeddings) # 建立 RAG chian QA_chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(model="gpt-4o-mini", temperature=0), vectorstore.as_retriever()) def generate_response(query): result = QA_chain({"question": query, 'chat_history': []}) return f'{result["answer"]}' iface = gr.Interface( fn=generate_response, inputs="text", outputs="text", title="數位身份白皮書查找機器人" ) iface.launch()