File size: 1,121 Bytes
b871c25
 
 
 
 
f34d217
b871c25
 
 
 
 
 
 
 
 
dfdc1d6
b871c25
 
 
7d9f32e
b871c25
 
dfdc1d6
b871c25
c714f48
b871c25
dfdc1d6
b871c25
dfdc1d6
f34d217
b871c25
 
 
 
f34d217
b2ac5a9
f34d217
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from langchain_community.vectorstores import Chroma
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain_openai import ChatOpenAI
import gradio as gr

# 讀取檔案
file_path = "mozilla.pdf"
loader = PyPDFLoader(file_path)

# 選擇 splitter 並將文字切分成多個 chunk
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = loader.load_and_split(splitter)

# 建立本地 db
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(texts, embeddings)

# 建立 RAG chian
QA_chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(model="gpt-4o-mini", temperature=0), vectorstore.as_retriever())

def generate_response(query):

    result = QA_chain({"question": query, 'chat_history': []})

    return f'{result["answer"]}'

iface = gr.Interface(
    fn=generate_response,
    inputs="text",
    outputs="text",
    title="數位身份白皮書查找機器人"
)

iface.launch()