File size: 2,575 Bytes
d5275a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import TextLoader, PyPDFLoader
from langchain import PromptTemplate
from PyPDF2 import PdfFileMerger
import gradio as gr
from dotenv import load_dotenv
import openai
import glob
import os

load_dotenv()
os.environ["OPENAI_API_KEY"] = os.environ['OPENAI_API_KEY']

merge_file = 'src/retrieval_qa/pdf/merge.pdf'
if not os.path.isfile(merge_file):
    pdf_file_merger = PdfFileMerger()
    for file_name in glob.glob('src/retrieval_qa/pdf/*.pdf'):
        pdf_file_merger.append(file_name)
    pdf_file_merger.write(merge_file)
    pdf_file_merger.close()

loader = PyPDFLoader(merge_file)
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectordb = Chroma.from_documents(texts, embeddings)

qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(
    model_name="gpt-3.5-turbo"), chain_type="stuff", retriever=vectordb.as_retriever())

# プロンプトの定義
template = """
あなたは再生医療・美容医学について学習したAIアシスタントです。下記の質問に具体的で医学的な回答をしてください。
質問:{question}
回答:
"""

prompt = PromptTemplate(
    input_variables=["question"],
    template=template,
)


def add_text(history, text):
    history = history + [(text, None)]
    return history, ""


def bot(history):
    query = history[-1][0]
    query = prompt.format(question=query)
    answer = qa.run(query)
    source = qa._get_docs(query)[0]
    source_sentence = source.page_content
    answer_source = source_sentence + "\n"+"source:" + \
        source.metadata["source"] + ", page:" + str(source.metadata["page"])
    history[-1][1] = answer  # + "\n\n情報ソースは以下です:\n" + answer_source
    return history


with gr.Blocks() as demo:
    chatbot = gr.Chatbot([], elem_id="chatbot").style(height=400)

    with gr.Row():
        with gr.Column(scale=0.6):
            txt = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter",
            ).style(container=False)

    txt.submit(add_text, [chatbot, txt], [chatbot, txt]).then(
        bot, chatbot, chatbot
    )

demo.launch()