yoon2566's picture
Create app.py
a6703be verified
import os
from openai import OpenAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.schema import AIMessage, HumanMessage
import gradio as gr
# 1. PDF ๋กœ๋“œ ๋ฐ ์ „์ฒ˜๋ฆฌ
pdf_filepath = '3.pdf'
loader = PyPDFLoader(pdf_filepath)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=400)
splits = text_splitter.split_documents(pages)
# 2. ๋ฒกํ„ฐ ์Šคํ† ์–ด ์ƒ์„ฑ ๋ฐ ์ž„๋ฒ ๋”ฉ
vectorstore = Chroma.from_documents(documents=splits,
embedding=OpenAIEmbeddings())
# 3. RAG ์ฒด์ธ ์„ค์ •
template = '''Answer the question based only on the following context:
{context}
Question: {question}
'''
prompt = ChatPromptTemplate.from_template(template)
model = ChatOpenAI(model='gpt-4o-mini', temperature=0)
retriever = vectorstore.as_retriever()
def format_docs(docs):
return '\n\n'.join(doc.page_content for doc in docs)
rag_chain = (
{'context': retriever | format_docs, 'question': RunnablePassthrough()}
| prompt
| model
| StrOutputParser()
)
# 4. Gradio ์ธํ„ฐํŽ˜์ด์Šค predict ํ•จ์ˆ˜ ์ˆ˜์ •
def predict(message, history):
# RAG ์ฒด์ธ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋‹ต๋ณ€ ์ƒ์„ฑ
response = rag_chain.invoke(message)
return response
# 5. Gradio ์ธํ„ฐํŽ˜์ด์Šค ์„ค์ • ๋ฐ ์‹คํ–‰
demo = gr.ChatInterface(
predict,
title="์˜์–ด ํ•™์Šต ์ฑ—๋ด‡ (Powered by RAG & LangChain)",
description="์˜์–ด ๊ต์žฌ(1.pdf) ๋‚ด์šฉ์„ ๊ธฐ๋ฐ˜์œผ๋กœ ์งˆ๋ฌธ์— ๋‹ต๋ณ€ํ•ฉ๋‹ˆ๋‹ค."
)
demo.launch()