import os from openai import OpenAI from langchain_community.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_openai import OpenAIEmbeddings from langchain_openai import ChatOpenAI from langchain_core.prompts import ChatPromptTemplate from langchain_core.runnables import RunnablePassthrough from langchain_core.output_parsers import StrOutputParser from langchain.schema import AIMessage, HumanMessage import gradio as gr # 1. PDF 로드 및 전처리 pdf_filepath = '3.pdf' loader = PyPDFLoader(pdf_filepath) pages = loader.load() text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=400) splits = text_splitter.split_documents(pages) # 2. 벡터 스토어 생성 및 임베딩 vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) # 3. RAG 체인 설정 template = '''Answer the question based only on the following context: {context} Question: {question} ''' prompt = ChatPromptTemplate.from_template(template) model = ChatOpenAI(model='gpt-4o-mini', temperature=0) retriever = vectorstore.as_retriever() def format_docs(docs): return '\n\n'.join(doc.page_content for doc in docs) rag_chain = ( {'context': retriever | format_docs, 'question': RunnablePassthrough()} | prompt | model | StrOutputParser() ) # 4. Gradio 인터페이스 predict 함수 수정 def predict(message, history): # RAG 체인을 사용하여 답변 생성 response = rag_chain.invoke(message) return response # 5. Gradio 인터페이스 설정 및 실행 demo = gr.ChatInterface( predict, title="영어 학습 챗봇 (Powered by RAG & LangChain)", description="영어 교재(1.pdf) 내용을 기반으로 질문에 답변합니다." ) demo.launch()