Spaces:
Sleeping
Sleeping
| import os | |
| from openai import OpenAI | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_openai import ChatOpenAI | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain.schema import AIMessage, HumanMessage | |
| import gradio as gr | |
| # 1. PDF ๋ก๋ ๋ฐ ์ ์ฒ๋ฆฌ | |
| pdf_filepath = '3.pdf' | |
| loader = PyPDFLoader(pdf_filepath) | |
| pages = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=400) | |
| splits = text_splitter.split_documents(pages) | |
| # 2. ๋ฒกํฐ ์คํ ์ด ์์ฑ ๋ฐ ์๋ฒ ๋ฉ | |
| vectorstore = Chroma.from_documents(documents=splits, | |
| embedding=OpenAIEmbeddings()) | |
| # 3. RAG ์ฒด์ธ ์ค์ | |
| template = '''Answer the question based only on the following context: | |
| {context} | |
| Question: {question} | |
| ''' | |
| prompt = ChatPromptTemplate.from_template(template) | |
| model = ChatOpenAI(model='gpt-4o-mini', temperature=0) | |
| retriever = vectorstore.as_retriever() | |
| def format_docs(docs): | |
| return '\n\n'.join(doc.page_content for doc in docs) | |
| rag_chain = ( | |
| {'context': retriever | format_docs, 'question': RunnablePassthrough()} | |
| | prompt | |
| | model | |
| | StrOutputParser() | |
| ) | |
| # 4. Gradio ์ธํฐํ์ด์ค predict ํจ์ ์์ | |
| def predict(message, history): | |
| # RAG ์ฒด์ธ์ ์ฌ์ฉํ์ฌ ๋ต๋ณ ์์ฑ | |
| response = rag_chain.invoke(message) | |
| return response | |
| # 5. Gradio ์ธํฐํ์ด์ค ์ค์ ๋ฐ ์คํ | |
| demo = gr.ChatInterface( | |
| predict, | |
| title="์์ด ํ์ต ์ฑ๋ด (Powered by RAG & LangChain)", | |
| description="์์ด ๊ต์ฌ(1.pdf) ๋ด์ฉ์ ๊ธฐ๋ฐ์ผ๋ก ์ง๋ฌธ์ ๋ต๋ณํฉ๋๋ค." | |
| ) | |
| demo.launch() |