Spaces:
Sleeping
Sleeping
File size: 3,120 Bytes
030cb07 40f60ce | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 | import os
import uuid
import gradio as gr
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# 1. LLM ์ด๊ธฐํ
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
# 2. ๋ฌธ์ ๋ก๋ ๋ฐ ๋ฒกํฐ DB ๊ตฌ์ถ
loader = PyPDFLoader("Maximizing Muscle Hypertrophy.pdf")
pages = loader.load_and_split()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(pages)
embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()
# ๊ฒ์๋ ๋ฌธ์๋ฅผ ํ๋์ ๋ฌธ์์ด๋ก ๊ฒฐํฉํ๋ ํฌํผ ํจ์
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
# 3. ํ๋กฌํํธ ์ ์
qa_prompt = ChatPromptTemplate.from_messages([
("system", """๋
ผ๋ฌธ ๋ฆฌ๋ทฐ ์ ๋ฌธ๊ฐ์
๋๋ค. ์ ๊ณต๋ ๋ฌธ์๋ฅผ ๋ฐํ์ผ๋ก ํ๊ตญ์ด๋ก ๋ต๋ณํ์ธ์.
๋ฌธ์์ ์๋ ๋ด์ฉ์ ๋ชจ๋ฅธ๋ค๊ณ ๋ตํ์ธ์.
{context}"""),
MessagesPlaceholder("chat_history"),
("human", "{input}"),
])
# 4. ์๋ฌ๊ฐ ๋๋ chains ๋ชจ๋์ ๋ฒ๋ฆฌ๊ณ LCEL(ํ์ดํ๋ผ์ธ) ๋ฌธ๋ฒ์ผ๋ก RAG ์ฒด์ธ ๊ตฌ์ถ
rag_chain = (
RunnablePassthrough.assign(context=(lambda x: format_docs(retriever.invoke(x["input"]))))
| qa_prompt
| llm
| StrOutputParser()
)
# 5. ๋ฉ๋ชจ๋ฆฌ(๋ํ ๊ธฐ๋ก) ์ฐ๋
store = {}
def get_session_history(session_id: str) -> BaseChatMessageHistory:
if session_id not in store:
store[session_id] = InMemoryChatMessageHistory()
return store[session_id]
conversational_rag_chain = RunnableWithMessageHistory(
rag_chain,
get_session_history,
input_messages_key="input",
history_messages_key="chat_history",
)
# 6. Gradio ์ฐ๋ ํจ์
def chat_response(message, history, session_id):
# LCEL ์ฒด์ธ์ ๋์
๋๋ฆฌ๊ฐ ์๋ ๋ฌธ์์ด์ ๋ฐ๋ก ๋ฐํํ๋ฏ๋ก ["answer"] ์ถ์ถ์ด ํ์ ์์
response = conversational_rag_chain.invoke(
{"input": message},
config={"configurable": {"session_id": session_id}}
)
return response
# 7. ๋ค์ค ์ฌ์ฉ์ ํ๊ฒฝ UI ์คํ
with gr.Blocks() as demo:
session_state = gr.State(lambda: str(uuid.uuid4()))
gr.ChatInterface(
fn=chat_response,
additional_inputs=[session_state],
title="๐ช ๊ทผ๋น๋ ๊ทน๋ํ ๋
ผ๋ฌธ Q&A ๋ด",
description="'Maximizing Muscle Hypertrophy' ๋
ผ๋ฌธ์ ๋ํด ๊ถ๊ธํ ์ ์ ๋ฌผ์ด๋ณด์ธ์!"
)
if __name__ == "__main__":
demo.launch() |