AD-Styles commited on
Commit
030cb07
ยท
verified ยท
1 Parent(s): 0923dda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -74
app.py CHANGED
@@ -1,75 +1,84 @@
1
- import os
2
- import uuid # ๊ณ ์œ  ์„ธ์…˜ ID ์ƒ์„ฑ์„ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์ถ”๊ฐ€
3
- import gradio as gr
4
- from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
5
- from langchain_community.document_loaders import PyPDFLoader
6
- from langchain_text_splitters import RecursiveCharacterTextSplitter
7
- from langchain_chroma import Chroma
8
- from langchain.chains.combine_documents import create_stuff_documents_chain
9
- from langchain.chains import create_retrieval_chain
10
- from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
11
- from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
12
- from langchain_core.runnables.history import RunnableWithMessageHistory
13
-
14
- # 1. LLM ์ดˆ๊ธฐํ™”
15
- llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
16
-
17
- # 2. ๋ฌธ์„œ ๋กœ๋“œ ๋ฐ ๋ฒกํ„ฐ DB ๊ตฌ์ถ• (์„œ๋ฒ„ ๊ตฌ๋™ ์‹œ 1ํšŒ ์‹คํ–‰)
18
- loader = PyPDFLoader("Maximizing Muscle Hypertrophy.pdf")
19
- pages = loader.load_and_split()
20
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
21
- splits = text_splitter.split_documents(pages)
22
-
23
- embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
24
- vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
25
- retriever = vectorstore.as_retriever()
26
-
27
- # 3. RAG ์ฒด์ธ ๋ฐ ๋ฉ”๋ชจ๋ฆฌ ์„ค์ •
28
- store = {}
29
- def get_session_history(session_id: str) -> BaseChatMessageHistory:
30
- if session_id not in store:
31
- store[session_id] = InMemoryChatMessageHistory()
32
- return store[session_id]
33
-
34
- qa_prompt = ChatPromptTemplate.from_messages([
35
- ("system", """๋…ผ๋ฌธ ๋ฆฌ๋ทฐ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ œ๊ณต๋œ ๋ฌธ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”.
36
- ๋ฌธ์„œ์— ์—†๋Š” ๋‚ด์šฉ์€ ๋ชจ๋ฅธ๋‹ค๊ณ  ๋‹ตํ•˜์„ธ์š”.
37
-
38
- {context}"""),
39
- MessagesPlaceholder("chat_history"),
40
- ("human", "{input}"),
41
- ])
42
-
43
- question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
44
- rag_chain = create_retrieval_chain(retriever, question_answer_chain)
45
-
46
- conversational_rag_chain = RunnableWithMessageHistory(
47
- rag_chain,
48
- get_session_history,
49
- input_messages_key="input",
50
- history_messages_key="chat_history",
51
- output_messages_key="answer",
52
- )
53
-
54
- # 4. Gradio ์—ฐ๋™ ํ•จ์ˆ˜ (session_id๋ฅผ ๋™์ ์œผ๋กœ ๋ฐ›๋„๋ก ์ˆ˜์ •)
55
- def chat_response(message, history, session_id):
56
- response = conversational_rag_chain.invoke(
57
- {"input": message},
58
- config={"configurable": {"session_id": session_id}}
59
- )
60
- return response["answer"]
61
-
62
- # 5. ๋‹ค์ค‘ ์‚ฌ์šฉ์ž ํ™˜๊ฒฝ์„ ๊ณ ๋ คํ•œ Gradio UI ์‹คํ–‰ ์„ค์ •
63
- with gr.Blocks() as demo:
64
- # ์ ‘์†ํ•˜๋Š” ์‚ฌ์šฉ์ž(๋ธŒ๋ผ์šฐ์ €)๋งˆ๋‹ค ๊ณ ์œ ํ•œ UUID๋ฅผ ์ƒ์„ฑํ•˜์—ฌ ์ƒํƒœ๊ฐ’์œผ๋กœ ์€๋‹‰
65
- session_state = gr.State(lambda: str(uuid.uuid4()))
66
-
67
- gr.ChatInterface(
68
- fn=chat_response,
69
- additional_inputs=[session_state], # ๋ฐฑ์—”๋“œ ํ•จ์ˆ˜์— ๊ณ ์œ  ์„ธ์…˜ ID ์ „๋‹ฌ
70
- title="๐Ÿ’ช ๊ทผ๋น„๋Œ€ ๊ทน๋Œ€ํ™” ๋…ผ๋ฌธ Q&A ๋ด‡",
71
- description="'Maximizing Muscle Hypertrophy' ๋…ผ๋ฌธ์— ๋Œ€ํ•ด ๊ถ๊ธˆํ•œ ์ ์„ ๋ฌผ์–ด๋ณด์„ธ์š”!"
72
- )
73
-
74
- if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
75
  demo.launch()
 
1
+ import os
2
+ import uuid
3
+ import gradio as gr
4
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain_chroma import Chroma
8
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
9
+ from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
10
+ from langchain_core.runnables.history import RunnableWithMessageHistory
11
+ from langchain_core.runnables import RunnablePassthrough
12
+ from langchain_core.output_parsers import StrOutputParser
13
+
14
+ # 1. LLM ์ดˆ๊ธฐํ™”
15
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
16
+
17
+ # 2. ๋ฌธ์„œ ๋กœ๋“œ ๋ฐ ๋ฒกํ„ฐ DB ๊ตฌ์ถ•
18
+ loader = PyPDFLoader("Maximizing Muscle Hypertrophy.pdf")
19
+ pages = loader.load_and_split()
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
21
+ splits = text_splitter.split_documents(pages)
22
+
23
+ embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
24
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
25
+ retriever = vectorstore.as_retriever()
26
+
27
+ # ๊ฒ€์ƒ‰๋œ ๋ฌธ์„œ๋ฅผ ํ•˜๋‚˜์˜ ๋ฌธ์ž์—ด๋กœ ๊ฒฐํ•ฉํ•˜๋Š” ํ—ฌํผ ํ•จ์ˆ˜
28
+ def format_docs(docs):
29
+ return "\n\n".join(doc.page_content for doc in docs)
30
+
31
+ # 3. ํ”„๋กฌํ”„ํŠธ ์ •์˜
32
+ qa_prompt = ChatPromptTemplate.from_messages([
33
+ ("system", """๋…ผ๋ฌธ ๋ฆฌ๋ทฐ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ œ๊ณต๋œ ๋ฌธ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”.
34
+ ๋ฌธ์„œ์— ์—†๋Š” ๋‚ด์šฉ์€ ๋ชจ๋ฅธ๋‹ค๊ณ  ๋‹ตํ•˜์„ธ์š”.
35
+
36
+ {context}"""),
37
+ MessagesPlaceholder("chat_history"),
38
+ ("human", "{input}"),
39
+ ])
40
+
41
+ # 4. ์—๋Ÿฌ๊ฐ€ ๋‚˜๋˜ chains ๋ชจ๋“ˆ์„ ๋ฒ„๋ฆฌ๊ณ  LCEL(ํŒŒ์ดํ”„๋ผ์ธ) ๋ฌธ๋ฒ•์œผ๋กœ RAG ์ฒด์ธ ๊ตฌ์ถ•
42
+ rag_chain = (
43
+ RunnablePassthrough.assign(context=(lambda x: format_docs(retriever.invoke(x["input"]))))
44
+ | qa_prompt
45
+ | llm
46
+ | StrOutputParser()
47
+ )
48
+
49
+ # 5. ๋ฉ”๋ชจ๋ฆฌ(๋Œ€ํ™” ๊ธฐ๋ก) ์—ฐ๋™
50
+ store = {}
51
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
52
+ if session_id not in store:
53
+ store[session_id] = InMemoryChatMessageHistory()
54
+ return store[session_id]
55
+
56
+ conversational_rag_chain = RunnableWithMessageHistory(
57
+ rag_chain,
58
+ get_session_history,
59
+ input_messages_key="input",
60
+ history_messages_key="chat_history",
61
+ )
62
+
63
+ # 6. Gradio ์—ฐ๋™ ํ•จ์ˆ˜
64
+ def chat_response(message, history, session_id):
65
+ # LCEL ์ฒด์ธ์€ ๋”•์…”๋„ˆ๋ฆฌ๊ฐ€ ์•„๋‹Œ ๋ฌธ์ž์—ด์„ ๋ฐ”๋กœ ๋ฐ˜ํ™˜ํ•˜๋ฏ€๋กœ ["answer"] ์ถ”์ถœ์ด ํ•„์š” ์—†์Œ
66
+ response = conversational_rag_chain.invoke(
67
+ {"input": message},
68
+ config={"configurable": {"session_id": session_id}}
69
+ )
70
+ return response
71
+
72
+ # 7. ๋‹ค์ค‘ ์‚ฌ์šฉ์ž ํ™˜๊ฒฝ UI ์‹คํ–‰
73
+ with gr.Blocks() as demo:
74
+ session_state = gr.State(lambda: str(uuid.uuid4()))
75
+
76
+ gr.ChatInterface(
77
+ fn=chat_response,
78
+ additional_inputs=[session_state],
79
+ title="๐Ÿ’ช ๊ทผ๋น„๋Œ€ ๊ทน๋Œ€ํ™” ๋…ผ๋ฌธ Q&A ๋ด‡",
80
+ description="'Maximizing Muscle Hypertrophy' ๋…ผ๋ฌธ์— ๋Œ€ํ•ด ๊ถ๊ธˆํ•œ ์ ์„ ๋ฌผ์–ด๋ณด์„ธ์š”!"
81
+ )
82
+
83
+ if __name__ == "__main__":
84
  demo.launch()