AD-Styles commited on
Commit
40f60ce
ยท
verified ยท
1 Parent(s): e0ba3f8

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +75 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid # ๊ณ ์œ  ์„ธ์…˜ ID ์ƒ์„ฑ์„ ์œ„ํ•œ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ ์ถ”๊ฐ€
3
+ import gradio as gr
4
+ from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain_chroma import Chroma
8
+ from langchain.chains.combine_documents import create_stuff_documents_chain
9
+ from langchain.chains import create_retrieval_chain
10
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
11
+ from langchain_core.chat_history import BaseChatMessageHistory, InMemoryChatMessageHistory
12
+ from langchain_core.runnables.history import RunnableWithMessageHistory
13
+
14
+ # 1. LLM ์ดˆ๊ธฐํ™”
15
+ llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0)
16
+
17
+ # 2. ๋ฌธ์„œ ๋กœ๋“œ ๋ฐ ๋ฒกํ„ฐ DB ๊ตฌ์ถ• (์„œ๋ฒ„ ๊ตฌ๋™ ์‹œ 1ํšŒ ์‹คํ–‰)
18
+ loader = PyPDFLoader("Maximizing Muscle Hypertrophy.pdf")
19
+ pages = loader.load_and_split()
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
21
+ splits = text_splitter.split_documents(pages)
22
+
23
+ embeddings = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
24
+ vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
25
+ retriever = vectorstore.as_retriever()
26
+
27
+ # 3. RAG ์ฒด์ธ ๋ฐ ๋ฉ”๋ชจ๋ฆฌ ์„ค์ •
28
+ store = {}
29
+ def get_session_history(session_id: str) -> BaseChatMessageHistory:
30
+ if session_id not in store:
31
+ store[session_id] = InMemoryChatMessageHistory()
32
+ return store[session_id]
33
+
34
+ qa_prompt = ChatPromptTemplate.from_messages([
35
+ ("system", """๋…ผ๋ฌธ ๋ฆฌ๋ทฐ ์ „๋ฌธ๊ฐ€์ž…๋‹ˆ๋‹ค. ์ œ๊ณต๋œ ๋ฌธ์„œ๋ฅผ ๋ฐ”ํƒ•์œผ๋กœ ํ•œ๊ตญ์–ด๋กœ ๋‹ต๋ณ€ํ•˜์„ธ์š”.
36
+ ๋ฌธ์„œ์— ์—†๋Š” ๋‚ด์šฉ์€ ๋ชจ๋ฅธ๋‹ค๊ณ  ๋‹ตํ•˜์„ธ์š”.
37
+
38
+ {context}"""),
39
+ MessagesPlaceholder("chat_history"),
40
+ ("human", "{input}"),
41
+ ])
42
+
43
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
44
+ rag_chain = create_retrieval_chain(retriever, question_answer_chain)
45
+
46
+ conversational_rag_chain = RunnableWithMessageHistory(
47
+ rag_chain,
48
+ get_session_history,
49
+ input_messages_key="input",
50
+ history_messages_key="chat_history",
51
+ output_messages_key="answer",
52
+ )
53
+
54
+ # 4. Gradio ์—ฐ๋™ ํ•จ์ˆ˜ (session_id๋ฅผ ๋™์ ์œผ๋กœ ๋ฐ›๋„๋ก ์ˆ˜์ •)
55
+ def chat_response(message, history, session_id):
56
+ response = conversational_rag_chain.invoke(
57
+ {"input": message},
58
+ config={"configurable": {"session_id": session_id}}
59
+ )
60
+ return response["answer"]
61
+
62
+ # 5. ๋‹ค์ค‘ ์‚ฌ์šฉ์ž ํ™˜๊ฒฝ์„ ๊ณ ๋ คํ•œ Gradio UI ์‹คํ–‰ ์„ค์ •
63
+ with gr.Blocks() as demo:
64
+ # ์ ‘์†ํ•˜๋Š” ์‚ฌ์šฉ์ž(๋ธŒ๋ผ์šฐ์ €)๋งˆ๋‹ค ๊ณ ์œ ํ•œ UUID๋ฅผ ์ƒ์„ฑํ•˜์—ฌ ์ƒํƒœ๊ฐ’์œผ๋กœ ์€๋‹‰
65
+ session_state = gr.State(lambda: str(uuid.uuid4()))
66
+
67
+ gr.ChatInterface(
68
+ fn=chat_response,
69
+ additional_inputs=[session_state], # ๋ฐฑ์—”๋“œ ํ•จ์ˆ˜์— ๊ณ ์œ  ์„ธ์…˜ ID ์ „๋‹ฌ
70
+ title="๐Ÿ’ช ๊ทผ๋น„๋Œ€ ๊ทน๋Œ€ํ™” ๋…ผ๋ฌธ Q&A ๋ด‡",
71
+ description="'Maximizing Muscle Hypertrophy' ๋…ผ๋ฌธ์— ๋Œ€ํ•ด ๊ถ๊ธˆํ•œ ์ ์„ ๋ฌผ์–ด๋ณด์„ธ์š”!"
72
+ )
73
+
74
+ if __name__ == "__main__":
75
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ langchain-google-genai
3
+ langchain-community
4
+ langchain-chroma
5
+ langchain-text-splitters
6
+ pypdf
7
+ chromadb