Spaces:
Sleeping
Sleeping
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import streamlit as st | |
| import time | |
| import base64 | |
| import uuid | |
| import tempfile | |
| from langchain_upstage import UpstageEmbeddings, ChatUpstage | |
| from langchain_chroma import Chroma | |
| from langchain_core.messages import HumanMessage, SystemMessage | |
| if "id" not in st.session_state: | |
| st.session_state.id = uuid.uuid4() | |
| st.session_state.file_cache = {} | |
| session_id = st.session_state.id | |
| client = None | |
| def reset_chat(): | |
| st.session_state.messages = [] | |
| st.session_state.context = None | |
| DB_PATH = '/content/drive/MyDrive/db/db' # Vectorstore DB ์ฉ๋๋ฌธ์ ๋ก ์ ๋ก๋ ๋ถ๊ฐ (> 1GB) | |
| vectorstore = Chroma(persist_directory=DB_PATH, embedding_function=UpstageEmbeddings(model="solar-embedding-1-large")) | |
| chat = ChatUpstage(upstage_api_key = 'up_psZn40aDVMRiIBdkzCx5ImcZqzez5') | |
| from langchain.retrievers.multi_query import MultiQueryRetriever | |
| retriever = MultiQueryRetriever.from_llm( | |
| retriever=vectorstore.as_retriever(), llm=chat) | |
| # 1) ์ฑ๋ด์ '๊ธฐ์ต'์ ์ ํ๊ธฐ ์ํ ์ฒซ๋ฒ์งธ ๋จ๊ณ | |
| # ์ด์ ๋ฉ์์ง๋ค๊ณผ ์ต์ ์ฌ์ฉ์ ์ง๋ฌธ์ ๋ถ์ํด, ๋ฌธ๋งฅ์ ๋ํ ์ ๋ณด๊ฐ ์์ด ํผ์์๋ง ๋ดค์ ๋ ์ดํดํ ์ ์๋๋ก ์ง๋ฌธ์ ๋ค์ ๊ตฌ์ฑํจ. | |
| # ์ฆ ์๋ก ๋ค์ด์จ ๊ทธ ์ง๋ฌธ ์์ฒด์๋ง ์ง์คํ ์ ์๋๋ก ๋ค์ ์ฌํธ์ฑ (llm ์์ ๋ํ๋ฅผ ๊ธฐ์ตํด์ ๋ํ๋ฅผ ์ฌ๊ตฌ์ฑ) | |
| from langchain.chains import create_history_aware_retriever | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| contextualize_q_system_prompt = "When there are older conversations and more recent user questions, these questions may be related to previous conversations. In this case, change the question to a question that can be understood independently without needing to know the content of the conversation. You don't have to answer the question, just reformulate it if necessary or leave it as is." | |
| # MessagePlaceHolder: 'chat_history' ์ ๋ ฅ ํค๋ฅผ ์ฌ์ฉํ์ฌ ์ด์ ๋ฉ์ธ์ง ๊ธฐ๋ก๋ค์ ํ๋กฌํํธ์ ํฌํจ์ํด. | |
| # ์ฆ, ํ๋กฌํํธ, ๋ฉ์ธ์ง ๊ธฐ๋ก(๋ฌธ๋งฅ ์ ๋ณด), ์ฌ์ฉ์์ ์ง๋ฌธ์ผ๋ก ํ๋กฌํํธ๊ฐ ๊ตฌ์ฑ๋จ. | |
| contextualize_q_prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ('system', contextualize_q_system_prompt), | |
| MessagesPlaceholder('chat_history'), | |
| ('human', '{input}'), | |
| ] | |
| ) | |
| # ์ด๋ฅผ ํ ๋๋ก ๋ฉ์ธ์ง ๊ธฐ๋ก์ ๊ธฐ์ตํ๋ retriever๋ฅผ ์์ฑ. | |
| history_aware_retriever = create_history_aware_retriever( | |
| chat, retriever, contextualize_q_prompt | |
| ) | |
| # 2) ์ฒด์ธ์ ์ฌ์ฉํ์ฌ ๋ฌธ์๋ฅผ ๋ถ๋ฌ์ฌ ์ ์๋ retriever ์ฒด์ธ ์์ฑ | |
| from langchain.chains import create_retrieval_chain | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| qa_system_prompt = """ | |
| You are an intelligent assistant helping the members of the Korean National Assembly with questions related to law and policy. You must answer politely. Read the given questions carefully and give the answer in Korean ONLY using the following pieces of the context. AGAIN, WRITE YOUR ANSWER ONLY BASED ON THE CONTEXT FROM THE DATABASE AND DON'T SEARCH ON THE INTERNET. | |
| DO NOT TRY TO MAKE UP AN ANSWER: | |
| ย - If the answer to the question cannot be determined from the context alone, say "I cannot determine the answer to that.". | |
| ย - If the context is empty, just say "I do not know the answer to that.". | |
| Context: {context} """ | |
| qa_prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ('system', qa_system_prompt), | |
| MessagesPlaceholder('chat_history'), | |
| ('human','{input}'+' ๋ต๋ณ์ ๊ตฌ์ฒด์ ์ผ๋ก ์ต์ ์ ๋ณด๋ถํฐ ์๊ฐ์ ํ๋ฆ์ ๋ฐ๋ผ ์์ฑํด์ค. ๊ทธ๋ฆฌ๊ณ ๋ต๋ณํ ๋ metadata์ ์๋ source๋ฅผ ํจ๊ป ์ ๊ณตํด์ค.'), | |
| ] | |
| ) | |
| question_answer_chain = create_stuff_documents_chain(chat, qa_prompt) | |
| # ๊ฒฐ๊ณผ๊ฐ์ input, chat_history, context, answer ํฌํจํจ. | |
| rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain) | |
| # history_aware_retriever ๋์ ์ผ๋ฐ retriever๋ก ๋ค์ ์๋ | |
| # ์น์ฌ์ดํธ ์ ๋ชฉ | |
| st.title("๊ตญํ ํ์๋ก ๊ธฐ๋ฐ ์ฑ๋ด ์๋น์ค :orange[NaraRAG] ๐โ๏ธ") | |
| if 'messages' not in st.session_state: | |
| st.session_state['messages'] = [{'role': 'assistant', | |
| 'content': '์๋ ํ์ธ์! ๊ตญํ ํ์๋ก์ ๊ดํด ๊ถ๊ธํ ๊ฒ์ด ์์ผ๋ฉด ์ธ์ ๋ ๋ฌผ์ด๋ด์ฃผ์ธ์ ๐'}] | |
| # ๋ํ ๋ด์ฉ์ ๊ธฐ๋กํ๊ธฐ ์ํด ์ ์ | |
| # Streamlit ํน์ฑ์ ํ์ฑํํ์ง ์์ผ๋ฉด ๋ด์ฉ์ด ๋ค ๋ ์๊ฐ. | |
| for message in st.session_state.messages: | |
| with st.chat_message(message["role"]): | |
| st.markdown(message["content"]) | |
| # ํ๋กฌํํธ ๋น์ฉ์ด ๋๋ฌด ๋ง์ด ์์๋๋ ๊ฒ์ ๋ฐฉ์งํ๊ธฐ ์ํด | |
| MAX_MESSAGES_BEFORE_DELETION = 4 | |
| # ์น์ฌ์ดํธ์์ ์ ์ ์ ์ธํ์ ๋ฐ๊ณ ์์์ ๋ง๋ AI ์์ด์ ํธ ์คํ์์ผ์ ๋ต๋ณ ๋ฐ๊ธฐ | |
| if prompt := st.chat_input("Ask a question!"): | |
| # ์ ์ ๊ฐ ๋ณด๋ธ ์ง๋ฌธ์ด๋ฉด ์ ์ ์์ด์ฝ๊ณผ ์ง๋ฌธ ๋ณด์ฌ์ฃผ๊ธฐ | |
| # ๋ง์ฝ ํ์ฌ ์ ์ฅ๋ ๋ํ ๋ด์ฉ ๊ธฐ๋ก์ด 4๊ฐ๋ณด๋ค ๋ง์ผ๋ฉด ์๋ฅด๊ธฐ | |
| if len(st.session_state.messages) >= MAX_MESSAGES_BEFORE_DELETION: | |
| # Remove the first two messages | |
| del st.session_state.messages[0] | |
| del st.session_state.messages[0] | |
| st.session_state.messages.append({"role": "user", "content": prompt}) | |
| with st.chat_message("user"): | |
| st.markdown(prompt) | |
| # AI๊ฐ ๋ณด๋ธ ๋ต๋ณ์ด๋ฉด AI ์์ด์ฝ์ด๋ LLM ์คํ์์ผ์ ๋ต๋ณ ๋ฐ๊ณ ์คํธ๋ฆฌ๋ฐํด์ ๋ณด์ฌ์ฃผ๊ธฐ | |
| with st.chat_message("assistant"): | |
| message_placeholder = st.empty() | |
| full_response = "" | |
| result = rag_chain.invoke({"input": prompt, "chat_history": st.session_state.messages}) | |
| # ์ฆ๊ฑฐ์๋ฃ ๋ณด์ฌ์ฃผ๊ธฐ | |
| with st.expander("Evidence context"): | |
| st.write( result['context']) | |
| for chunk in result["answer"].split(" "): | |
| full_response += chunk + " " | |
| time.sleep(0.2) | |
| message_placeholder.markdown(full_response + "โ") | |
| message_placeholder.markdown(full_response) | |
| st.session_state.messages.append({"role": "assistant", "content": full_response}) | |
| print("_______________________") | |
| print(st.session_state.messages) | |