import streamlit as st from langchain_community.vectorstores import Chroma from langchain.memory import ChatMessageHistory, ConversationBufferMemory from langchain import PromptTemplate from langchain.chains import ConversationalRetrievalChain from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_google_genai import ChatGoogleGenerativeAI from dotenv import load_dotenv import os from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader from langchain.text_splitter import RecursiveCharacterTextSplitter load_dotenv() from langchain_experimental.text_splitter import SemanticChunker custom_prompt_template = """Use the following pieces of information to answer the user's question. If you don't know the answer, just say that you don't know, don't try to make up an answer. Context: {context} Question: {question} Only return the helpful answer below and nothing else. Helpful answer: (always use english language) """ DATA_PATH = 'content/' st.set_page_config(page_title="Chatbot Hadis", page_icon="🦙", layout="centered", initial_sidebar_state="auto", menu_items=None) st.title("RAG Gemini Using semantic chunking💬") if "messages" not in st.session_state.keys(): # Initialize the chat messages history st.session_state.messages = [ {"role": "assistant", "content": "Klik button to processing file"} ] with st.sidebar: st.title('Sidebar') if st.button("Process"): st.text("Processing ...") loader = DirectoryLoader(DATA_PATH, glob='*.pdf', loader_cls=PyPDFLoader) documents = loader.load() embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001") text_splitter = SemanticChunker( embeddings, breakpoint_threshold_type="percentile" ) for document in documents: text = document.page_content docs = text_splitter.create_documents(text) docsearch= Chroma.from_documents(docs,embeddings) if 'retriever' not in st.session_state: st.session_state.retriever = docsearch.as_retriever() message_history = ChatMessageHistory() if 'memory' not in st.session_state: st.session_state.memory = ConversationBufferMemory( memory_key="chat_history", output_key="answer", chat_memory=message_history, return_messages=True, ) if 'prompt' not in st.session_state: st.session_state.prompt = PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"]) response = f"Processing done.!" st.session_state.messages = [{"role": "assistant", "content": response}] if user_question := st.chat_input("Your question"): # Prompt for user input and save to chat history st.session_state.messages.append({"role": "user", "content": user_question}) def get_text(): global input_text input_text = st.text_input("Ask your Question", key="input") return input_text for message in st.session_state.messages: with st.chat_message(message["role"]): st.write(message["content"]) def clear_chat_history(): st.session_state.messages = [ {"role": "assistant", "content": "Klik button to processing file"} ] st.sidebar.button('Clear Chat History', on_click=clear_chat_history) # If last message is not from assistant, generate a new response if st.session_state.messages[-1]["role"] != "assistant": with st.chat_message("assistant"): with st.spinner("Thinking..."): llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0, max_output_tokens=2048) chain = ConversationalRetrievalChain.from_llm( llm=llm, chain_type="stuff", retriever= st.session_state.retriever, verbose=True, max_tokens_limit = 3000, combine_docs_chain_kwargs={"prompt": st.session_state.prompt}, memory= st.session_state.memory, return_source_documents=True, ) # st_callback = StreamlitCallbackHandler(st.container()) res = chain(user_question)#, callbacks=[st_callback]) answer = res["answer"] placeholder = st.empty() placeholder.markdown(answer) message = {"role": "assistant", "content": answer} st.session_state.messages.append(message) # Add response to message history