File size: 3,632 Bytes
63453da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Import necessary libraries
import streamlit as st
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader, MongodbLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chains import create_retrieval_chain, create_history_aware_retriever
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.prompts import MessagesPlaceholder

# Load environment variables
load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')
hf_token = os.getenv('HF_TOKEN')

# Initialize the ChatGroq model
llm = ChatGroq(groq_api_key=groq_api_key, model_name="llama3-8b-8192")

# Initialize embeddings
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')

# MongoDB data loading setup
loader = MongodbLoader(
    connection_string="mongodb+srv://deshcode0:helloworld@deshcode0.ftigm.mongodb.net/?retryWrites=true&w=majority&appName=deshcode0",
    db_name="sample_mflix",
    collection_name="movies",
    field_names = ["_id", "plot", "genres", "runtime", "cast", "poster", "title", "fullplot", "languages", "released", "directors", "rated", "awards", "lastupdated", "year", "imdb", "countries", "type", "tomatoes", "num_mflix_comments"],
)
docs = loader.load()

# Split documents and initialize Chroma vector store
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
retriever = vectorstore.as_retriever()

# Define prompt templates
system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise.\n\n{context}"
)

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Initialize the retrieval chain
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)

# Streamlit App
st.title("LLM-Powered Question Answering with Memory")

# Initialize session state for chat history
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# Sidebar for question input
st.sidebar.title("Ask a Question")
question = st.sidebar.text_input("Enter your question:")

# Retrieve and display the answer
if question:
    # Add question to chat history
    st.session_state.chat_history.append(HumanMessage(content=question))
    
    # Retrieve answer with context from chat history
    response = rag_chain.invoke({"input": question, "chat_history": st.session_state.chat_history})
    
    # Display the answer
    st.write("**Answer:**")
    st.write(response['answer'])
    
    # Add answer to chat history
    st.session_state.chat_history.append(AIMessage(content=response['answer']))

# Display chat history in the main app
st.write("## Chat History")
for message in st.session_state.chat_history:
    if isinstance(message, HumanMessage):
        st.write(f"**You:** {message.content}")
    elif isinstance(message, AIMessage):
        st.write(f"**Bot:** {message.content}")