File size: 4,784 Bytes
47f235c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import streamlit as st
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from dotenv import load_dotenv, find_dotenv
# β
Load environment variables
load_dotenv(find_dotenv())
# β
FAISS Database Path
DB_FAISS_PATH = "vectorstore/db_faiss"
@st.cache_resource
def get_vectorstore():
"""Loads the FAISS vector store with embeddings."""
try:
embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
return FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
except Exception as e:
st.error(f"β οΈ Error loading vector store: {str(e)}")
return None
@st.cache_resource
def load_llm():
"""Loads the Hugging Face LLM model for text generation."""
HUGGINGFACE_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.3"
HF_TOKEN = os.getenv("HF_TOKEN")
if not HF_TOKEN:
st.error("β οΈ Hugging Face API token is missing. Please check your environment variables.")
return None
try:
return HuggingFaceEndpoint(
repo_id=HUGGINGFACE_REPO_ID,
task="text-generation",
temperature=0.3,
model_kwargs={"token": HF_TOKEN, "max_length": 256}
)
except Exception as e:
st.error(f"β οΈ Error loading LLM: {str(e)}")
return None
def set_custom_prompt():
"""Defines the chatbot's behavior with a custom prompt template."""
return PromptTemplate(
template="""
You are an SEO chatbot with advanced knowledge. Answer based **strictly** on the provided documents.
If the answer is in the context, provide a **clear, professional, and concise** response with sources.
If the question is **outside the given context**, politely decline:
**"I'm sorry, but I can only provide answers based on the available documents."**
**Context:** {context}
**Question:** {question}
**Answer:**
""",
input_variables=["context", "question"]
)
def generate_response(prompt, vectorstore, llm):
"""Retrieves relevant documents and generates a response from the LLM."""
if not vectorstore or not llm:
return "β Unable to process your request due to initialization issues."
try:
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={'k': 3}),
return_source_documents=True,
chain_type_kwargs={'prompt': set_custom_prompt()}
)
response_data = qa_chain.invoke({'query': prompt})
result = response_data.get("result", "")
source_documents = response_data.get("source_documents", [])
if not result or not source_documents:
return "β Sorry, but I can only provide answers based on the available documents."
formatted_sources = "\n\nπ **Sources:**" + "".join(
[f"\n- {doc.metadata.get('source', 'Unknown')} (Page: {doc.metadata.get('page', 'N/A')})" for doc in source_documents]
)
return f"{result}{formatted_sources}"
except Exception as e:
return f"β οΈ **Error:** {str(e)}"
def main():
"""Runs the Streamlit chatbot application."""
st.title("π§ Brainmines SEO Chatbot - Your AI Assistant for SEO Queries π")
# β
Load vector store and LLM
vectorstore = get_vectorstore()
llm = load_llm()
if not vectorstore or not llm:
st.error("β οΈ Failed to initialize vector store or LLM. Please check configurations.")
return
# β
Initialize session state
if "messages" not in st.session_state:
st.session_state.messages = [
{"role": "assistant", "content": "Hello! π I'm here to assist you with SEO-related queries. π"},
]
# β
Display chat history
for message in st.session_state.messages:
st.chat_message(message["role"]).markdown(message["content"])
prompt = st.chat_input("π¬ Enter your SEO question here")
if prompt:
st.chat_message("user").markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
with st.spinner("Thinking... π€"):
response = generate_response(prompt, vectorstore, llm)
st.chat_message("assistant").markdown(response)
st.session_state.messages.append({"role": "assistant", "content": response})
if __name__ == "__main__":
main()
|