Spaces:

Batrdj
/

Bot

Sleeping

App Files Files Community

Bot / final.py

Batrdj

Update final.py

47f235c verified 11 months ago

raw

history blame contribute delete

4.78 kB

	import os
	import streamlit as st
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.chains import RetrievalQA
	from langchain_community.vectorstores import FAISS
	from langchain_core.prompts import PromptTemplate
	from langchain_huggingface import HuggingFaceEndpoint
	from dotenv import load_dotenv, find_dotenv


	# ✅ Load environment variables
	load_dotenv(find_dotenv())

	# ✅ FAISS Database Path
	DB_FAISS_PATH = "vectorstore/db_faiss"

	@st.cache_resource
	def get_vectorstore():
	"""Loads the FAISS vector store with embeddings."""
	try:
	embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
	return FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
	except Exception as e:
	st.error(f"⚠️ Error loading vector store: {str(e)}")
	return None

	@st.cache_resource
	def load_llm():
	"""Loads the Hugging Face LLM model for text generation."""
	HUGGINGFACE_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.3"
	HF_TOKEN = os.getenv("HF_TOKEN")

	if not HF_TOKEN:
	st.error("⚠️ Hugging Face API token is missing. Please check your environment variables.")
	return None

	try:
	return HuggingFaceEndpoint(
	repo_id=HUGGINGFACE_REPO_ID,
	task="text-generation",
	temperature=0.3,
	model_kwargs={"token": HF_TOKEN, "max_length": 256}
	)
	except Exception as e:
	st.error(f"⚠️ Error loading LLM: {str(e)}")
	return None

	def set_custom_prompt():
	"""Defines the chatbot's behavior with a custom prompt template."""
	return PromptTemplate(
	template="""
	You are an SEO chatbot with advanced knowledge. Answer based strictly on the provided documents.

	If the answer is in the context, provide a clear, professional, and concise response with sources.
	If the question is outside the given context, politely decline:

	"I'm sorry, but I can only provide answers based on the available documents."

	Context: {context}
	Question: {question}

	Answer:
	""",
	input_variables=["context", "question"]
	)

	def generate_response(prompt, vectorstore, llm):
	"""Retrieves relevant documents and generates a response from the LLM."""
	if not vectorstore or not llm:
	return "❌ Unable to process your request due to initialization issues."

	try:
	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=vectorstore.as_retriever(search_kwargs={'k': 3}),
	return_source_documents=True,
	chain_type_kwargs={'prompt': set_custom_prompt()}
	)

	response_data = qa_chain.invoke({'query': prompt})
	result = response_data.get("result", "")
	source_documents = response_data.get("source_documents", [])

	if not result or not source_documents:
	return "❌ Sorry, but I can only provide answers based on the available documents."

	formatted_sources = "\n\n📚 Sources:" + "".join(
	[f"\n- {doc.metadata.get('source', 'Unknown')} (Page: {doc.metadata.get('page', 'N/A')})" for doc in source_documents]
	)
	return f"{result}{formatted_sources}"

	except Exception as e:
	return f"⚠️ Error: {str(e)}"

	def main():
	"""Runs the Streamlit chatbot application."""
	st.title("🧠 Brainmines SEO Chatbot - Your AI Assistant for SEO Queries 🚀")

	# ✅ Load vector store and LLM
	vectorstore = get_vectorstore()
	llm = load_llm()

	if not vectorstore or not llm:
	st.error("⚠️ Failed to initialize vector store or LLM. Please check configurations.")
	return

	# ✅ Initialize session state
	if "messages" not in st.session_state:
	st.session_state.messages = [
	{"role": "assistant", "content": "Hello! 👋 I'm here to assist you with SEO-related queries. 🚀"},
	]

	# ✅ Display chat history
	for message in st.session_state.messages:
	st.chat_message(message["role"]).markdown(message["content"])

	prompt = st.chat_input("💬 Enter your SEO question here")

	if prompt:
	st.chat_message("user").markdown(prompt)
	st.session_state.messages.append({"role": "user", "content": prompt})

	with st.spinner("Thinking... 🤔"):
	response = generate_response(prompt, vectorstore, llm)

	st.chat_message("assistant").markdown(response)
	st.session_state.messages.append({"role": "assistant", "content": response})

	if __name__ == "__main__":
	main()