Batrdj commited on
Commit
47f235c
Β·
verified Β·
1 Parent(s): f05b61d

Update final.py

Browse files
Files changed (1) hide show
  1. final.py +131 -130
final.py CHANGED
@@ -1,130 +1,131 @@
1
- import os
2
- import streamlit as st
3
- from langchain.embeddings import HuggingFaceEmbeddings
4
- from langchain.chains import RetrievalQA
5
- from langchain_community.vectorstores import FAISS
6
- from langchain_core.prompts import PromptTemplate
7
- from langchain_huggingface import HuggingFaceEndpoint
8
- from dotenv import load_dotenv, find_dotenv
9
-
10
- # βœ… Load environment variables
11
- load_dotenv(find_dotenv())
12
-
13
- # βœ… FAISS Database Path
14
- DB_FAISS_PATH = "vectorstore/db_faiss"
15
-
16
- @st.cache_resource
17
- def get_vectorstore():
18
- """Loads the FAISS vector store with embeddings."""
19
- try:
20
- embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
21
- return FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
22
- except Exception as e:
23
- st.error(f"⚠️ Error loading vector store: {str(e)}")
24
- return None
25
-
26
- @st.cache_resource
27
- def load_llm():
28
- """Loads the Hugging Face LLM model for text generation."""
29
- HUGGINGFACE_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.3"
30
- HF_TOKEN = os.getenv("HF_TOKEN")
31
-
32
- if not HF_TOKEN:
33
- st.error("⚠️ Hugging Face API token is missing. Please check your environment variables.")
34
- return None
35
-
36
- try:
37
- return HuggingFaceEndpoint(
38
- repo_id=HUGGINGFACE_REPO_ID,
39
- task="text-generation",
40
- temperature=0.3,
41
- model_kwargs={"token": HF_TOKEN, "max_length": 256}
42
- )
43
- except Exception as e:
44
- st.error(f"⚠️ Error loading LLM: {str(e)}")
45
- return None
46
-
47
- def set_custom_prompt():
48
- """Defines the chatbot's behavior with a custom prompt template."""
49
- return PromptTemplate(
50
- template="""
51
- You are an SEO chatbot with advanced knowledge. Answer based **strictly** on the provided documents.
52
-
53
- If the answer is in the context, provide a **clear, professional, and concise** response with sources.
54
- If the question is **outside the given context**, politely decline:
55
-
56
- **"I'm sorry, but I can only provide answers based on the available documents."**
57
-
58
- **Context:** {context}
59
- **Question:** {question}
60
-
61
- **Answer:**
62
- """,
63
- input_variables=["context", "question"]
64
- )
65
-
66
- def generate_response(prompt, vectorstore, llm):
67
- """Retrieves relevant documents and generates a response from the LLM."""
68
- if not vectorstore or not llm:
69
- return "❌ Unable to process your request due to initialization issues."
70
-
71
- try:
72
- qa_chain = RetrievalQA.from_chain_type(
73
- llm=llm,
74
- chain_type="stuff",
75
- retriever=vectorstore.as_retriever(search_kwargs={'k': 3}),
76
- return_source_documents=True,
77
- chain_type_kwargs={'prompt': set_custom_prompt()}
78
- )
79
-
80
- response_data = qa_chain.invoke({'query': prompt})
81
- result = response_data.get("result", "")
82
- source_documents = response_data.get("source_documents", [])
83
-
84
- if not result or not source_documents:
85
- return "❌ Sorry, but I can only provide answers based on the available documents."
86
-
87
- formatted_sources = "\n\nπŸ“š **Sources:**" + "".join(
88
- [f"\n- {doc.metadata.get('source', 'Unknown')} (Page: {doc.metadata.get('page', 'N/A')})" for doc in source_documents]
89
- )
90
- return f"{result}{formatted_sources}"
91
-
92
- except Exception as e:
93
- return f"⚠️ **Error:** {str(e)}"
94
-
95
- def main():
96
- """Runs the Streamlit chatbot application."""
97
- st.title("🧠 Brainmines SEO Chatbot - Your AI Assistant for SEO Queries πŸš€")
98
-
99
- # βœ… Load vector store and LLM
100
- vectorstore = get_vectorstore()
101
- llm = load_llm()
102
-
103
- if not vectorstore or not llm:
104
- st.error("⚠️ Failed to initialize vector store or LLM. Please check configurations.")
105
- return
106
-
107
- # βœ… Initialize session state
108
- if "messages" not in st.session_state:
109
- st.session_state.messages = [
110
- {"role": "assistant", "content": "Hello! πŸ‘‹ I'm here to assist you with SEO-related queries. πŸš€"},
111
- ]
112
-
113
- # βœ… Display chat history
114
- for message in st.session_state.messages:
115
- st.chat_message(message["role"]).markdown(message["content"])
116
-
117
- prompt = st.chat_input("πŸ’¬ Enter your SEO question here")
118
-
119
- if prompt:
120
- st.chat_message("user").markdown(prompt)
121
- st.session_state.messages.append({"role": "user", "content": prompt})
122
-
123
- with st.spinner("Thinking... πŸ€”"):
124
- response = generate_response(prompt, vectorstore, llm)
125
-
126
- st.chat_message("assistant").markdown(response)
127
- st.session_state.messages.append({"role": "assistant", "content": response})
128
-
129
- if __name__ == "__main__":
130
- main()
 
 
1
+ import os
2
+ import streamlit as st
3
+ from langchain.embeddings import HuggingFaceEmbeddings
4
+ from langchain.chains import RetrievalQA
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_core.prompts import PromptTemplate
7
+ from langchain_huggingface import HuggingFaceEndpoint
8
+ from dotenv import load_dotenv, find_dotenv
9
+
10
+
11
+ # βœ… Load environment variables
12
+ load_dotenv(find_dotenv())
13
+
14
+ # βœ… FAISS Database Path
15
+ DB_FAISS_PATH = "vectorstore/db_faiss"
16
+
17
+ @st.cache_resource
18
+ def get_vectorstore():
19
+ """Loads the FAISS vector store with embeddings."""
20
+ try:
21
+ embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
22
+ return FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
23
+ except Exception as e:
24
+ st.error(f"⚠️ Error loading vector store: {str(e)}")
25
+ return None
26
+
27
+ @st.cache_resource
28
+ def load_llm():
29
+ """Loads the Hugging Face LLM model for text generation."""
30
+ HUGGINGFACE_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.3"
31
+ HF_TOKEN = os.getenv("HF_TOKEN")
32
+
33
+ if not HF_TOKEN:
34
+ st.error("⚠️ Hugging Face API token is missing. Please check your environment variables.")
35
+ return None
36
+
37
+ try:
38
+ return HuggingFaceEndpoint(
39
+ repo_id=HUGGINGFACE_REPO_ID,
40
+ task="text-generation",
41
+ temperature=0.3,
42
+ model_kwargs={"token": HF_TOKEN, "max_length": 256}
43
+ )
44
+ except Exception as e:
45
+ st.error(f"⚠️ Error loading LLM: {str(e)}")
46
+ return None
47
+
48
+ def set_custom_prompt():
49
+ """Defines the chatbot's behavior with a custom prompt template."""
50
+ return PromptTemplate(
51
+ template="""
52
+ You are an SEO chatbot with advanced knowledge. Answer based **strictly** on the provided documents.
53
+
54
+ If the answer is in the context, provide a **clear, professional, and concise** response with sources.
55
+ If the question is **outside the given context**, politely decline:
56
+
57
+ **"I'm sorry, but I can only provide answers based on the available documents."**
58
+
59
+ **Context:** {context}
60
+ **Question:** {question}
61
+
62
+ **Answer:**
63
+ """,
64
+ input_variables=["context", "question"]
65
+ )
66
+
67
+ def generate_response(prompt, vectorstore, llm):
68
+ """Retrieves relevant documents and generates a response from the LLM."""
69
+ if not vectorstore or not llm:
70
+ return "❌ Unable to process your request due to initialization issues."
71
+
72
+ try:
73
+ qa_chain = RetrievalQA.from_chain_type(
74
+ llm=llm,
75
+ chain_type="stuff",
76
+ retriever=vectorstore.as_retriever(search_kwargs={'k': 3}),
77
+ return_source_documents=True,
78
+ chain_type_kwargs={'prompt': set_custom_prompt()}
79
+ )
80
+
81
+ response_data = qa_chain.invoke({'query': prompt})
82
+ result = response_data.get("result", "")
83
+ source_documents = response_data.get("source_documents", [])
84
+
85
+ if not result or not source_documents:
86
+ return "❌ Sorry, but I can only provide answers based on the available documents."
87
+
88
+ formatted_sources = "\n\nπŸ“š **Sources:**" + "".join(
89
+ [f"\n- {doc.metadata.get('source', 'Unknown')} (Page: {doc.metadata.get('page', 'N/A')})" for doc in source_documents]
90
+ )
91
+ return f"{result}{formatted_sources}"
92
+
93
+ except Exception as e:
94
+ return f"⚠️ **Error:** {str(e)}"
95
+
96
+ def main():
97
+ """Runs the Streamlit chatbot application."""
98
+ st.title("🧠 Brainmines SEO Chatbot - Your AI Assistant for SEO Queries πŸš€")
99
+
100
+ # βœ… Load vector store and LLM
101
+ vectorstore = get_vectorstore()
102
+ llm = load_llm()
103
+
104
+ if not vectorstore or not llm:
105
+ st.error("⚠️ Failed to initialize vector store or LLM. Please check configurations.")
106
+ return
107
+
108
+ # βœ… Initialize session state
109
+ if "messages" not in st.session_state:
110
+ st.session_state.messages = [
111
+ {"role": "assistant", "content": "Hello! πŸ‘‹ I'm here to assist you with SEO-related queries. πŸš€"},
112
+ ]
113
+
114
+ # βœ… Display chat history
115
+ for message in st.session_state.messages:
116
+ st.chat_message(message["role"]).markdown(message["content"])
117
+
118
+ prompt = st.chat_input("πŸ’¬ Enter your SEO question here")
119
+
120
+ if prompt:
121
+ st.chat_message("user").markdown(prompt)
122
+ st.session_state.messages.append({"role": "user", "content": prompt})
123
+
124
+ with st.spinner("Thinking... πŸ€”"):
125
+ response = generate_response(prompt, vectorstore, llm)
126
+
127
+ st.chat_message("assistant").markdown(response)
128
+ st.session_state.messages.append({"role": "assistant", "content": response})
129
+
130
+ if __name__ == "__main__":
131
+ main()