Update app.py
Browse files
app.py
CHANGED
|
@@ -25,7 +25,7 @@ Your primary knowledge source is an internal 350-entry complaint and resolution
|
|
| 25 |
|
| 26 |
You have access to the following information:
|
| 27 |
1. Short-term chat history between you and the user.
|
| 28 |
-
2. Retrieved context chunks from the internal complaint database.
|
| 29 |
|
| 30 |
You must:
|
| 31 |
- Use the chat history to maintain context across turns.
|
|
@@ -74,6 +74,31 @@ def load_store():
|
|
| 74 |
)
|
| 75 |
return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
def format_history(history, max_turns: int = 5) -> str:
|
| 78 |
if not history:
|
| 79 |
return "[No prior conversation]"
|
|
@@ -84,9 +109,12 @@ def format_history(history, max_turns: int = 5) -> str:
|
|
| 84 |
lines.append(f"Assistant: {turn['assistant']}")
|
| 85 |
return "\n".join(lines)
|
| 86 |
|
| 87 |
-
def answer_query(query, history):
|
| 88 |
-
|
| 89 |
-
docs =
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
context = "\n\n---\n\n".join([d.page_content for d in docs]) if docs else "[No matching context]"
|
| 92 |
history_text = format_history(history)
|
|
@@ -113,12 +141,27 @@ st.title("π EV Service Expert β RAG Chatbot")
|
|
| 113 |
|
| 114 |
if "chat_history" not in st.session_state:
|
| 115 |
st.session_state.chat_history = []
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
st.markdown("### π¬ Conversation")
|
| 124 |
for turn in st.session_state.chat_history:
|
|
@@ -134,7 +177,7 @@ if user_input:
|
|
| 134 |
st.write(user_input)
|
| 135 |
with st.chat_message("assistant"):
|
| 136 |
with st.spinner("Searching knowledge base..."):
|
| 137 |
-
answer = answer_query(user_input, st.session_state.chat_history)
|
| 138 |
st.write(answer)
|
| 139 |
st.session_state.chat_history.append(
|
| 140 |
{"user": user_input, "assistant": answer}
|
|
|
|
| 25 |
|
| 26 |
You have access to the following information:
|
| 27 |
1. Short-term chat history between you and the user.
|
| 28 |
+
2. Retrieved context chunks from the internal complaint database and any uploaded datasets.
|
| 29 |
|
| 30 |
You must:
|
| 31 |
- Use the chat history to maintain context across turns.
|
|
|
|
| 74 |
)
|
| 75 |
return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)
|
| 76 |
|
| 77 |
+
def build_store_from_upload(uploaded_file):
|
| 78 |
+
uploads_dir = Path("uploads")
|
| 79 |
+
uploads_dir.mkdir(exist_ok=True)
|
| 80 |
+
temp_path = uploads_dir / "user_dataset.pdf"
|
| 81 |
+
with open(temp_path, "wb") as f:
|
| 82 |
+
f.write(uploaded_file.getbuffer())
|
| 83 |
+
|
| 84 |
+
loader = PyPDFLoader(str(temp_path))
|
| 85 |
+
docs = loader.load()
|
| 86 |
+
|
| 87 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 88 |
+
chunk_size=800,
|
| 89 |
+
chunk_overlap=150,
|
| 90 |
+
separators=["\n\n", "\n", " ", ""]
|
| 91 |
+
)
|
| 92 |
+
chunks = splitter.split_documents(docs)
|
| 93 |
+
|
| 94 |
+
embeddings = GoogleGenerativeAIEmbeddings(
|
| 95 |
+
model="models/text-embedding-004",
|
| 96 |
+
google_api_key=GOOGLE_API
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
vectorstore = FAISS.from_documents(chunks, embeddings)
|
| 100 |
+
return vectorstore
|
| 101 |
+
|
| 102 |
def format_history(history, max_turns: int = 5) -> str:
|
| 103 |
if not history:
|
| 104 |
return "[No prior conversation]"
|
|
|
|
| 109 |
lines.append(f"Assistant: {turn['assistant']}")
|
| 110 |
return "\n".join(lines)
|
| 111 |
|
| 112 |
+
def answer_query(query, history, user_vectorstore=None):
|
| 113 |
+
base_store = load_store()
|
| 114 |
+
docs = base_store.similarity_search(query, k=5)
|
| 115 |
+
if user_vectorstore is not None:
|
| 116 |
+
user_docs = user_vectorstore.similarity_search(query, k=5)
|
| 117 |
+
docs = user_docs + docs
|
| 118 |
|
| 119 |
context = "\n\n---\n\n".join([d.page_content for d in docs]) if docs else "[No matching context]"
|
| 120 |
history_text = format_history(history)
|
|
|
|
| 141 |
|
| 142 |
if "chat_history" not in st.session_state:
|
| 143 |
st.session_state.chat_history = []
|
| 144 |
+
if "user_vectorstore" not in st.session_state:
|
| 145 |
+
st.session_state.user_vectorstore = None
|
| 146 |
+
|
| 147 |
+
col1, col2 = st.columns(2)
|
| 148 |
+
|
| 149 |
+
with col1:
|
| 150 |
+
if not DB_DIR.exists():
|
| 151 |
+
st.warning("Vector store missing. Click the button below to build it from 350_QA_dataset.pdf.")
|
| 152 |
+
if st.button("Build Default Vector Store"):
|
| 153 |
+
with st.spinner("Building vector store from internal dataset..."):
|
| 154 |
+
build_store()
|
| 155 |
+
else:
|
| 156 |
+
st.success("β
Default EV knowledge base loaded.")
|
| 157 |
+
|
| 158 |
+
with col2:
|
| 159 |
+
uploaded_file = st.file_uploader("Upload additional EV PDF dataset", type=["pdf"])
|
| 160 |
+
if uploaded_file is not None:
|
| 161 |
+
if st.button("Build Vector Store From Upload"):
|
| 162 |
+
with st.spinner("Building vector store from uploaded dataset..."):
|
| 163 |
+
st.session_state.user_vectorstore = build_store_from_upload(uploaded_file)
|
| 164 |
+
st.success("β
Uploaded dataset vector store ready and will be used in answers.")
|
| 165 |
|
| 166 |
st.markdown("### π¬ Conversation")
|
| 167 |
for turn in st.session_state.chat_history:
|
|
|
|
| 177 |
st.write(user_input)
|
| 178 |
with st.chat_message("assistant"):
|
| 179 |
with st.spinner("Searching knowledge base..."):
|
| 180 |
+
answer = answer_query(user_input, st.session_state.chat_history, st.session_state.user_vectorstore)
|
| 181 |
st.write(answer)
|
| 182 |
st.session_state.chat_history.append(
|
| 183 |
{"user": user_input, "assistant": answer}
|