Nikhithapotnuru commited on
Commit
eb93663
Β·
verified Β·
1 Parent(s): b3027e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -11
app.py CHANGED
@@ -25,7 +25,7 @@ Your primary knowledge source is an internal 350-entry complaint and resolution
25
 
26
  You have access to the following information:
27
  1. Short-term chat history between you and the user.
28
- 2. Retrieved context chunks from the internal complaint database.
29
 
30
  You must:
31
  - Use the chat history to maintain context across turns.
@@ -74,6 +74,31 @@ def load_store():
74
  )
75
  return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  def format_history(history, max_turns: int = 5) -> str:
78
  if not history:
79
  return "[No prior conversation]"
@@ -84,9 +109,12 @@ def format_history(history, max_turns: int = 5) -> str:
84
  lines.append(f"Assistant: {turn['assistant']}")
85
  return "\n".join(lines)
86
 
87
- def answer_query(query, history):
88
- vectorstore = load_store()
89
- docs = vectorstore.similarity_search(query, k=5)
 
 
 
90
 
91
  context = "\n\n---\n\n".join([d.page_content for d in docs]) if docs else "[No matching context]"
92
  history_text = format_history(history)
@@ -113,12 +141,27 @@ st.title("πŸ”‹ EV Service Expert β€” RAG Chatbot")
113
 
114
  if "chat_history" not in st.session_state:
115
  st.session_state.chat_history = []
116
-
117
- if not DB_DIR.exists():
118
- st.warning("Vector store missing. Click the button below to build it.")
119
- if st.button("Build Vector Store"):
120
- with st.spinner("Building vector store..."):
121
- build_store()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  st.markdown("### πŸ’¬ Conversation")
124
  for turn in st.session_state.chat_history:
@@ -134,7 +177,7 @@ if user_input:
134
  st.write(user_input)
135
  with st.chat_message("assistant"):
136
  with st.spinner("Searching knowledge base..."):
137
- answer = answer_query(user_input, st.session_state.chat_history)
138
  st.write(answer)
139
  st.session_state.chat_history.append(
140
  {"user": user_input, "assistant": answer}
 
25
 
26
  You have access to the following information:
27
  1. Short-term chat history between you and the user.
28
+ 2. Retrieved context chunks from the internal complaint database and any uploaded datasets.
29
 
30
  You must:
31
  - Use the chat history to maintain context across turns.
 
74
  )
75
  return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)
76
 
77
+ def build_store_from_upload(uploaded_file):
78
+ uploads_dir = Path("uploads")
79
+ uploads_dir.mkdir(exist_ok=True)
80
+ temp_path = uploads_dir / "user_dataset.pdf"
81
+ with open(temp_path, "wb") as f:
82
+ f.write(uploaded_file.getbuffer())
83
+
84
+ loader = PyPDFLoader(str(temp_path))
85
+ docs = loader.load()
86
+
87
+ splitter = RecursiveCharacterTextSplitter(
88
+ chunk_size=800,
89
+ chunk_overlap=150,
90
+ separators=["\n\n", "\n", " ", ""]
91
+ )
92
+ chunks = splitter.split_documents(docs)
93
+
94
+ embeddings = GoogleGenerativeAIEmbeddings(
95
+ model="models/text-embedding-004",
96
+ google_api_key=GOOGLE_API
97
+ )
98
+
99
+ vectorstore = FAISS.from_documents(chunks, embeddings)
100
+ return vectorstore
101
+
102
  def format_history(history, max_turns: int = 5) -> str:
103
  if not history:
104
  return "[No prior conversation]"
 
109
  lines.append(f"Assistant: {turn['assistant']}")
110
  return "\n".join(lines)
111
 
112
+ def answer_query(query, history, user_vectorstore=None):
113
+ base_store = load_store()
114
+ docs = base_store.similarity_search(query, k=5)
115
+ if user_vectorstore is not None:
116
+ user_docs = user_vectorstore.similarity_search(query, k=5)
117
+ docs = user_docs + docs
118
 
119
  context = "\n\n---\n\n".join([d.page_content for d in docs]) if docs else "[No matching context]"
120
  history_text = format_history(history)
 
141
 
142
  if "chat_history" not in st.session_state:
143
  st.session_state.chat_history = []
144
+ if "user_vectorstore" not in st.session_state:
145
+ st.session_state.user_vectorstore = None
146
+
147
+ col1, col2 = st.columns(2)
148
+
149
+ with col1:
150
+ if not DB_DIR.exists():
151
+ st.warning("Vector store missing. Click the button below to build it from 350_QA_dataset.pdf.")
152
+ if st.button("Build Default Vector Store"):
153
+ with st.spinner("Building vector store from internal dataset..."):
154
+ build_store()
155
+ else:
156
+ st.success("βœ… Default EV knowledge base loaded.")
157
+
158
+ with col2:
159
+ uploaded_file = st.file_uploader("Upload additional EV PDF dataset", type=["pdf"])
160
+ if uploaded_file is not None:
161
+ if st.button("Build Vector Store From Upload"):
162
+ with st.spinner("Building vector store from uploaded dataset..."):
163
+ st.session_state.user_vectorstore = build_store_from_upload(uploaded_file)
164
+ st.success("βœ… Uploaded dataset vector store ready and will be used in answers.")
165
 
166
  st.markdown("### πŸ’¬ Conversation")
167
  for turn in st.session_state.chat_history:
 
177
  st.write(user_input)
178
  with st.chat_message("assistant"):
179
  with st.spinner("Searching knowledge base..."):
180
+ answer = answer_query(user_input, st.session_state.chat_history, st.session_state.user_vectorstore)
181
  st.write(answer)
182
  st.session_state.chat_history.append(
183
  {"user": user_input, "assistant": answer}