Nikhithapotnuru's picture
Update app.py
c0dfc2e verified
import os
from pathlib import Path
import streamlit as st
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS
import google.generativeai as genai
st.set_page_config(layout="wide")
st.markdown("""
<style>
.stChatMessage { width: 100% !important; }
.block-container { padding-left: 1rem; padding-right: 1rem; }
</style>
""", unsafe_allow_html=True)
GOOGLE_API = os.getenv("GOOGLE_API")
if not GOOGLE_API:
st.error("❌ GOOGLE_API key missing. Add it in Space β†’ Settings β†’ Secrets")
st.stop()
genai.configure(api_key=GOOGLE_API)
DATA_FILE = Path("350_QA_dataset.pdf")
DB_DIR = Path("vectorstore")
SYSTEM_PROMPT = """
You are an EV Service Expert Assistant for a customer support team of an electric vehicle manufacturer.
Your primary knowledge source is an internal 350-entry complaint and resolution knowledge base extracted from "350_QA_dataset.pdf".
You have access to the following information:
1. Short-term chat history between you and the user.
2. Retrieved context chunks from the internal complaint database.
You must:
- Use the chat history to maintain context across turns.
- Use ONLY the retrieved context as the factual source when giving technical or EV-related answers.
Respond using this structure:
1. Issue summary
2. Likely cause / explanation
3. Recommended solution / actions
4. When to visit the service center
If no matching context exists, say:
"This specific issue is not covered in my internal EV complaint database. Based on general patterns, here are some safe next steps..."
"""
def build_store():
if not DATA_FILE.exists():
st.error("❌ PDF file missing. Upload '350_QA_dataset.pdf' in the Space root.")
return
loader = PyPDFLoader(str(DATA_FILE))
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=150,
separators=["\n\n", "\n", " ", ""]
)
chunks = splitter.split_documents(docs)
embeddings = GoogleGenerativeAIEmbeddings(
model="models/text-embedding-004",
google_api_key=GOOGLE_API
)
vectorstore = FAISS.from_documents(chunks, embeddings)
DB_DIR.mkdir(exist_ok=True)
vectorstore.save_local(str(DB_DIR))
st.success("βœ… Vector store built successfully!")
def load_store():
index_path = DB_DIR / "index.faiss"
if not (DB_DIR.exists() and index_path.exists()):
return None
embeddings = GoogleGenerativeAIEmbeddings(
model="models/text-embedding-004",
google_api_key=GOOGLE_API
)
return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)
def build_store_from_upload(uploaded_file):
uploads_dir = Path("uploads")
uploads_dir.mkdir(exist_ok=True)
temp_path = uploads_dir / "user_dataset.pdf"
with open(temp_path, "wb") as f:
f.write(uploaded_file.getbuffer())
loader = PyPDFLoader(str(temp_path))
docs = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=800,
chunk_overlap=150,
separators=["\n\n", "\n", " ", ""]
)
chunks = splitter.split_documents(docs)
embeddings = GoogleGenerativeAIEmbeddings(
model="models/text-embedding-004",
google_api_key=GOOGLE_API
)
vectorstore = FAISS.from_documents(chunks, embeddings)
return vectorstore
def format_history(history, max_turns: int = 5) -> str:
if not history:
return "[No prior conversation]"
turns = history[-max_turns:]
lines = []
for turn in turns:
lines.append(f"User: {turn['user']}")
lines.append(f"Assistant: {turn['assistant']}")
return "\n".join(lines)
def answer_query(query, history, user_vectorstore=None):
docs = []
base_store = load_store()
if base_store is not None:
docs.extend(base_store.similarity_search(query, k=5))
if user_vectorstore is not None:
user_docs = user_vectorstore.similarity_search(query, k=5)
docs.extend(user_docs)
if not docs:
context = "[No matching context]"
else:
context = "\n\n---\n\n".join([d.page_content for d in docs])
history_text = format_history(history)
model = genai.GenerativeModel("gemini-2.5-flash")
prompt = f"""
{SYSTEM_PROMPT}
Chat history:
{history_text}
Retrieved context:
{context}
User question:
{query}
"""
response = model.generate_content(prompt)
return response.text
st.title("πŸ”‹ EV Service Expert β€” RAG Chatbot")
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
if "user_vectorstore" not in st.session_state:
st.session_state.user_vectorstore = None
col1, col2 = st.columns(2)
with col1:
index_exists = DB_DIR.exists() and (DB_DIR / "index.faiss").exists()
if not index_exists:
st.warning("Default vector store missing. Click the button below to build it from 350_QA_dataset.pdf.")
if st.button("Build Default Vector Store"):
with st.spinner("Building vector store from internal dataset..."):
build_store()
else:
st.success("βœ… Default EV knowledge base loaded.")
with col2:
uploaded_file = st.file_uploader("Upload additional EV PDF dataset", type=["pdf"])
if uploaded_file is not None:
if st.button("Build Vector Store From Upload"):
with st.spinner("Building vector store from uploaded dataset..."):
st.session_state.user_vectorstore = build_store_from_upload(uploaded_file)
st.success("βœ… Uploaded dataset vector store ready and will be used in answers.")
st.markdown("### πŸ’¬ Conversation")
for turn in st.session_state.chat_history:
with st.chat_message("user"):
st.write(turn["user"])
with st.chat_message("assistant"):
st.write(turn["assistant"])
user_input = st.chat_input("Ask a question about EV issues:")
if user_input:
with st.chat_message("user"):
st.write(user_input)
with st.chat_message("assistant"):
with st.spinner("Searching knowledge base..."):
answer = answer_query(user_input, st.session_state.chat_history, st.session_state.user_vectorstore)
st.write(answer)
st.session_state.chat_history.append(
{"user": user_input, "assistant": answer}
)