import os from pathlib import Path import streamlit as st from langchain_community.document_loaders import PyPDFLoader from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings from langchain_community.vectorstores import FAISS import google.generativeai as genai st.set_page_config(layout="wide") st.markdown(""" """, unsafe_allow_html=True) GOOGLE_API = os.getenv("GOOGLE_API") if not GOOGLE_API: st.error("❌ GOOGLE_API key missing. Add it in Space → Settings → Secrets") st.stop() genai.configure(api_key=GOOGLE_API) DATA_FILE = Path("350_QA_dataset.pdf") DB_DIR = Path("vectorstore") SYSTEM_PROMPT = """ You are an EV Service Expert Assistant for a customer support team of an electric vehicle manufacturer. Your primary knowledge source is an internal 350-entry complaint and resolution knowledge base extracted from "350_QA_dataset.pdf". You have access to the following information: 1. Short-term chat history between you and the user. 2. Retrieved context chunks from the internal complaint database. You must: - Use the chat history to maintain context across turns. - Use ONLY the retrieved context as the factual source when giving technical or EV-related answers. Respond using this structure: 1. Issue summary 2. Likely cause / explanation 3. Recommended solution / actions 4. When to visit the service center If no matching context exists, say: "This specific issue is not covered in my internal EV complaint database. Based on general patterns, here are some safe next steps..." """ def build_store(): if not DATA_FILE.exists(): st.error("❌ PDF file missing. Upload '350_QA_dataset.pdf' in the Space root.") return loader = PyPDFLoader(str(DATA_FILE)) docs = loader.load() splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=150, separators=["\n\n", "\n", " ", ""] ) chunks = splitter.split_documents(docs) embeddings = GoogleGenerativeAIEmbeddings( model="models/text-embedding-004", google_api_key=GOOGLE_API ) vectorstore = FAISS.from_documents(chunks, embeddings) DB_DIR.mkdir(exist_ok=True) vectorstore.save_local(str(DB_DIR)) st.success("✅ Vector store built successfully!") def load_store(): index_path = DB_DIR / "index.faiss" if not (DB_DIR.exists() and index_path.exists()): return None embeddings = GoogleGenerativeAIEmbeddings( model="models/text-embedding-004", google_api_key=GOOGLE_API ) return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True) def build_store_from_upload(uploaded_file): uploads_dir = Path("uploads") uploads_dir.mkdir(exist_ok=True) temp_path = uploads_dir / "user_dataset.pdf" with open(temp_path, "wb") as f: f.write(uploaded_file.getbuffer()) loader = PyPDFLoader(str(temp_path)) docs = loader.load() splitter = RecursiveCharacterTextSplitter( chunk_size=800, chunk_overlap=150, separators=["\n\n", "\n", " ", ""] ) chunks = splitter.split_documents(docs) embeddings = GoogleGenerativeAIEmbeddings( model="models/text-embedding-004", google_api_key=GOOGLE_API ) vectorstore = FAISS.from_documents(chunks, embeddings) return vectorstore def format_history(history, max_turns: int = 5) -> str: if not history: return "[No prior conversation]" turns = history[-max_turns:] lines = [] for turn in turns: lines.append(f"User: {turn['user']}") lines.append(f"Assistant: {turn['assistant']}") return "\n".join(lines) def answer_query(query, history, user_vectorstore=None): docs = [] base_store = load_store() if base_store is not None: docs.extend(base_store.similarity_search(query, k=5)) if user_vectorstore is not None: user_docs = user_vectorstore.similarity_search(query, k=5) docs.extend(user_docs) if not docs: context = "[No matching context]" else: context = "\n\n---\n\n".join([d.page_content for d in docs]) history_text = format_history(history) model = genai.GenerativeModel("gemini-2.5-flash") prompt = f""" {SYSTEM_PROMPT} Chat history: {history_text} Retrieved context: {context} User question: {query} """ response = model.generate_content(prompt) return response.text st.title("🔋 EV Service Expert — RAG Chatbot") if "chat_history" not in st.session_state: st.session_state.chat_history = [] if "user_vectorstore" not in st.session_state: st.session_state.user_vectorstore = None col1, col2 = st.columns(2) with col1: index_exists = DB_DIR.exists() and (DB_DIR / "index.faiss").exists() if not index_exists: st.warning("Default vector store missing. Click the button below to build it from 350_QA_dataset.pdf.") if st.button("Build Default Vector Store"): with st.spinner("Building vector store from internal dataset..."): build_store() else: st.success("✅ Default EV knowledge base loaded.") with col2: uploaded_file = st.file_uploader("Upload additional EV PDF dataset", type=["pdf"]) if uploaded_file is not None: if st.button("Build Vector Store From Upload"): with st.spinner("Building vector store from uploaded dataset..."): st.session_state.user_vectorstore = build_store_from_upload(uploaded_file) st.success("✅ Uploaded dataset vector store ready and will be used in answers.") st.markdown("### 💬 Conversation") for turn in st.session_state.chat_history: with st.chat_message("user"): st.write(turn["user"]) with st.chat_message("assistant"): st.write(turn["assistant"]) user_input = st.chat_input("Ask a question about EV issues:") if user_input: with st.chat_message("user"): st.write(user_input) with st.chat_message("assistant"): with st.spinner("Searching knowledge base..."): answer = answer_query(user_input, st.session_state.chat_history, st.session_state.user_vectorstore) st.write(answer) st.session_state.chat_history.append( {"user": user_input, "assistant": answer} )