| import os | |
| from pathlib import Path | |
| import streamlit as st | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| import google.generativeai as genai | |
| st.set_page_config(layout="wide") | |
| st.markdown(""" | |
| <style> | |
| .stChatMessage { width: 100% !important; } | |
| .block-container { padding-left: 1rem; padding-right: 1rem; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| GOOGLE_API = os.getenv("GOOGLE_API") | |
| if not GOOGLE_API: | |
| st.error("β GOOGLE_API key missing. Add it in Space β Settings β Secrets") | |
| st.stop() | |
| genai.configure(api_key=GOOGLE_API) | |
| DATA_FILE = Path("350_QA_dataset.pdf") | |
| DB_DIR = Path("vectorstore") | |
| SYSTEM_PROMPT = """ | |
| You are an EV Service Expert Assistant for a customer support team of an electric vehicle manufacturer. | |
| Your primary knowledge source is an internal 350-entry complaint and resolution knowledge base extracted from "350_QA_dataset.pdf". | |
| You have access to the following information: | |
| 1. Short-term chat history between you and the user. | |
| 2. Retrieved context chunks from the internal complaint database. | |
| You must: | |
| - Use the chat history to maintain context across turns. | |
| - Use ONLY the retrieved context as the factual source when giving technical or EV-related answers. | |
| Respond using this structure: | |
| 1. Issue summary | |
| 2. Likely cause / explanation | |
| 3. Recommended solution / actions | |
| 4. When to visit the service center | |
| If no matching context exists, say: | |
| "This specific issue is not covered in my internal EV complaint database. Based on general patterns, here are some safe next steps..." | |
| """ | |
| def build_store(): | |
| if not DATA_FILE.exists(): | |
| st.error("β PDF file missing. Upload '350_QA_dataset.pdf' in the Space root.") | |
| return | |
| loader = PyPDFLoader(str(DATA_FILE)) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=800, | |
| chunk_overlap=150, | |
| separators=["\n\n", "\n", " ", ""] | |
| ) | |
| chunks = splitter.split_documents(docs) | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/text-embedding-004", | |
| google_api_key=GOOGLE_API | |
| ) | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| DB_DIR.mkdir(exist_ok=True) | |
| vectorstore.save_local(str(DB_DIR)) | |
| st.success("β Vector store built successfully!") | |
| def load_store(): | |
| index_path = DB_DIR / "index.faiss" | |
| if not (DB_DIR.exists() and index_path.exists()): | |
| return None | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/text-embedding-004", | |
| google_api_key=GOOGLE_API | |
| ) | |
| return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True) | |
| def build_store_from_upload(uploaded_file): | |
| uploads_dir = Path("uploads") | |
| uploads_dir.mkdir(exist_ok=True) | |
| temp_path = uploads_dir / "user_dataset.pdf" | |
| with open(temp_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| loader = PyPDFLoader(str(temp_path)) | |
| docs = loader.load() | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=800, | |
| chunk_overlap=150, | |
| separators=["\n\n", "\n", " ", ""] | |
| ) | |
| chunks = splitter.split_documents(docs) | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/text-embedding-004", | |
| google_api_key=GOOGLE_API | |
| ) | |
| vectorstore = FAISS.from_documents(chunks, embeddings) | |
| return vectorstore | |
| def format_history(history, max_turns: int = 5) -> str: | |
| if not history: | |
| return "[No prior conversation]" | |
| turns = history[-max_turns:] | |
| lines = [] | |
| for turn in turns: | |
| lines.append(f"User: {turn['user']}") | |
| lines.append(f"Assistant: {turn['assistant']}") | |
| return "\n".join(lines) | |
| def answer_query(query, history, user_vectorstore=None): | |
| docs = [] | |
| base_store = load_store() | |
| if base_store is not None: | |
| docs.extend(base_store.similarity_search(query, k=5)) | |
| if user_vectorstore is not None: | |
| user_docs = user_vectorstore.similarity_search(query, k=5) | |
| docs.extend(user_docs) | |
| if not docs: | |
| context = "[No matching context]" | |
| else: | |
| context = "\n\n---\n\n".join([d.page_content for d in docs]) | |
| history_text = format_history(history) | |
| model = genai.GenerativeModel("gemini-2.5-flash") | |
| prompt = f""" | |
| {SYSTEM_PROMPT} | |
| Chat history: | |
| {history_text} | |
| Retrieved context: | |
| {context} | |
| User question: | |
| {query} | |
| """ | |
| response = model.generate_content(prompt) | |
| return response.text | |
| st.title("π EV Service Expert β RAG Chatbot") | |
| if "chat_history" not in st.session_state: | |
| st.session_state.chat_history = [] | |
| if "user_vectorstore" not in st.session_state: | |
| st.session_state.user_vectorstore = None | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| index_exists = DB_DIR.exists() and (DB_DIR / "index.faiss").exists() | |
| if not index_exists: | |
| st.warning("Default vector store missing. Click the button below to build it from 350_QA_dataset.pdf.") | |
| if st.button("Build Default Vector Store"): | |
| with st.spinner("Building vector store from internal dataset..."): | |
| build_store() | |
| else: | |
| st.success("β Default EV knowledge base loaded.") | |
| with col2: | |
| uploaded_file = st.file_uploader("Upload additional EV PDF dataset", type=["pdf"]) | |
| if uploaded_file is not None: | |
| if st.button("Build Vector Store From Upload"): | |
| with st.spinner("Building vector store from uploaded dataset..."): | |
| st.session_state.user_vectorstore = build_store_from_upload(uploaded_file) | |
| st.success("β Uploaded dataset vector store ready and will be used in answers.") | |
| st.markdown("### π¬ Conversation") | |
| for turn in st.session_state.chat_history: | |
| with st.chat_message("user"): | |
| st.write(turn["user"]) | |
| with st.chat_message("assistant"): | |
| st.write(turn["assistant"]) | |
| user_input = st.chat_input("Ask a question about EV issues:") | |
| if user_input: | |
| with st.chat_message("user"): | |
| st.write(user_input) | |
| with st.chat_message("assistant"): | |
| with st.spinner("Searching knowledge base..."): | |
| answer = answer_query(user_input, st.session_state.chat_history, st.session_state.user_vectorstore) | |
| st.write(answer) | |
| st.session_state.chat_history.append( | |
| {"user": user_input, "assistant": answer} | |
| ) | |