Spaces:

Nikhithapotnuru
/

EV_Service_Assisstant

Sleeping

File size: 6,469 Bytes

541d841
 
 
e63c50f
541d841
 
ea06a9b
965bbc7
541d841
1d8d910
7d8e014
6694f05
 
d6180cf
 
 
 
 
 
6694f05
ea06a9b
 
 
e63c50f
 
ea06a9b
541d841
b3027e7
ea06a9b
541d841
ea06a9b
 
 
e63c50f
b3027e7
 
c0dfc2e
b3027e7
 
 
 
e63c50f
ea06a9b
 
 
 
 
541d841
ea06a9b
 
 
541d841
c0dfc2e
ea06a9b
541d841
ea06a9b
e63c50f
541d841
 
 
 
ea06a9b
 
 
 
 
541d841
 
e63c50f
ea06a9b
 
e63c50f
541d841
ea06a9b
 
 
541d841
ea06a9b
541d841
ea06a9b
d6180cf
 
 
e63c50f
ea06a9b
 
e63c50f
ea06a9b
e63c50f
eb93663
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3027e7
 
 
 
 
 
 
 
 
 
eb93663
d6180cf
 
eb93663
d6180cf
 
 
eb93663
 
d6180cf
 
 
 
 
 
541d841
b3027e7
541d841
ea06a9b
541d841
ea06a9b
 
541d841
b3027e7
 
 
ea06a9b
 
541d841
ea06a9b
 
 
e63c50f
ea06a9b
 
e63c50f
ea06a9b
541d841
b3027e7
 
eb93663
 
 
 
 
 
d6180cf
 
 
eb93663
 
 
 
 
 
 
 
 
 
 
 
 
541d841
b3027e7
 
 
 
 
 
 
 
 
 
 
 
 
 
eb93663
b3027e7

import os
from pathlib import Path
import streamlit as st

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

import google.generativeai as genai

st.set_page_config(layout="wide")

st.markdown("""
<style>
.stChatMessage { width: 100% !important; }
.block-container { padding-left: 1rem; padding-right: 1rem; }
</style>
""", unsafe_allow_html=True)

GOOGLE_API = os.getenv("GOOGLE_API")
if not GOOGLE_API:
    st.error("❌ GOOGLE_API key missing. Add it in Space → Settings → Secrets")
    st.stop()

genai.configure(api_key=GOOGLE_API)

DATA_FILE = Path("350_QA_dataset.pdf")
DB_DIR = Path("vectorstore")

SYSTEM_PROMPT = """
You are an EV Service Expert Assistant for a customer support team of an electric vehicle manufacturer.
Your primary knowledge source is an internal 350-entry complaint and resolution knowledge base extracted from "350_QA_dataset.pdf".

You have access to the following information:
1. Short-term chat history between you and the user.
2. Retrieved context chunks from the internal complaint database.

You must:
- Use the chat history to maintain context across turns.
- Use ONLY the retrieved context as the factual source when giving technical or EV-related answers.

Respond using this structure:
1. Issue summary
2. Likely cause / explanation
3. Recommended solution / actions
4. When to visit the service center

If no matching context exists, say:
"This specific issue is not covered in my internal EV complaint database. Based on general patterns, here are some safe next steps..."
"""


def build_store():
    if not DATA_FILE.exists():
        st.error("❌ PDF file missing. Upload '350_QA_dataset.pdf' in the Space root.")
        return

    loader = PyPDFLoader(str(DATA_FILE))
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=150,
        separators=["\n\n", "\n", " ", ""]
    )
    chunks = splitter.split_documents(docs)

    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/text-embedding-004",
        google_api_key=GOOGLE_API
    )

    vectorstore = FAISS.from_documents(chunks, embeddings)
    DB_DIR.mkdir(exist_ok=True)
    vectorstore.save_local(str(DB_DIR))

    st.success("✅ Vector store built successfully!")

def load_store():
    index_path = DB_DIR / "index.faiss"
    if not (DB_DIR.exists() and index_path.exists()):
        return None
    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/text-embedding-004",
        google_api_key=GOOGLE_API
    )
    return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)

def build_store_from_upload(uploaded_file):
    uploads_dir = Path("uploads")
    uploads_dir.mkdir(exist_ok=True)
    temp_path = uploads_dir / "user_dataset.pdf"
    with open(temp_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    loader = PyPDFLoader(str(temp_path))
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=150,
        separators=["\n\n", "\n", " ", ""]
    )
    chunks = splitter.split_documents(docs)

    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/text-embedding-004",
        google_api_key=GOOGLE_API
    )

    vectorstore = FAISS.from_documents(chunks, embeddings)
    return vectorstore

def format_history(history, max_turns: int = 5) -> str:
    if not history:
        return "[No prior conversation]"
    turns = history[-max_turns:]
    lines = []
    for turn in turns:
        lines.append(f"User: {turn['user']}")
        lines.append(f"Assistant: {turn['assistant']}")
    return "\n".join(lines)

def answer_query(query, history, user_vectorstore=None):
    docs = []

    base_store = load_store()
    if base_store is not None:
        docs.extend(base_store.similarity_search(query, k=5))

    if user_vectorstore is not None:
        user_docs = user_vectorstore.similarity_search(query, k=5)
        docs.extend(user_docs)

    if not docs:
        context = "[No matching context]"
    else:
        context = "\n\n---\n\n".join([d.page_content for d in docs])

    history_text = format_history(history)

    model = genai.GenerativeModel("gemini-2.5-flash")

    prompt = f"""
{SYSTEM_PROMPT}

Chat history:
{history_text}

Retrieved context:
{context}

User question:
{query}
"""

    response = model.generate_content(prompt)
    return response.text

st.title("🔋 EV Service Expert — RAG Chatbot")

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
if "user_vectorstore" not in st.session_state:
    st.session_state.user_vectorstore = None

col1, col2 = st.columns(2)

with col1:
    index_exists = DB_DIR.exists() and (DB_DIR / "index.faiss").exists()
    if not index_exists:
        st.warning("Default vector store missing. Click the button below to build it from 350_QA_dataset.pdf.")
        if st.button("Build Default Vector Store"):
            with st.spinner("Building vector store from internal dataset..."):
                build_store()
    else:
        st.success("✅ Default EV knowledge base loaded.")

with col2:
    uploaded_file = st.file_uploader("Upload additional EV PDF dataset", type=["pdf"])
    if uploaded_file is not None:
        if st.button("Build Vector Store From Upload"):
            with st.spinner("Building vector store from uploaded dataset..."):
                st.session_state.user_vectorstore = build_store_from_upload(uploaded_file)
            st.success("✅ Uploaded dataset vector store ready and will be used in answers.")

st.markdown("### 💬 Conversation")
for turn in st.session_state.chat_history:
    with st.chat_message("user"):
        st.write(turn["user"])
    with st.chat_message("assistant"):
        st.write(turn["assistant"])

user_input = st.chat_input("Ask a question about EV issues:")

if user_input:
    with st.chat_message("user"):
        st.write(user_input)
    with st.chat_message("assistant"):
        with st.spinner("Searching knowledge base..."):
            answer = answer_query(user_input, st.session_state.chat_history, st.session_state.user_vectorstore)
            st.write(answer)
    st.session_state.chat_history.append(
        {"user": user_input, "assistant": answer}
    )