File size: 6,469 Bytes
541d841
 
 
e63c50f
541d841
 
ea06a9b
965bbc7
541d841
1d8d910
7d8e014
6694f05
 
d6180cf
 
 
 
 
 
6694f05
ea06a9b
 
 
e63c50f
 
ea06a9b
541d841
b3027e7
ea06a9b
541d841
ea06a9b
 
 
e63c50f
b3027e7
 
c0dfc2e
b3027e7
 
 
 
e63c50f
ea06a9b
 
 
 
 
541d841
ea06a9b
 
 
541d841
c0dfc2e
ea06a9b
541d841
ea06a9b
e63c50f
541d841
 
 
 
ea06a9b
 
 
 
 
541d841
 
e63c50f
ea06a9b
 
e63c50f
541d841
ea06a9b
 
 
541d841
ea06a9b
541d841
ea06a9b
d6180cf
 
 
e63c50f
ea06a9b
 
e63c50f
ea06a9b
e63c50f
eb93663
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b3027e7
 
 
 
 
 
 
 
 
 
eb93663
d6180cf
 
eb93663
d6180cf
 
 
eb93663
 
d6180cf
 
 
 
 
 
541d841
b3027e7
541d841
ea06a9b
541d841
ea06a9b
 
541d841
b3027e7
 
 
ea06a9b
 
541d841
ea06a9b
 
 
e63c50f
ea06a9b
 
e63c50f
ea06a9b
541d841
b3027e7
 
eb93663
 
 
 
 
 
d6180cf
 
 
eb93663
 
 
 
 
 
 
 
 
 
 
 
 
541d841
b3027e7
 
 
 
 
 
 
 
 
 
 
 
 
 
eb93663
b3027e7
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import os
from pathlib import Path
import streamlit as st

from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

import google.generativeai as genai

st.set_page_config(layout="wide")

st.markdown("""
<style>
.stChatMessage { width: 100% !important; }
.block-container { padding-left: 1rem; padding-right: 1rem; }
</style>
""", unsafe_allow_html=True)

GOOGLE_API = os.getenv("GOOGLE_API")
if not GOOGLE_API:
    st.error("❌ GOOGLE_API key missing. Add it in Space β†’ Settings β†’ Secrets")
    st.stop()

genai.configure(api_key=GOOGLE_API)

DATA_FILE = Path("350_QA_dataset.pdf")
DB_DIR = Path("vectorstore")

SYSTEM_PROMPT = """
You are an EV Service Expert Assistant for a customer support team of an electric vehicle manufacturer.
Your primary knowledge source is an internal 350-entry complaint and resolution knowledge base extracted from "350_QA_dataset.pdf".

You have access to the following information:
1. Short-term chat history between you and the user.
2. Retrieved context chunks from the internal complaint database.

You must:
- Use the chat history to maintain context across turns.
- Use ONLY the retrieved context as the factual source when giving technical or EV-related answers.

Respond using this structure:
1. Issue summary
2. Likely cause / explanation
3. Recommended solution / actions
4. When to visit the service center

If no matching context exists, say:
"This specific issue is not covered in my internal EV complaint database. Based on general patterns, here are some safe next steps..."
"""


def build_store():
    if not DATA_FILE.exists():
        st.error("❌ PDF file missing. Upload '350_QA_dataset.pdf' in the Space root.")
        return

    loader = PyPDFLoader(str(DATA_FILE))
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=150,
        separators=["\n\n", "\n", " ", ""]
    )
    chunks = splitter.split_documents(docs)

    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/text-embedding-004",
        google_api_key=GOOGLE_API
    )

    vectorstore = FAISS.from_documents(chunks, embeddings)
    DB_DIR.mkdir(exist_ok=True)
    vectorstore.save_local(str(DB_DIR))

    st.success("βœ… Vector store built successfully!")

def load_store():
    index_path = DB_DIR / "index.faiss"
    if not (DB_DIR.exists() and index_path.exists()):
        return None
    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/text-embedding-004",
        google_api_key=GOOGLE_API
    )
    return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)

def build_store_from_upload(uploaded_file):
    uploads_dir = Path("uploads")
    uploads_dir.mkdir(exist_ok=True)
    temp_path = uploads_dir / "user_dataset.pdf"
    with open(temp_path, "wb") as f:
        f.write(uploaded_file.getbuffer())

    loader = PyPDFLoader(str(temp_path))
    docs = loader.load()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=800,
        chunk_overlap=150,
        separators=["\n\n", "\n", " ", ""]
    )
    chunks = splitter.split_documents(docs)

    embeddings = GoogleGenerativeAIEmbeddings(
        model="models/text-embedding-004",
        google_api_key=GOOGLE_API
    )

    vectorstore = FAISS.from_documents(chunks, embeddings)
    return vectorstore

def format_history(history, max_turns: int = 5) -> str:
    if not history:
        return "[No prior conversation]"
    turns = history[-max_turns:]
    lines = []
    for turn in turns:
        lines.append(f"User: {turn['user']}")
        lines.append(f"Assistant: {turn['assistant']}")
    return "\n".join(lines)

def answer_query(query, history, user_vectorstore=None):
    docs = []

    base_store = load_store()
    if base_store is not None:
        docs.extend(base_store.similarity_search(query, k=5))

    if user_vectorstore is not None:
        user_docs = user_vectorstore.similarity_search(query, k=5)
        docs.extend(user_docs)

    if not docs:
        context = "[No matching context]"
    else:
        context = "\n\n---\n\n".join([d.page_content for d in docs])

    history_text = format_history(history)

    model = genai.GenerativeModel("gemini-2.5-flash")

    prompt = f"""
{SYSTEM_PROMPT}

Chat history:
{history_text}

Retrieved context:
{context}

User question:
{query}
"""

    response = model.generate_content(prompt)
    return response.text

st.title("πŸ”‹ EV Service Expert β€” RAG Chatbot")

if "chat_history" not in st.session_state:
    st.session_state.chat_history = []
if "user_vectorstore" not in st.session_state:
    st.session_state.user_vectorstore = None

col1, col2 = st.columns(2)

with col1:
    index_exists = DB_DIR.exists() and (DB_DIR / "index.faiss").exists()
    if not index_exists:
        st.warning("Default vector store missing. Click the button below to build it from 350_QA_dataset.pdf.")
        if st.button("Build Default Vector Store"):
            with st.spinner("Building vector store from internal dataset..."):
                build_store()
    else:
        st.success("βœ… Default EV knowledge base loaded.")

with col2:
    uploaded_file = st.file_uploader("Upload additional EV PDF dataset", type=["pdf"])
    if uploaded_file is not None:
        if st.button("Build Vector Store From Upload"):
            with st.spinner("Building vector store from uploaded dataset..."):
                st.session_state.user_vectorstore = build_store_from_upload(uploaded_file)
            st.success("βœ… Uploaded dataset vector store ready and will be used in answers.")

st.markdown("### πŸ’¬ Conversation")
for turn in st.session_state.chat_history:
    with st.chat_message("user"):
        st.write(turn["user"])
    with st.chat_message("assistant"):
        st.write(turn["assistant"])

user_input = st.chat_input("Ask a question about EV issues:")

if user_input:
    with st.chat_message("user"):
        st.write(user_input)
    with st.chat_message("assistant"):
        with st.spinner("Searching knowledge base..."):
            answer = answer_query(user_input, st.session_state.chat_history, st.session_state.user_vectorstore)
            st.write(answer)
    st.session_state.chat_history.append(
        {"user": user_input, "assistant": answer}
    )