Nikhithapotnuru commited on
Commit
ea06a9b
Β·
verified Β·
1 Parent(s): df8ae4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -123
app.py CHANGED
@@ -1,171 +1,134 @@
1
  import os
2
  from pathlib import Path
3
- from dotenv import load_dotenv
4
  import streamlit as st
 
5
 
6
- # LangChain modules
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain_text_splitters import RecursiveCharacterTextSplitter
9
- from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
10
- from langchain_community.vectorstores import Chroma
11
 
12
- # Google Generative AI
13
  import google.generativeai as genai
14
 
 
 
 
15
  load_dotenv()
16
-
17
- # -------------------------
18
- # Config
19
- # -------------------------
20
- GOOGLE_API_KEY = os.getenv("GOOGLE_API")
21
- if not GOOGLE_API_KEY:
22
- st.error("❌ GOOGLE_API key missing. Add it under: Space β†’ Settings β†’ Secrets.")
23
  st.stop()
24
 
25
- genai.configure(api_key=GOOGLE_API_KEY)
26
-
27
- WORKDIR = Path(".")
28
- DATA_FILE = WORKDIR / "350_QA_dataset.pdf"
29
- CHROMA_DIR = WORKDIR / "chroma_db"
30
 
 
 
 
 
 
31
 
32
- # -------------------------
33
- # Detect Google Models
34
- # -------------------------
35
- def pick_models():
36
- models = genai.list_models()
37
-
38
- embed = None
39
- chat = None
40
-
41
- for m in models:
42
- caps = getattr(m, "supported_generation_methods", [])
43
- if "embedText" in caps and embed is None:
44
- embed = m.name
45
- if ("generateContent" in caps or "generateText" in caps) and chat is None:
46
- chat = m.name
47
-
48
- if embed is None:
49
- embed = "models/text-embedding-004"
50
 
51
- if chat is None:
52
- chat = "models/gemini-1.5-flash" # common universal model
53
 
54
- return embed, chat
 
 
 
 
55
 
 
 
 
56
 
57
- # -------------------------
58
- # Build vectorstore
59
- # -------------------------
60
- def build_vectorstore(embed_model):
61
  if not DATA_FILE.exists():
62
- st.error("❌ PDF file missing. Upload 350_QA_dataset.pdf to Space root.")
63
  return
64
 
65
- st.info("πŸ“„ Loading PDF...")
66
  loader = PyPDFLoader(str(DATA_FILE))
67
  docs = loader.load()
68
 
69
- splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=150)
 
 
 
 
70
  chunks = splitter.split_documents(docs)
71
 
72
- st.info(f"πŸ”’ Creating embeddings using: {embed_model}")
73
  embeddings = GoogleGenerativeAIEmbeddings(
74
- model=embed_model,
75
- google_api_key=GOOGLE_API_KEY
76
  )
77
 
78
- st.info("πŸ“¦ Building Chroma vector DB...")
79
- if CHROMA_DIR.exists():
80
- import shutil
81
- shutil.rmtree(CHROMA_DIR)
82
-
83
- db = Chroma.from_documents(chunks, embedding=embeddings, persist_directory=str(CHROMA_DIR))
84
- db.persist()
85
 
86
- st.success("βœ… Vector store created successfully!")
87
 
88
 
89
- # -------------------------
90
- # System Prompt
91
- # -------------------------
92
- SYSTEM_PROMPT = """
93
- You are an EV Service Expert Assistant for customer support.
94
-
95
- Use ONLY retrieved context.
96
-
97
- Respond using this structure:
98
- 1. Issue summary
99
- 2. Likely cause
100
- 3. Recommended solution
101
- 4. When to visit service center
102
- """
103
-
104
-
105
- # -------------------------
106
- # RAG Query
107
- # -------------------------
108
- def rag_query(chat_model, query, history, k=4):
109
- if not CHROMA_DIR.exists():
110
- return "❌ Vector DB missing. Build it first.", []
111
-
112
  embeddings = GoogleGenerativeAIEmbeddings(
113
- model=chat_model,
114
- google_api_key=GOOGLE_API_KEY
115
  )
116
- db = Chroma(persist_directory=str(CHROMA_DIR), embedding_function=embeddings)
117
 
118
- docs = db.similarity_search(query, k=k)
119
- ctx = "\n\n---\n".join([d.page_content for d in docs]) if docs else "[No context found]"
120
 
121
- model = genai.GenerativeModel(chat_model)
 
 
 
 
 
122
 
123
- prompt = [
124
- SYSTEM_PROMPT,
125
- "\n\nPrevious conversation:\n",
126
- str(history),
127
- "\n\nContext:\n",
128
- ctx,
129
- "\n\nUser Query:\n",
130
- query
131
- ]
132
 
133
- response = model.generate_content(prompt)
134
- return response.text, docs
135
 
 
 
136
 
137
- # -------------------------
138
- # Streamlit UI
139
- # -------------------------
140
- st.title("⚑ EV RAG Assistant – Hugging Face Space")
141
 
142
- embed_model, chat_model = pick_models()
 
 
143
 
144
- with st.expander("Detected Models"):
145
- st.write("Embedding model:", embed_model)
146
- st.write("Chat model:", chat_model)
147
 
148
- if st.button("πŸ“˜ Build Vector Store"):
149
- build_vectorstore(embed_model)
150
 
151
- st.divider()
 
 
 
152
 
153
- if "messages" not in st.session_state:
154
- st.session_state.messages = []
 
 
 
 
155
 
156
- query = st.text_input("Enter your EV issue:")
 
157
 
158
  if st.button("Submit") and query.strip():
159
- answer, docs = rag_query(chat_model, query, st.session_state.messages)
160
- st.session_state.messages.append({"role": "user", "content": query})
161
- st.session_state.messages.append({"role": "assistant", "content": answer, "sources": docs})
162
-
163
- for m in st.session_state.messages:
164
- if m["role"] == "user":
165
- st.markdown(f"**User:** {m['content']}")
166
- else:
167
- st.markdown(f"**Assistant:** {m['content']}")
168
- if m.get("sources"):
169
- with st.expander("Retrieved Context"):
170
- for d in m["sources"]:
171
- st.write(d.page_content)
 
1
  import os
2
  from pathlib import Path
 
3
  import streamlit as st
4
+ from dotenv import load_dotenv
5
 
 
6
  from langchain_community.document_loaders import PyPDFLoader
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
9
+ from langchain_community.vectorstores import FAISS
10
 
 
11
  import google.generativeai as genai
12
 
13
+ # -----------------------------
14
+ # Load API key
15
+ # -----------------------------
16
  load_dotenv()
17
+ GOOGLE_API = os.getenv("GOOGLE_API")
18
+ if not GOOGLE_API:
19
+ st.error("❌ GOOGLE_API key missing. Add it in Space β†’ Settings β†’ Secrets")
 
 
 
 
20
  st.stop()
21
 
22
+ genai.configure(api_key=GOOGLE_API)
 
 
 
 
23
 
24
+ # -----------------------------
25
+ # File paths
26
+ # -----------------------------
27
+ DATA_FILE = Path("350_QA_dataset.pdf") # upload this PDF to Space root
28
+ DB_DIR = Path("vectorstore")
29
 
30
+ # -----------------------------
31
+ # System Prompt
32
+ # -----------------------------
33
+ SYSTEM_PROMPT = """
34
+ You are an EV Service Expert Assistant for a customer support team of an electric vehicle manufacturer.
35
+ Your primary knowledge source is an internal 350-entry complaint and resolution knowledge base extracted from "350_QA_dataset.pdf".
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ You will receive retrieved chunks and the user's query. Use ONLY the retrieved context.
 
38
 
39
+ Respond using this structure:
40
+ 1. Issue summary
41
+ 2. Likely cause / explanation
42
+ 3. Recommended solution / actions
43
+ 4. When to visit the service center
44
 
45
+ If no matching context exists, say:
46
+ "This specific issue is not covered in my internal EV complaint database. Based on general patterns, here are some safe next steps..."
47
+ """
48
 
49
+ # -----------------------------
50
+ # Build vector store
51
+ # -----------------------------
52
+ def build_store():
53
  if not DATA_FILE.exists():
54
+ st.error("❌ PDF file missing. Upload '350_QA_dataset.pdf' in the Space root.")
55
  return
56
 
 
57
  loader = PyPDFLoader(str(DATA_FILE))
58
  docs = loader.load()
59
 
60
+ splitter = RecursiveCharacterTextSplitter(
61
+ chunk_size=800,
62
+ chunk_overlap=150,
63
+ separators=["\n\n", "\n", " ", ""]
64
+ )
65
  chunks = splitter.split_documents(docs)
66
 
 
67
  embeddings = GoogleGenerativeAIEmbeddings(
68
+ model="models/text-embedding-004",
69
+ google_api_key=GOOGLE_API
70
  )
71
 
72
+ vectorstore = FAISS.from_documents(chunks, embeddings)
73
+ DB_DIR.mkdir(exist_ok=True)
74
+ vectorstore.save_local(str(DB_DIR))
 
 
 
 
75
 
76
+ st.success("βœ… Vector store built successfully!")
77
 
78
 
79
+ # -----------------------------
80
+ # Load vector store
81
+ # -----------------------------
82
+ def load_store():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  embeddings = GoogleGenerativeAIEmbeddings(
84
+ model="models/text-embedding-004",
85
+ google_api_key=GOOGLE_API
86
  )
87
+ return FAISS.load_local(str(DB_DIR), embeddings, allow_dangerous_deserialization=True)
88
 
 
 
89
 
90
+ # -----------------------------
91
+ # Query function
92
+ # -----------------------------
93
+ def answer_query(query):
94
+ vectorstore = load_store()
95
+ docs = vectorstore.similarity_search(query, k=5)
96
 
97
+ context = "\n\n---\n\n".join([d.page_content for d in docs]) if docs else "[No matching context]"
 
 
 
 
 
 
 
 
98
 
99
+ model = genai.GenerativeModel("gemini-2.5-flash")
 
100
 
101
+ prompt = f"""
102
+ {SYSTEM_PROMPT}
103
 
104
+ Retrieved context:
105
+ {context}
 
 
106
 
107
+ User question:
108
+ {query}
109
+ """
110
 
111
+ response = model.generate_content(prompt)
112
+ return response.text
 
113
 
 
 
114
 
115
+ # -----------------------------
116
+ # Streamlit UI
117
+ # -----------------------------
118
+ st.title("πŸ”‹ EV Service Expert β€” RAG Chatbot")
119
 
120
+ # Build vector store button
121
+ if not DB_DIR.exists():
122
+ st.warning("Vector store missing. Click the button below to build it.")
123
+ if st.button("Build Vector Store"):
124
+ with st.spinner("Building vector store..."):
125
+ build_store()
126
 
127
+ # Query input
128
+ query = st.text_input("Ask a question about EV issues:")
129
 
130
  if st.button("Submit") and query.strip():
131
+ with st.spinner("Searching knowledge base..."):
132
+ answer = answer_query(query)
133
+ st.markdown("### 🧠 Assistant Response")
134
+ st.write(answer)