abhivsh commited on
Commit
e77448e
·
verified ·
1 Parent(s): 4db6d40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -70
app.py CHANGED
@@ -165,71 +165,71 @@ def get_file(source_documents):
165
  return references, files_in_order
166
 
167
 
168
- def build_chain(vectordb: Chroma):
169
- system_instruction = (
170
- "You are an expert **Electrical Engineer AI Assistant**, specialized in power systems "
171
- "and substation design (AIS/GIS up to 765kV), providing insights strictly from the provided context.\n\n"
172
- "**Formatting Guidelines:**\n"
173
- "1. Organize using **bullet points or numbered lists** where appropriate.\n"
174
- "2. **Bold** key technical terms, parameters, and essential facts.\n"
175
- "3. Use **technical language** consistent with IEC/IEEE/POWERGRID standards.\n"
176
- "4. For multi-step explanations, use **sub-headings** (e.g., `## Sub-section`).\n"
177
- "5. **Always include clause references (e.g., Clause XX.XX) for every piece of information.**\n"
178
- "6. **CRITICAL: If context contains a table, reproduce it EXACTLY — preserve all rows, "
179
- "columns, headers, and alignment. Never paraphrase table data.**\n\n"
180
- "**Context Prioritization:**\n"
181
- "1. Prioritize documents directly related to the queried equipment type.\n"
182
- "2. 'Specific Requirements' clauses **supersede** all other documents — reflect modified clauses first.\n"
183
- "3. If context is insufficient: 'The available documents do not contain information regarding [detail].'\n"
184
- "4. **Do not invent information** outside the provided context."
185
- )
186
-
187
- prompt = ChatPromptTemplate.from_messages([
188
- SystemMessagePromptTemplate.from_template(system_instruction),
189
- MessagesPlaceholder(variable_name="chat_history"),
190
- HumanMessagePromptTemplate.from_template(
191
- "Context:\n{context}\n\nQuestion:\n{question}"
192
- ),
193
- ])
194
-
195
- # ── Groq LLM ───────────────────────────────────────────────────────────────
196
- llm = ChatGroq(
197
- model=GROQ_MODEL,
198
- temperature=0.1,
199
- max_tokens=2048,
200
- api_key=GROQ_API_KEY,
201
- )
202
-
203
- # ── Retriever ──────────────────────────────────────────────────────────────
204
- retriever = vectordb.as_retriever(
205
- search_type="mmr",
206
- search_kwargs={"k": 3, "lambda_mult": 0.5, "fetch_k": 15},
207
- )
208
-
209
- def format_docs(docs):
210
- return "\n\n---\n\n".join(doc.page_content for doc in docs)
211
-
212
- rag_core = (
213
- RunnablePassthrough.assign(
214
- context=lambda x: format_docs(retriever.invoke(x["question"]))
215
- )
216
- | prompt
217
- | llm
218
- | StrOutputParser()
219
- )
220
-
221
- chain_with_history = RunnableWithMessageHistory(
222
- rag_core,
223
- get_session_history,
224
- input_messages_key="question",
225
- history_messages_key="chat_history",
226
- )
227
-
228
- return chain_with_history, retriever
229
-
230
-
231
- # ── Build once at startup (not per Gradio call) ───────────────────────────────
232
- chain, retriever = build_chain(vectordb) # vectordb initialised elsewhere
233
 
234
 
235
  # Query Re-write
@@ -242,17 +242,17 @@ def rewrite_query(question: str, llm) -> str:
242
  rewrite_prompt = PromptTemplate.from_template("""
243
  You are an expert query rewriter for a POWERGRID technical document retrieval system.
244
  The document corpus contains:
245
- - Model Technical Specifications for GIS/AIS substations (220kV / 400kV / 765kV)
246
  - IEC and IEEE standards referenced in POWERGRID specs
247
- - Equipment-specific specs: Circuit Breakers, Isolators, Surge Arresters, CTs, VTs,
248
- Power Transformers, Reactors, Protection Relays, Control & Relay Panels
249
- - Specific Requirements documents (which supersede other docs)
250
 
251
  Your task:
252
  1. Expand abbreviations (e.g., CB → Circuit Breaker, SA → Surge Arrester, CT → Current Transformer)
253
  2. Add relevant technical keywords likely present in the documents
254
  3. Include clause/section indicators if the query implies a specific requirement
255
- 4. If the query is vague, make it specific to power system substation context
256
  5. Preserve the original intent — do NOT change what is being asked
257
  6. Output ONLY the rewritten query, nothing else
258
 
 
165
  return references, files_in_order
166
 
167
 
168
+ # def build_chain(vectordb: Chroma):
169
+ # system_instruction = (
170
+ # "You are an expert **Electrical Engineer AI Assistant**, specialized in power systems "
171
+ # "and substation design (AIS/GIS up to 765kV), providing insights strictly from the provided context.\n\n"
172
+ # "**Formatting Guidelines:**\n"
173
+ # "1. Organize using **bullet points or numbered lists** where appropriate.\n"
174
+ # "2. **Bold** key technical terms, parameters, and essential facts.\n"
175
+ # "3. Use **technical language** consistent with IEC/IEEE/POWERGRID standards.\n"
176
+ # "4. For multi-step explanations, use **sub-headings** (e.g., `## Sub-section`).\n"
177
+ # "5. **Always include clause references (e.g., Clause XX.XX) for every piece of information.**\n"
178
+ # "6. **CRITICAL: If context contains a table, reproduce it EXACTLY — preserve all rows, "
179
+ # "columns, headers, and alignment. Never paraphrase table data.**\n\n"
180
+ # "**Context Prioritization:**\n"
181
+ # "1. Prioritize documents directly related to the queried equipment type.\n"
182
+ # "2. 'Specific Requirements' clauses **supersede** all other documents — reflect modified clauses first.\n"
183
+ # "3. If context is insufficient: 'The available documents do not contain information regarding [detail].'\n"
184
+ # "4. **Do not invent information** outside the provided context."
185
+ # )
186
+
187
+ # prompt = ChatPromptTemplate.from_messages([
188
+ # SystemMessagePromptTemplate.from_template(system_instruction),
189
+ # MessagesPlaceholder(variable_name="chat_history"),
190
+ # HumanMessagePromptTemplate.from_template(
191
+ # "Context:\n{context}\n\nQuestion:\n{question}"
192
+ # ),
193
+ # ])
194
+
195
+ # # ── Groq LLM ───────────────────────────────────────────────────────────────
196
+ # llm = ChatGroq(
197
+ # model=GROQ_MODEL,
198
+ # temperature=0.1,
199
+ # max_tokens=2048,
200
+ # api_key=GROQ_API_KEY,
201
+ # )
202
+
203
+ # # ── Retriever ──────────────────────────────────────────────────────────────
204
+ # retriever = vectordb.as_retriever(
205
+ # search_type="mmr",
206
+ # search_kwargs={"k": 3, "lambda_mult": 0.5, "fetch_k": 15},
207
+ # )
208
+
209
+ # def format_docs(docs):
210
+ # return "\n\n---\n\n".join(doc.page_content for doc in docs)
211
+
212
+ # rag_core = (
213
+ # RunnablePassthrough.assign(
214
+ # context=lambda x: format_docs(retriever.invoke(x["question"]))
215
+ # )
216
+ # | prompt
217
+ # | llm
218
+ # | StrOutputParser()
219
+ # )
220
+
221
+ # chain_with_history = RunnableWithMessageHistory(
222
+ # rag_core,
223
+ # get_session_history,
224
+ # input_messages_key="question",
225
+ # history_messages_key="chat_history",
226
+ # )
227
+
228
+ # return chain_with_history, retriever
229
+
230
+
231
+ # # ── Build once at startup (not per Gradio call) ───────────────────────────────
232
+ # chain, retriever = build_chain(vectordb) # vectordb initialised elsewhere
233
 
234
 
235
  # Query Re-write
 
242
  rewrite_prompt = PromptTemplate.from_template("""
243
  You are an expert query rewriter for a POWERGRID technical document retrieval system.
244
  The document corpus contains:
245
+ - Model Technical Specifications for various equipments used in GIS/AIS substations (132kV /220kV / 400kV / 765kV)
246
  - IEC and IEEE standards referenced in POWERGRID specs
247
+ - Equipment-specific specs: Circuit Breakers, Isolators, Surge Arresters, CTs, VTs, Gas Insulated Switchgears,
248
+ Power Transformers, Reactors, Protection Relays, Control & Relay Panels, Visual Monitoring Systems (VMS), Switchyard Erection
249
+ - Specific Requirements Document (which supersede other docs)
250
 
251
  Your task:
252
  1. Expand abbreviations (e.g., CB → Circuit Breaker, SA → Surge Arrester, CT → Current Transformer)
253
  2. Add relevant technical keywords likely present in the documents
254
  3. Include clause/section indicators if the query implies a specific requirement
255
+ 4. If the query is vague, make it specific to power system Substation context
256
  5. Preserve the original intent — do NOT change what is being asked
257
  6. Output ONLY the rewritten query, nothing else
258