Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -165,71 +165,71 @@ def get_file(source_documents):
|
|
| 165 |
return references, files_in_order
|
| 166 |
|
| 167 |
|
| 168 |
-
def build_chain(vectordb: Chroma):
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
# ── Build once at startup (not per Gradio call) ───────────────────────────────
|
| 232 |
-
chain, retriever = build_chain(vectordb) # vectordb initialised elsewhere
|
| 233 |
|
| 234 |
|
| 235 |
# Query Re-write
|
|
@@ -242,17 +242,17 @@ def rewrite_query(question: str, llm) -> str:
|
|
| 242 |
rewrite_prompt = PromptTemplate.from_template("""
|
| 243 |
You are an expert query rewriter for a POWERGRID technical document retrieval system.
|
| 244 |
The document corpus contains:
|
| 245 |
-
- Model Technical Specifications for GIS/AIS substations (220kV / 400kV / 765kV)
|
| 246 |
- IEC and IEEE standards referenced in POWERGRID specs
|
| 247 |
-
- Equipment-specific specs: Circuit Breakers, Isolators, Surge Arresters, CTs, VTs,
|
| 248 |
-
Power Transformers, Reactors, Protection Relays, Control & Relay Panels
|
| 249 |
-
- Specific Requirements
|
| 250 |
|
| 251 |
Your task:
|
| 252 |
1. Expand abbreviations (e.g., CB → Circuit Breaker, SA → Surge Arrester, CT → Current Transformer)
|
| 253 |
2. Add relevant technical keywords likely present in the documents
|
| 254 |
3. Include clause/section indicators if the query implies a specific requirement
|
| 255 |
-
4. If the query is vague, make it specific to power system
|
| 256 |
5. Preserve the original intent — do NOT change what is being asked
|
| 257 |
6. Output ONLY the rewritten query, nothing else
|
| 258 |
|
|
|
|
| 165 |
return references, files_in_order
|
| 166 |
|
| 167 |
|
| 168 |
+
# def build_chain(vectordb: Chroma):
|
| 169 |
+
# system_instruction = (
|
| 170 |
+
# "You are an expert **Electrical Engineer AI Assistant**, specialized in power systems "
|
| 171 |
+
# "and substation design (AIS/GIS up to 765kV), providing insights strictly from the provided context.\n\n"
|
| 172 |
+
# "**Formatting Guidelines:**\n"
|
| 173 |
+
# "1. Organize using **bullet points or numbered lists** where appropriate.\n"
|
| 174 |
+
# "2. **Bold** key technical terms, parameters, and essential facts.\n"
|
| 175 |
+
# "3. Use **technical language** consistent with IEC/IEEE/POWERGRID standards.\n"
|
| 176 |
+
# "4. For multi-step explanations, use **sub-headings** (e.g., `## Sub-section`).\n"
|
| 177 |
+
# "5. **Always include clause references (e.g., Clause XX.XX) for every piece of information.**\n"
|
| 178 |
+
# "6. **CRITICAL: If context contains a table, reproduce it EXACTLY — preserve all rows, "
|
| 179 |
+
# "columns, headers, and alignment. Never paraphrase table data.**\n\n"
|
| 180 |
+
# "**Context Prioritization:**\n"
|
| 181 |
+
# "1. Prioritize documents directly related to the queried equipment type.\n"
|
| 182 |
+
# "2. 'Specific Requirements' clauses **supersede** all other documents — reflect modified clauses first.\n"
|
| 183 |
+
# "3. If context is insufficient: 'The available documents do not contain information regarding [detail].'\n"
|
| 184 |
+
# "4. **Do not invent information** outside the provided context."
|
| 185 |
+
# )
|
| 186 |
+
|
| 187 |
+
# prompt = ChatPromptTemplate.from_messages([
|
| 188 |
+
# SystemMessagePromptTemplate.from_template(system_instruction),
|
| 189 |
+
# MessagesPlaceholder(variable_name="chat_history"),
|
| 190 |
+
# HumanMessagePromptTemplate.from_template(
|
| 191 |
+
# "Context:\n{context}\n\nQuestion:\n{question}"
|
| 192 |
+
# ),
|
| 193 |
+
# ])
|
| 194 |
+
|
| 195 |
+
# # ── Groq LLM ───────────────────────────────────────────────────────────────
|
| 196 |
+
# llm = ChatGroq(
|
| 197 |
+
# model=GROQ_MODEL,
|
| 198 |
+
# temperature=0.1,
|
| 199 |
+
# max_tokens=2048,
|
| 200 |
+
# api_key=GROQ_API_KEY,
|
| 201 |
+
# )
|
| 202 |
+
|
| 203 |
+
# # ── Retriever ──────────────────────────────────────────────────────────────
|
| 204 |
+
# retriever = vectordb.as_retriever(
|
| 205 |
+
# search_type="mmr",
|
| 206 |
+
# search_kwargs={"k": 3, "lambda_mult": 0.5, "fetch_k": 15},
|
| 207 |
+
# )
|
| 208 |
+
|
| 209 |
+
# def format_docs(docs):
|
| 210 |
+
# return "\n\n---\n\n".join(doc.page_content for doc in docs)
|
| 211 |
+
|
| 212 |
+
# rag_core = (
|
| 213 |
+
# RunnablePassthrough.assign(
|
| 214 |
+
# context=lambda x: format_docs(retriever.invoke(x["question"]))
|
| 215 |
+
# )
|
| 216 |
+
# | prompt
|
| 217 |
+
# | llm
|
| 218 |
+
# | StrOutputParser()
|
| 219 |
+
# )
|
| 220 |
+
|
| 221 |
+
# chain_with_history = RunnableWithMessageHistory(
|
| 222 |
+
# rag_core,
|
| 223 |
+
# get_session_history,
|
| 224 |
+
# input_messages_key="question",
|
| 225 |
+
# history_messages_key="chat_history",
|
| 226 |
+
# )
|
| 227 |
+
|
| 228 |
+
# return chain_with_history, retriever
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
# # ── Build once at startup (not per Gradio call) ───────────────────────────────
|
| 232 |
+
# chain, retriever = build_chain(vectordb) # vectordb initialised elsewhere
|
| 233 |
|
| 234 |
|
| 235 |
# Query Re-write
|
|
|
|
| 242 |
rewrite_prompt = PromptTemplate.from_template("""
|
| 243 |
You are an expert query rewriter for a POWERGRID technical document retrieval system.
|
| 244 |
The document corpus contains:
|
| 245 |
+
- Model Technical Specifications for various equipments used in GIS/AIS substations (132kV /220kV / 400kV / 765kV)
|
| 246 |
- IEC and IEEE standards referenced in POWERGRID specs
|
| 247 |
+
- Equipment-specific specs: Circuit Breakers, Isolators, Surge Arresters, CTs, VTs, Gas Insulated Switchgears,
|
| 248 |
+
Power Transformers, Reactors, Protection Relays, Control & Relay Panels, Visual Monitoring Systems (VMS), Switchyard Erection
|
| 249 |
+
- Specific Requirements Document (which supersede other docs)
|
| 250 |
|
| 251 |
Your task:
|
| 252 |
1. Expand abbreviations (e.g., CB → Circuit Breaker, SA → Surge Arrester, CT → Current Transformer)
|
| 253 |
2. Add relevant technical keywords likely present in the documents
|
| 254 |
3. Include clause/section indicators if the query implies a specific requirement
|
| 255 |
+
4. If the query is vague, make it specific to power system Substation context
|
| 256 |
5. Preserve the original intent — do NOT change what is being asked
|
| 257 |
6. Output ONLY the rewritten query, nothing else
|
| 258 |
|