Spaces:

abhivsh
/

ModelTS_SearchEngine

Running

App Files Files Community

abhivsh commited on 12 days ago

Commit

f3eaf2c

verified ·

1 Parent(s): eb6a5e8

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -63

app.py CHANGED Viewed

@@ -165,76 +165,76 @@ def get_file(source_documents):
     return references, files_in_order
-# def build_chain(vectordb: Chroma):
-#     system_instruction = (
-#         "You are an expert **Electrical Engineer AI Assistant**, specialized in power systems "
-#         "and substation design (AIS/GIS up to 765kV), providing insights strictly from the provided context.\n\n"
-#         "**Formatting Guidelines:**\n"
-#         "1. Organize using **bullet points or numbered lists** where appropriate.\n"
-#         "2. **Bold** key technical terms, parameters, and essential facts.\n"
-#         "3. Use **technical language** consistent with IEC/IEEE/POWERGRID standards.\n"
-#         "4. For multi-step explanations, use **sub-headings** (e.g., `## Sub-section`).\n"
-#         "5. **Always include clause references (e.g., Clause XX.XX) for every piece of information.**\n"
-#         "6. **CRITICAL: If context contains a table, reproduce it EXACTLY — preserve all rows, "
-#         "columns, headers, and alignment. Never paraphrase table data.**\n\n"
-#         "**Context Prioritization:**\n"
-#         "1. Prioritize documents directly related to the queried equipment type.\n"
-#         "2. 'Specific Requirements' clauses **supersede** all other documents — reflect modified clauses first.\n"
-#         "3. If context is insufficient: 'The available documents do not contain information regarding [detail].'\n"
-#         "4. **Do not invent information** outside the provided context."
-#     )
-#     prompt = ChatPromptTemplate.from_messages([
-#         SystemMessagePromptTemplate.from_template(system_instruction),
-#         MessagesPlaceholder(variable_name="chat_history"),
-#         HumanMessagePromptTemplate.from_template(
-#             "Context:\n{context}\n\nQuestion:\n{question}"
-#         ),
-#     ])
-#     # ── Groq LLM ───────────────────────────────────────────────────────────────
-#     llm = ChatGroq(
-#         model=GROQ_MODEL,
-#         temperature=0.1,
-#         max_tokens=2048,
-#         api_key=GROQ_API_KEY,
-#     )
-#     # ── Retriever ──────────────────────────────────────────────────────────────
-#     retriever = vectordb.as_retriever(
-#         search_type="mmr",
-#         search_kwargs={"k": 3, "lambda_mult": 0.5, "fetch_k": 15},
-#     )
-#     def format_docs(docs):
-#         return "\n\n---\n\n".join(doc.page_content for doc in docs)
-#     rag_core = (
-#         RunnablePassthrough.assign(
-#             context=lambda x: format_docs(retriever.invoke(x["question"]))
-#         )
-#         | prompt
-#         | llm
-#         | StrOutputParser()
-#     )
-#     chain_with_history = RunnableWithMessageHistory(
-#         rag_core,
-#         get_session_history,
-#         input_messages_key="question",
-#         history_messages_key="chat_history",
-#     )
-#     return chain_with_history, retriever
-# # ── Build once at startup (not per Gradio call) ───────────────────────────────
-# chain, retriever = build_chain(vectordb)   # vectordb initialised elsewhere
-retriever = vectordb.as_retriever(
-        search_type="mmr",
-        search_kwargs={"k": 3, "lambda_mult": 0.5, "fetch_k": 15},
-)
 # Query Re-write

     return references, files_in_order
+def build_chain(vectordb: Chroma):
+    system_instruction = (
+        "You are an expert **Electrical Engineer AI Assistant**, specialized in power systems "
+        "and substation design (AIS/GIS up to 765kV), providing insights strictly from the provided context.\n\n"
+        "**Formatting Guidelines:**\n"
+        "1. Organize using **bullet points or numbered lists** where appropriate.\n"
+        "2. **Bold** key technical terms, parameters, and essential facts.\n"
+        "3. Use **technical language** consistent with IEC/IEEE/POWERGRID standards.\n"
+        "4. For multi-step explanations, use **sub-headings** (e.g., `## Sub-section`).\n"
+        "5. **Always include clause references (e.g., Clause XX.XX) for every piece of information.**\n"
+        "6. **CRITICAL: If context contains a table, reproduce it EXACTLY — preserve all rows, "
+        "columns, headers, and alignment. Never paraphrase table data.**\n\n"
+        "**Context Prioritization:**\n"
+        "1. Prioritize documents directly related to the queried equipment type.\n"
+        "2. 'Specific Requirements' clauses **supersede** all other documents — reflect modified clauses first.\n"
+        "3. If context is insufficient: 'The available documents do not contain information regarding [detail].'\n"
+        "4. **Do not invent information** outside the provided context."
+    )
+    prompt = ChatPromptTemplate.from_messages([
+        SystemMessagePromptTemplate.from_template(system_instruction),
+        MessagesPlaceholder(variable_name="chat_history"),
+        HumanMessagePromptTemplate.from_template(
+            "Context:\n{context}\n\nQuestion:\n{question}"
+        ),
+    ])
+    # ── Groq LLM ───────────────────────────────────────────────────────────────
+    llm = ChatGroq(
+        model=GROQ_MODEL,
+        temperature=0.1,
+        max_tokens=2048,
+        api_key=GROQ_API_KEY,
+    )
+    # ── Retriever ──────────────────────────────────────────────────────────────
+    retriever = vectordb.as_retriever(
+        search_type="mmr",
+        search_kwargs={"k": 3, "lambda_mult": 0.5, "fetch_k": 15},
+    )
+    def format_docs(docs):
+        return "\n\n---\n\n".join(doc.page_content for doc in docs)
+    rag_core = (
+        RunnablePassthrough.assign(
+            context=lambda x: format_docs(retriever.invoke(x["question"]))
+        )
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    chain_with_history = RunnableWithMessageHistory(
+        rag_core,
+        get_session_history,
+        input_messages_key="question",
+        history_messages_key="chat_history",
+    )
+    return chain_with_history, retriever
+# ── Build once at startup (not per Gradio call) ───────────────────────────────
+chain, retriever = build_chain(vectordb)   # vectordb initialised elsewhere
+# retriever = vectordb.as_retriever(
+#         search_type="mmr",
+#         search_kwargs={"k": 3, "lambda_mult": 0.5, "fetch_k": 15},
+# )
 # Query Re-write