Spaces:

botInfinity
/

NEPAL_Constitution_Assistant_AI

Running

App Files Files Community

botInfinity commited on Jan 20

Commit

c7de1c8

verified ·

1 Parent(s): a6369b0

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -34

app.py CHANGED Viewed

@@ -1,13 +1,17 @@
 import os
 import streamlit as st
 from qdrant_client import QdrantClient
-from langchain_qdrant import QdrantVectorStore, RetrievalMode
 from langchain_huggingface import HuggingFaceEmbeddings
 from sentence_transformers import CrossEncoder
 from langchain_groq import ChatGroq
 # ------------------------------
-# Streamlit Config
 # ------------------------------
 st.set_page_config(
     page_title="Nepal Constitution AI",
@@ -16,7 +20,17 @@ st.set_page_config(
 )
 st.title("🧑‍⚖️ Nepal Constitution – AI Legal Assistant")
-st.caption("Hybrid RAG + Cross-Encoder Reranking (Demo)")
 # ------------------------------
 # User Input
@@ -27,7 +41,7 @@ query = st.text_input(
 )
 # ------------------------------
-# Cached Models (VERY IMPORTANT)
 # ------------------------------
 @st.cache_resource
 def load_embeddings():
@@ -37,19 +51,24 @@ def load_embeddings():
         encode_kwargs={"normalize_embeddings": True}
     )
 @st.cache_resource
 def load_reranker():
     return CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
 @st.cache_resource
 def load_vector_store():
-    client = QdrantClient(path="./qdrant_db")
     embeddings = load_embeddings()
     return QdrantVectorStore(
-        path = "./qdrant_db",
         collection_name="nepal_law",
         embedding=embeddings,
         retrieval_mode=RetrievalMode.HYBRID
     )
@@ -62,9 +81,9 @@ def load_llm():
     )
 # ------------------------------
-# Reranking Function
 # ------------------------------
-def rerank(query, docs, top_k=6):
     reranker = load_reranker()
     pairs = [(query, d.page_content) for d in docs]
     scores = reranker.predict(pairs)
@@ -77,38 +96,27 @@ def rerank(query, docs, top_k=6):
     return [doc for doc, _ in ranked[:top_k]]
-# ------------------------------
-# Main Logic
-# ------------------------------
 if query:
-    with st.spinner("🔍 Searching constitutional knowledge..."):
         vector_store = load_vector_store()
-        # Step 1: Retrieve
-        retrieved_docs = vector_store.similarity_search(query, k=20)
-        # Step 2: Rerank
-        reranked_docs = rerank(query, retrieved_docs, top_k=8)
-        # Build context
         context = "\n\n".join(
-            [f"[Source {i+1}]\n{doc.page_content}"
-             for i, doc in enumerate(reranked_docs)]
         )
-    # ------------------------------
-    # Improved Legal Prompt
-    # ------------------------------
     prompt = f"""
 You are a constitutional law assistant for Nepal.
-INSTRUCTIONS:
-- Answer ONLY using the provided context.
-- If the answer is not clearly found in the context, say:
-  "The provided constitutional text does not explicitly answer this question."
 - Do NOT invent articles, clauses, or interpretations.
-- Use clear, formal, and neutral legal language.
-- When relevant, reference article numbers/section numbers mentioned in the context.
 CONTEXT:
 {context}
@@ -123,14 +131,11 @@ ANSWER:
         llm = load_llm()
         response = llm.invoke(prompt)
-    # ------------------------------
-    # Output
-    # ------------------------------
     st.markdown("### ✅ Answer")
     st.write(response.content)
     with st.expander("📚 Retrieved Constitutional Sources"):
-        for i, doc in enumerate(reranked_docs):
             st.markdown(f"**Source {i+1}**")
             st.write(doc.page_content)
             st.markdown("---")

 import os
 import streamlit as st
 from qdrant_client import QdrantClient
+from langchain_qdrant import (
+    QdrantVectorStore,
+    RetrievalMode,
+    FastEmbedSparse
+)
 from langchain_huggingface import HuggingFaceEmbeddings
 from sentence_transformers import CrossEncoder
 from langchain_groq import ChatGroq
 # ------------------------------
+# Streamlit Config (MUST RUN FAST)
 # ------------------------------
 st.set_page_config(
     page_title="Nepal Constitution AI",
 )
 st.title("🧑‍⚖️ Nepal Constitution – AI Legal Assistant")
+st.caption("Hybrid RAG (Dense + BM25) + Cross-Encoder Reranking")
+# 🔥 EARLY VISIBILITY (HF health check helper)
+st.write("✅ App booted successfully.")
+# ------------------------------
+# Hard stop if DB missing (NO SILENT FAIL)
+# ------------------------------
+if not os.path.exists("./qdrant_db"):
+    st.error("❌ qdrant_db folder not found. You must commit it to the repo.")
+    st.stop()
 # ------------------------------
 # User Input
 )
 # ------------------------------
+# Cached Heavy Stuff
 # ------------------------------
 @st.cache_resource
 def load_embeddings():
         encode_kwargs={"normalize_embeddings": True}
     )
+@st.cache_resource
+def load_sparse_embeddings():
+    return FastEmbedSparse(model_name="Qdrant/bm25")
 @st.cache_resource
 def load_reranker():
     return CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
 @st.cache_resource
 def load_vector_store():
     embeddings = load_embeddings()
+    sparse_embeddings = load_sparse_embeddings()
     return QdrantVectorStore(
+        path="./qdrant_db",
         collection_name="nepal_law",
         embedding=embeddings,
+        sparse_embedding=sparse_embeddings,
         retrieval_mode=RetrievalMode.HYBRID
     )
     )
 # ------------------------------
+# Reranking
 # ------------------------------
+def rerank(query, docs, top_k=8):
     reranker = load_reranker()
     pairs = [(query, d.page_content) for d in docs]
     scores = reranker.predict(pairs)
     return [doc for doc, _ in ranked[:top_k]]
 if query:
+    with st.spinner("🔍 Searching constitution..."):
         vector_store = load_vector_store()
+        retrieved = vector_store.similarity_search(query, k=20)
+        reranked = rerank(query, retrieved)
         context = "\n\n".join(
+            f"[Source {i+1}]\n{doc.page_content}"
+            for i, doc in enumerate(reranked)
         )
     prompt = f"""
 You are a constitutional law assistant for Nepal.
+RULES:
+- Use ONLY the provided context.
 - Do NOT invent articles, clauses, or interpretations.
+- If the answer is not found, say so explicitly.
+- Use formal, neutral legal language.
+- Reference article/section numbers when mentioned.
 CONTEXT:
 {context}
         llm = load_llm()
         response = llm.invoke(prompt)
     st.markdown("### ✅ Answer")
     st.write(response.content)
     with st.expander("📚 Retrieved Constitutional Sources"):
+        for i, doc in enumerate(reranked):
             st.markdown(f"**Source {i+1}**")
             st.write(doc.page_content)
             st.markdown("---")