botInfinity commited on
Commit
c7de1c8
Β·
verified Β·
1 Parent(s): a6369b0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -34
app.py CHANGED
@@ -1,13 +1,17 @@
1
  import os
2
  import streamlit as st
3
  from qdrant_client import QdrantClient
4
- from langchain_qdrant import QdrantVectorStore, RetrievalMode
 
 
 
 
5
  from langchain_huggingface import HuggingFaceEmbeddings
6
  from sentence_transformers import CrossEncoder
7
  from langchain_groq import ChatGroq
8
 
9
  # ------------------------------
10
- # Streamlit Config
11
  # ------------------------------
12
  st.set_page_config(
13
  page_title="Nepal Constitution AI",
@@ -16,7 +20,17 @@ st.set_page_config(
16
  )
17
 
18
  st.title("πŸ§‘β€βš–οΈ Nepal Constitution – AI Legal Assistant")
19
- st.caption("Hybrid RAG + Cross-Encoder Reranking (Demo)")
 
 
 
 
 
 
 
 
 
 
20
 
21
  # ------------------------------
22
  # User Input
@@ -27,7 +41,7 @@ query = st.text_input(
27
  )
28
 
29
  # ------------------------------
30
- # Cached Models (VERY IMPORTANT)
31
  # ------------------------------
32
  @st.cache_resource
33
  def load_embeddings():
@@ -37,19 +51,24 @@ def load_embeddings():
37
  encode_kwargs={"normalize_embeddings": True}
38
  )
39
 
 
 
 
 
40
  @st.cache_resource
41
  def load_reranker():
42
  return CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
43
 
44
  @st.cache_resource
45
  def load_vector_store():
46
- client = QdrantClient(path="./qdrant_db")
47
  embeddings = load_embeddings()
 
48
 
49
  return QdrantVectorStore(
50
- path = "./qdrant_db",
51
  collection_name="nepal_law",
52
  embedding=embeddings,
 
53
  retrieval_mode=RetrievalMode.HYBRID
54
  )
55
 
@@ -62,9 +81,9 @@ def load_llm():
62
  )
63
 
64
  # ------------------------------
65
- # Reranking Function
66
  # ------------------------------
67
- def rerank(query, docs, top_k=6):
68
  reranker = load_reranker()
69
  pairs = [(query, d.page_content) for d in docs]
70
  scores = reranker.predict(pairs)
@@ -77,38 +96,27 @@ def rerank(query, docs, top_k=6):
77
 
78
  return [doc for doc, _ in ranked[:top_k]]
79
 
80
- # ------------------------------
81
- # Main Logic
82
- # ------------------------------
83
  if query:
84
- with st.spinner("πŸ” Searching constitutional knowledge..."):
85
  vector_store = load_vector_store()
 
 
86
 
87
- # Step 1: Retrieve
88
- retrieved_docs = vector_store.similarity_search(query, k=20)
89
-
90
- # Step 2: Rerank
91
- reranked_docs = rerank(query, retrieved_docs, top_k=8)
92
-
93
- # Build context
94
  context = "\n\n".join(
95
- [f"[Source {i+1}]\n{doc.page_content}"
96
- for i, doc in enumerate(reranked_docs)]
97
  )
98
 
99
- # ------------------------------
100
- # Improved Legal Prompt
101
- # ------------------------------
102
  prompt = f"""
103
  You are a constitutional law assistant for Nepal.
104
 
105
- INSTRUCTIONS:
106
- - Answer ONLY using the provided context.
107
- - If the answer is not clearly found in the context, say:
108
- "The provided constitutional text does not explicitly answer this question."
109
  - Do NOT invent articles, clauses, or interpretations.
110
- - Use clear, formal, and neutral legal language.
111
- - When relevant, reference article numbers/section numbers mentioned in the context.
 
112
 
113
  CONTEXT:
114
  {context}
@@ -123,14 +131,11 @@ ANSWER:
123
  llm = load_llm()
124
  response = llm.invoke(prompt)
125
 
126
- # ------------------------------
127
- # Output
128
- # ------------------------------
129
  st.markdown("### βœ… Answer")
130
  st.write(response.content)
131
 
132
  with st.expander("πŸ“š Retrieved Constitutional Sources"):
133
- for i, doc in enumerate(reranked_docs):
134
  st.markdown(f"**Source {i+1}**")
135
  st.write(doc.page_content)
136
  st.markdown("---")
 
1
  import os
2
  import streamlit as st
3
  from qdrant_client import QdrantClient
4
+ from langchain_qdrant import (
5
+ QdrantVectorStore,
6
+ RetrievalMode,
7
+ FastEmbedSparse
8
+ )
9
  from langchain_huggingface import HuggingFaceEmbeddings
10
  from sentence_transformers import CrossEncoder
11
  from langchain_groq import ChatGroq
12
 
13
  # ------------------------------
14
+ # Streamlit Config (MUST RUN FAST)
15
  # ------------------------------
16
  st.set_page_config(
17
  page_title="Nepal Constitution AI",
 
20
  )
21
 
22
  st.title("πŸ§‘β€βš–οΈ Nepal Constitution – AI Legal Assistant")
23
+ st.caption("Hybrid RAG (Dense + BM25) + Cross-Encoder Reranking")
24
+
25
+ # πŸ”₯ EARLY VISIBILITY (HF health check helper)
26
+ st.write("βœ… App booted successfully.")
27
+
28
+ # ------------------------------
29
+ # Hard stop if DB missing (NO SILENT FAIL)
30
+ # ------------------------------
31
+ if not os.path.exists("./qdrant_db"):
32
+ st.error("❌ qdrant_db folder not found. You must commit it to the repo.")
33
+ st.stop()
34
 
35
  # ------------------------------
36
  # User Input
 
41
  )
42
 
43
  # ------------------------------
44
+ # Cached Heavy Stuff
45
  # ------------------------------
46
  @st.cache_resource
47
  def load_embeddings():
 
51
  encode_kwargs={"normalize_embeddings": True}
52
  )
53
 
54
+ @st.cache_resource
55
+ def load_sparse_embeddings():
56
+ return FastEmbedSparse(model_name="Qdrant/bm25")
57
+
58
  @st.cache_resource
59
  def load_reranker():
60
  return CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
61
 
62
  @st.cache_resource
63
  def load_vector_store():
 
64
  embeddings = load_embeddings()
65
+ sparse_embeddings = load_sparse_embeddings()
66
 
67
  return QdrantVectorStore(
68
+ path="./qdrant_db",
69
  collection_name="nepal_law",
70
  embedding=embeddings,
71
+ sparse_embedding=sparse_embeddings,
72
  retrieval_mode=RetrievalMode.HYBRID
73
  )
74
 
 
81
  )
82
 
83
  # ------------------------------
84
+ # Reranking
85
  # ------------------------------
86
+ def rerank(query, docs, top_k=8):
87
  reranker = load_reranker()
88
  pairs = [(query, d.page_content) for d in docs]
89
  scores = reranker.predict(pairs)
 
96
 
97
  return [doc for doc, _ in ranked[:top_k]]
98
 
99
+
 
 
100
  if query:
101
+ with st.spinner("πŸ” Searching constitution..."):
102
  vector_store = load_vector_store()
103
+ retrieved = vector_store.similarity_search(query, k=20)
104
+ reranked = rerank(query, retrieved)
105
 
 
 
 
 
 
 
 
106
  context = "\n\n".join(
107
+ f"[Source {i+1}]\n{doc.page_content}"
108
+ for i, doc in enumerate(reranked)
109
  )
110
 
 
 
 
111
  prompt = f"""
112
  You are a constitutional law assistant for Nepal.
113
 
114
+ RULES:
115
+ - Use ONLY the provided context.
 
 
116
  - Do NOT invent articles, clauses, or interpretations.
117
+ - If the answer is not found, say so explicitly.
118
+ - Use formal, neutral legal language.
119
+ - Reference article/section numbers when mentioned.
120
 
121
  CONTEXT:
122
  {context}
 
131
  llm = load_llm()
132
  response = llm.invoke(prompt)
133
 
 
 
 
134
  st.markdown("### βœ… Answer")
135
  st.write(response.content)
136
 
137
  with st.expander("πŸ“š Retrieved Constitutional Sources"):
138
+ for i, doc in enumerate(reranked):
139
  st.markdown(f"**Source {i+1}**")
140
  st.write(doc.page_content)
141
  st.markdown("---")