botInfinity commited on
Commit
d074d09
Β·
verified Β·
1 Parent(s): 80dfc88

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +136 -0
app.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from qdrant_client import QdrantClient
4
+ from langchain_qdrant import QdrantVectorStore, RetrievalMode
5
+ from langchain_huggingface import HuggingFaceEmbeddings
6
+ from sentence_transformers import CrossEncoder
7
+ from langchain_groq import ChatGroq
8
+
9
+ # ------------------------------
10
+ # Streamlit Config
11
+ # ------------------------------
12
+ st.set_page_config(
13
+ page_title="Nepal Constitution AI",
14
+ page_icon="πŸ§‘β€βš–οΈ",
15
+ layout="wide"
16
+ )
17
+
18
+ st.title("πŸ§‘β€βš–οΈ Nepal Constitution – AI Legal Assistant")
19
+ st.caption("Hybrid RAG + Cross-Encoder Reranking (Demo)")
20
+
21
+ # ------------------------------
22
+ # User Input
23
+ # ------------------------------
24
+ query = st.text_input(
25
+ "Ask a constitutional or legal question:",
26
+ placeholder="e.g. What does Article 275 say about local governance?"
27
+ )
28
+
29
+ # ------------------------------
30
+ # Cached Models (VERY IMPORTANT)
31
+ # ------------------------------
32
+ @st.cache_resource
33
+ def load_embeddings():
34
+ return HuggingFaceEmbeddings(
35
+ model_name="BAAI/bge-m3",
36
+ model_kwargs={"device": "cpu"},
37
+ encode_kwargs={"normalize_embeddings": True}
38
+ )
39
+
40
+ @st.cache_resource
41
+ def load_reranker():
42
+ return CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
43
+
44
+ @st.cache_resource
45
+ def load_vector_store():
46
+ client = QdrantClient(path="./qdrant_db")
47
+ embeddings = load_embeddings()
48
+
49
+ return QdrantVectorStore(
50
+ path = "./qdrant_db",
51
+ collection_name="nepal_law",
52
+ embedding=embeddings,
53
+ retrieval_mode=RetrievalMode.HYBRID
54
+ )
55
+
56
+ @st.cache_resource
57
+ def load_llm():
58
+ return ChatGroq(
59
+ model="llama-3.1-8b-instant",
60
+ temperature=0.2,
61
+ max_tokens=600
62
+ )
63
+
64
+ # ------------------------------
65
+ # Reranking Function
66
+ # ------------------------------
67
+ def rerank(query, docs, top_k=6):
68
+ reranker = load_reranker()
69
+ pairs = [(query, d.page_content) for d in docs]
70
+ scores = reranker.predict(pairs)
71
+
72
+ ranked = sorted(
73
+ zip(docs, scores),
74
+ key=lambda x: x[1],
75
+ reverse=True
76
+ )
77
+
78
+ return [doc for doc, _ in ranked[:top_k]]
79
+
80
+ # ------------------------------
81
+ # Main Logic
82
+ # ------------------------------
83
+ if query:
84
+ with st.spinner("πŸ” Searching constitutional knowledge..."):
85
+ vector_store = load_vector_store()
86
+
87
+ # Step 1: Retrieve
88
+ retrieved_docs = vector_store.similarity_search(query, k=20)
89
+
90
+ # Step 2: Rerank
91
+ reranked_docs = rerank(query, retrieved_docs, top_k=8)
92
+
93
+ # Build context
94
+ context = "\n\n".join(
95
+ [f"[Source {i+1}]\n{doc.page_content}"
96
+ for i, doc in enumerate(reranked_docs)]
97
+ )
98
+
99
+ # ------------------------------
100
+ # Improved Legal Prompt
101
+ # ------------------------------
102
+ prompt = f"""
103
+ You are a constitutional law assistant for Nepal.
104
+
105
+ INSTRUCTIONS:
106
+ - Answer ONLY using the provided context.
107
+ - If the answer is not clearly found in the context, say:
108
+ "The provided constitutional text does not explicitly answer this question."
109
+ - Do NOT invent articles, clauses, or interpretations.
110
+ - Use clear, formal, and neutral legal language.
111
+ - When relevant, reference article numbers/section numbers mentioned in the context.
112
+
113
+ CONTEXT:
114
+ {context}
115
+
116
+ QUESTION:
117
+ {query}
118
+
119
+ ANSWER:
120
+ """
121
+
122
+ with st.spinner("🧠 Generating answer..."):
123
+ llm = load_llm()
124
+ response = llm.invoke(prompt)
125
+
126
+ # ------------------------------
127
+ # Output
128
+ # ------------------------------
129
+ st.markdown("### βœ… Answer")
130
+ st.write(response.content)
131
+
132
+ with st.expander("πŸ“š Retrieved Constitutional Sources"):
133
+ for i, doc in enumerate(reranked_docs):
134
+ st.markdown(f"**Source {i+1}**")
135
+ st.write(doc.page_content)
136
+ st.markdown("---")