hashirlodhi commited on
Commit
e46711a
·
verified ·
1 Parent(s): 97f00b4

Upload 11 files

Browse files
Files changed (12) hide show
  1. .env +4 -0
  2. .gitattributes +2 -0
  3. Constitution.pdf +3 -0
  4. Pakistan Penal Code.pdf +3 -0
  5. app.py +253 -0
  6. config.py +33 -0
  7. ingestion.py +36 -0
  8. logic.py +44 -0
  9. rag_engine.py +72 -0
  10. requirements.txt +11 -0
  11. search_engine.py +74 -0
  12. vector_store.py +42 -0
.env ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ GROQ_API_KEY = "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" # ← REPLACE THIS!
2
+ GOOGLE_API_KEY=AIzaSyD3qjA3zpWisKDa1KIMYF_fWfyaW9XpSUs
3
+ SEARCH_ENGINE_ID=57981799ad3044dfc
4
+ GEMINI_API_KEY=AIzaSyC8-w33K6dVIhNXxNQHS7Eknm03Gm17Hl4
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Constitution.pdf filter=lfs diff=lfs merge=lfs -text
37
+ Pakistan[[:space:]]Penal[[:space:]]Code.pdf filter=lfs diff=lfs merge=lfs -text
Constitution.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6c227d78847d1826d53bdb27e40bfb5cc065e7822fa27453872b21fe11c489
3
+ size 1546102
Pakistan Penal Code.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38fecd375cfd1e566c25cfe6f2c989eabc0fc06d807f811e8fa36ce00709695c
3
+ size 457396
app.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio as gr
4
+ import config
5
+ import ingestion
6
+ import vector_store
7
+ import rag_engine
8
+ import logic
9
+
10
+ # Global variable to store the chain
11
+ rag_chain = None
12
+
13
+ def initialize_system_once():
14
+ """
15
+ Initialize the complete system only once.
16
+ """
17
+ global rag_chain
18
+ if rag_chain is not None:
19
+ return rag_chain
20
+
21
+ print("Initializing LegalizeAI System...")
22
+
23
+ # Initialize Embedding Model
24
+ print("Loading embedding model...")
25
+ embedding_model = vector_store.get_embedding_model()
26
+
27
+ # Check if Vector Store exists
28
+ if os.path.exists(config.CHROMA_DB_DIR) and os.listdir(config.CHROMA_DB_DIR):
29
+ print(f"Loading existing vector store from {config.CHROMA_DB_DIR}...")
30
+ v_store = vector_store.get_vector_store(embedding_model)
31
+ else:
32
+ print("No existing vector store found. Starting ingestion process...")
33
+ docs = ingestion.load_documents()
34
+ if not docs:
35
+ # Create empty placeholder if no docs found to prevent crash,
36
+ # but warn hard.
37
+ print("CRITICAL WARNING: No documents loaded. App will run but local search will fail.")
38
+ # In a real app we might want to fail, but for UI it's better to stay up
39
+ return None
40
+
41
+ chunks = ingestion.split_documents(docs)
42
+ print("Creating vector store...")
43
+ v_store = vector_store.create_vector_store(chunks, embedding_model)
44
+
45
+ # Setup Retriever
46
+ retriever = vector_store.get_retriever(v_store)
47
+
48
+ # Setup RAG Chain
49
+ print("Initializing RAG chain...")
50
+ rag_chain = rag_engine.create_rag_chain(retriever)
51
+
52
+ print("System initialization complete!")
53
+ return rag_chain
54
+
55
+ def chat_response(message, history):
56
+ """
57
+ Gradio chat function.
58
+ """
59
+ try:
60
+ chain = initialize_system_once()
61
+ if not chain:
62
+ return "System Error: Failed to initialize AI chain. Please check server logs."
63
+
64
+ response = logic.generate_hybrid_response(message, chain)
65
+ return response
66
+ except Exception as e:
67
+ return f"An error occurred: {str(e)}"
68
+
69
+ # Custom CSS for a professional look
70
+ custom_css = """
71
+ body { background-color: #f0f2f5; }
72
+ footer { visibility: hidden !important; }
73
+
74
+ /* Custom Developer Footer */
75
+ .dev-footer {
76
+ text-align: center;
77
+ padding: 20px;
78
+ margin-top: 30px;
79
+ border-top: 1px solid #e5e7eb;
80
+ color: #4b5563;
81
+ background-color: transparent !important;
82
+ }
83
+
84
+ .dev-footer a {
85
+ display: inline-flex;
86
+ align-items: center;
87
+ justify-content: center;
88
+ margin: 0 10px;
89
+ color: #4b5563;
90
+ text-decoration: none;
91
+ transition: color 0.2s;
92
+ }
93
+
94
+ .dev-footer a:hover {
95
+ color: #1f2937;
96
+ }
97
+
98
+ .dev-footer svg {
99
+ margin-right: 5px;
100
+ width: 20px;
101
+ height: 20px;
102
+ fill: currentColor;
103
+ }
104
+ """
105
+
106
+ # HTML for the footer
107
+ footer_html = """
108
+ <div class="dev-footer">
109
+ <p>Developed by <strong>Muhammad Hashir Lodhi</strong></p>
110
+ <div>
111
+ <a href="https://github.com/HashirLodhi" target="_blank">
112
+ <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 0C5.37 0 0 5.37 0 12c0 5.31 3.435 9.795 8.205 11.385.6.105.825-.255.825-.57 0-.285-.015-1.05-.015-2.055-3.33.72-4.035-1.605-4.035-1.605-.54-1.38-1.32-1.74-1.32-1.74-1.095-.75.09-.735.09-.735 1.2.09 1.845 1.245 1.845 1.245 1.065 1.83 2.805 1.305 3.495.99.105-.78.42-1.305.765-1.605-2.67-.3-5.46-1.335-5.46-5.925 0-1.305.465-2.385 1.23-3.225-.12-.3-.54-1.53.12-3.18 0 0 1.005-.315 3.3 1.23.96-.27 1.98-.405 3-.405 1.02 0 2.04.135 3 .405 2.28-1.545 3.285-1.23 3.285-1.23.66 1.65.24 2.88.12 3.18.765.84 1.23 1.905 1.23 3.225 0 4.605-2.805 5.625-5.475 5.925.435.375.81 1.095.81 2.22 0 1.605-.015 2.895-.015 3.285 0 .315.225.69.825.57A12.02 12.02 0 0024 12c0-6.63-5.37-12-12-12z"/></svg>
113
+ GitHub
114
+ </a>
115
+ <a href="https://medium.com/@hashirlodhi145" target="_blank">
116
+ <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M13.54 12a6.8 6.8 0 01-6.77 6.82A6.8 6.8 0 010 12a6.8 6.8 0 016.77-6.82A6.8 6.8 0 0113.54 12zM20.96 12c0 3.54-1.51 6.42-3.38 6.42-1.87 0-3.39-2.88-3.39-6.42s1.52-6.42 3.39-6.42 3.38 2.88 3.38 6.42M24 12c0 3.17-.53 5.75-1.19 5.75-.66 0-1.19-2.58-1.19-5.75s.53-5.75 1.19-5.75C23.47 6.25 24 8.83 24 12z"/></svg>
117
+ Medium
118
+ </a>
119
+ <a href="https://www.linkedin.com/in/hashir-lodhi/" target="_blank">
120
+ <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M20.447 20.452h-3.554v-5.569c0-1.328-.027-3.037-1.852-3.037-1.853 0-2.136 1.445-2.136 2.939v5.667H9.351V9h3.414v1.561h.046c.477-.9 1.637-1.85 3.37-1.85 3.601 0 4.267 2.37 4.267 5.455v6.286zM5.337 7.433c-1.144 0-2.063-.926-2.063-2.065 0-1.138.92-2.063 2.063-2.063 1.14 0 2.064.925 2.064 2.063 0 1.139-.925 2.065-2.064 2.065zm1.782 13.019H3.555V9h3.564v11.452zM22.225 0H1.771C.792 0 0 .774 0 1.729v20.542C0 23.227.792 24 1.771 24h20.451C23.2 24 24 23.227 24 22.271V1.729C24 .774 23.2 0 22.222 0h.003z"/></svg>
121
+ LinkedIn
122
+ </a>
123
+ </div>
124
+ </div>
125
+ """
126
+
127
+ # Create the Gradio Interface
128
+ def create_ui():
129
+ initialize_system_once()
130
+
131
+ # Define interaction functions
132
+ def interact(message, history):
133
+ if not message:
134
+ return "", history
135
+
136
+ # Initialize history if strictly None (though usually empty list)
137
+ if history is None:
138
+ history = []
139
+
140
+ # Add user message
141
+ history.append({"role": "user", "content": message})
142
+
143
+ # Get response
144
+ try:
145
+ # Note: chat_response doesn't typically check history in current logic,
146
+ # but if it did, we'd need to ensure it handles the new format or pass just strings.
147
+ response = chat_response(message, history)
148
+ except Exception as e:
149
+ response = f"Error: {str(e)}"
150
+
151
+ # Add assistant response
152
+ history.append({"role": "assistant", "content": response})
153
+ return "", history
154
+
155
+ def retry_last(history):
156
+ if not history:
157
+ return history, ""
158
+
159
+ # Pop last message if it's assistant
160
+ if history and history[-1]["role"] == "assistant":
161
+ history.pop()
162
+
163
+ # Pop user message to edit
164
+ if history and history[-1]["role"] == "user":
165
+ last_msg = history.pop()
166
+ return history, last_msg["content"]
167
+
168
+ return history, ""
169
+
170
+ # Create the Gradio Blocks
171
+ with gr.Blocks(title="⚖️ LegalizeAI") as demo:
172
+ gr.Markdown(
173
+ """
174
+ # ⚖️ LegalizeAI
175
+ **Professional Assistant for Pakistani Law**
176
+
177
+ Consulting Constitution of Pakistan, Pakistan Penal Code, and Real-time Web Sources.
178
+ """
179
+ )
180
+
181
+ chatbot = gr.Chatbot(
182
+ height=500,
183
+ elem_id="chatbot",
184
+ avatar_images=(None, "⚖️")
185
+ )
186
+
187
+ with gr.Row():
188
+ txt = gr.Textbox(
189
+ scale=4,
190
+ show_label=False,
191
+ placeholder="Ask a legal question...",
192
+ container=False,
193
+ autofocus=True
194
+ )
195
+ submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
196
+
197
+ with gr.Row():
198
+ retry_btn = gr.Button("Retry 🔄", size="sm")
199
+ clear_btn = gr.Button("Clear 🗑️", size="sm")
200
+
201
+ # Example buttons logic
202
+ examples = [
203
+ "What is the punishment for theft in Pakistan?",
204
+ "Explain Article 62 of the Constitution.",
205
+ "Who is the current Prime Minister?",
206
+ "What are my fundamental rights?"
207
+ ]
208
+
209
+ gr.Examples(
210
+ examples=examples,
211
+ inputs=txt
212
+ )
213
+
214
+ # Footer
215
+ gr.HTML(footer_html)
216
+
217
+ # Event Wiring
218
+ submit_btn.click(interact, [txt, chatbot], [txt, chatbot])
219
+ txt.submit(interact, [txt, chatbot], [txt, chatbot])
220
+
221
+ retry_btn.click(retry_last, [chatbot], [chatbot, txt]) # Pop last and put in text
222
+ clear_btn.click(lambda: None, None, chatbot, queue=False)
223
+
224
+ return demo
225
+
226
+ def main():
227
+ try:
228
+ # Initialize system once before UI creation if needed, or let UI do it
229
+ initialize_system_once()
230
+
231
+ # Using a professional theme
232
+ theme = gr.themes.Soft(
233
+ primary_hue="slate",
234
+ secondary_hue="stone",
235
+ neutral_hue="zinc",
236
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"]
237
+ )
238
+
239
+ # Create UI
240
+ demo = create_ui()
241
+
242
+ # Launching - Pass theme and css here for Gradio 6.0+ compatibility
243
+ demo.launch(
244
+ server_name="127.0.0.1",
245
+ theme=theme,
246
+ css=custom_css
247
+ )
248
+ except Exception as e:
249
+ print(f"Fatal Error: {e}")
250
+ sys.exit(1)
251
+
252
+ if __name__ == "__main__":
253
+ main()
config.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from dotenv import load_dotenv
4
+
5
+ # Load environment variables
6
+ load_dotenv()
7
+
8
+ # API Keys
9
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
10
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
11
+
12
+ if not GROQ_API_KEY:
13
+ print("Warning: GROQ_API_KEY not found in .env file", file=sys.stderr)
14
+ if not GEMINI_API_KEY:
15
+ print("Warning: GEMINI_API_KEY not found in .env file", file=sys.stderr)
16
+
17
+ # Paths
18
+ PDF_FILES = [
19
+ "Constitution.pdf",
20
+ "Pakistan Penal Code.pdf"
21
+ ]
22
+
23
+ CHROMA_DB_DIR = "./chroma_db_legal"
24
+
25
+ # Models
26
+ EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
27
+ LLM_MODEL_NAME = "llama-3.3-70b-versatile"
28
+ GEMINI_MODEL_NAME = "gemini-2.5-flash"
29
+
30
+ # RAG Configuration
31
+ CHUNK_SIZE = 1000
32
+ CHUNK_OVERLAP = 200
33
+ RETRIEVER_K = 6
ingestion.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ import config
5
+
6
+ def load_documents(pdf_paths=None):
7
+ """
8
+ Load PDF documents from the specified paths.
9
+ """
10
+ if pdf_paths is None:
11
+ pdf_paths = config.PDF_FILES
12
+
13
+ docs = []
14
+ for path in pdf_paths:
15
+ if os.path.exists(path):
16
+ print(f"Loading: {path}")
17
+ loader = PyPDFLoader(path)
18
+ docs.extend(loader.load())
19
+ else:
20
+ print(f"File not found: {path} - Skipping")
21
+
22
+ print(f"Loaded {len(docs)} pages total.")
23
+ return docs
24
+
25
+ def split_documents(docs):
26
+ """
27
+ Split documents into smaller chunks for proper processing.
28
+ """
29
+ text_splitter = RecursiveCharacterTextSplitter(
30
+ chunk_size=config.CHUNK_SIZE,
31
+ chunk_overlap=config.CHUNK_OVERLAP,
32
+ separators=["\n\n", "\n", " ", ""]
33
+ )
34
+ chunks = text_splitter.split_documents(docs)
35
+ print(f"Created {len(chunks)} chunks.")
36
+ return chunks
logic.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import search_engine
2
+ import random
3
+
4
+ def generate_hybrid_response(question, rag_chain):
5
+ """
6
+ Generate response using RAG context + Gemini Search synthesis.
7
+ """
8
+ print(f"\nAnalyzing: {question}...")
9
+
10
+ # Phase 1: Local RAG
11
+ # We always get RAG context even if it's empty, to pass to Gemini
12
+ try:
13
+ rag_response = rag_chain.invoke(question)
14
+ except Exception as e:
15
+ print(f"RAG Error: {e}")
16
+ rag_response = "Error retrieving local context."
17
+
18
+ # Phase 2: Combined Synthesis via Gemini
19
+ print("Fetching information from Gemini (Context + Web)...")
20
+ final_answer = search_engine.search_and_synthesize(question, rag_response)
21
+
22
+ # Phase 3: Error Handling & Formatting
23
+ if final_answer == "SERVER_BUSY":
24
+ return "⚠️ **Service Unavailable**: The AI server is currently busy. Please try again in a few moments."
25
+
26
+ # If the answer is a denial (Pakistan filter), return it as is.
27
+ if "I specialize only in Pakistani Law" in final_answer:
28
+ return final_answer
29
+
30
+ # Creative Closing Generator (Optional, can be appended if the answer isn't a denial)
31
+ closings = [
32
+ "Need clarification on any point?",
33
+ "Shall we explore related case laws?",
34
+ "I can help draft a legal notice based on this.",
35
+ "Would you like to know about relevant court procedures?",
36
+ "Ask me if you need further details on this topic!"
37
+ ]
38
+ next_step = random.choice(closings)
39
+
40
+ # Construct final output
41
+ # The 'final_answer' from Gemini is already comprehensive.
42
+ # We just add the closing.
43
+
44
+ return f"{final_answer}\n\n_{next_step}_"
rag_engine.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_groq import ChatGroq
2
+ from langchain_core.prompts import ChatPromptTemplate
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.runnables import RunnablePassthrough
5
+ import config
6
+
7
+ def initialize_llm():
8
+ """
9
+ Initialize Groq LLM.
10
+ """
11
+ return ChatGroq(
12
+ model=config.LLM_MODEL_NAME,
13
+ temperature=0.1,
14
+ max_tokens=2000,
15
+ api_key=config.GROQ_API_KEY
16
+ )
17
+
18
+ def get_rag_prompt():
19
+ """
20
+ Create the prompt template for RAG.
21
+ """
22
+ return ChatPromptTemplate.from_template("""
23
+ You are a Senior Legal Consultant specializing in the laws of Pakistan.
24
+
25
+ CONTEXT:
26
+ 1. Constitution of Pakistan
27
+ 2. Pakistan Penal Code
28
+
29
+ INSTRUCTIONS:
30
+ - Adoption a formal, professional, and authoritative tone suitable for legal memoranda.
31
+ - Cite specific Articles, Sections, or Clauses extensively.
32
+ - If the information is present: Provide a direct, concise legal opinion.
33
+ - If the information is MISSING: State clearly "The provided legal documents do not contain specific provisions regarding [topic]." Do not apologize.
34
+ - Structure your response with clear headings if necessary.
35
+
36
+ LEGAL CONTEXT:
37
+ {context}
38
+
39
+ QUERY: {question}
40
+
41
+ LEGAL OPINION:
42
+ """)
43
+
44
+ def format_docs(docs):
45
+ """
46
+ Format retrieved documents for the prompt.
47
+ """
48
+ formatted = []
49
+ for i, doc in enumerate(docs):
50
+ source = doc.metadata.get('source', 'Unknown Document')
51
+ page = doc.metadata.get('page', 'N/A')
52
+ # Limit content length to avoid context window issues, though Groq usually has large context
53
+ content = doc.page_content[:800]
54
+ formatted.append(f"[Document {i+1}: {source}, Page {page}]")
55
+ formatted.append(content)
56
+ formatted.append("-" * 50)
57
+ return "\n".join(formatted)
58
+
59
+ def create_rag_chain(retriever):
60
+ """
61
+ Build the primary RAG chain.
62
+ """
63
+ llm = initialize_llm()
64
+ prompt = get_rag_prompt()
65
+
66
+ chain = (
67
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
68
+ | prompt
69
+ | llm
70
+ | StrOutputParser()
71
+ )
72
+ return chain
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-community
3
+ langchain-groq
4
+ google-genai
5
+ python-dotenv
6
+ chromadb
7
+ pypdf
8
+ sentence-transformers
9
+ gradio
10
+ langchain-huggingface
11
+ langchain-chroma
search_engine.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from google import genai
2
+ from google.genai import types
3
+ import config
4
+
5
+ def initialize_gemini_client():
6
+ """
7
+ Initialize Gemini client.
8
+ """
9
+ return genai.Client(api_key=config.GEMINI_API_KEY)
10
+
11
+ def search_and_synthesize(query, rag_context):
12
+ """
13
+ Search the web using Gemini's Google Search grounding tool and combine with RAG context.
14
+ Enforces Pakistan-specific content filtering.
15
+ """
16
+ try:
17
+ client = initialize_gemini_client()
18
+
19
+ # Create grounding tool with Google Search
20
+ grounding_tool = types.Tool(
21
+ google_search=types.GoogleSearch()
22
+ )
23
+
24
+ # Configuration with grounding tool
25
+ # Using the standard configuration approach compatible with google-genai
26
+ generate_config = types.GenerateContentConfig(
27
+ tools=[grounding_tool],
28
+ temperature=0.2,
29
+ system_instruction="""You are a specialized Legal Assistant for Pakistan.
30
+ Your primary job is to answer the user's legal question by combining:
31
+ 1. The User's Question.
32
+ 2. The provided 'Legal Context' (which comes from local legal documents like the Constitution and PPC).
33
+ 3. Real-time information from Google Search.
34
+
35
+ CRITICAL RULES:
36
+ - FILTER: You must ONLY answer questions related to Pakistan Law, the Pakistani Legal System, or general legal queries applicable in Pakistan.
37
+ - DENIAL: If the user asks about anything else (e.g., "Capital of Peru", "Movie reviews", "Laws of France"), query unrelated to Pakistan, you MUST REFUSE to answer. Say exactly: "I specialize only in Pakistani Law. I cannot assist with this query."
38
+ - SYNTHESIS: Provide a single, cohesive answer. citations are encouraged.
39
+ - Do not treat 'Legal Context' as the only truth if Search reveals it's outdated, but prioritize the Constitution/Acts if they are standard texts.
40
+ - If the user asks for a specific section, quote it if available in Context or Search.
41
+ """
42
+ )
43
+
44
+ prompt = f"""
45
+ User Query: {query}
46
+
47
+ Legal Context from Local Documents:
48
+ {rag_context}
49
+
50
+ Please provide a comprehensive answer based on the above instructions.
51
+ """
52
+
53
+ # Generate response with web search
54
+ response = client.models.generate_content(
55
+ model=config.GEMINI_MODEL_NAME,
56
+ contents=prompt,
57
+ config=generate_config,
58
+ )
59
+
60
+ if response and response.text:
61
+ return response.text
62
+ else:
63
+ # Fallback if model returns empty but no error raised
64
+ return "No information could be generated. Please try again."
65
+
66
+ except Exception as e:
67
+ print(f"Gemini search error: {e}")
68
+ error_msg = str(e).lower()
69
+ # Check for common "server busy" or quota errors
70
+ if "503" in error_msg or "429" in error_msg or "busy" in error_msg or "quota" in error_msg:
71
+ return "SERVER_BUSY"
72
+
73
+ # Generic error handling to avoid crashing
74
+ return "SERVER_BUSY"
vector_store.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_huggingface import HuggingFaceEmbeddings
2
+ from langchain_chroma import Chroma
3
+ import config
4
+
5
+ def get_embedding_model():
6
+ """
7
+ Initialize the embedding model.
8
+ """
9
+ return HuggingFaceEmbeddings(
10
+ model_name=config.EMBEDDING_MODEL_NAME,
11
+ model_kwargs={'device': 'cpu'}
12
+ )
13
+
14
+ def create_vector_store(chunks, embedding_model):
15
+ """
16
+ Create and persist a Chroma vector store from document chunks.
17
+ """
18
+ vectorstore = Chroma.from_documents(
19
+ documents=chunks,
20
+ embedding=embedding_model,
21
+ persist_directory=config.CHROMA_DB_DIR
22
+ )
23
+ return vectorstore
24
+
25
+ def get_vector_store(embedding_model):
26
+ """
27
+ Load existing vector store.
28
+ """
29
+ # Simply initializing with persist_directory attempts to load it
30
+ return Chroma(
31
+ persist_directory=config.CHROMA_DB_DIR,
32
+ embedding_function=embedding_model
33
+ )
34
+
35
+ def get_retriever(vectorstore):
36
+ """
37
+ Get a retriever from the vector store.
38
+ """
39
+ return vectorstore.as_retriever(
40
+ search_type="similarity",
41
+ search_kwargs={"k": config.RETRIEVER_K}
42
+ )