Saim-11 commited on
Commit
de1e26e
·
verified ·
1 Parent(s): 0a2761a

Update constitution_py.py

Browse files
Files changed (1) hide show
  1. constitution_py.py +171 -171
constitution_py.py CHANGED
@@ -1,171 +1,171 @@
1
- import warnings
2
- warnings.filterwarnings("ignore")
3
-
4
- import re
5
- import os
6
- import numpy as np
7
- import faiss
8
- from sentence_transformers import SentenceTransformer
9
- from langchain_groq import ChatGroq
10
- from langchain.chains import LLMChain
11
- from langchain_core.prompts import ChatPromptTemplate
12
- from pydantic import BaseModel, Field
13
- from langchain.output_parsers import PydanticOutputParser
14
- from lm import get_query_llm, get_answer_llm # Your custom LLM wrapper functions
15
- from functools import lru_cache
16
-
17
- # Initialize LLMs
18
- q_llm = get_query_llm()
19
- a_llm = get_answer_llm()
20
-
21
- # Load sentence transformer model once globally
22
- embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
23
- save_dir = "saved_data"
24
-
25
- from functools import lru_cache
26
-
27
- # Cache embeddings and index loading
28
- @lru_cache(maxsize=1)
29
- def load_embeddings_and_index(save_dir="saved_data"):
30
- embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
31
- index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
32
- with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
33
- chunks = [line.strip() for line in f.readlines()]
34
- return embedding, index, chunks
35
-
36
-
37
- similar_words = [
38
- "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",
39
- "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
40
- "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
41
- "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
42
- "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight"
43
- ]
44
-
45
- def is_explanation_query(query):
46
- return not any(word in query.lower() for word in similar_words)
47
-
48
- def retrieve_relevant_chunks(query, index, chunks, top_k=5):
49
- sub_str = "article"
50
- numbers = re.findall(r'\d+', query)
51
- flag = False
52
- if sub_str in query.lower() and numbers:
53
- article_number = str(numbers[0])
54
- for i, chunk in enumerate(chunks):
55
- if chunk.lower().startswith(f"article;{article_number}"):
56
- flag = is_explanation_query(query)
57
-
58
- return [chunk], flag
59
- print(flag)
60
- query_embedding = embedding_model.encode([query])
61
- query_embedding = np.array(query_embedding).astype("float32")
62
- distances, indices = index.search(query_embedding, top_k)
63
- relevant_chunks = [chunks[i] for i in indices[0]]
64
- return relevant_chunks, flag
65
-
66
- # Prompt to refine the query
67
- refine_prompt_template = ChatPromptTemplate.from_messages([
68
- ('system',
69
- "You are a legal assistant specialized in cleaning user queries. "
70
- "Your task is to fix spelling mistakes and convert number words to digits only (e.g., 'three' to '3'). "
71
- "Do not correct grammar, punctuation, or capitalization. "
72
- "Do not restructure or rephrase the query in any way. "
73
- "Do not add or remove words. "
74
- "If the input is already clean or does not make sense, return it exactly as it is. "
75
- "Only return one corrected query."),
76
- ('human', '{query}')
77
- ])
78
- refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template)
79
-
80
- # Define response schema
81
- class LegalResponse(BaseModel):
82
- title: str = Field (...,description='Return the title')
83
- answer: str = Field(..., description="The assistant's answer to the user's query")
84
- is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False")
85
- article_number: str = Field(..., description="Mentioned article number if available, else empty string")
86
-
87
- parser = PydanticOutputParser(pydantic_object=LegalResponse)
88
-
89
- # Prompt for direct article wording
90
- answer_prompt_template_query = ChatPromptTemplate.from_messages([
91
- ("system",
92
- "You are a legal assistant with expertise in the Constitution of Pakistan. "
93
- "Return answer in structure format."
94
- "Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording"
95
- "Especially return the title"),
96
- ("human",
97
- "User Query: {query}\n\n"
98
- "Instructions:\n"
99
- "0. Return Title"
100
- "1. Return the exact wording from the Constitution.\n"
101
- "2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n"
102
- "3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n"
103
- "4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n"
104
- "Context:\n{context}\n\n"
105
- "{format_instructions}\n")
106
- ])
107
-
108
- answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
109
-
110
- # Prompt for explanation-style answers
111
- explanation_prompt_template_query = ChatPromptTemplate.from_messages([
112
- ("system",
113
- "You are a legal expert assistant with deep knowledge of the Constitution of Pakistan. "
114
- "You will receive a user query and a set of context chunks from the Constitution. "
115
- "Your task is to determine if the query is answerable based strictly on the information provided in the context. "
116
- "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. "
117
- "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating `Is Relevant: False`, and do not fabricate any information."
118
- ),
119
-
120
- ("human",
121
- "User Query: {query}\n\n"
122
- "Context (Extracted Chunks):\n{context}\n\n"
123
- "Instructions:\n"
124
- "1. Use only the information in the context to determine if the query can be answered.\n"
125
- "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n"
126
- "3. If the query is answerable based on the context, explain the related article, clause, or provision clearly and precisely:\n"
127
- " - Include the Article number if available.\n"
128
- " - Describe its meaning and how it functions within the Constitution.\n"
129
- "4. Do NOT use real-world references, court cases, or examples.\n"
130
- "5. Conclude your response with:\n"
131
- " - `Is Relevant: True/False`\n"
132
- " - `Related Article(s)`: List article number(s) if any.\n\n"
133
- "{format_instructions}\n")
134
- ])
135
-
136
- answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template_query, output_parser=parser)
137
-
138
- # Load data
139
- embeddings, index, chunks = load_embeddings_and_index(save_dir)
140
-
141
- # Main function
142
- def get_legal_response(query):
143
- try:
144
- refined_query = refine_chain.run(query=query)
145
- except Exception as e:
146
- print(f"[Refinement Error] Using raw query instead: {e}")
147
- refined_query = query
148
-
149
- print("\nRefined Query:", refined_query)
150
-
151
- relevant_chunks, flag = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
152
-
153
- print("\nTop Relevant Chunks:")
154
- for i, chunk in enumerate(relevant_chunks, 1):
155
- print(f"\nChunk {i}:\n{'-'*50}\n{chunk}")
156
-
157
- context = "\n\n".join(relevant_chunks)
158
-
159
- if flag==True:
160
- print('okokokokokokokokokokok')
161
- response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
162
- else:
163
- print('nononononononononono')
164
- response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
165
-
166
- return {
167
- "title":response.title,
168
- "answer": response.answer,
169
- "is_relevant": response.is_relevant,
170
- "article_number": response.article_number
171
- }
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
+ import re
5
+ import os
6
+ import numpy as np
7
+ import faiss
8
+ from sentence_transformers import SentenceTransformer
9
+ from langchain_groq import ChatGroq
10
+ from langchain.chains import LLMChain
11
+ from langchain_core.prompts import ChatPromptTemplate
12
+ from pydantic import BaseModel, Field
13
+ from langchain.output_parsers import PydanticOutputParser
14
+ from lm import get_query_llm, get_answer_llm # Your custom LLM wrapper functions
15
+ from functools import lru_cache
16
+
17
+ # Initialize LLMs
18
+ q_llm = get_query_llm()
19
+ a_llm = get_answer_llm()
20
+
21
+ # Load sentence transformer model once globally
22
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
23
+ save_dir = "."
24
+
25
+ from functools import lru_cache
26
+
27
+ # Cache embeddings and index loading
28
+ @lru_cache(maxsize=1)
29
+ def load_embeddings_and_index(save_dir="."):
30
+ embedding = np.load(os.path.join(save_dir, "embeddings.npy"))
31
+ index = faiss.read_index(os.path.join(save_dir, "index.faiss"))
32
+ with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f:
33
+ chunks = [line.strip() for line in f.readlines()]
34
+ return embedding, index, chunks
35
+
36
+
37
+ similar_words = [
38
+ "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",
39
+ "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define",
40
+ "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose",
41
+ "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore",
42
+ "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight"
43
+ ]
44
+
45
+ def is_explanation_query(query):
46
+ return not any(word in query.lower() for word in similar_words)
47
+
48
+ def retrieve_relevant_chunks(query, index, chunks, top_k=5):
49
+ sub_str = "article"
50
+ numbers = re.findall(r'\d+', query)
51
+ flag = False
52
+ if sub_str in query.lower() and numbers:
53
+ article_number = str(numbers[0])
54
+ for i, chunk in enumerate(chunks):
55
+ if chunk.lower().startswith(f"article;{article_number}"):
56
+ flag = is_explanation_query(query)
57
+
58
+ return [chunk], flag
59
+ print(flag)
60
+ query_embedding = embedding_model.encode([query])
61
+ query_embedding = np.array(query_embedding).astype("float32")
62
+ distances, indices = index.search(query_embedding, top_k)
63
+ relevant_chunks = [chunks[i] for i in indices[0]]
64
+ return relevant_chunks, flag
65
+
66
+ # Prompt to refine the query
67
+ refine_prompt_template = ChatPromptTemplate.from_messages([
68
+ ('system',
69
+ "You are a legal assistant specialized in cleaning user queries. "
70
+ "Your task is to fix spelling mistakes and convert number words to digits only (e.g., 'three' to '3'). "
71
+ "Do not correct grammar, punctuation, or capitalization. "
72
+ "Do not restructure or rephrase the query in any way. "
73
+ "Do not add or remove words. "
74
+ "If the input is already clean or does not make sense, return it exactly as it is. "
75
+ "Only return one corrected query."),
76
+ ('human', '{query}')
77
+ ])
78
+ refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template)
79
+
80
+ # Define response schema
81
+ class LegalResponse(BaseModel):
82
+ title: str = Field (...,description='Return the title')
83
+ answer: str = Field(..., description="The assistant's answer to the user's query")
84
+ is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False")
85
+ article_number: str = Field(..., description="Mentioned article number if available, else empty string")
86
+
87
+ parser = PydanticOutputParser(pydantic_object=LegalResponse)
88
+
89
+ # Prompt for direct article wording
90
+ answer_prompt_template_query = ChatPromptTemplate.from_messages([
91
+ ("system",
92
+ "You are a legal assistant with expertise in the Constitution of Pakistan. "
93
+ "Return answer in structure format."
94
+ "Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording"
95
+ "Especially return the title"),
96
+ ("human",
97
+ "User Query: {query}\n\n"
98
+ "Instructions:\n"
99
+ "0. Return Title"
100
+ "1. Return the exact wording from the Constitution.\n"
101
+ "2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n"
102
+ "3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n"
103
+ "4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n"
104
+ "Context:\n{context}\n\n"
105
+ "{format_instructions}\n")
106
+ ])
107
+
108
+ answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser)
109
+
110
+ # Prompt for explanation-style answers
111
+ explanation_prompt_template_query = ChatPromptTemplate.from_messages([
112
+ ("system",
113
+ "You are a legal expert assistant with deep knowledge of the Constitution of Pakistan. "
114
+ "You will receive a user query and a set of context chunks from the Constitution. "
115
+ "Your task is to determine if the query is answerable based strictly on the information provided in the context. "
116
+ "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. "
117
+ "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating `Is Relevant: False`, and do not fabricate any information."
118
+ ),
119
+
120
+ ("human",
121
+ "User Query: {query}\n\n"
122
+ "Context (Extracted Chunks):\n{context}\n\n"
123
+ "Instructions:\n"
124
+ "1. Use only the information in the context to determine if the query can be answered.\n"
125
+ "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n"
126
+ "3. If the query is answerable based on the context, explain the related article, clause, or provision clearly and precisely:\n"
127
+ " - Include the Article number if available.\n"
128
+ " - Describe its meaning and how it functions within the Constitution.\n"
129
+ "4. Do NOT use real-world references, court cases, or examples.\n"
130
+ "5. Conclude your response with:\n"
131
+ " - `Is Relevant: True/False`\n"
132
+ " - `Related Article(s)`: List article number(s) if any.\n\n"
133
+ "{format_instructions}\n")
134
+ ])
135
+
136
+ answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template_query, output_parser=parser)
137
+
138
+ # Load data
139
+ embeddings, index, chunks = load_embeddings_and_index(save_dir)
140
+
141
+ # Main function
142
+ def get_legal_response(query):
143
+ try:
144
+ refined_query = refine_chain.run(query=query)
145
+ except Exception as e:
146
+ print(f"[Refinement Error] Using raw query instead: {e}")
147
+ refined_query = query
148
+
149
+ print("\nRefined Query:", refined_query)
150
+
151
+ relevant_chunks, flag = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5)
152
+
153
+ print("\nTop Relevant Chunks:")
154
+ for i, chunk in enumerate(relevant_chunks, 1):
155
+ print(f"\nChunk {i}:\n{'-'*50}\n{chunk}")
156
+
157
+ context = "\n\n".join(relevant_chunks)
158
+
159
+ if flag==True:
160
+ print('okokokokokokokokokokok')
161
+ response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
162
+ else:
163
+ print('nononononononononono')
164
+ response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions())
165
+
166
+ return {
167
+ "title":response.title,
168
+ "answer": response.answer,
169
+ "is_relevant": response.is_relevant,
170
+ "article_number": response.article_number
171
+ }