Spaces:
Sleeping
Sleeping
| import warnings | |
| warnings.filterwarnings("ignore") | |
| import re | |
| import os | |
| import numpy as np | |
| import faiss | |
| from sentence_transformers import SentenceTransformer | |
| from langchain_groq import ChatGroq | |
| from langchain.chains import LLMChain | |
| from langchain_core.prompts import ChatPromptTemplate | |
| from pydantic import BaseModel, Field | |
| from langchain.output_parsers import PydanticOutputParser | |
| from lm import get_query_llm, get_answer_llm # Your custom LLM wrapper functions | |
| from functools import lru_cache | |
| # Initialize LLMs | |
| q_llm = get_query_llm() | |
| a_llm = get_answer_llm() | |
| # Load sentence transformer model once globally | |
| embedding_model = SentenceTransformer("all-MiniLM-L6-v2") | |
| save_dir = "." | |
| from functools import lru_cache | |
| # Cache embeddings and index loading | |
| def load_embeddings_and_index(save_dir="saved_data"): | |
| embedding = np.load(os.path.join(save_dir, "embeddings.npy")) | |
| index = faiss.read_index(os.path.join(save_dir, "index.faiss")) | |
| with open(os.path.join(save_dir, "chunks.txt"), "r", encoding="utf-8") as f: | |
| chunks = [line.strip() for line in f.readlines()] | |
| return embedding, index, chunks | |
| similar_words = [ | |
| "explain", "elaborate", "describe", "clarify", "detail", "break down", "simplify", "outline",'in simple words', | |
| "demonstrate", "illustrate", "interpret", "expand on", "go over", "walk through", "define", | |
| "unpack", "decode", "shed light on", "analyze", "discuss", "make clear", "reveal", "disclose", | |
| "comment on", "talk about", "lay out", "spell out", "express", "delve into", "explore", | |
| "enlighten", "present", "review", "report", "state", "point out", "inform", "highlight","Brief" | |
| ] | |
| def is_explanation_query(query): | |
| return not any(word in query.lower() for word in similar_words) | |
| def retrieve_relevant_chunks(query, index, chunks, top_k=5): | |
| sub_str = "article" | |
| numbers = re.findall(r'\d+', query) | |
| var = 1 | |
| if sub_str in query.lower() and numbers: | |
| article_number = str(numbers[0]) | |
| for i, chunk in enumerate(chunks): | |
| if chunk.lower().startswith(f"article;{article_number}"): | |
| flag = is_explanation_query(query) | |
| if flag == False: | |
| var = 2 | |
| return [chunk], var | |
| query_embedding = embedding_model.encode([query]) | |
| query_embedding = np.array(query_embedding).astype("float32") | |
| distances, indices = index.search(query_embedding, top_k) | |
| relevant_chunks = [chunks[i] for i in indices[0]] | |
| var = 3 | |
| return relevant_chunks,var | |
| # Prompt to refine the query | |
| refine_prompt_template = ChatPromptTemplate.from_messages([ | |
| ('system', | |
| "You are a legal assistant specialized in cleaning user queries. " | |
| "Your task is to fix spelling mistakes and convert number words to digits only (e.g., 'three' to '3'). " | |
| "Do not correct grammar, punctuation, or capitalization. " | |
| "Do not restructure or rephrase the query in any way. " | |
| "Do not add or remove words. " | |
| "If the input is already clean or does not make sense, return it exactly as it is. " | |
| "Only return one corrected query."), | |
| ('human', '{query}') | |
| ]) | |
| refine_chain = LLMChain(llm=q_llm, prompt=refine_prompt_template) | |
| # Define response schema | |
| class LegalResponse(BaseModel): | |
| title: str = Field (...,description='Return the title') | |
| answer: str = Field(..., description="The assistant's answer to the user's query") | |
| is_relevant: bool = Field(..., description="True if the query is relevant to the Constitution of Pakistan, otherwise False") | |
| article_number: str = Field(..., description="Mentioned article number if available, else empty string") | |
| parser = PydanticOutputParser(pydantic_object=LegalResponse) | |
| # Prompt for direct article wording | |
| answer_prompt_template_query = ChatPromptTemplate.from_messages([ | |
| ("system", | |
| "You are a legal assistant with expertise in the Constitution of Pakistan. " | |
| "Return answer in structure format." | |
| "Your task is to extract and present the exact constitutional text, without paraphrasing, ensuring accuracy and fidelity to the original wording" | |
| "Especially return the title"), | |
| ("human", | |
| "User Query: {query}\n\n" | |
| "Instructions:\n" | |
| "0. Return Title" | |
| "1. Return the exact wording from the Constitution.\n" | |
| "2. If a query references a specific article or sub-clause (e.g., Article 11(3)(b), Article 11(b), or 11(i)), return only the exact wording of that clause from the Constitution — do not include the full article unless required by structure\n" | |
| "3. Indicate whether the query is related to the Constitution of Pakistan (Yes/No).ar\n" | |
| "4. Extract and return the article number if it is mentioned. with sub-clause if its mentioned like 1,2 or 1(a)\n\n" | |
| "Context:\n{context}\n\n" | |
| "{format_instructions}\n") | |
| ]) | |
| answer_chain_article = LLMChain(llm=a_llm, prompt=answer_prompt_template_query, output_parser=parser) | |
| explain_article_prompt_template = ChatPromptTemplate.from_messages([ | |
| ("system", | |
| "You are a helpful assistant that analyzes human-written legal or constitutional text. " | |
| "Your task is to return a structured response with the following fields:\n" | |
| "- title: The title of the article, if available or derivable.\n" | |
| "- answer: A clear explanation or summary of the content.\n" | |
| "- is_relevant: true if the content is relevant to the legal or constitutional domain, otherwise false.\n" | |
| "- article_number: Extract the article number (e.g., Article 11 or Article 3(a)), or return 'None' if not found." | |
| ), | |
| ("human", | |
| "query:\n{query}\n\n" | |
| "Context:\n{context}\n\n" | |
| "Return your response in the following format:\n\n" | |
| "title:\n" | |
| "answer:\n" | |
| "is_relevant:\n" | |
| "article_number\n\n" | |
| "{format_instructions}") | |
| ]) | |
| explain_chain_article = LLMChain(llm=a_llm,prompt=explain_article_prompt_template,output_parser=parser) | |
| # Prompt for explanation-style answers | |
| from langchain.prompts import ChatPromptTemplate | |
| from langchain.prompts import ChatPromptTemplate | |
| explanation_prompt_template = ChatPromptTemplate.from_messages([ | |
| ("system", | |
| "You are a legal expert assistant with deep knowledge of the Pakistan Penal Code, 1860 (PPC). " | |
| "You will receive a user query and a set of context chunks from the law. " | |
| "Your task is to determine if the query is answerable strictly based on the provided context. " | |
| "If it is, provide a structured explanation based on that context—without copying or repeating the context text verbatim. " | |
| "If the information needed to answer is not found in the provided chunks, respond with a structured message indicating Is Relevant: False, and do not fabricate any information." | |
| ), | |
| ("human", | |
| "User Query: {query}\n\n" | |
| "Context (Extracted Chunks):\n{context}\n\n" | |
| "Instructions:\n" | |
| "1. Use only the information in the context to determine if the query can be answered.\n" | |
| "2. DO NOT include or repeat the context text directly in your answer. Summarize or paraphrase when needed.\n" | |
| "3. If the query is answerable based on the context, explain the related section or clause clearly and precisely:\n" | |
| " - Include the Section number if available.\n" | |
| " - Describe its meaning and how it functions within the PPC.\n" | |
| "4. Do NOT use real-world references, court cases, or examples.\n" | |
| "5. Your final output must include the following structured return:\n" | |
| " - A *detailed explanation* of the relevant section or provision.\n" | |
| " - Is Relevant: True/False\n" | |
| " - Related Section(s): List section number(s) if any.\n\n" | |
| "{format_instructions}\n") | |
| ]) | |
| answer_chain_explanation = LLMChain(llm=a_llm, prompt=explanation_prompt_template, output_parser=parser) | |
| # Load data | |
| embeddings, index, chunks = load_embeddings_and_index(save_dir) | |
| # Main function | |
| def get_legal_response(query): | |
| try: | |
| refined_query = refine_chain.run(query=query) | |
| except Exception as e: | |
| print(f"[Refinement Error] Using raw query instead: {e}") | |
| refined_query = query | |
| print("\nRefined Query:", refined_query) | |
| relevant_chunks, var = retrieve_relevant_chunks(refined_query, index, chunks, top_k=5) | |
| print("\nTop Relevant Chunks:") | |
| for i, chunk in enumerate(relevant_chunks, 1): | |
| print(f"\nChunk {i}:\n{'-'*50}\n{chunk}") | |
| context = "\n\n".join(relevant_chunks) | |
| if var==1: | |
| print('okokokokokokokokokokok') | |
| response = answer_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) | |
| elif var==2: | |
| print('newnewnewnewnew') | |
| response = explain_chain_article.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) | |
| else: | |
| print('nononononononononono') | |
| response = answer_chain_explanation.run(query=refined_query,context=context,format_instructions=parser.get_format_instructions()) | |
| return { | |
| "title":response.title, | |
| "answer": response.answer, | |
| "is_relevant": response.is_relevant, | |
| "article_number": response.article_number, | |
| } | |