Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import chromadb | |
| from openai import OpenAI | |
| import json | |
| from sentence_transformers import SentenceTransformer | |
| from loguru import logger | |
| from test_embeddings import test_chromadb_content, initialize_chromadb | |
| class SentenceTransformerEmbeddings: | |
| def __init__(self, model_name: str = 'all-MiniLM-L6-v2'): | |
| self.model = SentenceTransformer(model_name) | |
| def __call__(self, input: list[str]) -> list[list[float]]: | |
| embeddings = self.model.encode(input) | |
| return embeddings.tolist() | |
| class LegalAssistant: | |
| def __init__(self): | |
| try: | |
| # Initialize and verify ChromaDB content | |
| logger.info("Initializing Bharateeya Nyaya Sanhita Assistant...") | |
| # Try to verify content, if fails, try to initialize | |
| if not test_chromadb_content(): | |
| logger.warning("ChromaDB verification failed, attempting to initialize...") | |
| if not initialize_chromadb(): | |
| raise ValueError("Failed to initialize ChromaDB with BNS content") | |
| # Initialize ChromaDB client | |
| base_path = os.path.dirname(os.path.abspath(__file__)) | |
| chroma_path = os.path.join(base_path, 'chroma_db') | |
| self.chroma_client = chromadb.PersistentClient(path=chroma_path) | |
| self.embedding_function = SentenceTransformerEmbeddings() | |
| # Get existing collection | |
| self.collection = self.chroma_client.get_collection( | |
| name="legal_documents", | |
| embedding_function=self.embedding_function | |
| ) | |
| logger.info(f"BNS Collection loaded with {self.collection.count()} sections") | |
| # Initialize Mistral AI client | |
| self.mistral_client = OpenAI( | |
| api_key=os.environ.get("MISTRAL_API_KEY", "dfb2j1YDsa298GXTgZo3juSjZLGUCfwi"), | |
| base_url="https://api.mistral.ai/v1" | |
| ) | |
| logger.info("BNS Assistant initialized successfully") | |
| except Exception as e: | |
| logger.error(f"Error initializing BNS Assistant: {str(e)}") | |
| raise | |
| def validate_query(self, query: str) -> tuple[bool, str]: | |
| """Validate the input query""" | |
| if not query or len(query.strip()) < 10: | |
| return False, "Please provide a more detailed question about the Bharateeya Nyaya Sanhita (minimum 10 characters)." | |
| if len(query) > 500: | |
| return False, "Please make your question more concise (maximum 500 characters)." | |
| return True, "" | |
| def get_response(self, query: str) -> dict: | |
| """Process query and get response from Mistral AI""" | |
| try: | |
| # Validate query | |
| is_valid, error_message = self.validate_query(query) | |
| if not is_valid: | |
| return { | |
| "answer": error_message, | |
| "references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
| "summary": "Query validation failed", | |
| "confidence": "LOW" | |
| } | |
| # Search ChromaDB for relevant content | |
| results = self.collection.query( | |
| query_texts=[query], | |
| n_results=3 | |
| ) | |
| if not results['documents'][0]: | |
| return { | |
| "answer": "No relevant information found in the Bharateeya Nyaya Sanhita.", | |
| "references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
| "summary": "No matching content in BNS", | |
| "confidence": "LOW" | |
| } | |
| # Format context with section titles | |
| context_parts = [] | |
| references = [] | |
| for doc, meta in zip(results['documents'][0], results['metadatas'][0]): | |
| context_parts.append(f"{meta['title']}:\n{doc}") | |
| references.append(meta['title']) | |
| context = "\n\n".join(context_parts) | |
| # Prepare system prompt with explicit JSON format | |
| system_prompt = '''You are a specialized legal assistant for the Bharateeya Nyaya Sanhita (BNS) that MUST follow these STRICT rules: | |
| 1. You MUST ONLY use information from the provided context. | |
| 2. DO NOT use any external knowledge about laws, IPC, Constitution, or legal matters. | |
| 3. Your response MUST be in this EXACT JSON format: | |
| { | |
| "answer": "Your detailed answer explaining BNS sections in simple, easy-to-understand language. Start with 'The Bharateeya Nyaya Sanhita...'", | |
| "reference_sections": ["List of relevant BNS section titles"], | |
| "summary": "Provide a user-friendly summary that explains:\n1. What BNS sections were found\n2. What each section covers\n3. How these sections relate to the query\nStart with 'In the Bharateeya Nyaya Sanhita...'", | |
| "confidence": "HIGH/MEDIUM/LOW" | |
| } | |
| Confidence Level Rules: | |
| - HIGH: When exact matching BNS sections and their details are found | |
| - MEDIUM: When partially relevant BNS sections are found | |
| - LOW: When sections are not clearly relevant or not found | |
| Response Guidelines: | |
| 1. Always mention "Bharateeya Nyaya Sanhita" when referencing sections | |
| 2. Explain legal terms in simple language | |
| 3. Make the summary easy to understand for non-legal persons | |
| 4. Break down complex legal concepts into simple explanations | |
| 5. Use everyday examples where appropriate | |
| If information is not in context, respond with: | |
| { | |
| "answer": "The Bharateeya Nyaya Sanhita sections related to your query are not present in the provided document.", | |
| "reference_sections": [], | |
| "summary": "No relevant sections found in the Bharateeya Nyaya Sanhita document", | |
| "confidence": "LOW" | |
| }''' | |
| # Prepare user content | |
| content = f'''Context Sections from Bharateeya Nyaya Sanhita: | |
| {context} | |
| Question: {query} | |
| IMPORTANT: | |
| 1. Use ONLY the information from the above BNS context | |
| 2. Format your response as a valid JSON object | |
| 3. Always reference "Bharateeya Nyaya Sanhita" in your response | |
| 4. Explain each section in simple, user-friendly language | |
| 5. Make the summary comprehensive but easy to understand | |
| 6. Break down legal concepts for non-legal persons | |
| 7. Ensure proper JSON formatting with double quotes''' | |
| # Get response from Mistral AI | |
| response = self.mistral_client.chat.completions.create( | |
| model="mistral-medium", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": content} | |
| ], | |
| temperature=0.3, | |
| max_tokens=1500, | |
| response_format={ "type": "json_object" } | |
| ) | |
| # Parse and validate response | |
| if response.choices and response.choices[0].message.content: | |
| try: | |
| result = json.loads(response.choices[0].message.content) | |
| # Validate response structure | |
| required_fields = ["answer", "reference_sections", "summary", "confidence"] | |
| if not all(field in result for field in required_fields): | |
| raise ValueError("Missing required fields in response") | |
| # Validate confidence level | |
| if result["confidence"] not in ["HIGH", "MEDIUM", "LOW"]: | |
| result["confidence"] = "LOW" | |
| # Validate references against context | |
| valid_references = [ref for ref in result["reference_sections"] | |
| if ref in references] | |
| # Format references to include BNS | |
| formatted_references = [f"Bharateeya Nyaya Sanhita - {ref}" | |
| for ref in valid_references] | |
| # If references don't match, adjust confidence | |
| if len(valid_references) != len(result["reference_sections"]): | |
| formatted_references = ["No specific references from Bharateeya Nyaya Sanhita"] | |
| result["confidence"] = "LOW" | |
| # Ensure answer and summary are properly formatted | |
| answer = str(result["answer"]) | |
| if not answer.startswith("The Bharateeya Nyaya Sanhita"): | |
| answer = f"The Bharateeya Nyaya Sanhita states that {answer.lower()}" | |
| summary = str(result["summary"]) | |
| if not summary.startswith("In the Bharateeya Nyaya Sanhita"): | |
| summary = f"In the Bharateeya Nyaya Sanhita, {summary.lower()}" | |
| return { | |
| "answer": answer, | |
| "references": formatted_references, | |
| "summary": summary, | |
| "confidence": result["confidence"] | |
| } | |
| except json.JSONDecodeError as e: | |
| logger.error(f"JSON parsing error: {str(e)}") | |
| return { | |
| "answer": "Error: Unable to process the response format", | |
| "references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
| "summary": "Could not generate summary due to processing error", | |
| "confidence": "LOW" | |
| } | |
| except ValueError as e: | |
| logger.error(f"Validation error: {str(e)}") | |
| return { | |
| "answer": "Error: Response structure was invalid", | |
| "references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
| "summary": "Could not generate summary due to validation error", | |
| "confidence": "LOW" | |
| } | |
| return { | |
| "answer": "Error: No valid response received from the system", | |
| "references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
| "summary": "Could not generate summary due to system error", | |
| "confidence": "LOW" | |
| } | |
| except Exception as e: | |
| logger.error(f"Error in get_response: {str(e)}") | |
| return { | |
| "answer": f"Error: {str(e)}", | |
| "references": ["No specific references from Bharateeya Nyaya Sanhita"], | |
| "summary": "Could not generate summary due to system error", | |
| "confidence": "LOW" | |
| } | |
| # Initialize the assistant | |
| try: | |
| assistant = LegalAssistant() | |
| except Exception as e: | |
| logger.error(f"Failed to initialize BNS Assistant: {str(e)}") | |
| raise | |
| def process_query(query: str) -> tuple: | |
| """Process the query and return formatted response""" | |
| response = assistant.get_response(query) | |
| return ( | |
| response["answer"], | |
| ", ".join(response["references"]), | |
| response["summary"], | |
| response["confidence"] | |
| ) | |
| # Create the Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(""" | |
| # Bharateeya Nyaya Sanhita Assistant | |
| ## Your Guide to Understanding the BNS | |
| This assistant helps you understand sections and provisions of the Bharateeya Nyaya Sanhita (BNS) in simple, clear language. | |
| ## Guidelines for Queries: | |
| 1. Ask specific questions about BNS sections or topics | |
| 2. End questions with a question mark | |
| 3. Keep queries between 10-500 characters | |
| 4. Example queries: | |
| - "What does the BNS say about theft?" | |
| - "Explain the provisions related to property offenses in BNS." | |
| - "What are the sections dealing with criminal breach of trust?" | |
| """) | |
| with gr.Row(): | |
| query_input = gr.Textbox( | |
| label="Enter your query about Bharateeya Nyaya Sanhita", | |
| placeholder="e.g., What are the main provisions about theft in BNS?" | |
| ) | |
| with gr.Row(): | |
| submit_btn = gr.Button("Get BNS Information", variant="primary") | |
| with gr.Row(): | |
| confidence_output = gr.Textbox(label="Information Reliability Level") | |
| with gr.Row(): | |
| answer_output = gr.Textbox( | |
| label="Detailed Explanation", | |
| lines=5 | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| references_output = gr.Textbox( | |
| label="BNS Section References", | |
| lines=2 | |
| ) | |
| with gr.Column(): | |
| summary_output = gr.Textbox( | |
| label="Simple Summary", | |
| lines=2 | |
| ) | |
| gr.Markdown(""" | |
| ### Important Notes: | |
| - All information is sourced directly from the Bharateeya Nyaya Sanhita | |
| - Responses are based only on the official BNS document | |
| - The assistant explains legal concepts in simple, understandable language | |
| - Reliability level indicates how well your query matches BNS content | |
| """) | |
| submit_btn.click( | |
| fn=process_query, | |
| inputs=[query_input], | |
| outputs=[answer_output, references_output, summary_output, confidence_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |