| import warnings |
| import os |
| import nltk |
| import re |
|
|
| |
| nltk.data.path = ['/app/nltk_data'] + nltk.data.path |
|
|
| |
| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
| from llama_index.core import ( |
| VectorStoreIndex, |
| StorageContext, |
| ServiceContext, |
| load_index_from_storage, |
| Document |
| ) |
| from llama_index.embeddings.huggingface import HuggingFaceEmbedding |
| from llama_index.llms.groq import Groq |
| import pdfplumber |
|
|
| |
| warnings.filterwarnings('ignore') |
|
|
| |
| app = FastAPI() |
|
|
| |
| class QueryRequest(BaseModel): |
| query: str |
|
|
| |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
|
| |
| input_files = [ |
| "civil.pdf", |
| "constitution.pdf", |
| "criminal.pdf", |
| "family.pdf", |
| "civil_1.pdf", |
| "civil_2.pdf", |
| "property_final.pdf", |
| "criminal_final.pdf", |
| "family_final.pdf", |
| "civil_book.pdf", |
| "criminal_book.pdf", |
| "penal_code_book.pdf", |
| "family_law_ordinance_book.pdf", |
| "west_family_book.pdf" |
| ] |
|
|
| |
| def extract_text_from_pdf(file_path): |
| text_data = [] |
| try: |
| with pdfplumber.open(file_path) as pdf: |
| for page in pdf.pages: |
| text = page.extract_text() |
| if text: |
| text_data.append(text) |
| return "\n".join(text_data) |
| except Exception as e: |
| print(f"Error extracting text from {file_path}: {str(e)}") |
| return "" |
|
|
| |
| def initialize_index(): |
| |
| persist_dir = "./storage_law_app" |
| if os.path.exists(persist_dir) and os.listdir(persist_dir): |
| print("Loading existing index...") |
| |
| embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") |
| |
| llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY) |
| service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm) |
| |
| |
| storage_context = StorageContext.from_defaults(persist_dir=persist_dir) |
| return load_index_from_storage(storage_context, service_context=service_context) |
| else: |
| print("Creating new index...") |
| os.makedirs(persist_dir, exist_ok=True) |
| |
| |
| documents = [] |
| for file in input_files: |
| if os.path.exists(file): |
| content = extract_text_from_pdf(file) |
| if content: |
| documents.append(Document(text=content)) |
| else: |
| print(f"Warning: File {file} not found") |
| |
| if not documents: |
| print("Warning: No documents were loaded") |
| |
| documents = [Document(text="This is a placeholder document as no actual documents were found.")] |
| |
| |
| embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2") |
| |
| llm = Groq(model="llama-3.1-8b-instant", api_key=GROQ_API_KEY) |
| service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm) |
| |
| |
| vector_index = VectorStoreIndex.from_documents( |
| documents=documents, |
| service_context=service_context, |
| show_progress=True |
| ) |
| vector_index.storage_context.persist(persist_dir=persist_dir) |
| return vector_index |
|
|
| |
| LEGAL_CONTEXT = """ |
| Context: Provide legal guidance based on the Pakistani legal framework. |
| Task: Analyze the query and provide a structured response with headings and bullet points. |
| The format should be: |
| 1. **Introduction/Overview**: A brief overview of the law or section. |
| 2. **Section Description**: Explain what this section does, including its purpose and scope. |
| 3. **Legal Provisions**: Highlight the key legal provisions or clauses under the specified section. |
| 4. **Punishments**: Explicitly mention the punishments with references if applicable. |
| 5. **Related Precedents**: Summarize any relevant legal precedents or landmark cases along with their results. |
| 6. **Conclusion/Recommendations**: Conclude with advice or recommendations tailored to the query. |
| """ |
|
|
| |
| GENERAL_RESPONSES = { |
| "greeting": "Hello! I'm your Pakistani Legal Assistant. I can help you with questions about Pakistani law, including civil law, criminal law, family law, and constitutional matters. How can I assist you today?", |
| |
| "capabilities": "I'm a specialized Pakistani Legal Assistant that can help you with:\n\n" |
| "- Information about Pakistani civil, criminal, family, and constitutional law\n" |
| "- Legal provisions and sections with detailed explanations\n" |
| "- Applicable punishments under various legal provisions\n" |
| "- Legal precedents and relevant case law\n" |
| "- Recommendations on legal matters\n\n" |
| "Just ask me any legal question, and I'll provide a structured response based on Pakistani law.", |
| |
| "default": "I'm your Pakistani Legal Assistant. I can help answer questions about Pakistani law. " |
| "For legal queries, I'll provide detailed information with proper structure. " |
| "How can I assist you with your legal questions today?" |
| } |
|
|
| |
| def is_legal_query(query): |
| |
| query_lower = query.lower() |
| |
| |
| greeting_patterns = [ |
| r'\b(hi|hello|hey|greetings|howdy|salam|assalam|namaste)\b', |
| r'\bhow are you\b', |
| r'\bnice to meet you\b' |
| ] |
| |
| capability_patterns = [ |
| r'\bwhat can you do\b', |
| r'\bwhat are your capabilities\b', |
| r'\bhow can you help\b', |
| r'\bwhat do you know\b', |
| r'\bwhat are you\b', |
| r'\bwho are you\b', |
| r'\bwhat is your purpose\b', |
| r'\bhow do you work\b' |
| ] |
| |
| |
| for pattern in greeting_patterns: |
| if re.search(pattern, query_lower): |
| return False, "greeting" |
| |
| |
| for pattern in capability_patterns: |
| if re.search(pattern, query_lower): |
| return False, "capabilities" |
| |
| |
| legal_keywords = [ |
| 'law', 'legal', 'court', 'justice', 'right', 'constitution', 'section', |
| 'crime', 'criminal', 'civil', 'family', 'divorce', 'marriage', 'inheritance', |
| 'punishment', 'penalty', 'fine', 'jail', 'prison', 'arrest', 'police', |
| 'judge', 'lawyer', 'attorney', 'defendant', 'plaintiff', 'accused', |
| 'trial', 'case', 'lawsuit', 'petition', 'appeal', 'witness', 'evidence', |
| 'contract', 'property', 'damages', 'compensation', 'regulation', 'statute', |
| 'act', 'provision', 'legislation', 'parliament', 'supreme court', 'high court', |
| 'district court', 'tribunal', 'code', 'penal', 'procedure' |
| ] |
| |
| |
| for keyword in legal_keywords: |
| if keyword in query_lower: |
| return True, None |
| |
| |
| |
| if len(query) > 20: |
| return True, None |
| |
| |
| return False, "default" |
|
|
| |
| @app.on_event("startup") |
| async def startup_event(): |
| global index |
| index = initialize_index() |
|
|
| |
| @app.post("/query/") |
| async def query_model(request: QueryRequest): |
| try: |
| |
| is_legal, response_type = is_legal_query(request.query) |
| |
| if not is_legal: |
| |
| print(f"General query detected: '{request.query}'") |
| print(f"Response type: {response_type}") |
| return {"response": GENERAL_RESPONSES.get(response_type, GENERAL_RESPONSES["default"])} |
| else: |
| |
| query_engine = index.as_query_engine( |
| similarity_top_k=5, |
| response_mode="tree_summarize", |
| |
| include_similarity=True, |
| ) |
| |
| |
| full_query = f"{LEGAL_CONTEXT}\n\nQuery: {request.query}" |
| |
| |
| response = query_engine.query(full_query) |
| |
| |
| similarity_scores = [] |
| source_nodes_info = [] |
| |
| if hasattr(response, 'source_nodes') and response.source_nodes: |
| print(f"\n--- Diagnostic Information for Query: '{request.query}' ---") |
| for i, node in enumerate(response.source_nodes): |
| if hasattr(node, 'score') and node.score is not None: |
| similarity_scores.append(node.score) |
| |
| text_preview = node.node.text[:200] + "..." if len(node.node.text) > 200 else node.node.text |
| source_nodes_info.append({ |
| "score": node.score, |
| "text_preview": text_preview |
| }) |
| |
| print(f"Source Node {i+1}: Score={node.score}") |
| print(f"Preview: {text_preview[:100]}...\n") |
| |
| |
| avg_similarity = sum(similarity_scores) / len(similarity_scores) if similarity_scores else None |
| |
| |
| response_text = response.response |
| |
| |
| if avg_similarity is not None: |
| confidence_percentage = round(avg_similarity * 100, 2) |
| confidence_level = "High" if confidence_percentage > 80 else \ |
| "Medium" if confidence_percentage > 60 else "Low" |
| |
| |
| |
| |
| |
| print(f"Confidence Summary: {confidence_level} ({confidence_percentage}%)") |
| print("--- End of Diagnostic Information ---\n") |
| |
| |
| return {"response": response_text} |
| |
| except Exception as e: |
| print(f"Error processing query: {str(e)}") |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| |
| @app.get("/") |
| async def root(): |
| return { |
| "message": "Pakistani Legal Assistant API", |
| "usage": "Send POST requests to /query/ with a JSON body containing the 'query' field" |
| } |
|
|
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |