Spaces:

hashirlodhi
/

Legalize_AI

Build error

App Files Files Community

hashirlodhi commited on Jan 7

Commit

e46711a

verified ·

1 Parent(s): 97f00b4

Upload 11 files

Browse files

Files changed (12) hide show

.env +4 -0
.gitattributes +2 -0
Constitution.pdf +3 -0
Pakistan Penal Code.pdf +3 -0
app.py +253 -0
config.py +33 -0
ingestion.py +36 -0
logic.py +44 -0
rag_engine.py +72 -0
requirements.txt +11 -0
search_engine.py +74 -0
vector_store.py +42 -0

.env ADDED Viewed

	@@ -0,0 +1,4 @@

+GROQ_API_KEY = "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"  # ← REPLACE THIS!
+GOOGLE_API_KEY=AIzaSyD3qjA3zpWisKDa1KIMYF_fWfyaW9XpSUs
+SEARCH_ENGINE_ID=57981799ad3044dfc
+GEMINI_API_KEY=AIzaSyC8-w33K6dVIhNXxNQHS7Eknm03Gm17Hl4

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Constitution.pdf filter=lfs diff=lfs merge=lfs -text
+Pakistan[[:space:]]Penal[[:space:]]Code.pdf filter=lfs diff=lfs merge=lfs -text

Constitution.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb6c227d78847d1826d53bdb27e40bfb5cc065e7822fa27453872b21fe11c489
+size 1546102

Pakistan Penal Code.pdf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:38fecd375cfd1e566c25cfe6f2c989eabc0fc06d807f811e8fa36ce00709695c
+size 457396

app.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import os
+import sys
+import gradio as gr
+import config
+import ingestion
+import vector_store
+import rag_engine
+import logic
+# Global variable to store the chain
+rag_chain = None
+def initialize_system_once():
+    """
+    Initialize the complete system only once.
+    """
+    global rag_chain
+    if rag_chain is not None:
+        return rag_chain
+    print("Initializing LegalizeAI System...")
+    # Initialize Embedding Model
+    print("Loading embedding model...")
+    embedding_model = vector_store.get_embedding_model()
+    # Check if Vector Store exists
+    if os.path.exists(config.CHROMA_DB_DIR) and os.listdir(config.CHROMA_DB_DIR):
+        print(f"Loading existing vector store from {config.CHROMA_DB_DIR}...")
+        v_store = vector_store.get_vector_store(embedding_model)
+    else:
+        print("No existing vector store found. Starting ingestion process...")
+        docs = ingestion.load_documents()
+        if not docs:
+            # Create empty placeholder if no docs found to prevent crash,
+            # but warn hard.
+            print("CRITICAL WARNING: No documents loaded. App will run but local search will fail.")
+            # In a real app we might want to fail, but for UI it's better to stay up
+            return None
+        chunks = ingestion.split_documents(docs)
+        print("Creating vector store...")
+        v_store = vector_store.create_vector_store(chunks, embedding_model)
+    # Setup Retriever
+    retriever = vector_store.get_retriever(v_store)
+    # Setup RAG Chain
+    print("Initializing RAG chain...")
+    rag_chain = rag_engine.create_rag_chain(retriever)
+    print("System initialization complete!")
+    return rag_chain
+def chat_response(message, history):
+    """
+    Gradio chat function.
+    """
+    try:
+        chain = initialize_system_once()
+        if not chain:
+            return "System Error: Failed to initialize AI chain. Please check server logs."
+        response = logic.generate_hybrid_response(message, chain)
+        return response
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
+# Custom CSS for a professional look
+custom_css = """
+body { background-color: #f0f2f5; }
+footer { visibility: hidden !important; }
+/* Custom Developer Footer */
+.dev-footer {
+    text-align: center;
+    padding: 20px;
+    margin-top: 30px;
+    border-top: 1px solid #e5e7eb;
+    color: #4b5563;
+    background-color: transparent !important;
+}
+.dev-footer a {
+    display: inline-flex;
+    align-items: center;
+    justify-content: center;
+    margin: 0 10px;
+    color: #4b5563;
+    text-decoration: none;
+    transition: color 0.2s;
+}
+.dev-footer a:hover {
+    color: #1f2937;
+}
+.dev-footer svg {
+    margin-right: 5px;
+    width: 20px;
+    height: 20px;
+    fill: currentColor;
+}
+"""
+# HTML for the footer
+footer_html = """
+<div class="dev-footer">
+    <p>Developed by <strong>Muhammad Hashir Lodhi</strong></p>
+    <div>
+        <a href="https://github.com/HashirLodhi" target="_blank">
+            <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M12 0C5.37 0 0 5.37 0 12c0 5.31 3.435 9.795 8.205 11.385.6.105.825-.255.825-.57 0-.285-.015-1.05-.015-2.055-3.33.72-4.035-1.605-4.035-1.605-.54-1.38-1.32-1.74-1.32-1.74-1.095-.75.09-.735.09-.735 1.2.09 1.845 1.245 1.845 1.245 1.065 1.83 2.805 1.305 3.495.99.105-.78.42-1.305.765-1.605-2.67-.3-5.46-1.335-5.46-5.925 0-1.305.465-2.385 1.23-3.225-.12-.3-.54-1.53.12-3.18 0 0 1.005-.315 3.3 1.23.96-.27 1.98-.405 3-.405 1.02 0 2.04.135 3 .405 2.28-1.545 3.285-1.23 3.285-1.23.66 1.65.24 2.88.12 3.18.765.84 1.23 1.905 1.23 3.225 0 4.605-2.805 5.625-5.475 5.925.435.375.81 1.095.81 2.22 0 1.605-.015 2.895-.015 3.285 0 .315.225.69.825.57A12.02 12.02 0 0024 12c0-6.63-5.37-12-12-12z"/></svg>
+            GitHub
+        </a>
+        <a href="https://medium.com/@hashirlodhi145" target="_blank">
+            <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M13.54 12a6.8 6.8 0 01-6.77 6.82A6.8 6.8 0 010 12a6.8 6.8 0 016.77-6.82A6.8 6.8 0 0113.54 12zM20.96 12c0 3.54-1.51 6.42-3.38 6.42-1.87 0-3.39-2.88-3.39-6.42s1.52-6.42 3.39-6.42 3.38 2.88 3.38 6.42M24 12c0 3.17-.53 5.75-1.19 5.75-.66 0-1.19-2.58-1.19-5.75s.53-5.75 1.19-5.75C23.47 6.25 24 8.83 24 12z"/></svg>
+            Medium
+        </a>
+        <a href="https://www.linkedin.com/in/hashir-lodhi/" target="_blank">
+            <svg viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg"><path d="M20.447 20.452h-3.554v-5.569c0-1.328-.027-3.037-1.852-3.037-1.853 0-2.136 1.445-2.136 2.939v5.667H9.351V9h3.414v1.561h.046c.477-.9 1.637-1.85 3.37-1.85 3.601 0 4.267 2.37 4.267 5.455v6.286zM5.337 7.433c-1.144 0-2.063-.926-2.063-2.065 0-1.138.92-2.063 2.063-2.063 1.14 0 2.064.925 2.064 2.063 0 1.139-.925 2.065-2.064 2.065zm1.782 13.019H3.555V9h3.564v11.452zM22.225 0H1.771C.792 0 0 .774 0 1.729v20.542C0 23.227.792 24 1.771 24h20.451C23.2 24 24 23.227 24 22.271V1.729C24 .774 23.2 0 22.222 0h.003z"/></svg>
+            LinkedIn
+        </a>
+    </div>
+</div>
+"""
+# Create the Gradio Interface
+def create_ui():
+    initialize_system_once()
+    # Define interaction functions
+    def interact(message, history):
+        if not message:
+            return "", history
+        # Initialize history if strictly None (though usually empty list)
+        if history is None:
+            history = []
+        # Add user message
+        history.append({"role": "user", "content": message})
+        # Get response
+        try:
+            # Note: chat_response doesn't typically check history in current logic,
+            # but if it did, we'd need to ensure it handles the new format or pass just strings.
+            response = chat_response(message, history)
+        except Exception as e:
+            response = f"Error: {str(e)}"
+        # Add assistant response
+        history.append({"role": "assistant", "content": response})
+        return "", history
+    def retry_last(history):
+        if not history:
+            return history, ""
+        # Pop last message if it's assistant
+        if history and history[-1]["role"] == "assistant":
+            history.pop()
+        # Pop user message to edit
+        if history and history[-1]["role"] == "user":
+            last_msg = history.pop()
+            return history, last_msg["content"]
+        return history, ""
+    # Create the Gradio Blocks
+    with gr.Blocks(title="⚖️ LegalizeAI") as demo:
+        gr.Markdown(
+            """
+            # ⚖️ LegalizeAI
+            **Professional Assistant for Pakistani Law**
+            Consulting Constitution of Pakistan, Pakistan Penal Code, and Real-time Web Sources.
+            """
+        )
+        chatbot = gr.Chatbot(
+            height=500,
+            elem_id="chatbot",
+            avatar_images=(None, "⚖️")
+        )
+        with gr.Row():
+            txt = gr.Textbox(
+                scale=4,
+                show_label=False,
+                placeholder="Ask a legal question...",
+                container=False,
+                autofocus=True
+            )
+            submit_btn = gr.Button("Send 🚀", scale=1, variant="primary")
+        with gr.Row():
+            retry_btn = gr.Button("Retry 🔄", size="sm")
+            clear_btn = gr.Button("Clear 🗑️", size="sm")
+        # Example buttons logic
+        examples = [
+            "What is the punishment for theft in Pakistan?",
+            "Explain Article 62 of the Constitution.",
+            "Who is the current Prime Minister?",
+            "What are my fundamental rights?"
+        ]
+        gr.Examples(
+            examples=examples,
+            inputs=txt
+        )
+        # Footer
+        gr.HTML(footer_html)
+        # Event Wiring
+        submit_btn.click(interact, [txt, chatbot], [txt, chatbot])
+        txt.submit(interact, [txt, chatbot], [txt, chatbot])
+        retry_btn.click(retry_last, [chatbot], [chatbot, txt]) # Pop last and put in text
+        clear_btn.click(lambda: None, None, chatbot, queue=False)
+    return demo
+def main():
+    try:
+        # Initialize system once before UI creation if needed, or let UI do it
+        initialize_system_once()
+        # Using a professional theme
+        theme = gr.themes.Soft(
+            primary_hue="slate",
+            secondary_hue="stone",
+            neutral_hue="zinc",
+            font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"]
+        )
+        # Create UI
+        demo = create_ui()
+        # Launching - Pass theme and css here for Gradio 6.0+ compatibility
+        demo.launch(
+            server_name="127.0.0.1",
+            theme=theme,
+            css=custom_css
+        )
+    except Exception as e:
+        print(f"Fatal Error: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

config.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import os
+import sys
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# API Keys
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
+if not GROQ_API_KEY:
+    print("Warning: GROQ_API_KEY not found in .env file", file=sys.stderr)
+if not GEMINI_API_KEY:
+    print("Warning: GEMINI_API_KEY not found in .env file", file=sys.stderr)
+# Paths
+PDF_FILES = [
+    "Constitution.pdf",
+    "Pakistan Penal Code.pdf"
+]
+CHROMA_DB_DIR = "./chroma_db_legal"
+# Models
+EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+LLM_MODEL_NAME = "llama-3.3-70b-versatile"
+GEMINI_MODEL_NAME = "gemini-2.5-flash"
+# RAG Configuration
+CHUNK_SIZE = 1000
+CHUNK_OVERLAP = 200
+RETRIEVER_K = 6

ingestion.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import os
+from langchain_community.document_loaders import PyPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+import config
+def load_documents(pdf_paths=None):
+    """
+    Load PDF documents from the specified paths.
+    """
+    if pdf_paths is None:
+        pdf_paths = config.PDF_FILES
+    docs = []
+    for path in pdf_paths:
+        if os.path.exists(path):
+            print(f"Loading: {path}")
+            loader = PyPDFLoader(path)
+            docs.extend(loader.load())
+        else:
+            print(f"File not found: {path} - Skipping")
+    print(f"Loaded {len(docs)} pages total.")
+    return docs
+def split_documents(docs):
+    """
+    Split documents into smaller chunks for proper processing.
+    """
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=config.CHUNK_SIZE,
+        chunk_overlap=config.CHUNK_OVERLAP,
+        separators=["\n\n", "\n", " ", ""]
+    )
+    chunks = text_splitter.split_documents(docs)
+    print(f"Created {len(chunks)} chunks.")
+    return chunks

logic.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import search_engine
+import random
+def generate_hybrid_response(question, rag_chain):
+    """
+    Generate response using RAG context + Gemini Search synthesis.
+    """
+    print(f"\nAnalyzing: {question}...")
+    # Phase 1: Local RAG
+    # We always get RAG context even if it's empty, to pass to Gemini
+    try:
+        rag_response = rag_chain.invoke(question)
+    except Exception as e:
+        print(f"RAG Error: {e}")
+        rag_response = "Error retrieving local context."
+    # Phase 2: Combined Synthesis via Gemini
+    print("Fetching information from Gemini (Context + Web)...")
+    final_answer = search_engine.search_and_synthesize(question, rag_response)
+    # Phase 3: Error Handling & Formatting
+    if final_answer == "SERVER_BUSY":
+        return "⚠️ **Service Unavailable**: The AI server is currently busy. Please try again in a few moments."
+    # If the answer is a denial (Pakistan filter), return it as is.
+    if "I specialize only in Pakistani Law" in final_answer:
+        return final_answer
+    # Creative Closing Generator (Optional, can be appended if the answer isn't a denial)
+    closings = [
+        "Need clarification on any point?",
+        "Shall we explore related case laws?",
+        "I can help draft a legal notice based on this.",
+        "Would you like to know about relevant court procedures?",
+        "Ask me if you need further details on this topic!"
+    ]
+    next_step = random.choice(closings)
+    # Construct final output
+    # The 'final_answer' from Gemini is already comprehensive.
+    # We just add the closing.
+    return f"{final_answer}\n\n_{next_step}_"

rag_engine.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from langchain_groq import ChatGroq
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+import config
+def initialize_llm():
+    """
+    Initialize Groq LLM.
+    """
+    return ChatGroq(
+        model=config.LLM_MODEL_NAME,
+        temperature=0.1,
+        max_tokens=2000,
+        api_key=config.GROQ_API_KEY
+    )
+def get_rag_prompt():
+    """
+    Create the prompt template for RAG.
+    """
+    return ChatPromptTemplate.from_template("""
+You are a Senior Legal Consultant specializing in the laws of Pakistan.
+CONTEXT:
+1. Constitution of Pakistan
+2. Pakistan Penal Code
+INSTRUCTIONS:
+- Adoption a formal, professional, and authoritative tone suitable for legal memoranda.
+- Cite specific Articles, Sections, or Clauses extensively.
+- If the information is present: Provide a direct, concise legal opinion.
+- If the information is MISSING: State clearly "The provided legal documents do not contain specific provisions regarding [topic]." Do not apologize.
+- Structure your response with clear headings if necessary.
+LEGAL CONTEXT:
+{context}
+QUERY: {question}
+LEGAL OPINION:
+""")
+def format_docs(docs):
+    """
+    Format retrieved documents for the prompt.
+    """
+    formatted = []
+    for i, doc in enumerate(docs):
+        source = doc.metadata.get('source', 'Unknown Document')
+        page = doc.metadata.get('page', 'N/A')
+        # Limit content length to avoid context window issues, though Groq usually has large context
+        content = doc.page_content[:800]
+        formatted.append(f"[Document {i+1}: {source}, Page {page}]")
+        formatted.append(content)
+        formatted.append("-" * 50)
+    return "\n".join(formatted)
+def create_rag_chain(retriever):
+    """
+    Build the primary RAG chain.
+    """
+    llm = initialize_llm()
+    prompt = get_rag_prompt()
+    chain = (
+        {"context": retriever | format_docs, "question": RunnablePassthrough()}
+        | prompt
+        | llm
+        | StrOutputParser()
+    )
+    return chain

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+langchain
+langchain-community
+langchain-groq
+google-genai
+python-dotenv
+chromadb
+pypdf
+sentence-transformers
+gradio
+langchain-huggingface
+langchain-chroma

search_engine.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from google import genai
+from google.genai import types
+import config
+def initialize_gemini_client():
+    """
+    Initialize Gemini client.
+    """
+    return genai.Client(api_key=config.GEMINI_API_KEY)
+def search_and_synthesize(query, rag_context):
+    """
+    Search the web using Gemini's Google Search grounding tool and combine with RAG context.
+    Enforces Pakistan-specific content filtering.
+    """
+    try:
+        client = initialize_gemini_client()
+        # Create grounding tool with Google Search
+        grounding_tool = types.Tool(
+            google_search=types.GoogleSearch()
+        )
+        # Configuration with grounding tool
+        # Using the standard configuration approach compatible with google-genai
+        generate_config = types.GenerateContentConfig(
+            tools=[grounding_tool],
+            temperature=0.2,
+            system_instruction="""You are a specialized Legal Assistant for Pakistan.
+            Your primary job is to answer the user's legal question by combining:
+            1. The User's Question.
+            2. The provided 'Legal Context' (which comes from local legal documents like the Constitution and PPC).
+            3. Real-time information from Google Search.
+            CRITICAL RULES:
+            - FILTER: You must ONLY answer questions related to Pakistan Law, the Pakistani Legal System, or general legal queries applicable in Pakistan.
+            - DENIAL: If the user asks about anything else (e.g., "Capital of Peru", "Movie reviews", "Laws of France"), query unrelated to Pakistan, you MUST REFUSE to answer. Say exactly: "I specialize only in Pakistani Law. I cannot assist with this query."
+            - SYNTHESIS: Provide a single, cohesive answer. citations are encouraged.
+            - Do not treat 'Legal Context' as the only truth if Search reveals it's outdated, but prioritize the Constitution/Acts if they are standard texts.
+            - If the user asks for a specific section, quote it if available in Context or Search.
+            """
+        )
+        prompt = f"""
+        User Query: {query}
+        Legal Context from Local Documents:
+        {rag_context}
+        Please provide a comprehensive answer based on the above instructions.
+        """
+        # Generate response with web search
+        response = client.models.generate_content(
+            model=config.GEMINI_MODEL_NAME,
+            contents=prompt,
+            config=generate_config,
+        )
+        if response and response.text:
+            return response.text
+        else:
+            # Fallback if model returns empty but no error raised
+            return "No information could be generated. Please try again."
+    except Exception as e:
+        print(f"Gemini search error: {e}")
+        error_msg = str(e).lower()
+        # Check for common "server busy" or quota errors
+        if "503" in error_msg or "429" in error_msg or "busy" in error_msg or "quota" in error_msg:
+            return "SERVER_BUSY"
+        # Generic error handling to avoid crashing
+        return "SERVER_BUSY"

vector_store.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_chroma import Chroma
+import config
+def get_embedding_model():
+    """
+    Initialize the embedding model.
+    """
+    return HuggingFaceEmbeddings(
+        model_name=config.EMBEDDING_MODEL_NAME,
+        model_kwargs={'device': 'cpu'}
+    )
+def create_vector_store(chunks, embedding_model):
+    """
+    Create and persist a Chroma vector store from document chunks.
+    """
+    vectorstore = Chroma.from_documents(
+        documents=chunks,
+        embedding=embedding_model,
+        persist_directory=config.CHROMA_DB_DIR
+    )
+    return vectorstore
+def get_vector_store(embedding_model):
+    """
+    Load existing vector store.
+    """
+    # Simply initializing with persist_directory attempts to load it
+    return Chroma(
+        persist_directory=config.CHROMA_DB_DIR,
+        embedding_function=embedding_model
+    )
+def get_retriever(vectorstore):
+    """
+    Get a retriever from the vector store.
+    """
+    return vectorstore.as_retriever(
+        search_type="similarity",
+        search_kwargs={"k": config.RETRIEVER_K}
+    )