Spaces:

Mishal23
/

Policy-Navigator

Runtime error

App Files Files Community

Mishal23 commited on Jun 26

Commit

d3bc1c4

verified ·

1 Parent(s): 06a947a

Create app.py

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import json
+import gradio as gr
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import FAISS
+from langchain.embeddings import HuggingFaceEmbeddings
+from langchain.schema import Document
+from huggingface_hub import InferenceClient
+import os
+# ✅ Step 1: Load and Chunk JSON with Metadata
+file_path = "pdf_data.json"
+documents = []
+splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
+try:
+    with open(file_path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+        for item in data:
+            if "text" in item:
+                section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other"
+                law_type = "criminal" if section == "PPC" else "general"
+                chunks = splitter.split_text(item["text"])
+                for chunk in chunks:
+                    documents.append(Document(
+                        page_content=chunk,
+                        metadata={"section": section, "law_type": law_type}
+                    ))
+except Exception as e:
+    print(f"❌ Failed to load: {e}")
+print(f"✅ Loaded {len(documents)} chunks with metadata")
+# ✅ Step 2: Create Embeddings & FAISS Vector Store
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+db = FAISS.from_documents(documents, embedding_model)
+# ✅ Step 3: Load Zephyr-7B via Hugging Face Inference API
+client = InferenceClient(
+    model="HuggingFaceH4/zephyr-7b-beta",
+    token=os.getenv("HF_TOKEN")  # set your token in environment variable
+)
+# ✅ Step 4: QA Function using chat_completion with formatting
+def ask_law_bot(query):
+    try:
+        results = db.similarity_search(query, k=5, filter={"section": "PPC"})
+        if not results:
+            return "❌ No relevant content found for this topic."
+        context = "\n\n".join([doc.page_content for doc in results if len(doc.page_content.strip()) > 100])
+        prompt = f"""You are a legal assistant helping users understand Pakistani law.
+Respond to the question using the given legal context. Your answer must follow these rules:
+- Use numbered bullet points (1. 2. 3.)
+- Reference relevant law sections like (section 220(b))
+- Be concise, clear, and avoid repetition
+- Use "YES" or "NO" if the question requires binary response
+Context:
+{context}
+Question: {query}
+Answer:"""
+        response = client.chat_completion(
+            messages=[
+                {"role": "system", "content": "You are a helpful and concise legal assistant for Pakistani law."},
+                {"role": "user", "content": prompt}
+            ],
+            max_tokens=512
+        )
+        return response.choices[0].message["content"].strip()
+    except Exception as e:
+        return f"❌ Error: {e}"
+# ✅ Step 5: Gradio UI
+gr.Interface(
+    fn=ask_law_bot,
+    inputs=gr.Textbox(lines=2, placeholder="e.g., What is the punishment for theft?"),
+    outputs=gr.Textbox(label="📘 Legal Answer"),
+    title="⚖️ Ask Pakistan Law — Powered by Zephyr 7B",
+    description="Ask questions from Pakistan's law using FAISS retrieval + Zephyr-7B via Hugging Face API.",
+    examples=[
+        "What is the punishment for theft?",
+        "What are the duties of the Commission?",
+        "What is the process of appeal under this law?"
+    ]
+).launch(share=True, debug=True)