Spaces:

Mishal23
/

Policy-Navigator

Runtime error

App Files Files Community

Policy-Navigator / app.py

Mishal23

Create app.py

d3bc1c4 verified 6 months ago

raw

history blame

3.4 kB

	import json
	import gradio as gr
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.schema import Document
	from huggingface_hub import InferenceClient
	import os

	# ✅ Step 1: Load and Chunk JSON with Metadata
	file_path = "pdf_data.json"
	documents = []

	splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)

	try:
	with open(file_path, "r", encoding="utf-8") as f:
	data = json.load(f)
	for item in data:
	if "text" in item:
	section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other"
	law_type = "criminal" if section == "PPC" else "general"
	chunks = splitter.split_text(item["text"])
	for chunk in chunks:
	documents.append(Document(
	page_content=chunk,
	metadata={"section": section, "law_type": law_type}
	))
	except Exception as e:
	print(f"❌ Failed to load: {e}")

	print(f"✅ Loaded {len(documents)} chunks with metadata")

	# ✅ Step 2: Create Embeddings & FAISS Vector Store
	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	db = FAISS.from_documents(documents, embedding_model)

	# ✅ Step 3: Load Zephyr-7B via Hugging Face Inference API
	client = InferenceClient(
	model="HuggingFaceH4/zephyr-7b-beta",
	token=os.getenv("HF_TOKEN") # set your token in environment variable
	)

	# ✅ Step 4: QA Function using chat_completion with formatting
	def ask_law_bot(query):
	try:
	results = db.similarity_search(query, k=5, filter={"section": "PPC"})
	if not results:
	return "❌ No relevant content found for this topic."

	context = "\n\n".join([doc.page_content for doc in results if len(doc.page_content.strip()) > 100])

	prompt = f"""You are a legal assistant helping users understand Pakistani law.
	Respond to the question using the given legal context. Your answer must follow these rules:
	- Use numbered bullet points (1. 2. 3.)
	- Reference relevant law sections like (section 220(b))
	- Be concise, clear, and avoid repetition
	- Use "YES" or "NO" if the question requires binary response

	Context:
	{context}

	Question: {query}
	Answer:"""

	response = client.chat_completion(
	messages=[
	{"role": "system", "content": "You are a helpful and concise legal assistant for Pakistani law."},
	{"role": "user", "content": prompt}
	],
	max_tokens=512
	)

	return response.choices[0].message["content"].strip()

	except Exception as e:
	return f"❌ Error: {e}"

	# ✅ Step 5: Gradio UI
	gr.Interface(
	fn=ask_law_bot,
	inputs=gr.Textbox(lines=2, placeholder="e.g., What is the punishment for theft?"),
	outputs=gr.Textbox(label="📘 Legal Answer"),
	title="⚖️ Ask Pakistan Law — Powered by Zephyr 7B",
	description="Ask questions from Pakistan's law using FAISS retrieval + Zephyr-7B via Hugging Face API.",
	examples=[
	"What is the punishment for theft?",
	"What are the duties of the Commission?",
	"What is the process of appeal under this law?"
	]
	).launch(share=True, debug=True)