Mishal23 commited on
Commit
d3bc1c4
Β·
verified Β·
1 Parent(s): 06a947a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import gradio as gr
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.vectorstores import FAISS
5
+ from langchain.embeddings import HuggingFaceEmbeddings
6
+ from langchain.schema import Document
7
+ from huggingface_hub import InferenceClient
8
+ import os
9
+
10
+ # βœ… Step 1: Load and Chunk JSON with Metadata
11
+ file_path = "pdf_data.json"
12
+ documents = []
13
+
14
+ splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
15
+
16
+ try:
17
+ with open(file_path, "r", encoding="utf-8") as f:
18
+ data = json.load(f)
19
+ for item in data:
20
+ if "text" in item:
21
+ section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other"
22
+ law_type = "criminal" if section == "PPC" else "general"
23
+ chunks = splitter.split_text(item["text"])
24
+ for chunk in chunks:
25
+ documents.append(Document(
26
+ page_content=chunk,
27
+ metadata={"section": section, "law_type": law_type}
28
+ ))
29
+ except Exception as e:
30
+ print(f"❌ Failed to load: {e}")
31
+
32
+ print(f"βœ… Loaded {len(documents)} chunks with metadata")
33
+
34
+ # βœ… Step 2: Create Embeddings & FAISS Vector Store
35
+ embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
36
+ db = FAISS.from_documents(documents, embedding_model)
37
+
38
+ # βœ… Step 3: Load Zephyr-7B via Hugging Face Inference API
39
+ client = InferenceClient(
40
+ model="HuggingFaceH4/zephyr-7b-beta",
41
+ token=os.getenv("HF_TOKEN") # set your token in environment variable
42
+ )
43
+
44
+ # βœ… Step 4: QA Function using chat_completion with formatting
45
+ def ask_law_bot(query):
46
+ try:
47
+ results = db.similarity_search(query, k=5, filter={"section": "PPC"})
48
+ if not results:
49
+ return "❌ No relevant content found for this topic."
50
+
51
+ context = "\n\n".join([doc.page_content for doc in results if len(doc.page_content.strip()) > 100])
52
+
53
+ prompt = f"""You are a legal assistant helping users understand Pakistani law.
54
+ Respond to the question using the given legal context. Your answer must follow these rules:
55
+ - Use numbered bullet points (1. 2. 3.)
56
+ - Reference relevant law sections like (section 220(b))
57
+ - Be concise, clear, and avoid repetition
58
+ - Use "YES" or "NO" if the question requires binary response
59
+
60
+ Context:
61
+ {context}
62
+
63
+ Question: {query}
64
+ Answer:"""
65
+
66
+ response = client.chat_completion(
67
+ messages=[
68
+ {"role": "system", "content": "You are a helpful and concise legal assistant for Pakistani law."},
69
+ {"role": "user", "content": prompt}
70
+ ],
71
+ max_tokens=512
72
+ )
73
+
74
+ return response.choices[0].message["content"].strip()
75
+
76
+ except Exception as e:
77
+ return f"❌ Error: {e}"
78
+
79
+ # βœ… Step 5: Gradio UI
80
+ gr.Interface(
81
+ fn=ask_law_bot,
82
+ inputs=gr.Textbox(lines=2, placeholder="e.g., What is the punishment for theft?"),
83
+ outputs=gr.Textbox(label="πŸ“˜ Legal Answer"),
84
+ title="βš–οΈ Ask Pakistan Law β€” Powered by Zephyr 7B",
85
+ description="Ask questions from Pakistan's law using FAISS retrieval + Zephyr-7B via Hugging Face API.",
86
+ examples=[
87
+ "What is the punishment for theft?",
88
+ "What are the duties of the Commission?",
89
+ "What is the process of appeal under this law?"
90
+ ]
91
+ ).launch(share=True, debug=True)