Spaces:
Runtime error
Runtime error
| import json | |
| import gradio as gr | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.schema import Document | |
| from huggingface_hub import InferenceClient | |
| import os | |
| # β Step 1: Load and Chunk JSON with Metadata | |
| file_path = "pdf_data.json" | |
| documents = [] | |
| splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50) | |
| try: | |
| with open(file_path, "r", encoding="utf-8") as f: | |
| data = json.load(f) | |
| for item in data: | |
| if "text" in item: | |
| section = "PPC" if "punishment" in item["text"].lower() or "section" in item["text"].lower() else "other" | |
| law_type = "criminal" if section == "PPC" else "general" | |
| chunks = splitter.split_text(item["text"]) | |
| for chunk in chunks: | |
| documents.append(Document( | |
| page_content=chunk, | |
| metadata={"section": section, "law_type": law_type} | |
| )) | |
| except Exception as e: | |
| print(f"β Failed to load: {e}") | |
| print(f"β Loaded {len(documents)} chunks with metadata") | |
| # β Step 2: Create Embeddings & FAISS Vector Store | |
| embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
| db = FAISS.from_documents(documents, embedding_model) | |
| # β Step 3: Load Zephyr-7B via Hugging Face Inference API | |
| client = InferenceClient( | |
| model="HuggingFaceH4/zephyr-7b-beta", | |
| token=os.getenv("HF_TOKEN") # set your token in environment variable | |
| ) | |
| # β Step 4: QA Function using chat_completion with formatting | |
| def ask_law_bot(query): | |
| try: | |
| results = db.similarity_search(query, k=5, filter={"section": "PPC"}) | |
| if not results: | |
| return "β No relevant content found for this topic." | |
| context = "\n\n".join([doc.page_content for doc in results if len(doc.page_content.strip()) > 100]) | |
| prompt = f"""You are a legal assistant helping users understand Pakistani law. | |
| Respond to the question using the given legal context. Your answer must follow these rules: | |
| - Use numbered bullet points (1. 2. 3.) | |
| - Reference relevant law sections like (section 220(b)) | |
| - Be concise, clear, and avoid repetition | |
| - Use "YES" or "NO" if the question requires binary response | |
| Context: | |
| {context} | |
| Question: {query} | |
| Answer:""" | |
| response = client.chat_completion( | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful and concise legal assistant for Pakistani law."}, | |
| {"role": "user", "content": prompt} | |
| ], | |
| max_tokens=512 | |
| ) | |
| return response.choices[0].message["content"].strip() | |
| except Exception as e: | |
| return f"β Error: {e}" | |
| # β Step 5: Gradio UI | |
| gr.Interface( | |
| fn=ask_law_bot, | |
| inputs=gr.Textbox(lines=2, placeholder="e.g., What is the punishment for theft?"), | |
| outputs=gr.Textbox(label="π Legal Answer"), | |
| title="βοΈ Ask Pakistan Law β Powered by Zephyr 7B", | |
| description="Ask questions from Pakistan's law using FAISS retrieval + Zephyr-7B via Hugging Face API.", | |
| examples=[ | |
| "What is the punishment for theft?", | |
| "What are the duties of the Commission?", | |
| "What is the process of appeal under this law?" | |
| ] | |
| ).launch(share=True, debug=True) | |