import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel BASE_MODEL = "unsloth/llama-3.2-3b-bnb-4bit" ADAPTER_MODEL = "devNaam/vakilai-llama32-3b-v1" print("Loading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) print("Loading base model on CPU...") model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, device_map="cpu", torch_dtype=torch.float32 ) print("Loading VakilAI adapter...") model = PeftModel.from_pretrained(model, ADAPTER_MODEL) print("Model ready") def build_prompt(question): return f""" You are VakilAI, an AI legal assistant for Indian law. Explain the answer clearly and simply. Question: {question} Answer: """ def vakil_ai(message, history): prompt = build_prompt(message) inputs = tokenizer(prompt, return_tensors="pt") output = model.generate( **inputs, max_new_tokens=200, temperature=0.5 ) response = tokenizer.decode(output[0], skip_special_tokens=True) if "Answer:" in response: response = response.split("Answer:")[-1].strip() return response demo = gr.ChatInterface( fn=vakil_ai, title="⚖️ AI Vakil – Indian Legal Assistant", description="Ask questions about IPC, Indian law, and legal concepts.", ) demo.launch()