| import gradio as gr |
| import torch |
|
|
| from transformers import AutoTokenizer, AutoModelForCausalLM |
| from peft import PeftModel |
|
|
| BASE_MODEL = "unsloth/llama-3.2-3b-bnb-4bit" |
| ADAPTER_MODEL = "devNaam/vakilai-llama32-3b-v1" |
|
|
| print("Loading tokenizer...") |
| tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) |
|
|
| print("Loading base model on CPU...") |
| model = AutoModelForCausalLM.from_pretrained( |
| BASE_MODEL, |
| device_map="cpu", |
| torch_dtype=torch.float32 |
| ) |
|
|
| print("Loading VakilAI adapter...") |
| model = PeftModel.from_pretrained(model, ADAPTER_MODEL) |
|
|
| print("Model ready") |
|
|
|
|
| def build_prompt(question): |
| return f""" |
| You are VakilAI, an AI legal assistant for Indian law. |
| |
| Explain the answer clearly and simply. |
| |
| Question: |
| {question} |
| |
| Answer: |
| """ |
|
|
|
|
| def vakil_ai(message, history): |
|
|
| prompt = build_prompt(message) |
|
|
| inputs = tokenizer(prompt, return_tensors="pt") |
|
|
| output = model.generate( |
| **inputs, |
| max_new_tokens=200, |
| temperature=0.5 |
| ) |
|
|
| response = tokenizer.decode(output[0], skip_special_tokens=True) |
|
|
| if "Answer:" in response: |
| response = response.split("Answer:")[-1].strip() |
|
|
| return response |
|
|
|
|
| demo = gr.ChatInterface( |
| fn=vakil_ai, |
| title="⚖️ AI Vakil – Indian Legal Assistant", |
| description="Ask questions about IPC, Indian law, and legal concepts.", |
| ) |
|
|
| demo.launch() |