File size: 1,347 Bytes
c1731e2
090538e
 
 
 
 
65b3a86
090538e
 
243a44a
090538e
 
1300286
090538e
 
1300286
 
090538e
 
243a44a
090538e
 
1300286
243a44a
 
1300286
243a44a
1300286
243a44a
1300286
243a44a
1300286
 
243a44a
 
 
 
 
1300286
243a44a
1300286
090538e
1300286
090538e
 
 
1300286
 
090538e
 
243a44a
 
 
 
 
 
c1731e2
 
243a44a
090538e
243a44a
1300286
c1731e2
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

BASE_MODEL = "unsloth/llama-3.2-3b-bnb-4bit"
ADAPTER_MODEL = "devNaam/vakilai-llama32-3b-v1"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

print("Loading base model on CPU...")
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="cpu",
    torch_dtype=torch.float32
)

print("Loading VakilAI adapter...")
model = PeftModel.from_pretrained(model, ADAPTER_MODEL)

print("Model ready")


def build_prompt(question):
    return f"""
You are VakilAI, an AI legal assistant for Indian law.

Explain the answer clearly and simply.

Question:
{question}

Answer:
"""


def vakil_ai(message, history):

    prompt = build_prompt(message)

    inputs = tokenizer(prompt, return_tensors="pt")

    output = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.5
    )

    response = tokenizer.decode(output[0], skip_special_tokens=True)

    if "Answer:" in response:
        response = response.split("Answer:")[-1].strip()

    return response


demo = gr.ChatInterface(
    fn=vakil_ai,
    title="⚖️ AI Vakil – Indian Legal Assistant",
    description="Ask questions about IPC, Indian law, and legal concepts.",
)

demo.launch()