import gradio as gr
import torch

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel

BASE_MODEL = "unsloth/llama-3.2-3b-bnb-4bit"
ADAPTER_MODEL = "devNaam/vakilai-llama32-3b-v1"

print("Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)

print("Loading base model on CPU...")
model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    device_map="cpu",
    torch_dtype=torch.float32
)

print("Loading VakilAI adapter...")
model = PeftModel.from_pretrained(model, ADAPTER_MODEL)

print("Model ready")


def build_prompt(question):
    return f"""
You are VakilAI, an AI legal assistant for Indian law.

Explain the answer clearly and simply.

Question:
{question}

Answer:
"""


def vakil_ai(message, history):

    prompt = build_prompt(message)

    inputs = tokenizer(prompt, return_tensors="pt")

    output = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.5
    )

    response = tokenizer.decode(output[0], skip_special_tokens=True)

    if "Answer:" in response:
        response = response.split("Answer:")[-1].strip()

    return response


demo = gr.ChatInterface(
    fn=vakil_ai,
    title="⚖️ AI Vakil – Indian Legal Assistant",
    description="Ask questions about IPC, Indian law, and legal concepts.",
)

demo.launch()