import os
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

# Load model from Hugging Face Hub
REPO_ID = "Enfysyz/JurisPrae"
FILENAME = "JurisPrae_Q4_K_M.gguf"
model_path = hf_hub_download(repo_id=REPO_ID, filename=FILENAME)

# Load Llama model
llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4
)

# Define system prompt
SYSTEM_PROMPT = (
    "You are a legal expert. Provide accurate, well-reasoned legal insights using proper legal terminology. "
    "Maintain a professional, objective tone. Be specific about which laws or legal principles apply. "
    "Explain the person's rights, cite the relevant statute(s), and give a clear legal opinion. "
    "When unsure, advise consulting a qualified attorney."
)

# Function to handle chat messages (expects message and history)
def chat_fn(message, history):
    # Construct prompt from history + system message
    prompt = f"<|system|>\n{SYSTEM_PROMPT}\n</s>\n"
    for user_msg, bot_msg in history:
        prompt += f"<|user|>\n{user_msg}\n</s>\n<|assistant|>\n{bot_msg}\n</s>\n"
    prompt += f"<|user|>\n{message}\n</s>\n<|assistant|>\n"

    # Get model response
    output = llm(prompt, max_tokens=512, stop=["</s>"])
    reply = output["choices"][0]["text"].strip()

    # Return just the assistant's reply (Gradio handles history)
    return reply

# Launch Gradio chat UI
gr.ChatInterface(
    fn=chat_fn,
    title="Legal Expert Assistant",
    description="An AI legal expert (not a substitute for a real lawyer)."
).launch()