# app.py  (Gradio – Original / Research Demo)

import re
from functools import lru_cache

import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "Psychotherapy-LLM/PsychoCounsel-Llama3-8B"


# -----------------------------
# Load model once (cached)
# -----------------------------
@lru_cache(maxsize=1)
def get_model():
    """
    Load PsychoCounsel-Llama3-8B in full precision on GPU (ZeroGPU) with device_map='auto'.
    This is called lazily the first time a request comes in and then cached.
    """
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

    # No bitsandbytes here: ZeroGPU gives you a GPU so we let Transformers
    # place layers automatically with device_map="auto".
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )

    return tokenizer, model


# -----------------------------
# Core generation logic
# -----------------------------
def build_original_prompt(client_text: str, context: str, mode: str) -> str:
    client_text = (client_text or "").strip()
    context = (context or "").strip()

    # Hard cap length so extremely long vignettes don't explode cost/time
    MAX_CHARS = 2000
    if len(client_text) > MAX_CHARS:
        client_text = client_text[:MAX_CHARS] + " [...]"

    if mode == "Brief (5–7 sentences)":
        instruction = (
            "You are a professional psychotherapist conducting a session with a client. "
            "Write 5–7 sentences in a warm, empathic, reflective tone, similar to the "
            "PsychoCounsel-Llama3-8B Appendix case studies. You may ask some open-ended "
            "questions and use gentle cognitive and reflective exploration. "
            "Only output what the therapist says to the client."
        )
    else:
        instruction = (
            "You are a professional psychotherapist conducting a session with a client. "
            "Generate a detailed, multi-paragraph therapeutic response in the tone and "
            "structure of the Appendix case study for PsychoCounsel-Llama3-8B. Start with "
            "validation and normalization, explore fears and beliefs, reflect on self-trust "
            "and values, consider introducing a simple exercise, and close by inviting the "
            "client to share what resonates. Only output what the therapist says."
        )

    if context:
        instruction += " Consider this additional context about the therapist's stance: " + context

    prompt = f"""{instruction}

Client Speech:
{client_text}

Therapist:
"""
    return prompt


def generate_response(
    client_speech: str,
    therapist_context: str,
    mode: str,
    temperature: float,
    top_p: float,
):
    if not client_speech or not client_speech.strip():
        return "Please enter some client speech."

    tokenizer, model = get_model()

    prompt = build_original_prompt(client_speech, therapist_context, mode)

    # Tokenize on the model's device
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    # Keep generation lengths moderate to avoid timeouts
    if mode == "Brief (5–7 sentences)":
        max_tokens = 140
    else:
        max_tokens = 260

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,                 # use sampling for some variability
            eos_token_id=tokenizer.eos_token_id,
        )

    generated = outputs[0][inputs["input_ids"].shape[1]:]
    raw = tokenizer.decode(generated, skip_special_tokens=True)

    # Light cleanup of known artifacts
    clean = raw.split("Note:")[0].split("FINAL ANSWER")[0].strip()

    if mode == "Brief (5–7 sentences)":
        sents = re.split(r'(?<=[.!?])\s+', clean)
        sents = [s.strip() for s in sents if s.strip()]
        clean = " ".join(sents[:7])

    return clean


# -----------------------------
# Gradio UI
# -----------------------------
DESCRIPTION = """
This app uses **Psychotherapy-LLM/PsychoCounsel-Llama3-8B** in a style similar to the paper's Appendix case studies.

> ⚠️ **Important:** This version does *not* include additional safety logic for paranoia / harm content.
> It is intended for research, benchmarking, and model analysis by professionals.
> It is **not** a standalone clinical tool, nor a substitute for real-world psychiatric or psychological care.
"""

default_example = (
    "Anxiety often strikes when I’m faced with making decisions. The fear of making "
    "the wrong choice or disappointing others paralyzes me, leaving me stuck in indecision. "
    "I want to learn how to trust myself and make confident choices."
)

with gr.Blocks(title="PsychoCounsel-Llama3-8B — Original / Research Demo") as demo:
    gr.Markdown("# 🧠 PsychoCounsel-Llama3-8B — Original / Research Demo")
    gr.Markdown(DESCRIPTION)

    with gr.Row():
        with gr.Column(scale=1):
            mode = gr.Radio(
                ["Brief (5–7 sentences)", "Extended (Appendix-style)"],
                value="Brief (5–7 sentences)",
                label="Response Style",
            )
            temperature = gr.Slider(
                0.1, 1.0, value=0.6, step=0.05, label="Temperature"
            )
            top_p = gr.Slider(
                0.5, 1.0, value=0.9, step=0.05, label="Top-p"
            )
            gr.Markdown(
                "This version is for **research / replication** and may generate content "
                "that is not appropriate for direct use with vulnerable clients."
            )

        with gr.Column(scale=2):
            client_speech_box = gr.Textbox(
                label="Client Speech",
                value=default_example,
                lines=10,
                placeholder="Paste or type the client's speech / vignette here…",
            )
            therapist_context_box = gr.Textbox(
                label="Optional: Therapist context (e.g., modality, goals)",
                value="",
                lines=5,
            )
            generate_btn = gr.Button("Generate Therapist Response", variant="primary")
            output_box = gr.Markdown(label="Therapist Response (Model Output)")

    generate_btn.click(
        fn=generate_response,
        inputs=[
            client_speech_box,
            therapist_context_box,
            mode,
            temperature,
            top_p,
        ],
        outputs=output_box,
    )

if __name__ == "__main__":
    demo.launch()