"""
Gradio App for HF Space Deployment
Comment Classification Skill powered by Qwen2.5-1.5B fine-tuned model.
"""

import os
import json
import time
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# ============================================================
# Load Model
# ============================================================
MODEL_ID = os.environ.get("MODEL_ID", "jovincia/qwen25-comment-classifier")

print(f"Loading model from {MODEL_ID}...")
try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_ID,
        torch_dtype=torch.float32,  # CPU-safe for HF Spaces free tier
        trust_remote_code=True,
    )
    model.eval()
    id2label = model.config.id2label
    print(f"Model loaded. Labels: {id2label}")
except Exception as e:
    print(f"ERROR: Failed to load model from '{MODEL_ID}': {e}")
    print("Make sure the model has been trained (02_finetune.py) or the HF repo exists.")
    raise SystemExit(1)

# Color mapping for labels
LABEL_COLORS = {
    "positive": "#4CAF50",
    "negative": "#F44336",
    "neutral": "#9E9E9E",
    "ambiguous": "#FF9800",
}

def classify_comment(text: str) -> dict:
    """Classify a comment and return label probabilities."""
    if not text or not text.strip():
        return {label: 0.0 for label in id2label.values()}

    start_time = time.perf_counter()

    inputs = tokenizer(
        text,
        padding=True,
        truncation=True,
        max_length=256,
        return_tensors="pt",
    )

    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=-1)[0]

    latency_ms = (time.perf_counter() - start_time) * 1000

    # Build result dict
    result = {}
    for idx, prob in enumerate(probs.tolist()):
        label = id2label[str(idx)] if str(idx) in id2label else id2label.get(idx, f"class_{idx}")
        result[label] = round(prob, 4)

    # Print latency for monitoring
    predicted = max(result, key=result.get)
    print(f"[{latency_ms:.1f}ms] '{text[:50]}...' -> {predicted} ({result[predicted]:.3f})")

    return result


def batch_classify(texts: str) -> str:
    """Classify multiple comments (one per line)."""
    if not texts or not texts.strip():
        return "Please enter at least one comment."

    lines = [line.strip() for line in texts.strip().split("\n") if line.strip()]
    results = []

    for line in lines:
        probs = classify_comment(line)
        predicted = max(probs, key=probs.get)
        confidence = probs[predicted]
        results.append(f"[{predicted.upper()}] ({confidence:.1%}) {line}")

    return "\n".join(results)


# ============================================================
# Gradio Interface
# ============================================================
with gr.Blocks(
    title="Comment Classification Skill",
    theme=gr.themes.Soft(),
) as demo:
    gr.Markdown(
        """
        # Comment Classification Skill
        **Fine-tuned Qwen2.5-1.5B** for 4-class comment sentiment classification.

        Classes: **positive** | **negative** | **neutral** | **ambiguous**
        """
    )

    with gr.Tab("Single Comment"):
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(
                    label="Enter a comment",
                    placeholder="Type your comment here...",
                    lines=3,
                )
                classify_btn = gr.Button("Classify", variant="primary")
            with gr.Column():
                output_label = gr.Label(label="Classification Result", num_top_classes=4)

        classify_btn.click(
            fn=classify_comment,
            inputs=input_text,
            outputs=output_label,
        )

        gr.Examples(
            examples=[
                ["This product is amazing! Best purchase I've ever made."],
                ["Terrible quality. Broke after one day of use."],
                ["It arrived on time. Standard packaging."],
                ["I'm not sure if this is working correctly or not..."],
                ["The customer service was incredibly helpful and kind!"],
                ["What a waste of money. Never buying from here again."],
            ],
            inputs=input_text,
        )

    with gr.Tab("Batch Classification"):
        gr.Markdown("Enter one comment per line for batch processing.")
        batch_input = gr.Textbox(
            label="Comments (one per line)",
            placeholder="Comment 1\nComment 2\nComment 3",
            lines=8,
        )
        batch_btn = gr.Button("Classify All", variant="primary")
        batch_output = gr.Textbox(label="Results", lines=10, interactive=False)

        batch_btn.click(
            fn=batch_classify,
            inputs=batch_input,
            outputs=batch_output,
        )

    gr.Markdown(
        """
        ---
        **Model:** Qwen2.5-1.5B fine-tuned with LoRA on GoEmotions dataset (58k+ comments)
        **Task:** 4-class comment sentiment classification
        """
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)