import os

import spaces
import torch
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer

from preprocess import clean_text, count_words

CHECKPOINT = "pangram/editlens_roberta-large"
BASE_MODEL = "FacebookAI/roberta-large"
MAX_LENGTH = 512
MIN_WORDS = 50

HF_TOKEN = os.environ.get("HF_TOKEN")

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
model = AutoModelForSequenceClassification.from_pretrained(CHECKPOINT, token=HF_TOKEN)
model.eval().to("cuda")

N_BUCKETS = model.config.num_labels

# A 4-bucket model maps a continuous "extent of AI editing" axis onto ordinal
# buckets: 0 = fully human, N-1 = fully AI-generated, with intermediate buckets
# representing increasing amounts of AI editing.
BUCKET_NAMES = {
    4: [
        "Human-written",
        "Lightly AI-edited",
        "Heavily AI-edited",
        "Fully AI-generated",
    ]
}.get(N_BUCKETS, [f"Bucket {i}" for i in range(N_BUCKETS)])


@spaces.GPU(duration=30)
def analyze(text):
    if not text or not text.strip():
        raise gr.Error("Please paste some text to analyze.")

    n_words = count_words(text)
    if n_words < MIN_WORDS:
        gr.Warning(
            f"Only {n_words} words detected. EditLens is most reliable on passages "
            f"of at least {MIN_WORDS} words; treat this result with caution."
        )

    cleaned = clean_text(text)
    inputs = tokenizer(
        cleaned,
        truncation=True,
        max_length=MAX_LENGTH,
        return_tensors="pt",
    ).to("cuda")

    with torch.no_grad():
        logits = model(**inputs).logits

    probs = torch.softmax(logits.float(), dim=-1)[0].cpu()
    bucket_idx = int(torch.argmax(probs).item())
    bucket_labels = torch.arange(N_BUCKETS, dtype=torch.float32)
    score = float((probs @ bucket_labels).item() / (N_BUCKETS - 1))

    label_probs = {BUCKET_NAMES[i]: float(probs[i]) for i in range(N_BUCKETS)}

    summary = (
        f"## AI editing score: {score:.2f} / 1.00\n"
        f"**Most likely:** {BUCKET_NAMES[bucket_idx]}\n\n"
        f"0.00 means the text reads as fully human-written; "
        f"1.00 means it reads as fully AI-generated. The score is the expected "
        f"value over the bucket distribution below."
    )
    return summary, label_probs


# Both examples are real rows from the EditLens test set (pangram/editlens_iclr),
# not text written for this demo — a human_written review and an ai_generated review.
HUMAN_EXAMPLE = (
    "We enjoyed our first visit to enroute coffee. Place was very easy to find and "
    "had a very warm and cozy feel for the place. Walking in you quickly view the "
    "delicious looking pastries they have lined up along the counter. Trying those "
    "next visit. They had a large section of coffee choices and looks like they sell "
    "additional merchandise. Giving them a 5 for outstanding 1st impression.\n\n"
    "Note for owner: possible define the cash register station or separate it a bit "
    "more from coffee pickup station. Seemed a bit crowded in that area."
)

AI_EXAMPLE = (
    "McDonald’s offers a consistent fast-food experience with widely recognized "
    "favorites like the Big Mac, fries, and McNuggets. The service is generally "
    "quick, and the menu provides options for breakfast, lunch, and dinner, as well "
    "as some healthier choices. While the food isn’t gourmet, it’s reliably "
    "tasty and convenient, making it a popular choice for people on the go. Prices "
    "are reasonable, but quality can vary slightly between locations. Overall, "
    "McDonald’s delivers on expectations for fast, accessible, and familiar meals."
)

with gr.Blocks(title="EditLens") as demo:
    gr.Markdown(
        """
        # EditLens — Quantifying the Extent of AI Editing in Text

        EditLens scores text on a continuous scale according to *how much AI
        intervention* it contains, rather than a simple human-vs-AI binary. This demo
        runs the RoBERTa-large model from the
        [EditLens ICLR 2026 paper](https://arxiv.org/abs/2510.03154)
        ([code](https://github.com/pangramlabs/EditLens) ·
        [model](https://huggingface.co/pangram/editlens_roberta-large)).

        Paste a passage (≥ 50 words works best) and get an AI-editing score in [0, 1].
        """
    )
    with gr.Row():
        with gr.Column():
            text_in = gr.Textbox(
                label="Text to analyze",
                placeholder="Paste a paragraph or more here…",
                lines=12,
            )
            btn = gr.Button("Analyze", variant="primary")
            gr.Examples(
                examples=[[HUMAN_EXAMPLE], [AI_EXAMPLE]],
                inputs=text_in,
                label="Examples (real human-written / AI-generated reviews from the EditLens test set)",
            )
        with gr.Column():
            summary_out = gr.Markdown()
            label_out = gr.Label(label="Bucket distribution", num_top_classes=N_BUCKETS)

    btn.click(analyze, inputs=text_in, outputs=[summary_out, label_out])
    text_in.submit(analyze, inputs=text_in, outputs=[summary_out, label_out])

    gr.Markdown(
        "Model and dataset are licensed CC BY-NC-SA 4.0. EditLens is a research "
        "system and can be wrong — do not use it to make consequential judgments "
        "about real people."
    )

demo.launch()