"""
OCS Semantic Scoring - Hugging Face Space

Scores creativity of divergent thinking responses using semantic distance
in word embedding space. Part of the Open Creativity Scoring project.

See: https://openscoring.du.edu
"""

import gradio as gr
import pandas as pd
import tempfile
import os
from scoring import SemanticScorer, download_model, ensure_spacy_model, MODELS, DEFAULT_MODEL

# Global scorer instances keyed by model name
scorers = {}
current_model = DEFAULT_MODEL


def get_scorer(model_name=None):
    """Get or create a scorer for the given model."""
    if model_name is None:
        model_name = current_model
    return scorers.get(model_name)


def load_model(model_name=None, progress=gr.Progress()):
    """Download and load a model."""
    global current_model
    if model_name is None:
        model_name = DEFAULT_MODEL

    if not MODELS.get(model_name, {}).get("hosted", True):
        return (
            f"{model_name} is not hosted on this Space (model files are too large). "
            "See the About tab for self-hosting instructions."
        )

    if model_name in scorers:
        current_model = model_name
        return f"{model_name} already loaded."

    progress(0, desc="Ensuring spaCy model is available...")
    ensure_spacy_model()

    progress(0.1, desc=f"Downloading {model_name} from Hugging Face Hub...")
    model_path = download_model(model_name)

    progress(0.5, desc="Loading model into memory (this may take a moment)...")
    scorer = SemanticScorer(model_name=model_name)
    scorer.load_model(model_path)
    scorers[model_name] = scorer
    current_model = model_name

    progress(1.0, desc="Ready!")
    return f"{model_name} loaded successfully."


def score_single(prompt, response, model_name, stopword, term_weighting, exclude_target,
                 normalize, elab_method, progress=gr.Progress()):
    """Score a single prompt-response pair."""
    scorer = get_scorer(model_name)
    if scorer is None:
        load_model(model_name, progress)
        scorer = get_scorer(model_name)

    if not prompt or not response:
        return "Please provide both a prompt and a response."

    orig = scorer.originality(
        prompt.strip(), response.strip(),
        stopword=stopword,
        term_weighting=term_weighting,
        exclude_target=exclude_target,
    )

    if orig is None:
        result = "Could not score - no recognized words found in response."
    else:
        if normalize:
            import numpy as np
            orig = scorer._scaler.transform(np.array([[orig]]))[0, 0]
            result = f"Originality: {orig:.1f} (on 1-7 scale)"
        else:
            result = f"Originality: {orig:.4f} (cosine distance, 0-1 scale)"

    if elab_method and elab_method != "none":
        elab = scorer.elaboration(response.strip(), method=elab_method)
        result += f"\nElaboration ({elab_method}): {elab}"

    return result


def score_batch(file, model_name, stopword, term_weighting, exclude_target, normalize,
                elab_method, progress=gr.Progress()):
    """Score a CSV file of prompt-response pairs."""
    scorer = get_scorer(model_name)
    if scorer is None:
        load_model(model_name, progress)
        scorer = get_scorer(model_name)

    if file is None:
        return None, "Please upload a CSV file."

    try:
        df = pd.read_csv(file.name)
    except Exception as e:
        return None, f"Error reading CSV: {e}"

    # Normalize column names
    df.columns = [c.strip().lower() for c in df.columns]

    if "prompt" not in df.columns or "response" not in df.columns:
        # Try to use first two columns
        if len(df.columns) >= 2:
            df.columns = ["prompt", "response"] + list(df.columns[2:])
        else:
            return None, "CSV must have at least two columns (prompt, response)."

    elab = elab_method if elab_method != "none" else None

    progress(0.2, desc=f"Scoring {len(df)} responses...")
    scored = scorer.score_batch(
        df, stopword=stopword, term_weighting=term_weighting,
        exclude_target=exclude_target, normalize=normalize,
        elab_method=elab,
    )
    progress(0.9, desc="Preparing output...")

    # Save to temp file for download
    output_path = os.path.join(tempfile.gettempdir(), "scored_output.csv")
    scored.to_csv(output_path, index=False)

    return output_path, scored.head(20).to_string(index=False)


# Citation text
CITATION_TEXT = """
**Citations:**

Dumas, D., Organisciak, P., & Doherty, M. D. (2020). Measuring divergent thinking
originality with human raters and text-mining models: A psychometric comparison of
methods. *Psychology of Aesthetics, Creativity, and the Arts*.
https://doi.org/10/ghcsqq

Organisciak, P., Acar, S., Dumas, D., & Berthiaume, K. (2023). Beyond semantic
distance: Automated scoring of divergent thinking greatly improves with large
language models. *Thinking Skills and Creativity*, 49, 101356.

**Note:** For LLM-based scoring (the newer, recommended approach), see
[openscoring.du.edu](https://openscoring.du.edu) and the
[ocsai library](https://github.com/massivetexts/ocsai).
"""

ABOUT_TEXT = """
# OCS Semantic Scoring

Scores creativity of divergent thinking responses (e.g., Alternate Uses Task)
by measuring **semantic distance** between a prompt and response in word
embedding space.

**How it works:**
1. Looks up word vectors for the prompt and response in the selected embedding model
2. Computes cosine similarity between them
3. Subtracts from 1 to get a distance score (higher = more original)

**Available models:**
- **MOTES 100k** (default): Children's writing embeddings (ages 10–12) from the MOTES study
- **GloVe 840B**: General-purpose embeddings trained on 840B Common Crawl tokens (Pennington et al. 2014). Large vocabulary, good for adult responses. English only.

**Options:**
- **Stopword filtering**: Skip common functional words (the, and, etc.)
- **Term weighting**: Weight words by IDF (rarer words matter more)
- **Exclude target**: Don't count prompt words in the response
- **Normalize**: Map scores to a 1-7 scale (model-specific calibration)
- **Elaboration**: Measure response length/complexity

**Note:** These models are English-only. Non-English words (e.g. Chinese, Arabic) will not be found in the vocabulary and will be skipped. For multilingual scoring, use [Ocsai](https://openscoring.du.edu/ocsai).
"""


# OCS color theme — matches openscoring.du.edu semantic theme (vibrant green)
OCS_THEME = gr.themes.Default(
    primary_hue=gr.themes.colors.Color(
        name="ocs-green",
        c50="#f0faf0",
        c100="#dcf5dc",
        c200="#b8ecb8",
        c300="#85dc85",
        c400="#4cc44c",
        c500="#2ea82e",   # vibrant green — hsl(145, 65%, 42%) approx
        c600="#258c25",
        c700="#1d701d",
        c800="#165416",
        c900="#0e380e",
        c950="#071c07",
    ),
    neutral_hue=gr.themes.colors.slate,
    font=[gr.themes.GoogleFont("Inter"), "sans-serif"],
).set(
    button_primary_background_fill="*primary_500",
    button_primary_background_fill_hover="*primary_600",
    block_label_text_size="sm",
)


# Build UI
with gr.Blocks(title="OCS Semantic Scoring", theme=OCS_THEME) as demo:
    gr.Markdown("# OCS Semantic Scoring")
    gr.Markdown(
        "Score divergent thinking originality using semantic distance in word embedding space. "
        "Part of [Open Creativity Scoring](https://openscoring.du.edu) · "
        "For higher accuracy, try [Ocsai LLM scoring](https://openscoring.du.edu/ocsai)."
    )

    # Model choices for dropdowns
    model_choices = [(MODELS[k]["description"], k) for k in MODELS]

    # Load model controls
    with gr.Row():
        model_selector = gr.Dropdown(
            label="Model",
            choices=model_choices,
            value=DEFAULT_MODEL,
        )
        load_btn = gr.Button("Load Model", variant="primary")
    load_status = gr.Textbox(label="Model Status", value="Model not loaded yet. Click 'Load Model' or score something to auto-load.", interactive=False)
    load_btn.click(fn=load_model, inputs=model_selector, outputs=load_status)

    with gr.Tabs():
        with gr.TabItem("Single Score"):
            with gr.Row():
                with gr.Column():
                    prompt_input = gr.Textbox(label="Prompt (object)", placeholder="e.g., brick", lines=1)
                    response_input = gr.Textbox(label="Response", placeholder="e.g., modern art sculpture", lines=2)

                    with gr.Row():
                        stopword = gr.Checkbox(label="Stopword filtering", value=True)
                        term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True)
                    with gr.Row():
                        exclude_tgt = gr.Checkbox(label="Exclude target words", value=True)
                        norm = gr.Checkbox(label="Normalize (1-7)", value=False)

                    elab = gr.Dropdown(
                        label="Elaboration method",
                        choices=["none", "whitespace", "stoplist", "idf", "pos"],
                        value="none",
                    )
                    score_btn = gr.Button("Score", variant="primary")

                with gr.Column():
                    result_output = gr.Textbox(label="Result", lines=4, interactive=False)

            score_btn.click(
                fn=score_single,
                inputs=[prompt_input, response_input, model_selector, stopword, term_weight, exclude_tgt, norm, elab],
                outputs=result_output,
            )

            gr.Examples(
                examples=[
                    ["brick", "doorstop"],
                    ["brick", "modern art sculpture displayed in a gallery"],
                    ["paperclip", "emergency lockpick for escaping a submarine"],
                    ["shoe", "flower pot for a tiny cactus"],
                ],
                inputs=[prompt_input, response_input],
            )

        with gr.TabItem("Batch Score (CSV)"):
            gr.Markdown(
                "Upload a CSV with `prompt` and `response` columns. "
                "If no headers, the first two columns are used."
            )
            with gr.Row():
                with gr.Column():
                    file_input = gr.File(label="Upload CSV", file_types=[".csv"])

                    with gr.Row():
                        b_stopword = gr.Checkbox(label="Stopword filtering", value=True)
                        b_term_weight = gr.Checkbox(label="Term weighting (IDF)", value=True)
                    with gr.Row():
                        b_exclude_tgt = gr.Checkbox(label="Exclude target words", value=True)
                        b_norm = gr.Checkbox(label="Normalize (1-7)", value=False)

                    b_elab = gr.Dropdown(
                        label="Elaboration method",
                        choices=["none", "whitespace", "stoplist", "idf", "pos"],
                        value="none",
                    )
                    batch_btn = gr.Button("Score File", variant="primary")

                with gr.Column():
                    file_output = gr.File(label="Download scored CSV")
                    preview = gr.Textbox(label="Preview (first 20 rows)", lines=10, interactive=False)

            batch_btn.click(
                fn=score_batch,
                inputs=[file_input, model_selector, b_stopword, b_term_weight, b_exclude_tgt, b_norm, b_elab],
                outputs=[file_output, preview],
            )

        with gr.TabItem("About"):
            gr.Markdown(ABOUT_TEXT)
            gr.Markdown(CITATION_TEXT)

if __name__ == "__main__":
    demo.launch()