"""
Hugging Face Space: Red Hat product/version NER.

Run NER on text to extract Red Hat products and versions. Model can be loaded
from the Hub (set MODEL_ID in Space settings or use default).
"""

import os
import gradio as gr
from transformers import pipeline

# Model: set in Space "Repository variables" or use default (your NER model on the Hub)
MODEL_ID = os.environ.get("MODEL_ID", "Neda7/ner-linux-product-version")

# Only show these entity types (filters stray labels; model may output PRODUCT, VERSION, etc.)
ALLOWED_ENTITY_GROUPS = {"PRODUCT", "VERSION"}


def load_ner():
    return pipeline(
        "token-classification",
        model=MODEL_ID,
        aggregation_strategy="simple",
    )


# Lazy load so Space starts even if model is large
_ner = None


def get_ner():
    global _ner
    if _ner is None:
        _ner = load_ner()
    return _ner


def run_ner(text: str, min_confidence: float = 0.5) -> str:
    if not text or not text.strip():
        return "Enter some text to extract Red Hat products and versions."
    try:
        pipe = get_ner()
        raw = pipe(text.strip())
    except Exception as e:
        return f"Error: {e}"
    # Filter: keep only allowed types and scores >= threshold (removes junk low-confidence spans)
    entities = []
    for e in raw:
        label = e.get("entity_group", e.get("entity", "?"))
        if label not in ALLOWED_ENTITY_GROUPS:
            continue
        score = e.get("score")
        if score is not None and score < min_confidence:
            continue
        entities.append(e)
    if not entities:
        return (
            "No Red Hat products or versions detected above the confidence threshold. "
            "Try lowering the **Min. confidence** slider or check that the text contains product/version names."
        )
    lines = []
    for e in entities:
        label = e.get("entity_group", e.get("entity", "?"))
        word = e.get("word", "").strip()
        score = e.get("score")
        score_str = f" ({score:.2f})" if score is not None else ""
        lines.append(f"- **{label}**: {word}{score_str}")
    return "\n".join(lines)


with gr.Blocks(title="Red Hat NER", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        "## Red Hat product & version NER\n"
        "Paste text below to detect Red Hat **products** (e.g. RHEL, OpenShift) and **versions**."
    )
    inp = gr.Textbox(
        label="Input text",
        placeholder="e.g. We use RHEL 8 and OpenShift 4.10 in production.",
        lines=4,
    )
    min_conf = gr.Slider(
        minimum=0.1,
        maximum=0.95,
        value=0.5,
        step=0.05,
        label="Min. confidence",
        info="Hide entities below this score (higher = fewer, more precise results).",
    )
    out = gr.Markdown(label="Entities")
    btn = gr.Button("Extract entities")
    btn.click(fn=run_ner, inputs=[inp, min_conf], outputs=out)
    gr.Markdown(
        "Model is loaded from the Hugging Face Hub. "
        "To use your own model, set the `MODEL_ID` variable in this Space's settings."
    )

demo.launch()