import gradio as gr
import spaces
import torch
from transformers import AutoModelForTokenClassification, AutoTokenizer, pipeline

MODEL_ID = "lujin/search-ner-lora-model"
DEVICE = 0 if torch.cuda.is_available() else -1

# Load model and tokenizer once so the interface stays responsive.
model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

ner_pipe = pipeline(
    "token-classification",
    model=model,
    tokenizer=tokenizer,
    aggregation_strategy="simple",
    device=DEVICE,
)


@spaces.GPU
def run_ner(text: str):
    """Return aggregated entity predictions for the provided text."""
    text = (text or "").strip()
    if not text:
        return []
    return ner_pipe(text)


demo = gr.Interface(
    fn=run_ner,
    inputs=gr.Textbox(lines=4, label="Input Text"),
    outputs=gr.JSON(label="Entities"),
    title="Search NER",
    description="Named-entity recognition using lujin/search-ner-lora-model.",
    examples=[
        ["OpenAI总部位于旧金山。"],
        ["小明毕业于清华大学，现在在阿里巴巴工作。"],
        ["明天在北京故宫博物院举行长城文化论坛。"],
        ["AgentBuilder Catalog 2.0 发布时间"],
        ["苹果公司发布了最新的iPhone。"],
        ["查找开源智能体开发平台"],
    ],
)

demo.launch()