import os
import gradio as gr
from transformers import pipeline

MODEL_ID = "Badhon/Bangla_punctuation_restore"

LABEL_TO_PUNCT = {
    "COMMA": "،",
    "DARI": "।",
    "QUESTION": "?",
    "EXCLAMATION": "!",
    "SEMICOLON": ";",
    "COLON": ":",
    "HYPHEN": "-"
}

punctuator = pipeline(
    "token-classification",
    model=MODEL_ID,
    aggregation_strategy="simple",
    token=os.getenv("HF_TOKEN")
)

def restore_punctuation(text: str) -> str:
    if not text.strip():
        return ""

    preds = punctuator(text)

    output = text
    offset = 0

    for p in preds:
        label = p["entity_group"]
        if label == "O":
            continue

        punct = LABEL_TO_PUNCT.get(label)
        if not punct:
            continue

        end = p["end"] + offset
        output = output[:end] + punct + output[end:]
        offset += len(punct)

    return output

demo = gr.Interface(
    fn=restore_punctuation,
    inputs=gr.Textbox(lines=4, placeholder="বাংলা টেক্সট লিখুন (যতিচিহ্ন ছাড়া)"),
    outputs="text",
    title="Bangla Punctuation Restoration",
    description="sagor-bert-base based Bangla punctuation restoration"
)

demo.launch()