import os import gradio as gr from transformers import pipeline MODEL_ID = "Badhon/Bangla_punctuation_restore" LABEL_TO_PUNCT = { "COMMA": "،", "DARI": "।", "QUESTION": "?", "EXCLAMATION": "!", "SEMICOLON": ";", "COLON": ":", "HYPHEN": "-" } punctuator = pipeline( "token-classification", model=MODEL_ID, aggregation_strategy="simple", token=os.getenv("HF_TOKEN") ) def restore_punctuation(text: str) -> str: if not text.strip(): return "" preds = punctuator(text) output = text offset = 0 for p in preds: label = p["entity_group"] if label == "O": continue punct = LABEL_TO_PUNCT.get(label) if not punct: continue end = p["end"] + offset output = output[:end] + punct + output[end:] offset += len(punct) return output demo = gr.Interface( fn=restore_punctuation, inputs=gr.Textbox(lines=4, placeholder="বাংলা টেক্সট লিখুন (যতিচিহ্ন ছাড়া)"), outputs="text", title="Bangla Punctuation Restoration", description="sagor-bert-base based Bangla punctuation restoration" ) demo.launch()