Badhon's picture
Update app.py
ea715a3 verified
import os
import gradio as gr
from transformers import pipeline
MODEL_ID = "Badhon/Bangla_punctuation_restore"
LABEL_TO_PUNCT = {
"COMMA": "،",
"DARI": "।",
"QUESTION": "?",
"EXCLAMATION": "!",
"SEMICOLON": ";",
"COLON": ":",
"HYPHEN": "-"
}
punctuator = pipeline(
"token-classification",
model=MODEL_ID,
aggregation_strategy="simple",
token=os.getenv("HF_TOKEN")
)
def restore_punctuation(text: str) -> str:
if not text.strip():
return ""
preds = punctuator(text)
output = text
offset = 0
for p in preds:
label = p["entity_group"]
if label == "O":
continue
punct = LABEL_TO_PUNCT.get(label)
if not punct:
continue
end = p["end"] + offset
output = output[:end] + punct + output[end:]
offset += len(punct)
return output
demo = gr.Interface(
fn=restore_punctuation,
inputs=gr.Textbox(lines=4, placeholder="বাংলা টেক্সট লিখুন (যতিচিহ্ন ছাড়া)"),
outputs="text",
title="Bangla Punctuation Restoration",
description="sagor-bert-base based Bangla punctuation restoration"
)
demo.launch()