Spaces:
Runtime error
Runtime error
File size: 1,228 Bytes
7821f40 3193bb9 ea715a3 3193bb9 7821f40 f30c460 ea715a3 f30c460 ea715a3 7821f40 ea715a3 f30c460 3193bb9 f30c460 ea715a3 f30c460 ea715a3 f30c460 ea715a3 f30c460 ea715a3 f30c460 ea715a3 f30c460 ea715a3 f30c460 3193bb9 ea715a3 3193bb9 ea715a3 3193bb9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | import os
import gradio as gr
from transformers import pipeline
MODEL_ID = "Badhon/Bangla_punctuation_restore"
LABEL_TO_PUNCT = {
"COMMA": "،",
"DARI": "।",
"QUESTION": "?",
"EXCLAMATION": "!",
"SEMICOLON": ";",
"COLON": ":",
"HYPHEN": "-"
}
punctuator = pipeline(
"token-classification",
model=MODEL_ID,
aggregation_strategy="simple",
token=os.getenv("HF_TOKEN")
)
def restore_punctuation(text: str) -> str:
if not text.strip():
return ""
preds = punctuator(text)
output = text
offset = 0
for p in preds:
label = p["entity_group"]
if label == "O":
continue
punct = LABEL_TO_PUNCT.get(label)
if not punct:
continue
end = p["end"] + offset
output = output[:end] + punct + output[end:]
offset += len(punct)
return output
demo = gr.Interface(
fn=restore_punctuation,
inputs=gr.Textbox(lines=4, placeholder="বাংলা টেক্সট লিখুন (যতিচিহ্ন ছাড়া)"),
outputs="text",
title="Bangla Punctuation Restoration",
description="sagor-bert-base based Bangla punctuation restoration"
)
demo.launch()
|