Spaces:
Sleeping
Sleeping
| import torch | |
| from functools import partial | |
| original_torch_load = torch.load | |
| torch.load = partial(original_torch_load, weights_only=False) | |
| # Now rest of imports | |
| import gradio as gr | |
| import unicodedata | |
| from fairseq.models.transformer import TransformerModel | |
| # ── Load model once at startup ────────────────────────────────────────────── | |
| model = TransformerModel.from_pretrained( | |
| model_name_or_path=".", | |
| checkpoint_file="fairseq_3lkh_best.pt", | |
| data_name_or_path=".", # looks for dict.en.txt / dict.ne.txt here | |
| task="translation_multi_simple_epoch", | |
| source_lang="en", | |
| target_lang="ne", | |
| lang_dict="lang_list.txt", | |
| lang_pairs="en-ne", | |
| beam=5, | |
| ) | |
| model.eval() | |
| # ── Inference helpers ─────────────────────────────────────────────────────── | |
| def transliterate_word(word): | |
| """Transliterate a single romanized word to Devanagari.""" | |
| char_separated = " ".join(list(word.strip())) | |
| prediction = model.translate(char_separated) | |
| # Join space-separated Devanagari chars back into a word | |
| result = "".join(prediction.strip().split()) | |
| return unicodedata.normalize("NFC", result) | |
| def transliterate_sentence(sentence): | |
| """ | |
| Split sentence into words, transliterate each, then rejoin. | |
| Preserves punctuation attached to words. | |
| """ | |
| if not sentence.strip(): | |
| return "" | |
| words = sentence.strip().split() | |
| transliterated = [] | |
| for word in words: | |
| # Separate leading/trailing punctuation from the word | |
| prefix, core, suffix = extract_punctuation(word) | |
| if core: | |
| deva = transliterate_word(core) | |
| transliterated.append(prefix + deva + suffix) | |
| else: | |
| transliterated.append(word) # punctuation-only token, keep as is | |
| return " ".join(transliterated) | |
| def extract_punctuation(word): | |
| """ | |
| Split a token like 'ghar,' into ('', 'ghar', ',') | |
| so punctuation is not fed into the model. | |
| """ | |
| prefix = "" | |
| suffix = "" | |
| # Strip leading punctuation | |
| while word and not word[0].isalpha(): | |
| prefix += word[0] | |
| word = word[1:] | |
| # Strip trailing punctuation | |
| while word and not word[-1].isalpha(): | |
| suffix = word[-1] + suffix | |
| word = word[:-1] | |
| return prefix, word, suffix | |
| # ── Gradio UI ─────────────────────────────────────────────────────────────── | |
| def run(sentence): | |
| try: | |
| return transliterate_sentence(sentence) | |
| except Exception as e: | |
| return f"Error: {str(e)}" | |
| iface = gr.Interface( | |
| fn=run, | |
| inputs=gr.Textbox( | |
| lines=3, | |
| placeholder="Type romanized Nepali sentence here... e.g. ma ghar janxu", | |
| label="Romanized Nepali (Input)" | |
| ), | |
| outputs=gr.Textbox( | |
| lines=3, | |
| label="Devanagari (Output)" | |
| ), | |
| title="Nepali Transliteration", | |
| description="Type a sentence in romanized Nepali and get the Devanagari output.", | |
| examples=[ | |
| ["ma ghar janxu"], | |
| ["aama ra baa ghar ma xan"], | |
| ["nepali basa sajilo xa"], | |
| ], | |
| ) | |
| iface.launch(server_name="0.0.0.0", server_port=7860) | |