Spaces:

Sagar32
/

fairseqonly

Sleeping

App Files Files Community

Sagar32 commited on Mar 19

Commit

8848631

verified ·

1 Parent(s): c31cc7c

Create app.py

Browse files

Files changed (1) hide show

app.py +105 -0

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import gradio as gr
+import unicodedata
+from fairseq.models.transformer import TransformerModel
+# ── Load model once at startup ──────────────────────────────────────────────
+model = TransformerModel.from_pretrained(
+    model_dir=".",
+    checkpoint_file="fairseq_3lkh_best.pt",
+    data_name_or_path=".",        # looks for dict.en.txt / dict.ne.txt here
+    task="translation_multi_simple_epoch",
+    source_lang="en",
+    target_lang="ne",
+    lang_dict="lang_list.txt",
+    lang_pairs="en-ne",
+    beam=5,
+)
+model.eval()
+# ── Inference helpers ───────────────────────────────────────────────────────
+def transliterate_word(word):
+    """Transliterate a single romanized word to Devanagari."""
+    char_separated = " ".join(list(word.strip()))
+    prediction = model.translate(char_separated)
+    # Join space-separated Devanagari chars back into a word
+    result = "".join(prediction.strip().split())
+    return unicodedata.normalize("NFC", result)
+def transliterate_sentence(sentence):
+    """
+    Split sentence into words, transliterate each, then rejoin.
+    Preserves punctuation attached to words.
+    """
+    if not sentence.strip():
+        return ""
+    words = sentence.strip().split()
+    transliterated = []
+    for word in words:
+        # Separate leading/trailing punctuation from the word
+        prefix, core, suffix = extract_punctuation(word)
+        if core:
+            deva = transliterate_word(core)
+            transliterated.append(prefix + deva + suffix)
+        else:
+            transliterated.append(word)   # punctuation-only token, keep as is
+    return " ".join(transliterated)
+def extract_punctuation(word):
+    """
+    Split a token like 'ghar,' into ('', 'ghar', ',')
+    so punctuation is not fed into the model.
+    """
+    prefix = ""
+    suffix = ""
+    # Strip leading punctuation
+    while word and not word[0].isalpha():
+        prefix += word[0]
+        word = word[1:]
+    # Strip trailing punctuation
+    while word and not word[-1].isalpha():
+        suffix = word[-1] + suffix
+        word = word[:-1]
+    return prefix, word, suffix
+# ── Gradio UI ───────────────────────────────────────────────────────────────
+def run(sentence):
+    try:
+        return transliterate_sentence(sentence)
+    except Exception as e:
+        return f"Error: {str(e)}"
+iface = gr.Interface(
+    fn=run,
+    inputs=gr.Textbox(
+        lines=3,
+        placeholder="Type romanized Nepali sentence here... e.g. ma ghar janxu",
+        label="Romanized Nepali (Input)"
+    ),
+    outputs=gr.Textbox(
+        lines=3,
+        label="Devanagari (Output)"
+    ),
+    title="Nepali Transliteration",
+    description="Type a sentence in romanized Nepali and get the Devanagari output.",
+    examples=[
+        ["ma ghar janxu"],
+        ["aama ra baa ghar ma xan"],
+        ["nepali basa sajilo xa"],
+    ],
+    allow_flagging="never",
+)
+iface.launch()