fairseqonly / app.py
Sagar32's picture
Update app.py
a09935f verified
import torch
from functools import partial
original_torch_load = torch.load
torch.load = partial(original_torch_load, weights_only=False)
# Now rest of imports
import gradio as gr
import unicodedata
from fairseq.models.transformer import TransformerModel
# ── Load model once at startup ──────────────────────────────────────────────
model = TransformerModel.from_pretrained(
model_name_or_path=".",
checkpoint_file="fairseq_3lkh_best.pt",
data_name_or_path=".", # looks for dict.en.txt / dict.ne.txt here
task="translation_multi_simple_epoch",
source_lang="en",
target_lang="ne",
lang_dict="lang_list.txt",
lang_pairs="en-ne",
beam=5,
)
model.eval()
# ── Inference helpers ───────────────────────────────────────────────────────
def transliterate_word(word):
"""Transliterate a single romanized word to Devanagari."""
char_separated = " ".join(list(word.strip()))
prediction = model.translate(char_separated)
# Join space-separated Devanagari chars back into a word
result = "".join(prediction.strip().split())
return unicodedata.normalize("NFC", result)
def transliterate_sentence(sentence):
"""
Split sentence into words, transliterate each, then rejoin.
Preserves punctuation attached to words.
"""
if not sentence.strip():
return ""
words = sentence.strip().split()
transliterated = []
for word in words:
# Separate leading/trailing punctuation from the word
prefix, core, suffix = extract_punctuation(word)
if core:
deva = transliterate_word(core)
transliterated.append(prefix + deva + suffix)
else:
transliterated.append(word) # punctuation-only token, keep as is
return " ".join(transliterated)
def extract_punctuation(word):
"""
Split a token like 'ghar,' into ('', 'ghar', ',')
so punctuation is not fed into the model.
"""
prefix = ""
suffix = ""
# Strip leading punctuation
while word and not word[0].isalpha():
prefix += word[0]
word = word[1:]
# Strip trailing punctuation
while word and not word[-1].isalpha():
suffix = word[-1] + suffix
word = word[:-1]
return prefix, word, suffix
# ── Gradio UI ───────────────────────────────────────────────────────────────
def run(sentence):
try:
return transliterate_sentence(sentence)
except Exception as e:
return f"Error: {str(e)}"
iface = gr.Interface(
fn=run,
inputs=gr.Textbox(
lines=3,
placeholder="Type romanized Nepali sentence here... e.g. ma ghar janxu",
label="Romanized Nepali (Input)"
),
outputs=gr.Textbox(
lines=3,
label="Devanagari (Output)"
),
title="Nepali Transliteration",
description="Type a sentence in romanized Nepali and get the Devanagari output.",
examples=[
["ma ghar janxu"],
["aama ra baa ghar ma xan"],
["nepali basa sajilo xa"],
],
)
iface.launch(server_name="0.0.0.0", server_port=7860)