Sagar32 commited on
Commit
8848631
·
verified ·
1 Parent(s): c31cc7c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import unicodedata
3
+ from fairseq.models.transformer import TransformerModel
4
+
5
+ # ── Load model once at startup ──────────────────────────────────────────────
6
+ model = TransformerModel.from_pretrained(
7
+ model_dir=".",
8
+ checkpoint_file="fairseq_3lkh_best.pt",
9
+ data_name_or_path=".", # looks for dict.en.txt / dict.ne.txt here
10
+ task="translation_multi_simple_epoch",
11
+ source_lang="en",
12
+ target_lang="ne",
13
+ lang_dict="lang_list.txt",
14
+ lang_pairs="en-ne",
15
+ beam=5,
16
+ )
17
+ model.eval()
18
+
19
+
20
+ # ── Inference helpers ───────────────────────────────────────────────────────
21
+
22
+ def transliterate_word(word):
23
+ """Transliterate a single romanized word to Devanagari."""
24
+ char_separated = " ".join(list(word.strip()))
25
+ prediction = model.translate(char_separated)
26
+ # Join space-separated Devanagari chars back into a word
27
+ result = "".join(prediction.strip().split())
28
+ return unicodedata.normalize("NFC", result)
29
+
30
+
31
+ def transliterate_sentence(sentence):
32
+ """
33
+ Split sentence into words, transliterate each, then rejoin.
34
+ Preserves punctuation attached to words.
35
+ """
36
+ if not sentence.strip():
37
+ return ""
38
+
39
+ words = sentence.strip().split()
40
+ transliterated = []
41
+
42
+ for word in words:
43
+ # Separate leading/trailing punctuation from the word
44
+ prefix, core, suffix = extract_punctuation(word)
45
+ if core:
46
+ deva = transliterate_word(core)
47
+ transliterated.append(prefix + deva + suffix)
48
+ else:
49
+ transliterated.append(word) # punctuation-only token, keep as is
50
+
51
+ return " ".join(transliterated)
52
+
53
+
54
+ def extract_punctuation(word):
55
+ """
56
+ Split a token like 'ghar,' into ('', 'ghar', ',')
57
+ so punctuation is not fed into the model.
58
+ """
59
+ prefix = ""
60
+ suffix = ""
61
+
62
+ # Strip leading punctuation
63
+ while word and not word[0].isalpha():
64
+ prefix += word[0]
65
+ word = word[1:]
66
+
67
+ # Strip trailing punctuation
68
+ while word and not word[-1].isalpha():
69
+ suffix = word[-1] + suffix
70
+ word = word[:-1]
71
+
72
+ return prefix, word, suffix
73
+
74
+
75
+ # ── Gradio UI ───────────────────────────────────────────────────────────────
76
+
77
+ def run(sentence):
78
+ try:
79
+ return transliterate_sentence(sentence)
80
+ except Exception as e:
81
+ return f"Error: {str(e)}"
82
+
83
+
84
+ iface = gr.Interface(
85
+ fn=run,
86
+ inputs=gr.Textbox(
87
+ lines=3,
88
+ placeholder="Type romanized Nepali sentence here... e.g. ma ghar janxu",
89
+ label="Romanized Nepali (Input)"
90
+ ),
91
+ outputs=gr.Textbox(
92
+ lines=3,
93
+ label="Devanagari (Output)"
94
+ ),
95
+ title="Nepali Transliteration",
96
+ description="Type a sentence in romanized Nepali and get the Devanagari output.",
97
+ examples=[
98
+ ["ma ghar janxu"],
99
+ ["aama ra baa ghar ma xan"],
100
+ ["nepali basa sajilo xa"],
101
+ ],
102
+ allow_flagging="never",
103
+ )
104
+
105
+ iface.launch()