Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

.ipynb_checkpoints/README-checkpoint.md +62 -0
README.md +62 -0
config.json +12 -0
modeling_transliterator.py +14 -0
pytorch_model.bin +3 -0
src_vocab.json +32 -0
tgt_vocab.json +76 -0

.ipynb_checkpoints/README-checkpoint.md ADDED Viewed

	@@ -0,0 +1,62 @@

+# 🅰️ Akshara-ML — Malayalam Transliteration Model
+**Akshara-ML** is a neural transliteration model that converts **Manglish (Romanized Malayalam)** into **Malayalam script**.
+Developed by **EnduraSolution**, in association with **Aksharakuppy**.
+🌐 https://aksharakuppy.com
+---
+[![Hugging Face](https://img.shields.io/badge/HuggingFace-Model-yellow)](https://huggingface.co/endurasolution/akshara-ml)
+## ✨ Features
+- 🔤 Manglish → Malayalam transliteration
+- ⚡ Fast inference (greedy decoding)
+- 🎯 High accuracy (beam search decoding)
+- 🧠 Transformer-based architecture
+- 🇮🇳 Built specifically for Malayalam language
+---
+## 🧪 Example
+| Manglish | Malayalam |
+|--------|----------|
+| namaskaram | നമസ്കാരം |
+| sugam aano | സുഖം ആണോ |
+| ente peru | എന്റെ പേര് |
+---
+## 🚀 Usage (Python)
+```python
+from model import build_model
+from train import load_checkpoint
+from dataset import load_vocab, get_inverse_vocab
+from config import Config
+import torch
+# Load vocab
+src_vocab = load_vocab("src_vocab.json")
+tgt_vocab = load_vocab("tgt_vocab.json")
+inv_vocab = get_inverse_vocab(tgt_vocab)
+# Build model
+model = build_model(len(src_vocab), len(tgt_vocab))
+load_checkpoint("pytorch_model.bin", model)
+model.eval()
+def transliterate(text):
+    ids = [Config.SOS_IDX] + [src_vocab.get(c, Config.UNK_IDX) for c in text] + [Config.EOS_IDX]
+    src = torch.tensor([ids])
+    pred_ids = model.greedy_decode(src)
+    output = ""
+    for i in pred_ids:
+        if i == Config.EOS_IDX:
+            break
+        output += inv_vocab.get(i, "")
+    return output
+print(transliterate("namaskaram"))

README.md ADDED Viewed

	@@ -0,0 +1,62 @@

+# 🅰️ Akshara-ML — Malayalam Transliteration Model
+**Akshara-ML** is a neural transliteration model that converts **Manglish (Romanized Malayalam)** into **Malayalam script**.
+Developed by **EnduraSolution**, in association with **Aksharakuppy**.
+🌐 https://aksharakuppy.com
+---
+[![Hugging Face](https://img.shields.io/badge/HuggingFace-Model-yellow)](https://huggingface.co/endurasolution/akshara-ml)
+## ✨ Features
+- 🔤 Manglish → Malayalam transliteration
+- ⚡ Fast inference (greedy decoding)
+- 🎯 High accuracy (beam search decoding)
+- 🧠 Transformer-based architecture
+- 🇮🇳 Built specifically for Malayalam language
+---
+## 🧪 Example
+| Manglish | Malayalam |
+|--------|----------|
+| namaskaram | നമസ്കാരം |
+| sugam aano | സുഖം ആണോ |
+| ente peru | എന്റെ പേര് |
+---
+## 🚀 Usage (Python)
+```python
+from model import build_model
+from train import load_checkpoint
+from dataset import load_vocab, get_inverse_vocab
+from config import Config
+import torch
+# Load vocab
+src_vocab = load_vocab("src_vocab.json")
+tgt_vocab = load_vocab("tgt_vocab.json")
+inv_vocab = get_inverse_vocab(tgt_vocab)
+# Build model
+model = build_model(len(src_vocab), len(tgt_vocab))
+load_checkpoint("pytorch_model.bin", model)
+model.eval()
+def transliterate(text):
+    ids = [Config.SOS_IDX] + [src_vocab.get(c, Config.UNK_IDX) for c in text] + [Config.EOS_IDX]
+    src = torch.tensor([ids])
+    pred_ids = model.greedy_decode(src)
+    output = ""
+    for i in pred_ids:
+        if i == Config.EOS_IDX:
+            break
+        output += inv_vocab.get(i, "")
+    return output
+print(transliterate("namaskaram"))

config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "model_type": "malayalam-transliteration-transformer",
+  "d_model": 512,
+  "nhead": 8,
+  "num_encoder_layers": 6,
+  "num_decoder_layers": 6,
+  "dim_feedforward": 2048,
+  "dropout": 0.1,
+  "src_vocab_size": 30,
+  "tgt_vocab_size": 74,
+  "max_position_embeddings": 1024
+}

modeling_transliterator.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import torch
+import torch.nn as nn
+from model import TransliterationTransformer
+class HFTransliterator(nn.Module):
+    def __init__(self, config):
+        super().__init__()
+        self.model = TransliterationTransformer(
+            config["src_vocab_size"],
+            config["tgt_vocab_size"]
+        )
+    def forward(self, src, tgt):
+        return self.model(src, tgt)

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:866a807590fcfdb9c6dc7290033f9cd9c087286adf843561bc2475b7075f1d09
+size 181196059

src_vocab.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "<pad>": 0,
+  "<sos>": 1,
+  "<eos>": 2,
+  "<unk>": 3,
+  "a": 4,
+  "n": 5,
+  "i": 6,
+  "u": 7,
+  "h": 8,
+  "k": 9,
+  "t": 10,
+  "l": 11,
+  "e": 12,
+  "m": 13,
+  "r": 14,
+  "y": 15,
+  "d": 16,
+  "o": 17,
+  "p": 18,
+  "s": 19,
+  "v": 20,
+  "c": 21,
+  "g": 22,
+  "b": 23,
+  "j": 24,
+  "z": 25,
+  "w": 26,
+  "f": 27,
+  "x": 28,
+  "q": 29
+}

tgt_vocab.json ADDED Viewed

	@@ -0,0 +1,76 @@

+{
+  "<pad>": 0,
+  "<sos>": 1,
+  "<eos>": 2,
+  "<unk>": 3,
+  "്": 4,
+  "ി": 5,
+  "ക": 6,
+  "ന": 7,
+  "ു": 8,
+  "ാ": 9,
+  "ത": 10,
+  "യ": 11,
+  "ട": 12,
+  "ര": 13,
+  "മ": 14,
+  "ല": 15,
+  "െ": 16,
+  "പ": 17,
+  "വ": 18,
+  "ണ": 19,
+  "ള": 20,
+  "ം": 21,
+  "റ": 22,
+  "സ": 23,
+  "ച": 24,
+  "ോ": 25,
+  "ങ": 26,
+  "േ": 27,
+  "ദ": 28,
+  "ീ": 29,
+  "ൂ": 30,
+  "ശ": 31,
+  "ഷ": 32,
+  "അ": 33,
+  "ർ": 34,
+  "ഗ": 35,
+  "ൊ": 36,
+  "ൽ": 37,
+  "ജ": 38,
+  "ബ": 39,
+  "ധ": 40,
+  "ഞ": 41,
+  "ഹ": 42,
+  "ഴ": 43,
+  "ഭ": 44,
+  "ൾ": 45,
+  "ൻ": 46,
+  "ആ": 47,
+  "ഡ": 48,
+  "ഇ": 49,
+  "ൈ": 50,
+  "ഥ": 51,
+  "ഉ": 52,
+  "ഫ": 53,
+  "എ": 54,
+  "ൃ": 55,
+  "ഖ": 56,
+  "ഒ": 57,
+  "ഘ": 58,
+  "ൌ": 59,
+  "ഓ": 60,
+  "ഠ": 61,
+  "ഏ": 62,
+  "ൺ": 63,
+  "ഈ": 64,
+  "ഊ": 65,
+  "ഐ": 66,
+  "ഛ": 67,
+  "ഔ": 68,
+  "ഢ": 69,
+  "ഃ": 70,
+  "ഋ": 71,
+  "ൗ": 72,
+  "ഝ": 73
+}