KavyaBansal
Initial commit - English to Hindi Transliteration
8e00d1b
import torch
import gradio as gr
from transformers import logging
# Suppress transformers warnings
logging.set_verbosity_error()
# Import the model and tokenizer classes
from transliterator import EnglishHindiTransliterator, CharacterTokenizer
# Load tokenizers
eng_tokenizer = CharacterTokenizer(is_hindi=False)
hindi_tokenizer = CharacterTokenizer(is_hindi=True)
# Load the trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = EnglishHindiTransliterator(
input_dim=len(eng_tokenizer),
output_dim=len(hindi_tokenizer),
emb_dim=256,
hid_dim=512,
n_layers=3,
dropout=0.3
).to(device)
checkpoint = torch.load("best_transliteration_model.pt", map_location=device)
model.load_state_dict(checkpoint["model_state_dict"])
model.eval()
# Define transliteration function
def transliterate_text(text):
with torch.no_grad():
src_tensor = torch.tensor(eng_tokenizer.encode(text)).unsqueeze(0).to(device)
tgt_tensor = torch.tensor([hindi_tokenizer.char2idx[hindi_tokenizer.sos_token]]).unsqueeze(0).to(device)
for _ in range(50): # Max sequence length
output = model(src_tensor, tgt_tensor)
pred_token = output.argmax(2)[:, -1]
tgt_tensor = torch.cat([tgt_tensor, pred_token.unsqueeze(1)], dim=1)
if pred_token.item() == hindi_tokenizer.char2idx[hindi_tokenizer.eos_token]:
break
return hindi_tokenizer.decode(tgt_tensor.squeeze().cpu().numpy())
# Create Gradio Interface
interface = gr.Interface(
fn=transliterate_text,
inputs=gr.Textbox(label="Enter English Word"),
outputs=gr.Textbox(label="Hindi Transliteration"),
title="English to Hindi Transliteration",
description="Enter an English word and get its Hindi transliteration using a deep learning model."
)
if __name__ == "__main__":
interface.launch()