Spaces:

KavyaBansal
/

EnglishHindiTransliteration

Build error

KavyaBansal

Initial commit - English to Hindi Transliteration

8e00d1b 11 months ago

1.95 kB

	import torch
	import gradio as gr
	from transformers import logging

	# Suppress transformers warnings
	logging.set_verbosity_error()

	# Import the model and tokenizer classes
	from transliterator import EnglishHindiTransliterator, CharacterTokenizer

	# Load tokenizers
	eng_tokenizer = CharacterTokenizer(is_hindi=False)
	hindi_tokenizer = CharacterTokenizer(is_hindi=True)

	# Load the trained model
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model = EnglishHindiTransliterator(
	input_dim=len(eng_tokenizer),
	output_dim=len(hindi_tokenizer),
	emb_dim=256,
	hid_dim=512,
	n_layers=3,
	dropout=0.3
	).to(device)

	checkpoint = torch.load("best_transliteration_model.pt", map_location=device)
	model.load_state_dict(checkpoint["model_state_dict"])
	model.eval()

	# Define transliteration function
	def transliterate_text(text):
	with torch.no_grad():
	src_tensor = torch.tensor(eng_tokenizer.encode(text)).unsqueeze(0).to(device)
	tgt_tensor = torch.tensor([hindi_tokenizer.char2idx[hindi_tokenizer.sos_token]]).unsqueeze(0).to(device)

	for _ in range(50): # Max sequence length
	output = model(src_tensor, tgt_tensor)
	pred_token = output.argmax(2)[:, -1]
	tgt_tensor = torch.cat([tgt_tensor, pred_token.unsqueeze(1)], dim=1)

	if pred_token.item() == hindi_tokenizer.char2idx[hindi_tokenizer.eos_token]:
	break

	return hindi_tokenizer.decode(tgt_tensor.squeeze().cpu().numpy())

	# Create Gradio Interface
	interface = gr.Interface(
	fn=transliterate_text,
	inputs=gr.Textbox(label="Enter English Word"),
	outputs=gr.Textbox(label="Hindi Transliteration"),
	title="English to Hindi Transliteration",
	description="Enter an English word and get its Hindi transliteration using a deep learning model."
	)

	if __name__ == "__main__":
	interface.launch()