""" Simple inference examples for English to Kannada translation Quick start guide for using the model """ import torch from tokenizers import Tokenizer from main import Transformer, greedy_decode def simple_translate(english_sentence): """ Translate a single English sentence to Kannada Args: english_sentence: English text to translate Returns: Kannada translation """ # Setup device device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load model checkpoint = torch.load('best_model.pt', map_location=device) vocab_info = checkpoint['vocab_info'] # Initialize model model = Transformer( d_model=384, ffn_hidden=1536, num_heads=6, drop_prob=0.1, num_layers=4, max_sequence_length=75, src_vocab_size=vocab_info['source_vocab_size'], tgt_vocab_size=vocab_info['target_vocab_size'] ) # Load weights model.load_state_dict(checkpoint['model_state_dict']) model.to(device) model.eval() # Load tokenizers src_tokenizer = Tokenizer.from_file('source_tokenizer.json') tgt_tokenizer = Tokenizer.from_file('target_tokenizer.json') # Translate translation = greedy_decode( model=model, src_sentence=english_sentence, source_tokenizer=src_tokenizer, target_tokenizer=tgt_tokenizer, vocab_info=vocab_info, device=device, max_length=75 ) return translation class TranslationPipeline: """ Reusable translation pipeline for multiple translations Loads model once and reuses it """ def __init__(self, model_path='best_model.pt', src_tokenizer_path='source_tokenizer.json', tgt_tokenizer_path='target_tokenizer.json'): """ Initialize the translation pipeline Args: model_path: Path to model checkpoint src_tokenizer_path: Path to source tokenizer tgt_tokenizer_path: Path to target tokenizer """ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print(f"Initializing translation pipeline on {self.device}...") # Load model checkpoint = torch.load(model_path, map_location=self.device) self.vocab_info = checkpoint['vocab_info'] self.model = Transformer( d_model=384, ffn_hidden=1536, num_heads=6, drop_prob=0.1, num_layers=4, max_sequence_length=75, src_vocab_size=self.vocab_info['source_vocab_size'], tgt_vocab_size=self.vocab_info['target_vocab_size'] ) self.model.load_state_dict(checkpoint['model_state_dict']) self.model.to(self.device) self.model.eval() # Load tokenizers self.src_tokenizer = Tokenizer.from_file(src_tokenizer_path) self.tgt_tokenizer = Tokenizer.from_file(tgt_tokenizer_path) print("Pipeline ready!") def translate(self, english_text): """ Translate English text to Kannada Args: english_text: English sentence or text Returns: Kannada translation """ return greedy_decode( model=self.model, src_sentence=english_text, source_tokenizer=self.src_tokenizer, target_tokenizer=self.tgt_tokenizer, vocab_info=self.vocab_info, device=self.device, max_length=75 ) def translate_batch(self, english_texts): """ Translate multiple English texts Args: english_texts: List of English sentences Returns: List of Kannada translations """ return [self.translate(text) for text in english_texts] # Example 1: Simple one-time translation def example_1_simple(): """Example: Translate a single sentence""" print("Example 1: Simple Translation") print("-" * 50) sentence = "Good morning, have a nice day!" translation = simple_translate(sentence) print(f"English: {sentence}") print(f"Kannada: {translation}") print() # Example 2: Using the pipeline for multiple translations def example_2_pipeline(): """Example: Translate multiple sentences efficiently""" print("Example 2: Pipeline Translation") print("-" * 50) # Initialize pipeline once pipeline = TranslationPipeline() # Translate multiple sentences sentences = [ "Hello world!", "How are you today?", "What is your name?", "I am learning Kannada.", "Thank you for your help." ] print("Translating sentences:\n") for eng in sentences: kan = pipeline.translate(eng) print(f" EN: {eng}") print(f" KN: {kan}") print() # Example 3: Batch translation def example_3_batch(): """Example: Batch translation""" print("Example 3: Batch Translation") print("-" * 50) pipeline = TranslationPipeline() sentences = [ "The weather is beautiful.", "Where is the nearest hospital?", "Can you help me please?", "I love this city.", "See you tomorrow!" ] translations = pipeline.translate_batch(sentences) print("Batch translation results:\n") for eng, kan in zip(sentences, translations): print(f"EN: {eng}") print(f"KN: {kan}") print() # Example 4: Custom usage with error handling def example_4_with_error_handling(): """Example: Translation with error handling""" print("Example 4: Translation with Error Handling") print("-" * 50) try: pipeline = TranslationPipeline() test_cases = [ "Hello!", "", # Empty string "This is a very long sentence that might exceed the maximum token length and we need to see how the model handles it when processing.", "Short.", ] for text in test_cases: try: if not text.strip(): print("Skipping empty input") continue translation = pipeline.translate(text) print(f"✓ EN: {text}") print(f" KN: {translation}") print() except Exception as e: print(f"✗ Error translating '{text}': {e}") print() except Exception as e: print(f"Failed to initialize pipeline: {e}") if __name__ == "__main__": print("="*70) print("English to Kannada Translation - Inference Examples") print("="*70) print() # Run examples example_1_simple() print("\n" + "="*70 + "\n") example_2_pipeline() print("\n" + "="*70 + "\n") example_3_batch() print("\n" + "="*70 + "\n") example_4_with_error_handling() print("\n" + "="*70) print("All examples completed!")