yashwan2003
/

engtokantranslation

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2c40048d0ec6ab5898a45298e3e974d786054b4becafa92f44763a912d51341
 size 726511201

 version https://git-lfs.github.com/spec/v1
+oid sha256:faa1c740cfc878fcbbf4495d80e1648e108c990029ead1b5145014d06e2c8e0a
 size 726511201

main.py CHANGED Viewed

@@ -689,7 +689,7 @@ def greedy_decode(
     model = model.to(device)
     # Encode source sentence
-    src_ids = source_tokenizer.encode(src_sentence).ids
     src_ids = src_ids[:max_length]
     src_padded = src_ids + [vocab_info['src_pad_idx']] * (max_length - len(src_ids))
     src = torch.tensor([src_padded], dtype=torch.long).to(device)
@@ -749,30 +749,6 @@ def greedy_decode(
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-print(f"Using device: {device}")
-with open(f'dataset/train.en', 'r') as file:
-    english_texts = file.readlines()
-with open(f'dataset/train.kn', 'r') as file:
-    kannada_texts = file.readlines()
-english_texts= [sentence.rstrip('\n').lower() for sentence in english_texts]
-kannada_texts = [sentence.rstrip('\n') for sentence in kannada_texts]
-# Prepare data with OPTIMIZED settings for faster training
-# CRITICAL: Reduced vocab sizes from 64k/40k to 16k/12k for 3-5x speedup
-# The final linear layer size is vocab_size × d_model, so smaller vocab = much faster
-train_loader, val_loader, src_tok, tgt_tok, vocab_info = prepare_data(
-    english_texts,
-    kannada_texts,
-    source_vocab_size=50000,  # Reduced from 64000 - still captures most words
-    target_vocab_size=32000,  # Reduced from 40000 - 3x faster computation
-    max_length=75,  # Reduced from 100 - fewer tokens to process
-    batch_size=500  # Reduced from 300 - better GPU utilization
-)
 class TransformerTrainer:
     """
@@ -993,47 +969,73 @@ class TransformerTrainer:
         return self.train_losses, self.val_losses
-# Initialize model with optimized size
-# PERFORMANCE: Smaller model = faster training, often better generalization
-model = Transformer(
-    d_model=384,  # Reduced from 512 for faster computation
-    ffn_hidden=1536,  # Reduced from 2048 (4x d_model ratio maintained)
-    num_heads=6,  # Reduced from 8 (d_model must be divisible by num_heads)
-    drop_prob=0.1,
-    num_layers=4,  # Reduced from 6 - still effective for translation
-    max_sequence_length=75,  # Match the max_length from data prep
-    src_vocab_size=vocab_info['source_vocab_size'],
-    tgt_vocab_size=vocab_info['target_vocab_size']
-)
-# Initialize trainer with performance optimizations
-trainer = TransformerTrainer(
-    model=model,
-    train_loader=train_loader,
-    val_loader=val_loader,
-    vocab_info=vocab_info,
-    device=device,
-    learning_rate=0.0001,
-    use_amp=True,  # Enable mixed precision for 2-3x speedup
-    gradient_accumulation_steps=1  # Increase if you get OOM errors
-)
-# Train
-train_losses, val_losses = trainer.train(num_epochs=50, save_path='best_model.pt')
-# Inference example
-test_sentence = "Hello, how are you?"
-translation = greedy_decode(
-    model,
-    test_sentence,
-    src_tok,
-    tgt_tok,
-    vocab_info,
-    device=device  # Explicitly pass device
-)
-print(f"Source: {test_sentence}")
-print(f"Translation: {translation}")
-print("Training pipeline ready with fixed device handling!")

     model = model.to(device)
     # Encode source sentence
+    src_ids = source_tokenizer.encode(src_sentence)
     src_ids = src_ids[:max_length]
     src_padded = src_ids + [vocab_info['src_pad_idx']] * (max_length - len(src_ids))
     src = torch.tensor([src_padded], dtype=torch.long).to(device)
 class TransformerTrainer:
     """
         return self.train_losses, self.val_losses
+if __name__ == "__main__":
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    print(f"Using device: {device}")
+    with open(f'dataset/train.en', 'r') as file:
+        english_texts = file.readlines()
+    with open(f'dataset/train.kn', 'r') as file:
+        kannada_texts = file.readlines()
+    english_texts= [sentence.rstrip('\n').lower() for sentence in english_texts]
+    kannada_texts = [sentence.rstrip('\n') for sentence in kannada_texts]
+    # Prepare data with OPTIMIZED settings for faster training
+    # CRITICAL: Reduced vocab sizes from 64k/40k to 16k/12k for 3-5x speedup
+    # The final linear layer size is vocab_size × d_model, so smaller vocab = much faster
+    train_loader, val_loader, src_tok, tgt_tok, vocab_info = prepare_data(
+        english_texts,
+        kannada_texts,
+        source_vocab_size=50000,  # Reduced from 64000 - still captures most words
+        target_vocab_size=32000,  # Reduced from 40000 - 3x faster computation
+        max_length=75,  # Reduced from 100 - fewer tokens to process
+        batch_size=500  # Reduced from 300 - better GPU utilization
+    )
+    # Initialize model with optimized size
+    # PERFORMANCE: Smaller model = faster training, often better generalization
+    model = Transformer(
+        d_model=384,  # Reduced from 512 for faster computation
+        ffn_hidden=1536,  # Reduced from 2048 (4x d_model ratio maintained)
+        num_heads=6,  # Reduced from 8 (d_model must be divisible by num_heads)
+        drop_prob=0.1,
+        num_layers=4,  # Reduced from 6 - still effective for translation
+        max_sequence_length=75,  # Match the max_length from data prep
+        src_vocab_size=vocab_info['source_vocab_size'],
+        tgt_vocab_size=vocab_info['target_vocab_size']
+    )
+    # Initialize trainer with performance optimizations
+    trainer = TransformerTrainer(
+        model=model,
+        train_loader=train_loader,
+        val_loader=val_loader,
+        vocab_info=vocab_info,
+        device=device,
+        learning_rate=0.0001,
+        use_amp=True,  # Enable mixed precision for 2-3x speedup
+        gradient_accumulation_steps=1  # Increase if you get OOM errors
+    )
+    # Train
+    train_losses, val_losses = trainer.train(num_epochs=50, save_path='best_model.pt')
+    # Inference example
+    test_sentence = "Hello, how are you?"
+    translation = greedy_decode(
+        model,
+        test_sentence,
+        src_tok,
+        tgt_tok,
+        vocab_info,
+        device=device  # Explicitly pass device
+    )
+    print(f"Source: {test_sentence}")
+    print(f"Translation: {translation}")
+    print("Training pipeline ready with fixed device handling!")