tamoghna
/

encoder-decoder-eng2bn-eng2hi

@@ -1,126 +1,3 @@
-# import math
-# import torch
-# import torch.nn as nn
-# from transformers import PretrainedConfig, PreTrainedModel
-# import warnings
-# # Use the Hugging Face base configuration class for compatibility
-# class TransformerConfig(PretrainedConfig):
-#     # Model type must match the one found in your config.json (small_transformer)
-#     model_type = "small_transformer"
-#     def __init__(self,
-#                  vocab_size=80000,
-#                  d_model=256,
-#                  nhead=8,
-#                  num_encoder_layers=3,
-#                  num_decoder_layers=3,
-#                  dim_feedforward=512,
-#                  dropout=0.1,
-#                  pad_token_id=0,
-#                  bos_token_id=1,  # Assuming <s> is 1
-#                  eos_token_id=2,  # Assuming </s> is 2
-#                  max_position_embeddings=512,
-#                  **kwargs):
-#         super().__init__(pad_token_id=pad_token_id,
-#                          bos_token_id=bos_token_id,
-#                          eos_token_id=eos_token_id,
-#                          **kwargs)
-#         self.vocab_size = vocab_size
-#         self.d_model = d_model
-#         self.nhead = nhead
-#         self.num_encoder_layers = num_encoder_layers
-#         self.num_decoder_layers = num_decoder_layers
-#         self.dim_feedforward = dim_feedforward
-#         self.dropout = dropout
-#         self.max_position_embeddings = max_position_embeddings
-#         # Add a placeholder for decoder_start_token_id, which is needed for generation
-#         if not hasattr(self, "decoder_start_token_id"):
-#              # For a multilingual model, this is often the target language token ID
-#              # You will set this explicitly during generation in your Gradio app (as shown previously)
-#              self.decoder_start_token_id = None
-# # Use the Hugging Face base model class for compatibility
-# class SmallTransformer(PreTrainedModel):
-#     # Link the model to its configuration class
-#     config_class = TransformerConfig
-#     def __init__(self, config):
-#         super().__init__(config)
-#         self.config = config
-#         # --- Model Components (from your training code) ---
-#         self.embedding = nn.Embedding(config.vocab_size, config.d_model, padding_idx=config.pad_token_id)
-#         self.pos_encoder = nn.Embedding(config.max_position_embeddings, config.d_model)
-#         self.pos_decoder = nn.Embedding(config.max_position_embeddings, config.d_model)
-#         self.embed_scale = math.sqrt(config.d_model)
-#         enc_layer = nn.TransformerEncoderLayer(d_model=config.d_model, nhead=config.nhead,
-#                                                dim_feedforward=config.dim_feedforward,
-#                                                dropout=config.dropout, batch_first=True)
-#         dec_layer = nn.TransformerDecoderLayer(d_model=config.d_model, nhead=config.nhead,
-#                                                dim_feedforward=config.dim_feedforward,
-#                                                dropout=config.dropout, batch_first=True)
-#         self.encoder = nn.TransformerEncoder(enc_layer, num_layers=config.num_encoder_layers)
-#         self.decoder = nn.TransformerDecoder(dec_layer, num_layers=config.num_decoder_layers)
-#         self.output_layer = nn.Linear(config.d_model, config.vocab_size)
-#         # Initialize weights
-#         self.post_init()
-#     # Implement the forward pass exactly as you had it
-#     def forward(self, input_ids=None, decoder_input_ids=None, **kwargs):
-#         src = input_ids
-#         tgt = decoder_input_ids
-#         assert src.dim() == 2 and tgt.dim() == 2
-#         # Your custom max_token check (omitting for brevity but keep if you need it)
-#         src_mask = (src == self.config.pad_token_id)
-#         tgt_mask_pad = (tgt == self.config.pad_token_id)
-#         T = tgt.size(1)
-#         # Create Causal Mask
-#         causal_mask = torch.triu(torch.ones((T, T), device=tgt.device), diagonal=1).bool()
-#         # Positional Encoding
-#         src_pos = torch.arange(0, src.size(1), device=src.device).unsqueeze(0).expand(src.size(0), -1).clamp(max=self.config.max_position_embeddings - 1)
-#         tgt_pos = torch.arange(0, tgt.size(1), device=tgt.device).unsqueeze(0).expand(tgt.size(0), -1).clamp(max=self.config.max_position_embeddings - 1)
-#         src_emb = self.embedding(src) * self.embed_scale + self.pos_encoder(src_pos)
-#         tgt_emb = self.embedding(tgt) * self.embed_scale + self.pos_decoder(tgt_pos)
-#         memory = self.encoder(src_emb, src_key_padding_mask=src_mask)
-#         output = self.decoder(tgt_emb, memory, tgt_mask=causal_mask,
-#                               tgt_key_padding_mask=tgt_mask_pad,
-#                               memory_key_padding_mask=src_mask)
-#         # The output must be the logits before the final softmax/loss
-#         logits = self.output_layer(output)
-#         # Return a dictionary/tuple of outputs compatible with PreTrainedModel
-#         return (logits,) # Return logits in a tuple for compatibility
-#     # Implement the mandatory generate method (minimal implementation)
-#     def prepare_inputs_for_generation(self, decoder_input_ids, **kwargs):
-#         # This method is required by the .generate() function
-#         return {"input_ids": kwargs.get("input_ids"), "decoder_input_ids": decoder_input_ids}
-#     def _prepare_decoder_input_ids_for_generation(self, decoder_input_ids, **kwargs):
-#         # A simple method to ensure the decoder input starts with the language token
-#         # This is typically handled by generation_config, but we include a check here
-#         if decoder_input_ids is None and self.config.decoder_start_token_id is not None:
-#              warnings.warn("Using decoder_start_token_id from config. This should be manually set during generation.")
-#              decoder_input_ids = torch.ones((kwargs["input_ids"].shape[0], 1), dtype=torch.long, device=self.device) * self.config.decoder_start_token_id
-#         return decoder_input_ids
-# # No registration needed - auto_map in config.json handles this
 """PyTorch Small Transformer model for English to Hindi/Bengali translation."""
 import math
@@ -299,7 +176,8 @@ class SmallTransformer(SmallTransformerPreTrainedModel):
     def generate(
         self,
         input_ids: torch.LongTensor,
-        max_length: int = 64,
         lang_token_id: int = None,
         eos_token_id: int = None,
         **kwargs
@@ -308,6 +186,12 @@ class SmallTransformer(SmallTransformerPreTrainedModel):
         if eos_token_id is None:
             eos_token_id = self.config.eos_token_id
         batch_size = input_ids.size(0)
         device = input_ids.device

 """PyTorch Small Transformer model for English to Hindi/Bengali translation."""
 import math
     def generate(
         self,
         input_ids: torch.LongTensor,
+        max_length: int = None,
+        max_new_tokens: int = None,
         lang_token_id: int = None,
         eos_token_id: int = None,
         **kwargs
         if eos_token_id is None:
             eos_token_id = self.config.eos_token_id
+        # Handle max_new_tokens parameter
+        if max_new_tokens is not None:
+            max_length = max_new_tokens
+        elif max_length is None:
+            max_length = 64
         batch_size = input_ids.size(0)
         device = input_ids.device