EvolphTech
/

Wildnerve-tlm01_Hybrid_Model

Text Generation

wildnerve_tlm01

Model card Files Files and versions

xet

Community

WildnerveAI commited on May 15, 2025

Commit

4c26014

verified ·

1 Parent(s): 812bb66

Upload model_Custm.py

Browse files

Files changed (1) hide show

model_Custm.py +48 -51

model_Custm.py CHANGED Viewed

@@ -393,78 +393,75 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
             # Calculate loss if labels are provided
             loss = None
             if labels is not None:
-                # Get shapes for debugging
-                logger.debug(f"Output shape: {output.shape}, Labels shape: {labels.shape}")
                 # Create loss function
                 loss_fct = nn.CrossEntropyLoss()
-                # Handle shape mismatches properly
-                if output.dim() == 3:  # [batch, seq, vocab]
                     batch_size, seq_len, vocab_size = output.size()
-                    # Reshape to [batch*seq, vocab]
-                    output_flat = output.reshape(-1, vocab_size)
-                    # If labels are way bigger than our batch size, something is wrong
-                    # in the training loop, but we'll try to handle it gracefully
-                    if labels.size(0) > batch_size * seq_len:
-                        # Calculate target size
-                        target_size = output_flat.size(0)
-                        # Take just enough labels to match our flattened output
-                        if labels.size(0) >= target_size:
-                            labels = labels[:target_size]
-                        else:
-                            # Pad labels if needed
-                            padding = torch.zeros(target_size - labels.size(0),
-                                                device=labels.device,
-                                                dtype=labels.dtype)
-                            labels = torch.cat([labels, padding])
-                    # Calculate loss with proper shapes
-                    loss = loss_fct(output_flat, labels.view(-1))
-                else:  # output is [batch, vocab]
-                    # Handle excessive label size similar to above
-                    if labels.size(0) > output.size(0):
-                        labels = labels[:output.size(0)]
                     loss = loss_fct(output, labels)
             # Return in HuggingFace format
             if loss is not None:
-                return (loss, output)
             else:
-                # Create a compatible output object
-                class SimpleModelOutput:
-                    def __init__(self, logits):
-                        self.logits = logits
-                    def __getitem__(self, idx):
-                        if idx == 0: return None  # Return None for loss
-                        elif idx == 1: return self.logits
-                        raise IndexError("Index out of range")
-                return SimpleModelOutput(output)
         except Exception as e:
-            # Detailed error logging for debugging
-            logger.error(f"Error in forward pass: {e}")
-            logger.error(f"Traceback: {traceback.format_exc()}")
-            logger.error(f"Input shapes - src: {src.shape if src is not None else None}, "
-                       f"input_ids: {input_ids.shape if input_ids is not None else None}")
-            # Create minimal output to prevent cascading errors, matching expected return format
-            batch_size = src.shape[0] if src is not None else (input_ids.shape[0] if input_ids is not None else 1)
-            dummy_output = torch.zeros((batch_size, self.output_size), device=next(self.parameters()).device)
-            # Return in expected format to avoid "too many values to unpack" errors
             if labels is not None:
-                # Match (loss, logits) format
-                dummy_loss = torch.tensor(999.0, device=next(self.parameters()).device)
                 return (dummy_loss, dummy_output)
             else:
-                # Match object with logits attribute
                 class SimpleModelOutput:
                     def __init__(self, logits):
                         self.logits = logits
                 return SimpleModelOutput(dummy_output)
     # Add sentence transformer methods
     def encode_sentences(self, sentences, batch_size=32, normalize_embeddings=True):
         """Encode sentences into vectors (sentence transformer functionality)"""

             # Calculate loss if labels are provided
             loss = None
             if labels is not None:
                 # Create loss function
                 loss_fct = nn.CrossEntropyLoss()
+                # CRITICAL FIX: Debug shape information
+                batch_size, seq_len = None, None
+                if output.dim() == 3:
                     batch_size, seq_len, vocab_size = output.size()
+                    logger.debug(f"3D Output shape: {output.shape}, Labels shape: {labels.shape}")
+                    # Fix for the target batch size mismatch (12 vs 9204, 16 vs 12272, etc.)
+                    # If labels are flattened but output isn't, reshape output to match
+                    if labels.size(0) == batch_size * seq_len:
+                        # This means labels are already flattened to [batch_size*seq_len]
+                        flattened_output = output.view(-1, output.size(-1))
+                        loss = loss_fct(flattened_output, labels)
+                        # Return explicitly formatted for HuggingFace compatibility
+                        return (loss, output)
+                    else:
+                        # Regular case - reshape both
+                        flattened_output = output.view(-1, output.size(-1))
+                        flattened_labels = labels.view(-1)
+                        loss = loss_fct(flattened_output, flattened_labels)
+                else:
+                    # For classification (2D output)
                     loss = loss_fct(output, labels)
+            # Simple object with logits attribute for HuggingFace compatibility
+            class SimpleModelOutput:
+                def __init__(self, logits):
+                    self.logits = logits
             # Return in HuggingFace format
             if loss is not None:
+                return (loss, output)  # Return tuple
             else:
+                return SimpleModelOutput(output)  # Return object with logits attribute
         except Exception as e:
+            logger.error(f"Error in forward pass: {e}", exc_info=True)
+            # Create fallback outputs that match expected formats
+            device = next(self.parameters()).device if hasattr(self, 'parameters') else torch.device('cpu')
+            # Get batch size from inputs
+            if src is not None:
+                batch_size = src.size(0)
+            elif input_ids is not None:
+                batch_size = input_ids.size(0)
+            else:
+                batch_size = 1
+            # Log input/target shapes for debugging
             if labels is not None:
+                logger.error(f"Input shapes - batch_size: {batch_size}, labels: {labels.shape}")
+            # Create dummy output with correct vocab size
+            vocab_size = self.output_size if hasattr(self, 'output_size') else 50257
+            dummy_output = torch.zeros((batch_size, vocab_size), device=device)
+            # Match the expected return format
+            if labels is not None:
+                dummy_loss = torch.tensor(999.0, device=device)
                 return (dummy_loss, dummy_output)
             else:
                 class SimpleModelOutput:
                     def __init__(self, logits):
                         self.logits = logits
                 return SimpleModelOutput(dummy_output)
     # Add sentence transformer methods
     def encode_sentences(self, sentences, batch_size=32, normalize_embeddings=True):
         """Encode sentences into vectors (sentence transformer functionality)"""