EvolphTech
/

Wildnerve-tlm01_Hybrid_Model

Text Generation

wildnerve_tlm01

Model card Files Files and versions

xet

Community

WildnerveAI commited on May 13, 2025

Commit

f23efb2

verified ·

1 Parent(s): f71f4f5

Upload model_Custm.py

Browse files

Files changed (1) hide show

model_Custm.py +37 -37

model_Custm.py CHANGED Viewed

@@ -469,9 +469,7 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
             return f"Error generating response: {str(e)}"
     def generate_tokens(self, input_ids, max_length=None, temperature=0.7, top_k=50, top_p=0.95, repetition_penalty=1.0, **kwargs):
-        """
-        Generate tokens autoregressively without recursion.
-        """
         logger.info(f"generate_tokens called with tensor of shape: {input_ids.shape if hasattr(input_ids, 'shape') else 'unknown'}")
         try:
@@ -485,51 +483,53 @@ class Wildnerve_tlm01(nn.Module, AbstractModel):
             if input_ids.dim() == 1:
                 input_ids = input_ids.unsqueeze(0)
-            # Get device from input tensor
-            device = input_ids.device
             # Set reasonable defaults for missing parameters
             if max_length is None:
                 max_length = min(getattr(self, 'max_seq_length', 1024), 1024)
-            max_length = min(max_length, 1024)  # Reasonable maximum
-            # Check if we're already at or beyond max length
-            if input_ids.shape[1] >= max_length:
-                return input_ids  # Return without change
-            # Create attention mask if needed
-            attention_mask = None
-            if hasattr(self, 'transformer') and getattr(self, 'transformer', None) is not None:
-                attention_mask = torch.ones((input_ids.shape[0], input_ids.shape[1]), dtype=torch.long, device=device)
             # Initialize generated sequences with input_ids
             generated_sequences = input_ids.clone()
-            # Get end token ID (use EOS token if model has one, otherwise use default)
-            eos_token_id = None
-            if hasattr(self, 'tokenizer') and self.tokenizer is not None and hasattr(self.tokenizer, 'eos_token_id'):
-                eos_token_id = self.tokenizer.eos_token_id
-            # Simply append a few tokens to avoid the recursive call
-            # For a production system, you would implement proper token generation here
-            current_len = input_ids.shape[1]
-            new_tokens_needed = min(10, max_length - current_len)
-            # Create some dummy token IDs (this will be basic but avoid errors)
-            batch_size = input_ids.shape[0]
-            dummy_tokens = torch.ones((batch_size, new_tokens_needed), dtype=torch.long, device=device) * (eos_token_id or 50256)  # GPT-2 EOS token
-            # Concatenate new tokens to input_ids
-            output_ids = torch.cat([input_ids, dummy_tokens], dim=1)
-            logger.info(f"Simple generate_tokens returning output of shape {output_ids.shape}")
-            return output_ids
         except Exception as e:
             logger.error(f"Error in generate_tokens: {e}")
-            # Return input as fallback to prevent errors
-            return input_ids
     def generate_with_decoding(self, input_ids=None, prompt=None, **kwargs):
         """

             return f"Error generating response: {str(e)}"
     def generate_tokens(self, input_ids, max_length=None, temperature=0.7, top_k=50, top_p=0.95, repetition_penalty=1.0, **kwargs):
+        """Generate tokens autoregressively."""
         logger.info(f"generate_tokens called with tensor of shape: {input_ids.shape if hasattr(input_ids, 'shape') else 'unknown'}")
         try:
             if input_ids.dim() == 1:
                 input_ids = input_ids.unsqueeze(0)
             # Set reasonable defaults for missing parameters
             if max_length is None:
                 max_length = min(getattr(self, 'max_seq_length', 1024), 1024)
+            max_length = min(max_length, 1024)
             # Initialize generated sequences with input_ids
             generated_sequences = input_ids.clone()
+            # Auto-regressive generation loop
+            for step in range(max_length - input_ids.shape[1]):
+                # Forward pass through the model
+                with torch.no_grad():
+                    outputs = self(generated_sequences)
+                    # FIX: Handle both 2D and 3D output formats
+                    if outputs.dim() == 3:  # [batch_size, seq_length, vocab_size]
+                        # Original 3D format
+                        next_token_logits = outputs[:, -1, :]
+                    else:  # outputs.dim() == 2: [batch_size, vocab_size]
+                        # Direct 2D output format
+                        next_token_logits = outputs
+                # Apply temperature
+                if temperature > 0:
+                    next_token_logits = next_token_logits / temperature
+                # Apply top-k filtering
+                if top_k > 0:
+                    top_k_values, top_k_indices = torch.topk(next_token_logits, top_k)
+                    next_token_logits = torch.full_like(next_token_logits, float("-inf"))
+                    for batch_idx in range(generated_sequences.shape[0]):
+                        next_token_logits[batch_idx, top_k_indices[batch_idx]] = top_k_values[batch_idx]
+                # Sample next token
+                probs = torch.softmax(next_token_logits, dim=-1)
+                next_tokens = torch.multinomial(probs, num_samples=1).squeeze(-1)
+                # Add to sequence
+                generated_sequences = torch.cat([generated_sequences, next_tokens.unsqueeze(-1)], dim=1)
+                # Optional stopping criteria could be added here
+            return generated_sequences
         except Exception as e:
             logger.error(f"Error in generate_tokens: {e}")
+            return input_ids  # Return input as fallback
     def generate_with_decoding(self, input_ids=None, prompt=None, **kwargs):
         """