Spaces:

lemms
/

llm

Runtime error

App Files Files Community

lemms commited on Aug 20, 2025

Commit

7b5eb87

verified ·

1 Parent(s): 02c7565

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +18 -12

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-OpenLLM Real Models App - Final fixed version with exact bias configuration
 """
 import gradio as gr
@@ -133,7 +133,7 @@ class Block(nn.Module):
         return x
 class CausalSelfAttention(nn.Module):
-    """Multi-head self-attention with causal masking - EXACT bias configuration"""
     def __init__(self, config):
         super().__init__()
         assert config.n_embd % config.n_head == 0
@@ -146,11 +146,15 @@ class CausalSelfAttention(nn.Module):
         self.dropout = config.dropout
         self.bias = config.bias
-        # ADD THE BIAS PARAMETER that the saved model expects
         if config.bias:
-            self.bias = nn.Parameter(torch.zeros(config.n_embd))
         else:
-            self.register_parameter('bias', None)
     def forward(self, x):
         B, T, C = x.size()
@@ -161,17 +165,19 @@ class CausalSelfAttention(nn.Module):
         q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
         v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
-        # Causal self-attention
-        y = F.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=True)
         y = y.transpose(1, 2).contiguous().view(B, T, C)
         # Output projection
         y = self.resid_dropout(self.c_proj(y))
-        # Add the bias if it exists
-        if self.bias is not None:
-            y = y + self.bias
         return y
 class MLP(nn.Module):

 #!/usr/bin/env python3
 """
+OpenLLM Real Models App - Ultimate fixed version with correct attention bias handling
 """
 import gradio as gr
         return x
 class CausalSelfAttention(nn.Module):
+    """Multi-head self-attention with causal masking - ULTIMATE FIX"""
     def __init__(self, config):
         super().__init__()
         assert config.n_embd % config.n_head == 0
         self.dropout = config.dropout
         self.bias = config.bias
+        # REGISTER THE ATTENTION BIAS as a buffer (not parameter) to match saved model
+        # This is actually an attention mask, not a learnable bias
         if config.bias:
+            # Create a causal attention mask buffer
+            mask = torch.tril(torch.ones(config.block_size, config.block_size))
+            mask = mask.view(1, 1, config.block_size, config.block_size)
+            self.register_buffer('bias', mask)
         else:
+            self.register_buffer('bias', None)
     def forward(self, x):
         B, T, C = x.size()
         q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
         v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
+        # Causal self-attention using the bias mask
+        if self.bias is not None:
+            # Use the causal mask
+            attn_mask = self.bias[:, :, :T, :T]
+            y = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask, dropout_p=self.dropout if self.training else 0, is_causal=False)
+        else:
+            # Use built-in causal attention
+            y = F.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=True)
         y = y.transpose(1, 2).contiguous().view(B, T, C)
         # Output projection
         y = self.resid_dropout(self.c_proj(y))
         return y
 class MLP(nn.Module):