Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
OpenLLM Real Models App -
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
@@ -49,8 +49,8 @@ class GPT(nn.Module):
|
|
| 49 |
ln_f = nn.LayerNorm(config.n_embd),
|
| 50 |
))
|
| 51 |
|
| 52 |
-
# Language model head
|
| 53 |
-
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=
|
| 54 |
|
| 55 |
# Initialize weights
|
| 56 |
self.apply(self._init_weights)
|
|
@@ -133,7 +133,7 @@ class Block(nn.Module):
|
|
| 133 |
return x
|
| 134 |
|
| 135 |
class CausalSelfAttention(nn.Module):
|
| 136 |
-
"""Multi-head self-attention with causal masking"""
|
| 137 |
def __init__(self, config):
|
| 138 |
super().__init__()
|
| 139 |
assert config.n_embd % config.n_head == 0
|
|
@@ -144,7 +144,13 @@ class CausalSelfAttention(nn.Module):
|
|
| 144 |
self.n_head = config.n_head
|
| 145 |
self.n_embd = config.n_embd
|
| 146 |
self.dropout = config.dropout
|
| 147 |
-
self.bias = config.bias
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
def forward(self, x):
|
| 150 |
B, T, C = x.size()
|
|
@@ -161,6 +167,11 @@ class CausalSelfAttention(nn.Module):
|
|
| 161 |
|
| 162 |
# Output projection
|
| 163 |
y = self.resid_dropout(self.c_proj(y))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
return y
|
| 165 |
|
| 166 |
class MLP(nn.Module):
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
OpenLLM Real Models App - Final fixed version with exact bias configuration
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
| 49 |
ln_f = nn.LayerNorm(config.n_embd),
|
| 50 |
))
|
| 51 |
|
| 52 |
+
# Language model head - MUST have bias to match saved model
|
| 53 |
+
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=True)
|
| 54 |
|
| 55 |
# Initialize weights
|
| 56 |
self.apply(self._init_weights)
|
|
|
|
| 133 |
return x
|
| 134 |
|
| 135 |
class CausalSelfAttention(nn.Module):
|
| 136 |
+
"""Multi-head self-attention with causal masking - EXACT bias configuration"""
|
| 137 |
def __init__(self, config):
|
| 138 |
super().__init__()
|
| 139 |
assert config.n_embd % config.n_head == 0
|
|
|
|
| 144 |
self.n_head = config.n_head
|
| 145 |
self.n_embd = config.n_embd
|
| 146 |
self.dropout = config.dropout
|
| 147 |
+
self.bias = config.bias
|
| 148 |
+
|
| 149 |
+
# ADD THE BIAS PARAMETER that the saved model expects
|
| 150 |
+
if config.bias:
|
| 151 |
+
self.bias = nn.Parameter(torch.zeros(config.n_embd))
|
| 152 |
+
else:
|
| 153 |
+
self.register_parameter('bias', None)
|
| 154 |
|
| 155 |
def forward(self, x):
|
| 156 |
B, T, C = x.size()
|
|
|
|
| 167 |
|
| 168 |
# Output projection
|
| 169 |
y = self.resid_dropout(self.c_proj(y))
|
| 170 |
+
|
| 171 |
+
# Add the bias if it exists
|
| 172 |
+
if self.bias is not None:
|
| 173 |
+
y = y + self.bias
|
| 174 |
+
|
| 175 |
return y
|
| 176 |
|
| 177 |
class MLP(nn.Module):
|