Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
OpenLLM Real Models App -
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
@@ -133,7 +133,7 @@ class Block(nn.Module):
|
|
| 133 |
return x
|
| 134 |
|
| 135 |
class CausalSelfAttention(nn.Module):
|
| 136 |
-
"""Multi-head self-attention with causal masking -
|
| 137 |
def __init__(self, config):
|
| 138 |
super().__init__()
|
| 139 |
assert config.n_embd % config.n_head == 0
|
|
@@ -144,7 +144,7 @@ class CausalSelfAttention(nn.Module):
|
|
| 144 |
self.n_head = config.n_head
|
| 145 |
self.n_embd = config.n_embd
|
| 146 |
self.dropout = config.dropout
|
| 147 |
-
self.
|
| 148 |
|
| 149 |
# REGISTER THE ATTENTION BIAS as a buffer (not parameter) to match saved model
|
| 150 |
# This is actually an attention mask, not a learnable bias
|
|
@@ -152,7 +152,7 @@ class CausalSelfAttention(nn.Module):
|
|
| 152 |
# Create a causal attention mask buffer
|
| 153 |
mask = torch.tril(torch.ones(config.block_size, config.block_size))
|
| 154 |
mask = mask.view(1, 1, config.block_size, config.block_size)
|
| 155 |
-
self.register_buffer('bias', mask)
|
| 156 |
else:
|
| 157 |
self.register_buffer('bias', None)
|
| 158 |
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
OpenLLM Real Models App - Final working version with correct attribute naming
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
| 133 |
return x
|
| 134 |
|
| 135 |
class CausalSelfAttention(nn.Module):
|
| 136 |
+
"""Multi-head self-attention with causal masking - FINAL WORKING VERSION"""
|
| 137 |
def __init__(self, config):
|
| 138 |
super().__init__()
|
| 139 |
assert config.n_embd % config.n_head == 0
|
|
|
|
| 144 |
self.n_head = config.n_head
|
| 145 |
self.n_embd = config.n_embd
|
| 146 |
self.dropout = config.dropout
|
| 147 |
+
self.use_bias = config.bias # Use different name for the boolean flag
|
| 148 |
|
| 149 |
# REGISTER THE ATTENTION BIAS as a buffer (not parameter) to match saved model
|
| 150 |
# This is actually an attention mask, not a learnable bias
|
|
|
|
| 152 |
# Create a causal attention mask buffer
|
| 153 |
mask = torch.tril(torch.ones(config.block_size, config.block_size))
|
| 154 |
mask = mask.view(1, 1, config.block_size, config.block_size)
|
| 155 |
+
self.register_buffer('bias', mask) # This matches the saved model's 'bias' key
|
| 156 |
else:
|
| 157 |
self.register_buffer('bias', None)
|
| 158 |
|