Spaces:

lemms
/

llm

Runtime error

App Files Files Community

lemms commited on Aug 20, 2025

Commit

95d50fc

verified ·

1 Parent(s): 04e75aa

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +16 -15

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-OpenLLM Real Models App - Final version with correct model architecture
 """
 import gradio as gr
@@ -33,21 +33,21 @@ class GPTConfig:
         self.bias = bias
 class GPT(nn.Module):
-    """GPT-style transformer model - matches the actual trained model architecture"""
     def __init__(self, config):
         super().__init__()
         assert config.vocab_size is not None
         assert config.block_size is not None
         self.config = config
-        # Word and position embeddings
-        self.wte = nn.Embedding(config.vocab_size, config.n_embd)
-        self.wpe = nn.Embedding(config.block_size, config.n_embd)
-        self.drop = nn.Dropout(config.dropout)
-        # Transformer blocks
-        self.h = nn.ModuleList([Block(config) for _ in range(config.n_layer)])
-        self.ln_f = nn.LayerNorm(config.n_embd)
         # Language model head
         self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=config.bias)
@@ -72,13 +72,13 @@ class GPT(nn.Module):
         assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
         pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
-        tok_emb = self.wte(idx)
-        pos_emb = self.wpe(pos)
-        x = self.drop(tok_emb + pos_emb)
-        for block in self.h:
             x = block(x)
-        x = self.ln_f(x)
         if targets is not None:
             logits = self.lm_head(x)
@@ -144,6 +144,7 @@ class CausalSelfAttention(nn.Module):
         self.n_head = config.n_head
         self.n_embd = config.n_embd
         self.dropout = config.dropout
     def forward(self, x):
         B, T, C = x.size()

 #!/usr/bin/env python3
 """
+OpenLLM Real Models App - Corrected version with exact model architecture
 """
 import gradio as gr
         self.bias = bias
 class GPT(nn.Module):
+    """GPT-style transformer model - EXACT architecture matching the saved model"""
     def __init__(self, config):
         super().__init__()
         assert config.vocab_size is not None
         assert config.block_size is not None
         self.config = config
+        # Create the transformer module with the exact naming convention
+        self.transformer = nn.ModuleDict(dict(
+            wte = nn.Embedding(config.vocab_size, config.n_embd),
+            wpe = nn.Embedding(config.block_size, config.n_embd),
+            drop = nn.Dropout(config.dropout),
+            h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
+            ln_f = nn.LayerNorm(config.n_embd),
+        ))
         # Language model head
         self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=config.bias)
         assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
         pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
+        tok_emb = self.transformer.wte(idx)
+        pos_emb = self.transformer.wpe(pos)
+        x = self.transformer.drop(tok_emb + pos_emb)
+        for block in self.transformer.h:
             x = block(x)
+        x = self.transformer.ln_f(x)
         if targets is not None:
             logits = self.lm_head(x)
         self.n_head = config.n_head
         self.n_embd = config.n_embd
         self.dropout = config.dropout
+        self.bias = config.bias  # Add bias parameter
     def forward(self, x):
         B, T, C = x.size()