Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

Really-Amazing commited on Mar 14

Commit

10eadd6

verified ·

1 Parent(s): 5c428ee

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import torch
 import gradio as gr
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
-from nanochat.gpt import GPT   # ← correct class
 MODEL_PATH = "model_000971.pt"
@@ -13,18 +13,20 @@ tokenizer = get_tokenizer()
 print("Creating GPT model skeleton (D12 fallback)...")
-# Create blank model — use positional arguments (common in nanochat forks)
-# Order usually: vocab_size, n_layer, n_head, n_embd, block_size, dropout, ...
-model = GPT(
-    vocab_size=50257,     # GPT-2 base — most common
     n_layer=12,
     n_head=12,
     n_embd=768,
     block_size=1024,
     dropout=0.1,
-    # If error about missing arg, add bias=True or other defaults here
 )
 print("Loading flat state_dict from checkpoint...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
@@ -34,7 +36,7 @@ for k in list(state_dict.keys()):
     if k.startswith(unwanted_prefix):
         state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
-# Load — strict=False ignores extra keys (value_embeds, lambdas, etc.)
 missing, unexpected = model.load_state_dict(state_dict, strict=False)
 print(f"Load info: {len(missing)} missing keys, {len(unexpected)} unexpected keys")
@@ -46,7 +48,6 @@ print("Model ready!")
 engine = Engine(model=model, tokenizer=tokenizer)
 def chat_fn(message, history):
-    # Use max_tokens as per your engine.py grep
     return engine.generate(message, max_tokens=512, temperature=0.85)
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:

 import gradio as gr
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
+from nanochat.gpt import GPT, GPTConfig   # ← Added GPTConfig here!
 MODEL_PATH = "model_000971.pt"
 print("Creating GPT model skeleton (D12 fallback)...")
+# 1. Create the config object first
+# (50304 is the standard padded vocab size in nanoGPT for efficiency)
+config = GPTConfig(
+    vocab_size=50304,
     n_layer=12,
     n_head=12,
     n_embd=768,
     block_size=1024,
     dropout=0.1,
 )
+# 2. Pass the config object into the GPT class
+model = GPT(config)
 print("Loading flat state_dict from checkpoint...")
 state_dict = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
     if k.startswith(unwanted_prefix):
         state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
+# Load — strict=False ignores extra keys
 missing, unexpected = model.load_state_dict(state_dict, strict=False)
 print(f"Load info: {len(missing)} missing keys, {len(unexpected)} unexpected keys")
 engine = Engine(model=model, tokenizer=tokenizer)
 def chat_fn(message, history):
     return engine.generate(message, max_tokens=512, temperature=0.85)
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: