Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

Really-Amazing commited on Mar 14

Commit

de657c0

verified ·

1 Parent(s): 00d2698

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -32

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import torch
 import gradio as gr
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
-from nanochat.gpt import GPT
 MODEL_PATH = "model_000971.pt"
@@ -10,37 +9,31 @@ print("Waking up the toddler (NanoChat-ClimbMix-D12)...")
 tokenizer = get_tokenizer()
-print("Building GPT model skeleton (D12 fallback)...")
-config = {
-    "n_layer": 12,
-    "n_head": 12,
-    "n_embd": 768,
-    "block_size": 1024,
-    "vocab_size": 50257,      # GPT-2 standard — safer bet
-    "dropout": 0.1,
-    "bias": True,
-}
-model = GPT(**config)
-print("Loading weights from checkpoint...")
 checkpoint = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
-state_dict = checkpoint if not isinstance(checkpoint, dict) else (
-    checkpoint.get('model') or checkpoint.get('state_dict') or checkpoint
-)
-unwanted_prefix = '_orig_mod.'
-for k in list(state_dict.keys()):
-    if k.startswith(unwanted_prefix):
-        state_dict[k[len(unwanted_prefix):]] = state_dict.pop(k)
-missing, unexpected = model.load_state_dict(state_dict, strict=False)
-if missing or unexpected:
-    print(f"Warning: Missing keys: {len(missing)} | Unexpected: {len(unexpected)}")
 model.to("cpu")
 model.eval()
-print("Model loaded successfully!")
 engine = Engine(model=model, tokenizer=tokenizer)
@@ -48,13 +41,8 @@ def chat_fn(message, history):
     return engine.generate(message, max_tokens=512, temperature=0.85)
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
-    gr.Markdown("# 🧸 NanoChat-ClimbMix-D12 – The Confident Toddler LLM")
-    gr.Markdown("Karpathy nanochat fork. Preschool phase: bold, funny, often wrong. 😂\nRoadmap: D14 → D16 → D18 → D20+")
-    gr.ChatInterface(
-        fn=chat_fn,
-        examples=["Why is the sky blue?", "What is UPI?", "Write hello world Python code"],
-        title="Chat with the Toddler"
-    )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
 from nanochat.engine import Engine
 from nanochat.tokenizer import get_tokenizer
 MODEL_PATH = "model_000971.pt"
 tokenizer = get_tokenizer()
+print("Loading checkpoint directly...")
 checkpoint = torch.load(MODEL_PATH, map_location="cpu", weights_only=False)
+# Your checkpoint is a flat state_dict with 'transformer.' prefix
+# So we need the model class instance first
+# Option 1: If nanochat has a from_checkpoint or load method
+# (most likely in checkpoint_manager or engine)
+try:
+    from nanochat.checkpoint_manager import load_model
+    model, _ = load_model(".", checkpoint_name="model_000971.pt", device="cpu")
+except Exception as e:
+    print(f"checkpoint_manager failed: {e}")
+    # Option 2: Direct load if checkpoint is state_dict
+    state_dict = checkpoint
+    # We need a pre-initialized model to load into
+    # Since we can't build GPT without args, assume Engine can help or fallback
+    # For now, raise to see
+    raise ValueError("Cannot reconstruct model — checkpoint is flat state_dict. Need model skeleton or load method")
 model.to("cpu")
 model.eval()
+print("Model loaded!")
 engine = Engine(model=model, tokenizer=tokenizer)
     return engine.generate(message, max_tokens=512, temperature=0.85)
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
+    gr.Markdown("# 🧸 NanoChat-ClimbMix-D12")
+    gr.ChatInterface(fn=chat_fn)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)