chore: update 7 added files

Files changed (5) hide show

README.md +4 -0
app.py +14 -12
configuration_nanochat.py +1 -1
modeling_nanochat.py +1 -0
tokenization_nanochat.py +108 -0

README.md CHANGED Viewed

@@ -8,8 +8,12 @@ sdk_version: 5.0.0
 app_file: app.py
 pinned: false
 license: mit
 ---
 # NanoChat 561M - Chat Interface
 A Gradio-based chat interface for the NanoChat 561M parameter language model.

 app_file: app.py
 pinned: false
 license: mit
+suggested_hardware: cpu-basic
+models:
+  - HarleyCooper/nanochat561
 ---
 # NanoChat 561M - Chat Interface
 A Gradio-based chat interface for the NanoChat 561M parameter language model.

app.py CHANGED Viewed

@@ -2,10 +2,12 @@ import gradio as gr
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
-# Ensure custom config/model are registered before loading from Hub.
 import configuration_nanochat  # noqa: F401
 import modeling_nanochat  # noqa: F401
 MODEL_ID = "HarleyCooper/nanochat561"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -17,17 +19,10 @@ else:
     TORCH_DTYPE = torch.float32
-try:
-    tokenizer = AutoTokenizer.from_pretrained(
-        MODEL_ID,
-        trust_remote_code=True,
-        use_fast=False,
-    )
-except Exception as exc:
-    raise RuntimeError(
-        "Failed to load the nanochat tokenizer. Make sure `tokenizer/tokenizer.pkl` "
-        "or the expected tokenizer assets are present in the repository."
-    ) from exc
 # Ensure pad token exists for generation.
 if tokenizer.pad_token_id is None:
@@ -38,6 +33,13 @@ model = AutoModelForCausalLM.from_pretrained(
     torch_dtype=TORCH_DTYPE,
     trust_remote_code=True,
 )
 model.to(DEVICE)
 model.eval()

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
+# Ensure custom config/model are registered with transformers
 import configuration_nanochat  # noqa: F401
 import modeling_nanochat  # noqa: F401
+# Updated: Fixed tokenizer, config, DynamicCache, weights, RoPE dims, and past_kv None handling
 MODEL_ID = "HarleyCooper/nanochat561"
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
     TORCH_DTYPE = torch.float32
+tokenizer = AutoTokenizer.from_pretrained(
+    MODEL_ID,
+    trust_remote_code=True,
+)
 # Ensure pad token exists for generation.
 if tokenizer.pad_token_id is None:
     torch_dtype=TORCH_DTYPE,
     trust_remote_code=True,
 )
+config = model.config
+if not hasattr(config, "num_hidden_layers") and hasattr(config, "n_layer"):
+    config.num_hidden_layers = config.n_layer
+if not hasattr(config, "hidden_size") and hasattr(config, "n_embd"):
+    config.hidden_size = config.n_embd
 model.to(DEVICE)
 model.eval()

configuration_nanochat.py CHANGED Viewed

@@ -10,7 +10,7 @@ from transformers.configuration_utils import PretrainedConfig
 class NanoChatConfig(PretrainedConfig):
-    model_type = "nanochat"
     def __init__(
         self,

 class NanoChatConfig(PretrainedConfig):
+    model_type = "nanochat-gpt"
     def __init__(
         self,

modeling_nanochat.py CHANGED Viewed

@@ -299,4 +299,5 @@ class NanoChatForCausalLM(PreTrainedModel):
 AutoConfig.register("nanochat", NanoChatConfig)
 AutoModelForCausalLM.register(NanoChatConfig, NanoChatForCausalLM)

 AutoConfig.register("nanochat", NanoChatConfig)
+AutoConfig.register("nanochat-gpt", NanoChatConfig)
 AutoModelForCausalLM.register(NanoChatConfig, NanoChatForCausalLM)

tokenization_nanochat.py ADDED Viewed

	@@ -0,0 +1,108 @@

+"""
+Hugging Face tokenizer wrapper for nanochat's rustbpe+tiktoken vocabulary.
+"""
+from __future__ import annotations
+import os
+import pickle
+from typing import Dict, List, Optional, Tuple
+import tiktoken
+from transformers.tokenization_utils import PreTrainedTokenizer
+SPECIAL_TOKENS = [
+    "<|bos|>",
+    "<|user_start|>",
+    "<|user_end|>",
+    "<|assistant_start|>",
+    "<|assistant_end|>",
+    "<|python_start|>",
+    "<|python_end|>",
+    "<|output_start|>",
+    "<|output_end|>",
+]
+class NanoChatTokenizer(PreTrainedTokenizer):
+    vocab_files_names = {"tokenizer_file": "tokenizer/tokenizer.pkl"}
+    model_input_names = ["input_ids", "attention_mask"]
+    def __init__(self, tokenizer_file: Optional[str] = None, **kwargs):
+        if tokenizer_file is None:
+            raise ValueError("tokenizer_file must be provided")
+        with open(tokenizer_file, "rb") as handle:
+            self._encoding: tiktoken.Encoding = pickle.load(handle)
+        self._id_to_token: List[str] = [self._encoding.decode([i]) for i in range(self._encoding.n_vocab)]
+        self.vocab: Dict[str, int] = {token: idx for idx, token in enumerate(self._id_to_token)}
+        super().__init__(
+            bos_token="<|bos|>",
+            eos_token="<|bos|>",
+            unk_token="<|bos|>",
+            pad_token="<|bos|>",
+            **kwargs,
+        )
+        self.bos_token_id = self.vocab[self.bos_token]
+        self.eos_token_id = self.vocab[self.eos_token]
+        self.unk_token_id = self.vocab[self.unk_token]
+        self.pad_token_id = self.vocab[self.pad_token]
+    @property
+    def vocab_size(self) -> int:
+        return len(self._id_to_token)
+    def get_vocab(self) -> Dict[str, int]:
+        return dict(self.vocab)
+    def _tokenize(self, text: str) -> List[str]:
+        token_ids = self._encoding.encode_ordinary(text)
+        return [self._id_to_token[token_id] for token_id in token_ids]
+    def _convert_token_to_id(self, token: str) -> int:
+        return self.vocab.get(token, self.unk_token_id)
+    def _convert_id_to_token(self, index: int) -> str:
+        return self._id_to_token[index]
+    def build_inputs_with_special_tokens(
+        self,
+        token_ids_0: List[int],
+        token_ids_1: Optional[List[int]] = None,
+    ) -> List[int]:
+        if token_ids_1 is not None:
+            raise ValueError("nanochat tokenizer only supports single sequences")
+        return [self.bos_token_id] + token_ids_0
+    def create_token_type_ids_from_sequences(
+        self,
+        token_ids_0: List[int],
+        token_ids_1: Optional[List[int]] = None,
+    ) -> List[int]:
+        del token_ids_1
+        return [0] * (len(token_ids_0) + 1)  # +1 for BOS
+    def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
+        target_dir = os.path.join(save_directory, "tokenizer")
+        os.makedirs(target_dir, exist_ok=True)
+        filename = (filename_prefix + "-" if filename_prefix else "") + "tokenizer.pkl"
+        dest_file = os.path.join(target_dir, filename)
+        with open(dest_file, "wb") as handle:
+            pickle.dump(self._encoding, handle)
+        return (dest_file,)
+    def _decode(
+        self,
+        token_ids: List[int],
+        skip_special_tokens: bool = False,
+        clean_up_tokenization_spaces: Optional[bool] = None,
+        spaces_between_special_tokens: bool = True,
+        **kwargs,
+    ) -> str:
+        del clean_up_tokenization_spaces, spaces_between_special_tokens, kwargs
+        if skip_special_tokens:
+            token_ids = [tid for tid in token_ids if tid not in self.all_special_ids]
+        return self._encoding.decode(token_ids)