HarleyCoops commited on
Commit
337c00c
·
1 Parent(s): 30a86a5

chore: update 7 added files

Browse files
README.md CHANGED
@@ -8,8 +8,12 @@ sdk_version: 5.0.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
 
 
11
  ---
12
 
 
13
  # NanoChat 561M - Chat Interface
14
 
15
  A Gradio-based chat interface for the NanoChat 561M parameter language model.
 
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
+ suggested_hardware: cpu-basic
12
+ models:
13
+ - HarleyCooper/nanochat561
14
  ---
15
 
16
+
17
  # NanoChat 561M - Chat Interface
18
 
19
  A Gradio-based chat interface for the NanoChat 561M parameter language model.
app.py CHANGED
@@ -2,10 +2,12 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
- # Ensure custom config/model are registered before loading from Hub.
6
  import configuration_nanochat # noqa: F401
7
  import modeling_nanochat # noqa: F401
8
 
 
 
9
 
10
  MODEL_ID = "HarleyCooper/nanochat561"
11
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -17,17 +19,10 @@ else:
17
  TORCH_DTYPE = torch.float32
18
 
19
 
20
- try:
21
- tokenizer = AutoTokenizer.from_pretrained(
22
- MODEL_ID,
23
- trust_remote_code=True,
24
- use_fast=False,
25
- )
26
- except Exception as exc:
27
- raise RuntimeError(
28
- "Failed to load the nanochat tokenizer. Make sure `tokenizer/tokenizer.pkl` "
29
- "or the expected tokenizer assets are present in the repository."
30
- ) from exc
31
 
32
  # Ensure pad token exists for generation.
33
  if tokenizer.pad_token_id is None:
@@ -38,6 +33,13 @@ model = AutoModelForCausalLM.from_pretrained(
38
  torch_dtype=TORCH_DTYPE,
39
  trust_remote_code=True,
40
  )
 
 
 
 
 
 
 
41
  model.to(DEVICE)
42
  model.eval()
43
 
 
2
  import torch
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
 
5
+ # Ensure custom config/model are registered with transformers
6
  import configuration_nanochat # noqa: F401
7
  import modeling_nanochat # noqa: F401
8
 
9
+ # Updated: Fixed tokenizer, config, DynamicCache, weights, RoPE dims, and past_kv None handling
10
+
11
 
12
  MODEL_ID = "HarleyCooper/nanochat561"
13
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
19
  TORCH_DTYPE = torch.float32
20
 
21
 
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ MODEL_ID,
24
+ trust_remote_code=True,
25
+ )
 
 
 
 
 
 
 
26
 
27
  # Ensure pad token exists for generation.
28
  if tokenizer.pad_token_id is None:
 
33
  torch_dtype=TORCH_DTYPE,
34
  trust_remote_code=True,
35
  )
36
+
37
+ config = model.config
38
+ if not hasattr(config, "num_hidden_layers") and hasattr(config, "n_layer"):
39
+ config.num_hidden_layers = config.n_layer
40
+ if not hasattr(config, "hidden_size") and hasattr(config, "n_embd"):
41
+ config.hidden_size = config.n_embd
42
+
43
  model.to(DEVICE)
44
  model.eval()
45
 
configuration_nanochat.py CHANGED
@@ -10,7 +10,7 @@ from transformers.configuration_utils import PretrainedConfig
10
 
11
 
12
  class NanoChatConfig(PretrainedConfig):
13
- model_type = "nanochat"
14
 
15
  def __init__(
16
  self,
 
10
 
11
 
12
  class NanoChatConfig(PretrainedConfig):
13
+ model_type = "nanochat-gpt"
14
 
15
  def __init__(
16
  self,
modeling_nanochat.py CHANGED
@@ -299,4 +299,5 @@ class NanoChatForCausalLM(PreTrainedModel):
299
 
300
 
301
  AutoConfig.register("nanochat", NanoChatConfig)
 
302
  AutoModelForCausalLM.register(NanoChatConfig, NanoChatForCausalLM)
 
299
 
300
 
301
  AutoConfig.register("nanochat", NanoChatConfig)
302
+ AutoConfig.register("nanochat-gpt", NanoChatConfig)
303
  AutoModelForCausalLM.register(NanoChatConfig, NanoChatForCausalLM)
tokenization_nanochat.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face tokenizer wrapper for nanochat's rustbpe+tiktoken vocabulary.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import os
8
+ import pickle
9
+ from typing import Dict, List, Optional, Tuple
10
+
11
+ import tiktoken
12
+ from transformers.tokenization_utils import PreTrainedTokenizer
13
+
14
+ SPECIAL_TOKENS = [
15
+ "<|bos|>",
16
+ "<|user_start|>",
17
+ "<|user_end|>",
18
+ "<|assistant_start|>",
19
+ "<|assistant_end|>",
20
+ "<|python_start|>",
21
+ "<|python_end|>",
22
+ "<|output_start|>",
23
+ "<|output_end|>",
24
+ ]
25
+
26
+
27
+ class NanoChatTokenizer(PreTrainedTokenizer):
28
+ vocab_files_names = {"tokenizer_file": "tokenizer/tokenizer.pkl"}
29
+ model_input_names = ["input_ids", "attention_mask"]
30
+
31
+ def __init__(self, tokenizer_file: Optional[str] = None, **kwargs):
32
+ if tokenizer_file is None:
33
+ raise ValueError("tokenizer_file must be provided")
34
+
35
+ with open(tokenizer_file, "rb") as handle:
36
+ self._encoding: tiktoken.Encoding = pickle.load(handle)
37
+
38
+ self._id_to_token: List[str] = [self._encoding.decode([i]) for i in range(self._encoding.n_vocab)]
39
+ self.vocab: Dict[str, int] = {token: idx for idx, token in enumerate(self._id_to_token)}
40
+
41
+ super().__init__(
42
+ bos_token="<|bos|>",
43
+ eos_token="<|bos|>",
44
+ unk_token="<|bos|>",
45
+ pad_token="<|bos|>",
46
+ **kwargs,
47
+ )
48
+
49
+ self.bos_token_id = self.vocab[self.bos_token]
50
+ self.eos_token_id = self.vocab[self.eos_token]
51
+ self.unk_token_id = self.vocab[self.unk_token]
52
+ self.pad_token_id = self.vocab[self.pad_token]
53
+
54
+ @property
55
+ def vocab_size(self) -> int:
56
+ return len(self._id_to_token)
57
+
58
+ def get_vocab(self) -> Dict[str, int]:
59
+ return dict(self.vocab)
60
+
61
+ def _tokenize(self, text: str) -> List[str]:
62
+ token_ids = self._encoding.encode_ordinary(text)
63
+ return [self._id_to_token[token_id] for token_id in token_ids]
64
+
65
+ def _convert_token_to_id(self, token: str) -> int:
66
+ return self.vocab.get(token, self.unk_token_id)
67
+
68
+ def _convert_id_to_token(self, index: int) -> str:
69
+ return self._id_to_token[index]
70
+
71
+ def build_inputs_with_special_tokens(
72
+ self,
73
+ token_ids_0: List[int],
74
+ token_ids_1: Optional[List[int]] = None,
75
+ ) -> List[int]:
76
+ if token_ids_1 is not None:
77
+ raise ValueError("nanochat tokenizer only supports single sequences")
78
+ return [self.bos_token_id] + token_ids_0
79
+
80
+ def create_token_type_ids_from_sequences(
81
+ self,
82
+ token_ids_0: List[int],
83
+ token_ids_1: Optional[List[int]] = None,
84
+ ) -> List[int]:
85
+ del token_ids_1
86
+ return [0] * (len(token_ids_0) + 1) # +1 for BOS
87
+
88
+ def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
89
+ target_dir = os.path.join(save_directory, "tokenizer")
90
+ os.makedirs(target_dir, exist_ok=True)
91
+ filename = (filename_prefix + "-" if filename_prefix else "") + "tokenizer.pkl"
92
+ dest_file = os.path.join(target_dir, filename)
93
+ with open(dest_file, "wb") as handle:
94
+ pickle.dump(self._encoding, handle)
95
+ return (dest_file,)
96
+
97
+ def _decode(
98
+ self,
99
+ token_ids: List[int],
100
+ skip_special_tokens: bool = False,
101
+ clean_up_tokenization_spaces: Optional[bool] = None,
102
+ spaces_between_special_tokens: bool = True,
103
+ **kwargs,
104
+ ) -> str:
105
+ del clean_up_tokenization_spaces, spaces_between_special_tokens, kwargs
106
+ if skip_special_tokens:
107
+ token_ids = [tid for tid in token_ids if tid not in self.all_special_ids]
108
+ return self._encoding.decode(token_ids)