trohrbaugh commited on
Commit
e549f26
·
verified ·
1 Parent(s): 9f532e7

starcoder2: add gpt_bigcode pattern for tiny_starcoder_py (234K dl)

Browse files
Files changed (1) hide show
  1. scan.py +2 -1
scan.py CHANGED
@@ -233,8 +233,9 @@ KNOWN_BASES = {
233
  "starcoder2": {
234
  "name": "BigCode StarCoder2",
235
  "vocab_size": 49152,
236
- "model_type_patterns": ["starcoder2"],
237
  # 3B: hidden=3072/30L (97K dl), 7B: hidden=4608/32L, 15B: hidden=6144/40L
 
238
  # Code-specialized tokenizer (49152 tokens)
239
  },
240
  "zaya": {
 
233
  "starcoder2": {
234
  "name": "BigCode StarCoder2",
235
  "vocab_size": 49152,
236
+ "model_type_patterns": ["starcoder2", "gpt_bigcode"],
237
  # 3B: hidden=3072/30L (97K dl), 7B: hidden=4608/32L, 15B: hidden=6144/40L
238
+ # gpt_bigcode = tiny_starcoder_py and early StarCoder variants (234K dl)
239
  # Code-specialized tokenizer (49152 tokens)
240
  },
241
  "zaya": {