Fedor Krasnov commited on
Commit
2123e77
·
1 Parent(s): 2bf7408

python3.10 c2_mamba_mlm_train.py -e 2 -b 32 -hs 256 -hl 8 -is 128 -ss 16 --no-mlm -mp data/models/mamba8_preds_black --tokenizer_model_path data/C2MPD -lp q

Browse files
Files changed (3) hide show
  1. config.json +2 -2
  2. model.safetensors +2 -2
  3. tokenizer.json +0 -0
config.json CHANGED
@@ -12,7 +12,7 @@
12
  "intermediate_size": 128,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "mamba",
15
- "num_hidden_layers": 6,
16
  "pad_token_id": 0,
17
  "rescale_prenorm_residual": false,
18
  "residual_in_fp32": true,
@@ -29,5 +29,5 @@
29
  "use_cache": true,
30
  "use_conv_bias": true,
31
  "use_mambapy": true,
32
- "vocab_size": 32000
33
  }
 
12
  "intermediate_size": 128,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "mamba",
15
+ "num_hidden_layers": 8,
16
  "pad_token_id": 0,
17
  "rescale_prenorm_residual": false,
18
  "residual_in_fp32": true,
 
29
  "use_cache": true,
30
  "use_conv_bias": true,
31
  "use_mambapy": true,
32
+ "vocab_size": 32768
33
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:699fc6f0e73f31cb3a8cedf97733cfa67a7254cc82d9fdc42ab6865352b36390
3
- size 35408216
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98d33fc899d5d2281ee1507404c7d2f6a727634bad55e476910f4d7f9b85bc0e
3
+ size 37074304
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff