jumelet commited on
Commit
8d4a01c
·
verified ·
1 Parent(s): 5aff23c

Add main & ema weights for fas

Browse files
README.md CHANGED
@@ -27,13 +27,13 @@ ema, main
27
  {
28
  "attention_probs_dropout_prob": 0.1,
29
  "hidden_dropout_prob": 0.1,
30
- "hidden_size": 384,
31
- "intermediate_size": 1280,
32
  "max_position_embeddings": 512,
33
  "position_bucket_size": 32,
34
- "num_attention_heads": 6,
35
  "num_hidden_layers": 12,
36
- "vocab_size": 8192,
37
  "layer_norm_eps": 1e-05,
38
  "force_causal_mask": true,
39
  "classifier_dropout": 0.1,
@@ -41,7 +41,7 @@ ema, main
41
  "num_labels": 2
42
  }
43
  ```
44
- Tokenizer file: `tokenizer_fas_vs8192.json`
45
 
46
  ## Quick Usage
47
  ```python
@@ -68,6 +68,6 @@ print(outputs.logits)
68
  ```
69
 
70
  ## Notes
71
- - Converted on 2025-10-06T00:31:37.440575+00:00
72
  - Weights are the exact trained parameters; no new layers were initialized.
73
  - Requires `trust_remote_code=True` due to custom architecture.
 
27
  {
28
  "attention_probs_dropout_prob": 0.1,
29
  "hidden_dropout_prob": 0.1,
30
+ "hidden_size": 768,
31
+ "intermediate_size": 2560,
32
  "max_position_embeddings": 512,
33
  "position_bucket_size": 32,
34
+ "num_attention_heads": 12,
35
  "num_hidden_layers": 12,
36
+ "vocab_size": 16384,
37
  "layer_norm_eps": 1e-05,
38
  "force_causal_mask": true,
39
  "classifier_dropout": 0.1,
 
41
  "num_labels": 2
42
  }
43
  ```
44
+ Tokenizer file: `tokenizer_fas_vs16384.json`
45
 
46
  ## Quick Usage
47
  ```python
 
68
  ```
69
 
70
  ## Notes
71
+ - Converted on 2025-10-07T00:51:49.351833+00:00
72
  - Weights are the exact trained parameters; no new layers were initialized.
73
  - Requires `trust_remote_code=True` due to custom architecture.
config.json CHANGED
@@ -18,16 +18,16 @@
18
  "eos_token_id": 2,
19
  "force_causal_mask": true,
20
  "hidden_dropout_prob": 0.1,
21
- "hidden_size": 384,
22
- "intermediate_size": 1280,
23
  "layer_norm_eps": 1e-05,
24
  "mask_token_id": 4,
25
  "max_position_embeddings": 512,
26
  "model_type": "gpt_bert",
27
- "num_attention_heads": 6,
28
  "num_hidden_layers": 12,
29
  "num_labels": 2,
30
  "pad_token_id": 3,
31
  "position_bucket_size": 32,
32
- "vocab_size": 8192
33
  }
 
18
  "eos_token_id": 2,
19
  "force_causal_mask": true,
20
  "hidden_dropout_prob": 0.1,
21
+ "hidden_size": 768,
22
+ "intermediate_size": 2560,
23
  "layer_norm_eps": 1e-05,
24
  "mask_token_id": 4,
25
  "max_position_embeddings": 512,
26
  "model_type": "gpt_bert",
27
+ "num_attention_heads": 12,
28
  "num_hidden_layers": 12,
29
  "num_labels": 2,
30
  "pad_token_id": 3,
31
  "position_bucket_size": 32,
32
+ "vocab_size": 16384
33
  }
fas-2gpu-500steps.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e922c32f665605d12b06913cb6c05f6ddcfd8a667d1e4f0ff63b6f74ea1926a
3
  size 503042738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2344aad24c7ca5d361ce2143d2d12d8b5c993fa8838534b145fdd3b5e2e6c6e
3
  size 503042738
fas-2gpu-500steps_ema.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dccddb5e2398a8a953c390eeb3dcf1878d1798a774801ff7e418ac77320272ba
3
  size 503043438
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b531f5e01fcbcae3ae3aeb14fc9f1f30e8cd232de580a9c580d51476977387bd
3
  size 503043438
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab5b76b5985c0f512922b608f154c4fca95ee75cddd095559a9776a2a55ef947
3
  size 553332392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5dc3b5552de3aca100bcc961519367e8926752d32f14221b05b91e20f238c27
3
  size 553332392
model_ema.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab5b76b5985c0f512922b608f154c4fca95ee75cddd095559a9776a2a55ef947
3
  size 553332392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5dc3b5552de3aca100bcc961519367e8926752d32f14221b05b91e20f238c27
3
  size 553332392
original_project_config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_dropout_prob": 0.1,
4
- "hidden_size": 384,
5
- "intermediate_size": 1280,
6
  "max_position_embeddings": 512,
7
  "position_bucket_size": 32,
8
- "num_attention_heads": 6,
9
  "num_hidden_layers": 12,
10
- "vocab_size": 8192,
11
  "layer_norm_eps": 1e-05,
12
  "force_causal_mask": true,
13
  "classifier_dropout": 0.1,
 
1
  {
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_dropout_prob": 0.1,
4
+ "hidden_size": 768,
5
+ "intermediate_size": 2560,
6
  "max_position_embeddings": 512,
7
  "position_bucket_size": 32,
8
+ "num_attention_heads": 12,
9
  "num_hidden_layers": 12,
10
+ "vocab_size": 16384,
11
  "layer_norm_eps": 1e-05,
12
  "force_causal_mask": true,
13
  "classifier_dropout": 0.1,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4462e964cf00ec32e745e3f89f60d755476453e010a2033bd91aaa5a2f178df
3
  size 503029622
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0dea2a8eb8d1b7f730f52a79fa0fac9d2518590862b8c8b1c6f6be93dc20a6c
3
  size 503029622
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff