jumelet commited on
Commit
a5ea7c5
·
verified ·
1 Parent(s): 8227b3b

Add main & ema weights for ind

Browse files
README.md CHANGED
@@ -27,13 +27,13 @@ ema, main
27
  {
28
  "attention_probs_dropout_prob": 0.1,
29
  "hidden_dropout_prob": 0.1,
30
- "hidden_size": 384,
31
- "intermediate_size": 1280,
32
  "max_position_embeddings": 512,
33
  "position_bucket_size": 32,
34
- "num_attention_heads": 6,
35
  "num_hidden_layers": 12,
36
- "vocab_size": 8192,
37
  "layer_norm_eps": 1e-05,
38
  "force_causal_mask": true,
39
  "classifier_dropout": 0.1,
@@ -41,7 +41,7 @@ ema, main
41
  "num_labels": 2
42
  }
43
  ```
44
- Tokenizer file: `tokenizer_ind_vs8192.json`
45
 
46
  ## Quick Usage
47
  ```python
@@ -68,6 +68,6 @@ print(outputs.logits)
68
  ```
69
 
70
  ## Notes
71
- - Converted on 2025-10-06T00:34:41.843970+00:00
72
  - Weights are the exact trained parameters; no new layers were initialized.
73
  - Requires `trust_remote_code=True` due to custom architecture.
 
27
  {
28
  "attention_probs_dropout_prob": 0.1,
29
  "hidden_dropout_prob": 0.1,
30
+ "hidden_size": 768,
31
+ "intermediate_size": 2560,
32
  "max_position_embeddings": 512,
33
  "position_bucket_size": 32,
34
+ "num_attention_heads": 12,
35
  "num_hidden_layers": 12,
36
+ "vocab_size": 16384,
37
  "layer_norm_eps": 1e-05,
38
  "force_causal_mask": true,
39
  "classifier_dropout": 0.1,
 
41
  "num_labels": 2
42
  }
43
  ```
44
+ Tokenizer file: `tokenizer_ind_vs16384.json`
45
 
46
  ## Quick Usage
47
  ```python
 
68
  ```
69
 
70
  ## Notes
71
+ - Converted on 2025-10-07T00:53:45.582592+00:00
72
  - Weights are the exact trained parameters; no new layers were initialized.
73
  - Requires `trust_remote_code=True` due to custom architecture.
config.json CHANGED
@@ -18,16 +18,16 @@
18
  "eos_token_id": 2,
19
  "force_causal_mask": true,
20
  "hidden_dropout_prob": 0.1,
21
- "hidden_size": 384,
22
- "intermediate_size": 1280,
23
  "layer_norm_eps": 1e-05,
24
  "mask_token_id": 4,
25
  "max_position_embeddings": 512,
26
  "model_type": "gpt_bert",
27
- "num_attention_heads": 6,
28
  "num_hidden_layers": 12,
29
  "num_labels": 2,
30
  "pad_token_id": 3,
31
  "position_bucket_size": 32,
32
- "vocab_size": 8192
33
  }
 
18
  "eos_token_id": 2,
19
  "force_causal_mask": true,
20
  "hidden_dropout_prob": 0.1,
21
+ "hidden_size": 768,
22
+ "intermediate_size": 2560,
23
  "layer_norm_eps": 1e-05,
24
  "mask_token_id": 4,
25
  "max_position_embeddings": 512,
26
  "model_type": "gpt_bert",
27
+ "num_attention_heads": 12,
28
  "num_hidden_layers": 12,
29
  "num_labels": 2,
30
  "pad_token_id": 3,
31
  "position_bucket_size": 32,
32
+ "vocab_size": 16384
33
  }
ind-2gpu-500steps.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:165fc9cceee621c3df426cf19e1f1e59e2953cbabe7adc77fc790cd2e18e3d03
3
  size 503042738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8c14e75b5394a0b626176164485e3aba0ce0e30a9c2d6dcff9607cd76a318e8
3
  size 503042738
ind-2gpu-500steps_ema.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:196ebeb7663d5870b64f8616d9be904e5e86bb3a356ac89356ca82edad290a31
3
  size 503043438
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e889b748317cb4d78527bf37bbf5ab96ee8ec885fdd23fbdfbb34be56862d5e5
3
  size 503043438
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab5b76b5985c0f512922b608f154c4fca95ee75cddd095559a9776a2a55ef947
3
  size 553332392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c37d0c0bc6109234271fae3ffcd0db3c735ff423ebd9bbb68fe1ae1e3350bda
3
  size 553332392
model_ema.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab5b76b5985c0f512922b608f154c4fca95ee75cddd095559a9776a2a55ef947
3
  size 553332392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c37d0c0bc6109234271fae3ffcd0db3c735ff423ebd9bbb68fe1ae1e3350bda
3
  size 553332392
original_project_config.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_dropout_prob": 0.1,
4
- "hidden_size": 384,
5
- "intermediate_size": 1280,
6
  "max_position_embeddings": 512,
7
  "position_bucket_size": 32,
8
- "num_attention_heads": 6,
9
  "num_hidden_layers": 12,
10
- "vocab_size": 8192,
11
  "layer_norm_eps": 1e-05,
12
  "force_causal_mask": true,
13
  "classifier_dropout": 0.1,
 
1
  {
2
  "attention_probs_dropout_prob": 0.1,
3
  "hidden_dropout_prob": 0.1,
4
+ "hidden_size": 768,
5
+ "intermediate_size": 2560,
6
  "max_position_embeddings": 512,
7
  "position_bucket_size": 32,
8
+ "num_attention_heads": 12,
9
  "num_hidden_layers": 12,
10
+ "vocab_size": 16384,
11
  "layer_norm_eps": 1e-05,
12
  "force_causal_mask": true,
13
  "classifier_dropout": 0.1,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4462e964cf00ec32e745e3f89f60d755476453e010a2033bd91aaa5a2f178df
3
  size 503029622
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b0dd172c62009de0354151925d73195e03da1054801df10c0a9d524817816e3
3
  size 503029622
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff