Add main & ema weights for fas

Files changed (9) hide show

README.md CHANGED Viewed

@@ -27,13 +27,13 @@ ema, main
 {
   "attention_probs_dropout_prob": 0.1,
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 384,
-  "intermediate_size": 1280,
   "max_position_embeddings": 512,
   "position_bucket_size": 32,
-  "num_attention_heads": 6,
   "num_hidden_layers": 12,
-  "vocab_size": 8192,
   "layer_norm_eps": 1e-05,
   "force_causal_mask": true,
   "classifier_dropout": 0.1,
@@ -41,7 +41,7 @@ ema, main
   "num_labels": 2
 }
 ```
-Tokenizer file: `tokenizer_fas_vs8192.json`
 ## Quick Usage
 ```python
@@ -68,6 +68,6 @@ print(outputs.logits)
 ```
 ## Notes
-- Converted on 2025-10-06T00:31:37.440575+00:00
 - Weights are the exact trained parameters; no new layers were initialized.
 - Requires `trust_remote_code=True` due to custom architecture.

 {
   "attention_probs_dropout_prob": 0.1,
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "intermediate_size": 2560,
   "max_position_embeddings": 512,
   "position_bucket_size": 32,
+  "num_attention_heads": 12,
   "num_hidden_layers": 12,
+  "vocab_size": 16384,
   "layer_norm_eps": 1e-05,
   "force_causal_mask": true,
   "classifier_dropout": 0.1,
   "num_labels": 2
 }
 ```
+Tokenizer file: `tokenizer_fas_vs16384.json`
 ## Quick Usage
 ```python
 ```
 ## Notes
+- Converted on 2025-10-07T00:51:49.351833+00:00
 - Weights are the exact trained parameters; no new layers were initialized.
 - Requires `trust_remote_code=True` due to custom architecture.

config.json CHANGED Viewed

@@ -18,16 +18,16 @@
   "eos_token_id": 2,
   "force_causal_mask": true,
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 384,
-  "intermediate_size": 1280,
   "layer_norm_eps": 1e-05,
   "mask_token_id": 4,
   "max_position_embeddings": 512,
   "model_type": "gpt_bert",
-  "num_attention_heads": 6,
   "num_hidden_layers": 12,
   "num_labels": 2,
   "pad_token_id": 3,
   "position_bucket_size": 32,
-  "vocab_size": 8192
 }

   "eos_token_id": 2,
   "force_causal_mask": true,
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "intermediate_size": 2560,
   "layer_norm_eps": 1e-05,
   "mask_token_id": 4,
   "max_position_embeddings": 512,
   "model_type": "gpt_bert",
+  "num_attention_heads": 12,
   "num_hidden_layers": 12,
   "num_labels": 2,
   "pad_token_id": 3,
   "position_bucket_size": 32,
+  "vocab_size": 16384
 }

fas-2gpu-500steps.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e922c32f665605d12b06913cb6c05f6ddcfd8a667d1e4f0ff63b6f74ea1926a
 size 503042738

 version https://git-lfs.github.com/spec/v1
+oid sha256:e2344aad24c7ca5d361ce2143d2d12d8b5c993fa8838534b145fdd3b5e2e6c6e
 size 503042738

fas-2gpu-500steps_ema.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dccddb5e2398a8a953c390eeb3dcf1878d1798a774801ff7e418ac77320272ba
 size 503043438

 version https://git-lfs.github.com/spec/v1
+oid sha256:b531f5e01fcbcae3ae3aeb14fc9f1f30e8cd232de580a9c580d51476977387bd
 size 503043438

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab5b76b5985c0f512922b608f154c4fca95ee75cddd095559a9776a2a55ef947
 size 553332392

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5dc3b5552de3aca100bcc961519367e8926752d32f14221b05b91e20f238c27
 size 553332392

model_ema.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab5b76b5985c0f512922b608f154c4fca95ee75cddd095559a9776a2a55ef947
 size 553332392

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5dc3b5552de3aca100bcc961519367e8926752d32f14221b05b91e20f238c27
 size 553332392

original_project_config.json CHANGED Viewed

@@ -1,13 +1,13 @@
 {
   "attention_probs_dropout_prob": 0.1,
   "hidden_dropout_prob": 0.1,
-  "hidden_size": 384,
-  "intermediate_size": 1280,
   "max_position_embeddings": 512,
   "position_bucket_size": 32,
-  "num_attention_heads": 6,
   "num_hidden_layers": 12,
-  "vocab_size": 8192,
   "layer_norm_eps": 1e-05,
   "force_causal_mask": true,
   "classifier_dropout": 0.1,

 {
   "attention_probs_dropout_prob": 0.1,
   "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "intermediate_size": 2560,
   "max_position_embeddings": 512,
   "position_bucket_size": 32,
+  "num_attention_heads": 12,
   "num_hidden_layers": 12,
+  "vocab_size": 16384,
   "layer_norm_eps": 1e-05,
   "force_causal_mask": true,
   "classifier_dropout": 0.1,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b4462e964cf00ec32e745e3f89f60d755476453e010a2033bd91aaa5a2f178df
 size 503029622

 version https://git-lfs.github.com/spec/v1
+oid sha256:e0dea2a8eb8d1b7f730f52a79fa0fac9d2518590862b8c8b1c6f6be93dc20a6c
 size 503029622

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff