gbyuvd
/

ChemMiniQ3-SAbRLo

Text Generation

molecular-generation

cheminformatics

Model card Files Files and versions

gbyuvd commited on Sep 26, 2025

Commit

5fa06f8

·

verified ·

1 Parent(s): 0bcda87

Upload 7 files

Files changed (3) hide show

config.json +51 -48
model.safetensors +2 -2
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -1,48 +1,51 @@
-{
-  "model_type": "chemq3_mtp",
-  "architectures": [
-    "ChemQ3MTPForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.1,
-  "bos_token_id": 0,
-  "dtype": "float32",
-  "eos_token_id": 1,
-  "head_dim": 64,
-  "hidden_act": "silu",
-  "hidden_size": 320,
-  "initializer_range": 0.02,
-  "intermediate_size": 1280,
-  "layer_types": [
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention",
-    "full_attention"
-  ],
-  "max_position_embeddings": 128,
-  "max_window_layers": 28,
-  "num_attention_heads": 4,
-  "num_hidden_layers": 6,
-  "num_key_value_heads": 2,
-  "pad_token_id": 2,
-  "rms_norm_eps": 1e-06,
-  "rope_scaling": null,
-  "rope_theta": 10000.0,
-  "sliding_window": null,
-  "tie_word_embeddings": true,
-  "transformers_version": "4.56.1",
-  "use_cache": false,
-  "use_sliding_window": false,
-  "vocab_size": 782,
-  "num_future_tokens": 3,
-  "use_mtp_training": true,
-  "horizon_weights": [1.0, 0.9, 0.81],
-  "entropy_controller_config": {
-    "min_entropy": 0.5,
-    "max_entropy": 3.0,
-    "target_entropy": 1.5,
-    "adaptation_rate": 0.01
-  }
-}

+{
+  "architectures": [
+    "ChemQ3MTPForCausalLM"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 0,
+  "dtype": "float32",
+  "entropy_controller_config": {
+    "adaptation_rate": 0.01,
+    "max_entropy": 3.0,
+    "min_entropy": 0.5,
+    "target_entropy": 1.5
+  },
+  "eos_token_id": 1,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 320,
+  "horizon_weights": [
+    1.0,
+    0.9,
+    0.81
+  ],
+  "initializer_range": 0.02,
+  "intermediate_size": 1280,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 128,
+  "max_window_layers": 28,
+  "model_type": "chemq3_mtp",
+  "num_attention_heads": 4,
+  "num_future_tokens": 3,
+  "num_hidden_layers": 6,
+  "num_key_value_heads": 2,
+  "pad_token_id": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.56.1",
+  "use_cache": false,
+  "use_mtp_training": true,
+  "use_sliding_window": false,
+  "vocab_size": 782
+}

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:35079ed16162d885c701f9e05234933a1fed32f4a2ff5a1218d80de95838523d
-size 39427428

 version https://git-lfs.github.com/spec/v1
+oid sha256:efc47ed13d40f0ae6949d286a9da8de10d788c7868c55af11f50c2ff4eae8e18
+size 39437252

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:426475db7e68e7905ebadd505f46b81c0da444386a36fba5e9404dc848b3bf00
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c61f63685fe07f214a969ac255f140d860b1bc3b7d8b23a075a494fdc4c4d63
 size 5368