Training in progress, step 2500

Files changed (3) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "./checkpoint-10000",
   "architectures": [
     "MambaForCausalLM"
   ],
@@ -24,8 +24,8 @@
   "time_step_min": 0.001,
   "time_step_rank": 100,
   "time_step_scale": 1.0,
-  "torch_dtype": "float32",
-  "transformers_version": "4.41.1",
   "use_bias": false,
   "use_cache": true,
   "use_conv_bias": true,

 {
+  "_name_or_path": "neeleshg23/draft-mamba-4-1600",
   "architectures": [
     "MambaForCausalLM"
   ],
   "time_step_min": 0.001,
   "time_step_rank": 100,
   "time_step_scale": 1.0,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.1",
   "use_bias": false,
   "use_cache": true,
   "use_conv_bias": true,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae6fe05d47f04f59f51b2da9ab04b460f5df52f41200c78383c923f094bde840
-size 463658944

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c934472c3a06753d5254ae3b649afd095d7b7bfa3cd746e3c2cb0db8d783a43
+size 232241360

training_args.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:248fbdad713297f54d9cb5336d3293ff25ab843df1f3ddac9ca486b51c34795c
+size 5304