Model save

Files changed (8) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: HuggingFaceTB/SmolLM-360M
 library_name: transformers
 model_name: smollm-360M-instruct-new
 tags:
@@ -11,7 +11,7 @@ licence: license
 # Model Card for smollm-360M-instruct-new
-This model is a fine-tuned version of [HuggingFaceTB/SmolLM-360M](https://huggingface.co/HuggingFaceTB/SmolLM-360M).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bootpin/huggingface/runs/ji2zjdjv)
 This model was trained with SFT.

 ---
+base_model: HuggingFaceTB/SmolLM-135M
 library_name: transformers
 model_name: smollm-360M-instruct-new
 tags:
 # Model Card for smollm-360M-instruct-new
+This model is a fine-tuned version of [HuggingFaceTB/SmolLM-135M](https://huggingface.co/HuggingFaceTB/SmolLM-135M).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bootpin/huggingface/runs/7e64o2gq)
 This model was trained with SFT.

all_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 0.9999235532451648,
-    "total_flos": 3.034423594613473e+17,
-    "train_loss": 0.9440924714646938,
-    "train_runtime": 19382.7999,
     "train_samples": 321330,
-    "train_samples_per_second": 5.399,
-    "train_steps_per_second": 0.337
 }

 {
     "epoch": 0.9999235532451648,
+    "total_flos": 1.024299027881001e+17,
+    "train_loss": 1.1466178722155569,
+    "train_runtime": 11966.5261,
     "train_samples": 321330,
+    "train_samples_per_second": 8.745,
+    "train_steps_per_second": 0.547
 }

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "HuggingFaceTB/SmolLM-360M",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -9,15 +9,15 @@
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
-  "hidden_size": 960,
   "initializer_range": 0.02,
-  "intermediate_size": 2560,
   "max_position_embeddings": 2048,
   "mlp_bias": false,
   "model_type": "llama",
-  "num_attention_heads": 15,
-  "num_hidden_layers": 32,
-  "num_key_value_heads": 5,
   "pad_token_id": 2,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,

 {
+  "_name_or_path": "HuggingFaceTB/SmolLM-135M",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "eos_token_id": 2,
   "head_dim": 64,
   "hidden_act": "silu",
+  "hidden_size": 576,
   "initializer_range": 0.02,
+  "intermediate_size": 1536,
   "max_position_embeddings": 2048,
   "mlp_bias": false,
   "model_type": "llama",
+  "num_attention_heads": 9,
+  "num_hidden_layers": 30,
+  "num_key_value_heads": 3,
   "pad_token_id": 2,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9381021883733195e8b22dc44fcd7e8c80ec8c1aa2ae02480976424179d76112
-size 723674912

 version https://git-lfs.github.com/spec/v1
+oid sha256:22f8564cd6476d6de51763addb9989e6bf6d3ca18bc585f60f942c6d5b112dcd
+size 269060552

runs/Apr04_06-13-04_73046ea093b0/events.out.tfevents.1743747206.73046ea093b0.3984.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a76106fe1e7c9e488ea0d47e3a16904a5ed57d15ec469beabf4a3855f6c020b
+size 282355

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 0.9999235532451648,
-    "total_flos": 3.034423594613473e+17,
-    "train_loss": 0.9440924714646938,
-    "train_runtime": 19382.7999,
     "train_samples": 321330,
-    "train_samples_per_second": 5.399,
-    "train_steps_per_second": 0.337
 }

 {
     "epoch": 0.9999235532451648,
+    "total_flos": 1.024299027881001e+17,
+    "train_loss": 1.1466178722155569,
+    "train_runtime": 11966.5261,
     "train_samples": 321330,
+    "train_samples_per_second": 8.745,
+    "train_steps_per_second": 0.547
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:660130340752a85755b183ad2fee1bcf609335970acd7d280ccb869f0799861f
 size 7032

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d0753b93833f9b20b3f3f27dc9e29aa04c0bdbc87262daff1d48a2ac02fcb21
 size 7032