adeo commited on
Commit
1328e35
·
verified ·
1 Parent(s): d0dec4d

Model save

Browse files
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- base_model: HuggingFaceTB/SmolLM-360M
3
  library_name: transformers
4
  model_name: smollm-360M-instruct-new
5
  tags:
@@ -11,7 +11,7 @@ licence: license
11
 
12
  # Model Card for smollm-360M-instruct-new
13
 
14
- This model is a fine-tuned version of [HuggingFaceTB/SmolLM-360M](https://huggingface.co/HuggingFaceTB/SmolLM-360M).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bootpin/huggingface/runs/ji2zjdjv)
31
 
32
  This model was trained with SFT.
33
 
 
1
  ---
2
+ base_model: HuggingFaceTB/SmolLM-135M
3
  library_name: transformers
4
  model_name: smollm-360M-instruct-new
5
  tags:
 
11
 
12
  # Model Card for smollm-360M-instruct-new
13
 
14
+ This model is a fine-tuned version of [HuggingFaceTB/SmolLM-135M](https://huggingface.co/HuggingFaceTB/SmolLM-135M).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/bootpin/huggingface/runs/7e64o2gq)
31
 
32
  This model was trained with SFT.
33
 
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9999235532451648,
3
- "total_flos": 3.034423594613473e+17,
4
- "train_loss": 0.9440924714646938,
5
- "train_runtime": 19382.7999,
6
  "train_samples": 321330,
7
- "train_samples_per_second": 5.399,
8
- "train_steps_per_second": 0.337
9
  }
 
1
  {
2
  "epoch": 0.9999235532451648,
3
+ "total_flos": 1.024299027881001e+17,
4
+ "train_loss": 1.1466178722155569,
5
+ "train_runtime": 11966.5261,
6
  "train_samples": 321330,
7
+ "train_samples_per_second": 8.745,
8
+ "train_steps_per_second": 0.547
9
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "HuggingFaceTB/SmolLM-360M",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -9,15 +9,15 @@
9
  "eos_token_id": 2,
10
  "head_dim": 64,
11
  "hidden_act": "silu",
12
- "hidden_size": 960,
13
  "initializer_range": 0.02,
14
- "intermediate_size": 2560,
15
  "max_position_embeddings": 2048,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
- "num_attention_heads": 15,
19
- "num_hidden_layers": 32,
20
- "num_key_value_heads": 5,
21
  "pad_token_id": 2,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
 
1
  {
2
+ "_name_or_path": "HuggingFaceTB/SmolLM-135M",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
9
  "eos_token_id": 2,
10
  "head_dim": 64,
11
  "hidden_act": "silu",
12
+ "hidden_size": 576,
13
  "initializer_range": 0.02,
14
+ "intermediate_size": 1536,
15
  "max_position_embeddings": 2048,
16
  "mlp_bias": false,
17
  "model_type": "llama",
18
+ "num_attention_heads": 9,
19
+ "num_hidden_layers": 30,
20
+ "num_key_value_heads": 3,
21
  "pad_token_id": 2,
22
  "pretraining_tp": 1,
23
  "rms_norm_eps": 1e-05,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9381021883733195e8b22dc44fcd7e8c80ec8c1aa2ae02480976424179d76112
3
- size 723674912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22f8564cd6476d6de51763addb9989e6bf6d3ca18bc585f60f942c6d5b112dcd
3
+ size 269060552
runs/Apr04_06-13-04_73046ea093b0/events.out.tfevents.1743747206.73046ea093b0.3984.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a76106fe1e7c9e488ea0d47e3a16904a5ed57d15ec469beabf4a3855f6c020b
3
+ size 282355
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 0.9999235532451648,
3
- "total_flos": 3.034423594613473e+17,
4
- "train_loss": 0.9440924714646938,
5
- "train_runtime": 19382.7999,
6
  "train_samples": 321330,
7
- "train_samples_per_second": 5.399,
8
- "train_steps_per_second": 0.337
9
  }
 
1
  {
2
  "epoch": 0.9999235532451648,
3
+ "total_flos": 1.024299027881001e+17,
4
+ "train_loss": 1.1466178722155569,
5
+ "train_runtime": 11966.5261,
6
  "train_samples": 321330,
7
+ "train_samples_per_second": 8.745,
8
+ "train_steps_per_second": 0.547
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:660130340752a85755b183ad2fee1bcf609335970acd7d280ccb869f0799861f
3
  size 7032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0753b93833f9b20b3f3f27dc9e29aa04c0bdbc87262daff1d48a2ac02fcb21
3
  size 7032