furproxy commited on
Commit
ecdaafc
·
verified ·
1 Parent(s): 1aebfff

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/README-checkpoint.md CHANGED
@@ -37,12 +37,12 @@ More information needed
37
  The following hyperparameters were used during training:
38
  - family_to_muon_lr = {
39
  "language": _fallback(getattr(training_args, "language_muon_lr", 2e-5), language_lr),
40
- "vision": _fallback(getattr(training_args, "vision_muon_lr", 4e-5), vision_lr),
41
- "merger": _fallback(getattr(training_args, "merger_muon_lr", 2e-5), merger_lr),
42
  }
43
 
44
  family_to_adamw_lr = {
45
- "language": _fallback(getattr(training_args, "language_adamw_lr", 1e-5), language_lr),
46
  "vision": _fallback(getattr(training_args, "vision_adamw_lr", 5e-6), vision_lr),
47
  "merger": _fallback(getattr(training_args, "merger_adamw_lr", 1e-5), merger_lr),
48
  }
 
37
  The following hyperparameters were used during training:
38
  - family_to_muon_lr = {
39
  "language": _fallback(getattr(training_args, "language_muon_lr", 2e-5), language_lr),
40
+ "vision": _fallback(getattr(training_args, "vision_muon_lr", 2e-5), vision_lr),
41
+ "merger": _fallback(getattr(training_args, "merger_muon_lr", 4e-5), merger_lr),
42
  }
43
 
44
  family_to_adamw_lr = {
45
+ "language": _fallback(getattr(training_args, "language_adamw_lr", 5e-6), language_lr),
46
  "vision": _fallback(getattr(training_args, "vision_adamw_lr", 5e-6), vision_lr),
47
  "merger": _fallback(getattr(training_args, "merger_adamw_lr", 1e-5), merger_lr),
48
  }
README.md CHANGED
@@ -37,12 +37,12 @@ More information needed
37
  The following hyperparameters were used during training:
38
  - family_to_muon_lr = {
39
  "language": _fallback(getattr(training_args, "language_muon_lr", 2e-5), language_lr),
40
- "vision": _fallback(getattr(training_args, "vision_muon_lr", 4e-5), vision_lr),
41
- "merger": _fallback(getattr(training_args, "merger_muon_lr", 2e-5), merger_lr),
42
  }
43
 
44
  family_to_adamw_lr = {
45
- "language": _fallback(getattr(training_args, "language_adamw_lr", 1e-5), language_lr),
46
  "vision": _fallback(getattr(training_args, "vision_adamw_lr", 5e-6), vision_lr),
47
  "merger": _fallback(getattr(training_args, "merger_adamw_lr", 1e-5), merger_lr),
48
  }
 
37
  The following hyperparameters were used during training:
38
  - family_to_muon_lr = {
39
  "language": _fallback(getattr(training_args, "language_muon_lr", 2e-5), language_lr),
40
+ "vision": _fallback(getattr(training_args, "vision_muon_lr", 2e-5), vision_lr),
41
+ "merger": _fallback(getattr(training_args, "merger_muon_lr", 4e-5), merger_lr),
42
  }
43
 
44
  family_to_adamw_lr = {
45
+ "language": _fallback(getattr(training_args, "language_adamw_lr", 5e-6), language_lr),
46
  "vision": _fallback(getattr(training_args, "vision_adamw_lr", 5e-6), vision_lr),
47
  "merger": _fallback(getattr(training_args, "merger_adamw_lr", 1e-5), merger_lr),
48
  }
all_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "effective_tokens_per_sec": 7286.899729026163,
3
  "epoch": 4.0,
4
  "total_flos": 3.4175049861232067e+18,
5
- "train_loss": 0.7562091423920304,
6
- "train_runtime": 9085.3052,
7
- "train_samples_per_second": 5.957,
8
- "train_steps_per_second": 0.199
9
  }
 
1
  {
2
+ "effective_tokens_per_sec": 8369.940564234135,
3
  "epoch": 4.0,
4
  "total_flos": 3.4175049861232067e+18,
5
+ "train_loss": 0.8386258969696222,
6
+ "train_runtime": 7909.6987,
7
+ "train_samples_per_second": 6.842,
8
+ "train_steps_per_second": 0.228
9
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f61c55ce28d865474bb9f937dbfd9678293a9ff5a565896f364808ef09aa8e31
3
  size 20859273368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29a3829346071e30b1ef236360853099f0ee06d5c8dea2b9e2d7eaa0a4b2a8e6
3
  size 20859273368
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "effective_tokens_per_sec": 7286.899729026163,
3
  "epoch": 4.0,
4
  "total_flos": 3.4175049861232067e+18,
5
- "train_loss": 0.7562091423920304,
6
- "train_runtime": 9085.3052,
7
- "train_samples_per_second": 5.957,
8
- "train_steps_per_second": 0.199
9
  }
 
1
  {
2
+ "effective_tokens_per_sec": 8369.940564234135,
3
  "epoch": 4.0,
4
  "total_flos": 3.4175049861232067e+18,
5
+ "train_loss": 0.8386258969696222,
6
+ "train_runtime": 7909.6987,
7
+ "train_samples_per_second": 6.842,
8
+ "train_steps_per_second": 0.228
9
  }
trainer_log.jsonl CHANGED
The diff for this file is too large to render. See raw diff
 
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_loss.png CHANGED