Trouter-Library commited on
Commit
e62d89f
·
verified ·
1 Parent(s): f6f63c0

Create training_args.json

Browse files
Files changed (1) hide show
  1. training_args.json +50 -0
training_args.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "./helion-v1.5-checkpoints",
3
+ "overwrite_output_dir": true,
4
+ "do_train": true,
5
+ "do_eval": true,
6
+ "evaluation_strategy": "steps",
7
+ "eval_steps": 500,
8
+ "per_device_train_batch_size": 4,
9
+ "per_device_eval_batch_size": 4,
10
+ "gradient_accumulation_steps": 8,
11
+ "learning_rate": 2e-05,
12
+ "weight_decay": 0.01,
13
+ "num_train_epochs": 3,
14
+ "lr_scheduler_type": "cosine",
15
+ "warmup_steps": 100,
16
+ "logging_dir": "./logs",
17
+ "logging_steps": 10,
18
+ "save_strategy": "steps",
19
+ "save_steps": 500,
20
+ "save_total_limit": 3,
21
+ "fp16": false,
22
+ "bf16": true,
23
+ "max_grad_norm": 1.0,
24
+ "optim": "adamw_torch",
25
+ "group_by_length": true,
26
+ "length_column_name": "length",
27
+ "report_to": [
28
+ "tensorboard",
29
+ "wandb"
30
+ ],
31
+ "load_best_model_at_end": true,
32
+ "metric_for_best_model": "eval_loss",
33
+ "greater_is_better": false,
34
+ "gradient_checkpointing": true,
35
+ "gradient_checkpointing_kwargs": {
36
+ "use_reentrant": false
37
+ },
38
+ "dataloader_num_workers": 4,
39
+ "dataloader_pin_memory": true,
40
+ "ddp_find_unused_parameters": false,
41
+ "torch_compile": false,
42
+ "max_steps": -1,
43
+ "save_safetensors": true,
44
+ "push_to_hub": false,
45
+ "hub_model_id": "DeepXR/Helion-V1.5",
46
+ "hub_strategy": "every_save",
47
+ "hub_token": null,
48
+ "auto_find_batch_size": false,
49
+ "include_inputs_for_metrics": false
50
+ }