File size: 3,871 Bytes
f22c128
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
output_dir: checkpoints/EleutherAI/pythia-14m
overwrite_output_dir: 'False'
do_train: 'False'
do_eval: 'False'
do_predict: 'False'
eval_strategy: IntervalStrategy.NO
prediction_loss_only: 'False'
per_device_train_batch_size: '8'
per_device_eval_batch_size: '8'
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: '8'
eval_accumulation_steps: None
eval_delay: '0'
torch_empty_cache_steps: None
learning_rate: '0.001'
weight_decay: '0.0'
adam_beta1: '0.9'
adam_beta2: '0.999'
adam_epsilon: 1e-08
max_grad_norm: '1.0'
num_train_epochs: '3.0'
max_steps: '-1'
lr_scheduler_type: SchedulerType.LINEAR
lr_scheduler_kwargs: '{}'
warmup_ratio: '0.0'
warmup_steps: '0'
log_level: warning
log_level_replica: warning
log_on_each_node: 'True'
logging_dir: checkpoints/EleutherAI/pythia-14m/runs/Jul16_19-19-58_0082549b2b6f
logging_strategy: IntervalStrategy.STEPS
logging_first_step: 'True'
logging_steps: '250'
logging_nan_inf_filter: 'True'
save_strategy: IntervalStrategy.STEPS
save_steps: '300'
save_total_limit: None
save_safetensors: 'True'
save_on_each_node: 'False'
save_only_model: 'False'
restore_callback_states_from_checkpoint: 'False'
no_cuda: 'False'
use_cpu: 'False'
use_mps_device: 'False'
seed: '42'
data_seed: None
jit_mode_eval: 'False'
use_ipex: 'False'
bf16: 'False'
fp16: 'False'
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: 'False'
fp16_full_eval: 'False'
tf32: None
local_rank: '0'
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: 'False'
debug: '[]'
dataloader_drop_last: 'False'
eval_steps: None
dataloader_num_workers: '0'
dataloader_prefetch_factor: None
past_index: '-1'
run_name: EleutherAI/pythia-14m_distilled_from_pythia-14m
disable_tqdm: 'False'
remove_unused_columns: 'False'
label_names: '[''input_ids'']'
load_best_model_at_end: 'False'
metric_for_best_model: None
greater_is_better: None
ignore_data_skip: 'False'
fsdp: '[]'
fsdp_min_num_params: '0'
fsdp_config: '{''min_num_params'': 0, ''xla'': False, ''xla_fsdp_v2'': False, ''xla_fsdp_grad_ckpt'':
  False}'
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: '{''split_batches'': False, ''dispatch_batches'': None, ''even_batches'':
  True, ''use_seedable_sampler'': True, ''non_blocking'': False, ''gradient_accumulation_kwargs'':
  None, ''use_configured_state'': False}'
deepspeed: None
label_smoothing_factor: '0.0'
optim: OptimizerNames.ADAMW_TORCH
optim_args: None
adafactor: 'False'
group_by_length: 'False'
length_column_name: length
report_to: '[''wandb'']'
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: None
dataloader_pin_memory: 'True'
dataloader_persistent_workers: 'False'
skip_memory_metrics: 'True'
use_legacy_prediction_loop: 'False'
push_to_hub: 'True'
resume_from_checkpoint: None
hub_model_id: test-distillation
hub_strategy: HubStrategy.EVERY_SAVE
hub_token: None
hub_private_repo: 'False'
hub_always_push: 'False'
gradient_checkpointing: 'False'
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: 'False'
eval_do_concat_batches: 'True'
fp16_backend: auto
evaluation_strategy: None
push_to_hub_model_id: None
push_to_hub_organization: None
push_to_hub_token: None
_n_gpu: '1'
mp_parameters: ''
auto_find_batch_size: 'False'
full_determinism: 'False'
torchdynamo: None
ray_scope: last
ddp_timeout: '1800'
torch_compile: 'False'
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: 'False'
include_num_input_tokens_seen: 'False'
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: 'False'
eval_on_start: 'False'
use_liger_kernel: 'False'
eval_use_gather_object: 'False'
checkpoints_dir: .//checkpoints/
init_step: '0'
save_log_steps: '250'
bucket_name: devinterp-language
s3_folder: checkpoints/tetrahedron-3m
delete_after_upload: 'False'
push_to_aws: 'False'