File size: 3,871 Bytes
f22c128 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
output_dir: checkpoints/EleutherAI/pythia-14m
overwrite_output_dir: 'False'
do_train: 'False'
do_eval: 'False'
do_predict: 'False'
eval_strategy: IntervalStrategy.NO
prediction_loss_only: 'False'
per_device_train_batch_size: '8'
per_device_eval_batch_size: '8'
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: '8'
eval_accumulation_steps: None
eval_delay: '0'
torch_empty_cache_steps: None
learning_rate: '0.001'
weight_decay: '0.0'
adam_beta1: '0.9'
adam_beta2: '0.999'
adam_epsilon: 1e-08
max_grad_norm: '1.0'
num_train_epochs: '3.0'
max_steps: '-1'
lr_scheduler_type: SchedulerType.LINEAR
lr_scheduler_kwargs: '{}'
warmup_ratio: '0.0'
warmup_steps: '0'
log_level: warning
log_level_replica: warning
log_on_each_node: 'True'
logging_dir: checkpoints/EleutherAI/pythia-14m/runs/Jul16_19-19-58_0082549b2b6f
logging_strategy: IntervalStrategy.STEPS
logging_first_step: 'True'
logging_steps: '250'
logging_nan_inf_filter: 'True'
save_strategy: IntervalStrategy.STEPS
save_steps: '300'
save_total_limit: None
save_safetensors: 'True'
save_on_each_node: 'False'
save_only_model: 'False'
restore_callback_states_from_checkpoint: 'False'
no_cuda: 'False'
use_cpu: 'False'
use_mps_device: 'False'
seed: '42'
data_seed: None
jit_mode_eval: 'False'
use_ipex: 'False'
bf16: 'False'
fp16: 'False'
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: 'False'
fp16_full_eval: 'False'
tf32: None
local_rank: '0'
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: 'False'
debug: '[]'
dataloader_drop_last: 'False'
eval_steps: None
dataloader_num_workers: '0'
dataloader_prefetch_factor: None
past_index: '-1'
run_name: EleutherAI/pythia-14m_distilled_from_pythia-14m
disable_tqdm: 'False'
remove_unused_columns: 'False'
label_names: '[''input_ids'']'
load_best_model_at_end: 'False'
metric_for_best_model: None
greater_is_better: None
ignore_data_skip: 'False'
fsdp: '[]'
fsdp_min_num_params: '0'
fsdp_config: '{''min_num_params'': 0, ''xla'': False, ''xla_fsdp_v2'': False, ''xla_fsdp_grad_ckpt'':
False}'
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: '{''split_batches'': False, ''dispatch_batches'': None, ''even_batches'':
True, ''use_seedable_sampler'': True, ''non_blocking'': False, ''gradient_accumulation_kwargs'':
None, ''use_configured_state'': False}'
deepspeed: None
label_smoothing_factor: '0.0'
optim: OptimizerNames.ADAMW_TORCH
optim_args: None
adafactor: 'False'
group_by_length: 'False'
length_column_name: length
report_to: '[''wandb'']'
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: None
dataloader_pin_memory: 'True'
dataloader_persistent_workers: 'False'
skip_memory_metrics: 'True'
use_legacy_prediction_loop: 'False'
push_to_hub: 'True'
resume_from_checkpoint: None
hub_model_id: test-distillation
hub_strategy: HubStrategy.EVERY_SAVE
hub_token: None
hub_private_repo: 'False'
hub_always_push: 'False'
gradient_checkpointing: 'False'
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: 'False'
eval_do_concat_batches: 'True'
fp16_backend: auto
evaluation_strategy: None
push_to_hub_model_id: None
push_to_hub_organization: None
push_to_hub_token: None
_n_gpu: '1'
mp_parameters: ''
auto_find_batch_size: 'False'
full_determinism: 'False'
torchdynamo: None
ray_scope: last
ddp_timeout: '1800'
torch_compile: 'False'
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: 'False'
include_num_input_tokens_seen: 'False'
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: 'False'
eval_on_start: 'False'
use_liger_kernel: 'False'
eval_use_gather_object: 'False'
checkpoints_dir: .//checkpoints/
init_step: '0'
save_log_steps: '250'
bucket_name: devinterp-language
s3_folder: checkpoints/tetrahedron-3m
delete_after_upload: 'False'
push_to_aws: 'False'
|