File size: 3,842 Bytes
524eab3 87a2ece 793b152 87a2ece 524eab3 87a2ece 524eab3 87a2ece |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
output_dir: checkpoints/triangle-100k-og
overwrite_output_dir: 'False'
do_train: 'False'
do_eval: 'False'
do_predict: 'False'
eval_strategy: IntervalStrategy.NO
prediction_loss_only: 'False'
per_device_train_batch_size: '25'
per_device_eval_batch_size: '8'
per_gpu_train_batch_size: None
per_gpu_eval_batch_size: None
gradient_accumulation_steps: '16'
eval_accumulation_steps: None
eval_delay: '0'
torch_empty_cache_steps: None
learning_rate: '0.001'
weight_decay: '0.05'
adam_beta1: '0.9'
adam_beta2: '0.999'
adam_epsilon: 1e-08
max_grad_norm: '1.0'
num_train_epochs: '3.0'
max_steps: '100'
lr_scheduler_type: SchedulerType.CONSTANT
lr_scheduler_kwargs: '{}'
warmup_ratio: '0.0'
warmup_steps: '0'
log_level: warning
log_level_replica: warning
log_on_each_node: 'True'
logging_dir: checkpoints/triangle-100k-og/runs/Jul09_16-55-42_7be3271c880a
logging_strategy: IntervalStrategy.STEPS
logging_first_step: 'True'
logging_steps: '250'
logging_nan_inf_filter: 'True'
save_strategy: IntervalStrategy.STEPS
save_steps: '0.25'
save_total_limit: None
save_safetensors: 'True'
save_on_each_node: 'False'
save_only_model: 'False'
restore_callback_states_from_checkpoint: 'False'
no_cuda: 'False'
use_cpu: 'False'
use_mps_device: 'False'
seed: '42'
data_seed: None
jit_mode_eval: 'False'
use_ipex: 'False'
bf16: 'False'
fp16: 'False'
fp16_opt_level: O1
half_precision_backend: auto
bf16_full_eval: 'False'
fp16_full_eval: 'False'
tf32: None
local_rank: '0'
ddp_backend: None
tpu_num_cores: None
tpu_metrics_debug: 'False'
debug: '[]'
dataloader_drop_last: 'False'
eval_steps: None
dataloader_num_workers: '0'
dataloader_prefetch_factor: None
past_index: '-1'
run_name: triangle-100k-og
disable_tqdm: 'False'
remove_unused_columns: 'False'
label_names: '[''input_ids'']'
load_best_model_at_end: 'False'
metric_for_best_model: None
greater_is_better: None
ignore_data_skip: 'False'
fsdp: '[]'
fsdp_min_num_params: '0'
fsdp_config: '{''min_num_params'': 0, ''xla'': False, ''xla_fsdp_v2'': False, ''xla_fsdp_grad_ckpt'':
False}'
fsdp_transformer_layer_cls_to_wrap: None
accelerator_config: '{''split_batches'': False, ''dispatch_batches'': None, ''even_batches'':
True, ''use_seedable_sampler'': True, ''non_blocking'': False, ''gradient_accumulation_kwargs'':
None, ''use_configured_state'': False}'
deepspeed: None
label_smoothing_factor: '0.0'
optim: OptimizerNames.ADAMW_TORCH
optim_args: None
adafactor: 'False'
group_by_length: 'False'
length_column_name: length
report_to: '[''wandb'']'
ddp_find_unused_parameters: None
ddp_bucket_cap_mb: None
ddp_broadcast_buffers: None
dataloader_pin_memory: 'True'
dataloader_persistent_workers: 'False'
skip_memory_metrics: 'True'
use_legacy_prediction_loop: 'False'
push_to_hub: 'False'
resume_from_checkpoint: None
hub_model_id: timaeus/triangle-100k-og
hub_strategy: HubStrategy.EVERY_SAVE
hub_token: None
hub_private_repo: 'False'
hub_always_push: 'False'
gradient_checkpointing: 'False'
gradient_checkpointing_kwargs: None
include_inputs_for_metrics: 'False'
eval_do_concat_batches: 'True'
fp16_backend: auto
evaluation_strategy: None
push_to_hub_model_id: None
push_to_hub_organization: None
push_to_hub_token: None
_n_gpu: '1'
mp_parameters: ''
auto_find_batch_size: 'False'
full_determinism: 'False'
torchdynamo: None
ray_scope: last
ddp_timeout: '1800'
torch_compile: 'False'
torch_compile_backend: None
torch_compile_mode: None
dispatch_batches: None
split_batches: None
include_tokens_per_second: 'False'
include_num_input_tokens_seen: 'False'
neftune_noise_alpha: None
optim_target_modules: None
batch_eval_metrics: 'False'
eval_on_start: 'False'
use_liger_kernel: 'False'
eval_use_gather_object: 'False'
checkpoints_dir: .//checkpoints/
init_step: '0'
save_log_steps: '0'
bucket_name: devinterp-language
s3_folder: checkpoints/tetrahedron-3m
delete_after_upload: 'False'
push_to_aws: 'True'
|