Parakeet_Paraphraser_BART / model_args.json
anshajgoyal's picture
model push
0cda074
{
"adafactor_beta1": null,
"adafactor_clip_threshold": 1.0,
"adafactor_decay_rate": -0.8,
"adafactor_eps": [
1e-30,
0.001
],
"adafactor_relative_step": true,
"adafactor_scale_parameter": true,
"adafactor_warmup_init": true,
"adam_epsilon": 1e-08,
"best_model_dir": "outputs/best_model",
"cache_dir": "cache_dir/",
"config": {},
"cosine_schedule_num_cycles": 0.5,
"custom_layer_parameters": [],
"custom_parameter_groups": [],
"dataloader_num_workers": 0,
"do_lower_case": false,
"dynamic_quantize": false,
"early_stopping_consider_epochs": false,
"early_stopping_delta": 0,
"early_stopping_metric": "eval_loss",
"early_stopping_metric_minimize": true,
"early_stopping_patience": 3,
"encoding": null,
"eval_batch_size": 8,
"evaluate_during_training": false,
"evaluate_during_training_silent": true,
"evaluate_during_training_steps": 2000,
"evaluate_during_training_verbose": false,
"evaluate_each_epoch": true,
"fp16": true,
"gradient_accumulation_steps": 1,
"learning_rate": 5e-05,
"local_rank": -1,
"logging_steps": 50,
"loss_type": null,
"loss_args": {},
"manual_seed": null,
"max_grad_norm": 1.0,
"max_seq_length": 128,
"model_name": "model/",
"model_type": "bart",
"multiprocessing_chunksize": -1,
"n_gpu": 1,
"no_cache": false,
"no_save": false,
"not_saved_args": [],
"num_train_epochs": 10,
"optimizer": "AdamW",
"output_dir": "outputs/",
"overwrite_output_dir": true,
"polynomial_decay_schedule_lr_end": 1e-07,
"polynomial_decay_schedule_power": 1.0,
"process_count": 6,
"quantized_model": false,
"reprocess_input_data": true,
"save_best_model": true,
"save_eval_checkpoints": true,
"save_model_every_epoch": true,
"save_optimizer_and_scheduler": true,
"save_steps": -1,
"scheduler": "linear_schedule_with_warmup",
"silent": false,
"skip_special_tokens": true,
"tensorboard_dir": null,
"thread_count": null,
"tokenizer_name": null,
"tokenizer_type": null,
"train_batch_size": 8,
"train_custom_parameters_only": false,
"use_cached_eval_features": false,
"use_early_stopping": false,
"use_hf_datasets": false,
"use_multiprocessing": false,
"use_multiprocessing_for_evaluation": false,
"wandb_kwargs": {},
"wandb_project": "Paraphrasing with BART",
"warmup_ratio": 0.06,
"warmup_steps": 5609,
"weight_decay": 0.0,
"model_class": "Seq2SeqModel",
"base_marian_model_name": "model/",
"dataset_class": null,
"dataset_cache_dir": null,
"do_sample": false,
"early_stopping": true,
"evaluate_generated_text": false,
"faiss_d": 768,
"faiss_m": 128,
"include_title_in_knowledge_dataset": true,
"length_penalty": 2.0,
"max_length": 128,
"max_steps": -1,
"num_beams": 4,
"num_return_sequences": 1,
"rag_embed_batch_size": 16,
"repetition_penalty": 1.0,
"save_knowledge_dataset": true,
"save_knowledge_dataset_with_checkpoints": false,
"split_text_character": " ",
"split_text_n": 100,
"src_lang": "en_XX",
"tgt_lang": "ro_RO",
"top_k": 100,
"top_p": 0.95,
"use_multiprocessed_decoding": false
}