{ "train_loop_config": { "a2g_norm": false, "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "beta_coef": "0.0", "block_size": 1024, "buffer_max_size": 20000, "causal_model": true, "checkpoint_interval": "100", "clamp_update": false, "config_name": null, "constraint_type": "kl", "cpu_per_worker": 7, "dataset_config_name": null, "dataset_name": "nbtpj/summ_ds_train", "ent_coef": 0.0001, "epsilon": 0.2, "eval_interval": "100", "freeze_role2": false, "gamma": 0.95, "gpu_per_worker": 1, "grad_norm": 0.5, "gradient_accumulation_steps": 1, "gradient_checkpoint": false, "group_relative_norm": false, "inference_config": { "do_sample": true, "max_new_tokens": 135, "min_new_tokens": 32, "temperature": 0.0 }, "keep_eval_size": false, "label_col": "summary", "learning_rate": 3e-07, "lm_fraction": -1.0, "lm_w": 0.0, "log_interval": "5m", "log_rollout_txt": true, "lora": true, "lr_scheduler_type": "constant", "max_train_rollouts": 100000, "max_train_steps": 40000, "mini_epoch": 1, "mixed_precision": "bf16", "model_name_or_path": "gpt2", "model_type": null, "n_augment": 0, "n_generate": 4, "need_attn_mask": true, "num_warmup_steps": 200, "only_train_role1": false, "per_device_eval_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_train_batch_size": 4, "pretrained_role2_name_or_path": "none", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "push_to_hub": null, "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "rl_algo": "off_policy", "rl_w": 1.0, "rollout_config": { "accuracy_w": 1.96282248134428, "accuracy_w2": 19.549524484259873, "len_pen": 1.0, "len_pen2": 1.0, "max_ctx_len": 860, "sampling_params_1": { "logprobs": 5, "max_tokens": 135, "min_tokens": 32, "n": 1, "temperature": 1.0 }, "sampling_params_2": { "logprobs": 5, "max_tokens": 860, "min_tokens": 5, "n": 4, "temperature": 1.0 }, "similarity_fn": "rouge", "threshold": 0.01605191133358762 }, "rollout_game": "baseline3v2", "sample_config": { "do_sample": true, "min_new_tokens": 1, "temperature": 1.0 }, "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "seed": 0, "test_clm": false, "test_gen": true, "test_glue": false, "text_template": "{text}\nTL;DR: {summary}", "tokenizer_name": null, "train_from_raw": true, "train_split_name": "sim_with_one_golden__cnndm_train", "trunc_eval": 256, "trunc_evals": [ "cnndm___10" ], "trust_remote_code": true, "tune_metrics": [ "cnndm/rouge1___1.0", "cnndm/rouge2___2.0", "cnndm/bertscore_f1___0.25" ], "use_deepspeed": false, "use_slow_tokenizer": false, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "vllm_sleep": true, "vllm_vram_ratio": 0.3, "weight_decay": 1e-05, "world_size": 1, "zero_config": 2 } }