{ "dataset_name": "nbtpj/summ_ds_train", "dataset_config_name": null, "train_split_name": "merge36_cnndmsamsumxsum", "text_template": "{text}\nTL;DR: {summary}", "label_col": "summary", "freeze_role2": false, "only_train_role1": false, "model_name_or_path": "gpt2", "ref_role1_name_or_path": "gpt2", "ref_role2_name_or_path": "gpt2", "pretrained_role2_name_or_path": "none", "config_name": null, "vectorizer_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/vectorizer/wikitext103_tfidf_full.joblib", "tokenizer_name": null, "use_slow_tokenizer": false, "per_device_train_batch_size": 4, "per_device_query_rollout_batch_size": 32, "per_device_eval_batch_size": 4, "vllm_vram_ratio": 0.3, "learning_rate": 3e-07, "grad_norm": 0.5, "weight_decay": 1e-05, "max_train_steps": 40000, "max_train_rollouts": 100000, "gradient_accumulation_steps": 1, "lr_scheduler_type": "constant", "num_warmup_steps": 200, "seed": 0, "model_type": null, "block_size": 1024, "mini_epoch": 1, "rollout_game": "baseline3v2", "rl_algo": "on_policy", "constraint_type": "kl", "clamp_update": false, "rl_w": 1.0, "lm_w": 1.0, "n_generate": 4, "n_augment": 0, "gradient_checkpoint": false, "group_relative_norm": true, "sample_config": { "do_sample": true, "min_new_tokens": 20, "temperature": 1.0 }, "inference_config": { "do_sample": true, "temperature": 0.0, "min_new_tokens": 5, "max_new_tokens": 250 }, "rollout_config": { "accuracy_w": 12.960656597279739, "len_pen": 1.0, "accuracy_w2": 30.20289640158668, "len_pen2": 1.0, "threshold": 0.09062263471261954, "similarity_fn": "rouge" }, "ent_coef": 0.0001, "beta_coef": "0.0", "prompt_0": "{text}", "prompt_1": "{text}\nTL;DR: ", "prompt_2": "Given the text: {role1_output}\nReconstruct the summarized text to the detailed:", "prompt_eval": "{text}\nTL;DR:", "epsilon": 0.2, "a2g_norm": true, "vllm_sleep": true, "lora": false, "need_attn_mask": true, "gamma": 0.95, "trust_remote_code": true, "test_glue": false, "test_clm": false, "causal_model": true, "test_gen": true, "log_rollout_txt": true, "trunc_eval": 256, "trunc_evals": [ "cnndm___12", "samsum___12", "xsum___12", "gigaword___200", "duc___50" ], "use_deepspeed": false, "zero_config": 2, "log_interval": "5m", "eval_interval": "100", "checkpoint_interval": "100", "lm_fraction": -1.0, "push_to_hub": null, "keep_eval_size": false, "mixed_precision": "bf16", "tune_metrics": [ "cnndm/rouge1___1.0", "cnndm/bertscore_f1___0.25", "samsum/rouge1___1.0", "samsum/bertscore_f1___0.25", "xsum/rouge1___1.0", "xsum/bertscore_f1___0.25" ], "base_path": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2", "script": "/common/home/users/m/mq.nguyen.2023/testcode/SAC_LM/module9_clmv2/execute/utune/debug.py", "train_from_raw": true, "world_size": 1, "cpu_per_worker": 7, "gpu_per_worker": 1 }