buseskorkmaz commited on
Commit
27ae7f2
·
1 Parent(s): dee152c

Upload 4 files

Browse files

Hackernews checkpoints used in evaluation in the paper

Files changed (4) hide show
  1. config.json +1 -0
  2. model.pkl +3 -0
  3. model_32767_copy.pkl +3 -0
  4. optim.pkl +3 -0
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model": {"gpt2": {"name": "gpt2", "gpt2_type": "gpt2", "from_pretrained": true, "lm_head": true}, "name": "per_token_iql", "dataset": {"name": "hackernews_list_dataset", "cache_id": "d_train"}, "alpha": 0.005, "gamma": 0.99, "beta": 0.0, "transition_weight": 0.0, "clip_weight": null, "value_max": null, "value_min": null, "detach_v": false, "detach_q": false, "detach_pi": false, "double_q": true, "seperate_policy": true, "seperate_target": true, "tau": 0.6, "exp_weights": true, "dm_margin": 0.0, "load": {"name": "per_token_iql", "checkpoint_path": "outputs/hackernews/conditional_hackernews_official_bc_test_fix_q/model_converted.pkl", "strict_load": false}, "advanced_mlp": false, "cql_temp": 1.0}, "train_dataset": {"data": {"name": "hackernews_rl_dataset", "path": "data/hackernews_rl_dataset/", "cache_path": null, "reward_shift": 0.0, "reward_scale": 1.0, "reward_f": {"name": "score_human_reward", "job_descriptions_path": "data/hackernews_rl_dataset/", "index_path": "data/hackernews_rl_dataset/train_idxs.json"}, "index_path": "data/hackernews_rl_dataset/train_idxs.json", "cache_id": "train_raw_data"}, "token_reward": {"name": "constant_token_reward", "c": 0.0}, "name": "hackernews_list_dataset", "max_len": 1024, "cuttoff": null, "resample_timeout": 0.0, "cache_id": "d_train", "include_parent": true}, "eval_dataset": {"data": {"name": "hackernews_rl_dataset", "path": "data/hackernews_rl_dataset/", "cache_path": null, "reward_shift": 0.0, "reward_scale": 1.0, "reward_f": {"name": "score_human_reward", "job_descriptions_path": "data/hackernews_rl_dataset/", "index_path": "data/hackernews_rl_dataset/test_idxs.json"}, "index_path": "data/hackernews_rl_dataset/test_idxs.json", "cache_id": "test_raw_data"}, "token_reward": {"name": "constant_token_reward", "c": 0.0}, "name": "hackernews_list_dataset", "max_len": 1024, "cuttoff": null, "resample_timeout": 0.0, "cache_id": "d_test", "include_parent": true}, "evaluator": {"env": {"name": "hackernews_env", "reward_shift": 0.0, "reward_scale": 1.0, "reward_f": {"name": "hackernews_reward"}, "data": {"name": "hackernews_rl_dataset", "cache_id": "test_raw_data"}, "include_parent": true}, "name": "iql_evaluator", "verbose": true, "kind": "sample", "generation_kwargs": {"max_generation_len": 256, "temp": 1.0, "top_k": null, "top_p": null, "exp_adv": true, "adv_weight": 8.0, "adv_clip": null, "include_logits": true, "include_adv": true, "num_generations": 1, "rerank_log_prob_weight": 0.0, "rerank_advantage_weight": 1.0}}, "train": {"save_checkpoint_dir": "/dccstor/autofair/bias_llm/Bias-ILQL/src/utils/../../outputs/hackernews/conditional_hackernews_official_iql_bc_fix/", "optim_state_path": "/dccstor/autofair/bias_llm/Bias-ILQL/src/utils/../../outputs/hackernews/optim_state/", "epochs": 50, "dataloader_workers": 0, "bsize": 1, "grad_accum_steps": 64, "log_every": 256, "eval_every": 2048, "save_every": 8192, "max_checkpoints": 1, "eval_bsize": 1, "eval_batches": 8, "lr": 0.001, "weight_decay": 0.0, "hard_update_every": null, "max_steps": null, "loss": {"v_loss_weight": 1.0, "q_loss_weight": 1.0, "awac_weight": 0.0, "cql_loss_weight": 0.25, "dm_loss_weight": 0.0, "mc_returns": false}}, "wandb": {"use_wandb": false, "wandb_project": "hackernews_iql"}, "system": {"device": "cuda:0", "num_processes": 2, "use_fp16": false}}
model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341c944026d3847593eb622748ef44cf3f3f5b2a890486117ac1cc1f2c16bae4
3
+ size 2790950877
model_32767_copy.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4be4c213ffce861c81bee794cdb8d87a8c77dfa0d4c4cc898dd44a468a5e0bb
3
+ size 2790985785
optim.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f1dee2e81e1c9101fd17ee9823f7f2a3de553b248600c1a8dcd14eb488ec54
3
+ size 2260095677