bloomRL / trpo /config.yaml
maxymoo2's picture
Upload pedagogical policy baseline checkpoints
6f81d73 verified
_wandb:
value:
cli_version: 0.24.2
e:
xfkab93qb69gjyqobi3o9jz9p2xs3ohp:
args:
- --critic_model=value_kc_critic
- --use_wandb
- --wandb_project_name=task1_all_kc
- --kc_to_que_path=data/XES3G5M/metadata/kc_questions_map.json
- --kc_emb_path=data/XES3G5M_embeddings/kc_emb.json
- --cluster_to_kc_path=data/XES3G5M/metadata/kc_clusters.json
- --cluster_to_que_path=data/XES3G5M/metadata/cluster_to_que_ids_map.json
- --pretrained_model_path=data/pretrained_kt_model.ckpt
- --dataloader_num_workers=8
- --seed=4
cpu_count: 252
cpu_count_logical: 252
cudaVersion: "12.8"
disk:
/:
total: "1247017926656"
used: "1026639593472"
email: maxonorris@gmail.com
executable: /mnt/pvc/Baselines/ExRec/.venv_task1/bin/python
gpu: NVIDIA A100-SXM4-40GB
gpu_count: 1
gpu_nvidia:
- architecture: Ampere
cudaCores: 6912
memoryTotal: "42949672960"
name: NVIDIA A100-SXM4-40GB
uuid: GPU-9ae80bdd-e009-754a-7706-b530e2654492
host: alphamcts-run-models-trpo-default-hr7ws-w7phw
memory:
total: "962415210496"
os: Linux-6.8.0-55-generic-x86_64-with-glibc2.31
program: -m train_test.all_kc_train_trpo
python: CPython 3.10.13
root: /mnt/pvc/Baselines/ExRec
startedAt: "2026-02-11T03:44:02.169555Z"
writerId: xfkab93qb69gjyqobi3o9jz9p2xs3ohp
m: []
python_version: 3.10.13
t:
"1":
- 1
- 5
- 53
"2":
- 1
- 5
- 53
"3":
- 2
- 13
- 15
- 16
"4": 3.10.13
"5": 0.24.2
"12": 0.24.2
"13": linux-x86_64
action_size:
value: 768
action_type:
value: continuous
actor_lr:
value: 5e-05
actor_step_size:
value: 0.5
actor_up_projection_size:
value: 1200
advantage_normalization:
value: false
backtrack_coeff:
value: 0.8
batch_size:
value: 512
checkpoint_path:
value: ./trpo_saved_models/65041ec3-88ea-4943-8e27-e7f97620c529
cluster_to_kc_path:
value: data/XES3G5M/metadata/kc_clusters.json
cluster_to_que_path:
value: data/XES3G5M/metadata/cluster_to_que_ids_map.json
critic_hidden_size:
value: 300
critic_model:
value: value_kc_critic
critic_up_projection_size:
value: 1200
dataloader_num_workers:
value: 8
deterministic_eval:
value: false
discount_factor:
value: 0.99
gae_lambda:
value: 0.95
hidden_size:
value: 300
kc_emb_path:
value: data/XES3G5M_embeddings/kc_emb.json
kc_emb_size:
value: 768
kc_to_que_path:
value: data/XES3G5M/metadata/kc_questions_map.json
log_dir:
value: ./train_trpo_logs
log_path:
value: ./train_trpo_logs/65041ec3-88ea-4943-8e27-e7f97620c529
max_backtracks:
value: 10
max_batchsize:
value: 512
max_kl:
value: 0.01
n_epoch:
value: 100
optim_critic_iters:
value: 5
pretrained_model_path:
value: data/pretrained_kt_model.ckpt
repeat_per_update:
value: 1
reward_normalization:
value: false
save_dir:
value: ./trpo_saved_models
seed:
value: 4
student_state_size:
value: 300
test_batch_size:
value: 2048
test_init_seq_size:
value: 100
test_last_n_steps:
value: 10
test_log_wandb:
value: false
test_max_steps:
value: 10
test_n_episode:
value: 2048
test_reward_func:
value: step_by_step
test_reward_scale:
value: 1000
train_batch_size:
value: 512
train_folds:
value: 2-3-4
train_init_seq_size:
value: 100
train_last_n_steps:
value: 10
train_max_steps:
value: 10
train_max_steps_until_student_change:
value: 10
train_n_episode:
value: 512
train_replay_buffer_size:
value: 12
train_reward_func:
value: step_by_step
train_reward_scale:
value: 1000
use_wandb:
value: true
valid_question_bank_que_emb_path:
value: data/XES3G5M_embeddings/qid2content_sol_avg_emb.json
wandb_project_name:
value: task1_all_kc
wandb_run_name:
value: trpo