amburger66 commited on
Commit
f160d1d
·
verified ·
1 Parent(s): 8171912

LoRA fine-tune on RobotSmith task03 tool 12_long_2

Browse files
README.md CHANGED
@@ -17,8 +17,8 @@ library_name: transformers
17
 
18
  ## Training Run
19
 
20
- - **Wandb Run**: [lora_task03_data_fixed](https://wandb.ai/r-pad/rbm-finetune-robotsmith/runs/6ihsmc6l)
21
- - **Wandb ID**: `6ihsmc6l`
22
  - **Project**: rbm-finetune-robotsmith
23
  - **Notes**: fine-tuning Robometer on RobotSmith
24
 
 
17
 
18
  ## Training Run
19
 
20
+ - **Wandb Run**: [lora_task03_12_long_2](https://wandb.ai/r-pad/rbm-finetune-robotsmith/runs/c18gv1ii)
21
+ - **Wandb ID**: `c18gv1ii`
22
  - **Project**: rbm-finetune-robotsmith
23
  - **Notes**: fine-tuning Robometer on RobotSmith
24
 
metrics.json CHANGED
@@ -1,11 +1,59 @@
1
  {
2
- "step": 450,
3
  "metrics": {
4
- "eval_rew_align/success_auprc_robotsmith": 0.5288600712338849,
5
- "eval_rew_align/positive_success_acc_robotsmith": 0.8125,
6
- "eval_rew_align/negative_success_acc_robotsmith": 0.9348591549295775,
7
- "eval_rew_align/loss_robotsmith": 4.137787556648254,
8
- "eval_rew_align/pearson_robotsmith": 0.9885291064855337,
9
- "time/custom_evaluations": 83.76057602092624
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  }
11
  }
 
1
  {
2
+ "step": 1000,
3
  "metrics": {
4
+ "eval_rew_align/success_auprc_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6195081082466317,
5
+ "eval_rew_align/positive_success_acc_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
6
+ "eval_rew_align/negative_success_acc_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.952161913523459,
7
+ "eval_rew_align/loss_amburger66_robotsmith_rbm_12_long_2_robotsmith": 3.8970178365707397,
8
+ "eval_rew_align/pearson_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.980936971524903,
9
+ "eval_p_rank/kendall_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
10
+ "eval_p_rank/kendall_rewind_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
11
+ "eval_p_rank/avg_succ_subopt_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
12
+ "eval_p_rank/min_succ_subopt_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
13
+ "eval_p_rank/max_succ_subopt_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
14
+ "eval_p_rank/avg_subopt_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
15
+ "eval_p_rank/min_subopt_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
16
+ "eval_p_rank/max_subopt_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
17
+ "eval_p_rank/avg_succ_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
18
+ "eval_p_rank/min_succ_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
19
+ "eval_p_rank/max_succ_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
20
+ "eval_p_rank/ranking_acc_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
21
+ "eval_p_rank/ranking_acc_all_pairs_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
22
+ "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
23
+ "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
24
+ "eval_p_rank/ranking_acc_failure_vs_successful_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
25
+ "eval_p_rank/kendall_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
26
+ "eval_p_rank/kendall_rewind_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
27
+ "eval_p_rank/avg_succ_subopt_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
28
+ "eval_p_rank/min_succ_subopt_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
29
+ "eval_p_rank/max_succ_subopt_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
30
+ "eval_p_rank/avg_subopt_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
31
+ "eval_p_rank/min_subopt_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
32
+ "eval_p_rank/max_subopt_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
33
+ "eval_p_rank/avg_succ_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
34
+ "eval_p_rank/min_succ_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
35
+ "eval_p_rank/max_succ_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
36
+ "eval_p_rank/ranking_acc_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
37
+ "eval_p_rank/ranking_acc_all_pairs_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
38
+ "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
39
+ "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
40
+ "eval_p_rank/ranking_acc_failure_vs_successful_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
41
+ "eval_p_rank/kendall_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
42
+ "eval_p_rank/kendall_rewind_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
43
+ "eval_p_rank/avg_succ_subopt_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
44
+ "eval_p_rank/min_succ_subopt_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
45
+ "eval_p_rank/max_succ_subopt_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
46
+ "eval_p_rank/avg_subopt_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
47
+ "eval_p_rank/min_subopt_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
48
+ "eval_p_rank/max_subopt_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
49
+ "eval_p_rank/avg_succ_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
50
+ "eval_p_rank/min_succ_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
51
+ "eval_p_rank/max_succ_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
52
+ "eval_p_rank/ranking_acc_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
53
+ "eval_p_rank/ranking_acc_all_pairs_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
54
+ "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
55
+ "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
56
+ "eval_p_rank/ranking_acc_failure_vs_successful_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
57
+ "time/custom_evaluations": 42.60612651426345
58
  }
59
  }
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e0e6693e838703cc847e2ba66fcae114c3ca7ba1dbc16d751cd78a06c2c15ee
3
  size 4996131352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:987c1571a4b4794295eaea14a93eb19639639818363f500c0891b1ad70256bb8
3
  size 4996131352
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fc0035aebba87eeda73b20ead9bc83508b7b9d9e42dd633796c350d11bfc4fd
3
  size 4162297688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e49884673370f656f3bddd80b824a1d7f5511754db47dd3245782da572b6e51b
3
  size 4162297688
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df32aad17cbfc2aa4ad3381b46f28c1a9815a7ed93ec0e8534171090ad2c781b
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613f61c1856f1d10e0b45e06e24d92845d755b9f935778b8c022c275cde9f777
3
  size 5841