LoRA fine-tune on RobotSmith task03 tool 12_long_2

Browse files

Files changed (6) hide show

README.md +2 -2
metrics.json +55 -7
model-00001-of-00002.safetensors +1 -1
model-00002-of-00002.safetensors +1 -1
trainer_state.json +0 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,8 +17,8 @@ library_name: transformers
 ## Training Run
-- **Wandb Run**: [lora_task03_data_fixed](https://wandb.ai/r-pad/rbm-finetune-robotsmith/runs/6ihsmc6l)
-- **Wandb ID**: `6ihsmc6l`
 - **Project**: rbm-finetune-robotsmith
 - **Notes**: fine-tuning Robometer on RobotSmith

 ## Training Run
+- **Wandb Run**: [lora_task03_12_long_2](https://wandb.ai/r-pad/rbm-finetune-robotsmith/runs/c18gv1ii)
+- **Wandb ID**: `c18gv1ii`
 - **Project**: rbm-finetune-robotsmith
 - **Notes**: fine-tuning Robometer on RobotSmith

metrics.json CHANGED Viewed

@@ -1,11 +1,59 @@
 {
-  "step": 450,
   "metrics": {
-    "eval_rew_align/success_auprc_robotsmith": 0.5288600712338849,
-    "eval_rew_align/positive_success_acc_robotsmith": 0.8125,
-    "eval_rew_align/negative_success_acc_robotsmith": 0.9348591549295775,
-    "eval_rew_align/loss_robotsmith": 4.137787556648254,
-    "eval_rew_align/pearson_robotsmith": 0.9885291064855337,
-    "time/custom_evaluations": 83.76057602092624
   }
 }

 {
+  "step": 1000,
   "metrics": {
+    "eval_rew_align/success_auprc_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6195081082466317,
+    "eval_rew_align/positive_success_acc_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_rew_align/negative_success_acc_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.952161913523459,
+    "eval_rew_align/loss_amburger66_robotsmith_rbm_12_long_2_robotsmith": 3.8970178365707397,
+    "eval_rew_align/pearson_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.980936971524903,
+    "eval_p_rank/kendall_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/kendall_rewind_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/avg_succ_subopt_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/min_succ_subopt_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/max_succ_subopt_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/avg_subopt_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/min_subopt_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/max_subopt_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/avg_succ_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/min_succ_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/max_succ_fail_diff_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/ranking_acc_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_all_pairs_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_failure_vs_suboptimal_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_suboptimal_vs_successful_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_failure_vs_successful_last_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/kendall_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/kendall_rewind_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/avg_succ_subopt_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/min_succ_subopt_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/max_succ_subopt_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/avg_subopt_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/min_subopt_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/max_subopt_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/avg_succ_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/min_succ_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/max_succ_fail_diff_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/ranking_acc_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_all_pairs_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_failure_vs_suboptimal_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_suboptimal_vs_successful_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_failure_vs_successful_avg_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/kendall_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/kendall_rewind_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/avg_succ_subopt_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/min_succ_subopt_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/max_succ_subopt_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.22012983560562138,
+    "eval_p_rank/avg_subopt_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/min_subopt_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/max_subopt_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.6151127217337489,
+    "eval_p_rank/avg_succ_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/min_succ_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/max_succ_fail_diff_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 0.8352425573393703,
+    "eval_p_rank/ranking_acc_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_all_pairs_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_failure_vs_suboptimal_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_suboptimal_vs_successful_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "eval_p_rank/ranking_acc_failure_vs_successful_sum_amburger66_robotsmith_rbm_12_long_2_robotsmith": 1.0,
+    "time/custom_evaluations": 42.60612651426345
   }
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7e0e6693e838703cc847e2ba66fcae114c3ca7ba1dbc16d751cd78a06c2c15ee
 size 4996131352

 version https://git-lfs.github.com/spec/v1
+oid sha256:987c1571a4b4794295eaea14a93eb19639639818363f500c0891b1ad70256bb8
 size 4996131352

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fc0035aebba87eeda73b20ead9bc83508b7b9d9e42dd633796c350d11bfc4fd
 size 4162297688

 version https://git-lfs.github.com/spec/v1
+oid sha256:e49884673370f656f3bddd80b824a1d7f5511754db47dd3245782da572b6e51b
 size 4162297688

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df32aad17cbfc2aa4ad3381b46f28c1a9815a7ed93ec0e8534171090ad2c781b
 size 5841

 version https://git-lfs.github.com/spec/v1
+oid sha256:613f61c1856f1d10e0b45e06e24d92845d755b9f935778b8c022c275cde9f777
 size 5841