explcre commited on
Commit
12d18ad
·
verified ·
1 Parent(s): 94cfd1a

Upload exp_phase8_t3_rl_edit_tight_gspo_FIXED_s1_20260506_123025/log.jsonl with huggingface_hub

Browse files
exp_phase8_t3_rl_edit_tight_gspo_FIXED_s1_20260506_123025/log.jsonl CHANGED
@@ -19,3 +19,23 @@
19
  {"rl_step": 90, "loss": -8.940696716308594e-08, "mean_return": 37.36669921875, "max_return": 47.160789489746094, "mean_motif_chained": 37.36670026318524, "gnorm": 0.0003566741943359375, "elapsed_s": 649.3024468421936, "cell_type": "Mic", "edit_type": "activity_boost", "target_tf": "SPI1", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 8.940696716308594e-08, "adv_std": 0.9999998807907104, "adv_min": -1.413169503211975, "adv_max": 0.7373439073562622, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 37.36670026318524, "comp_kd_mean": 0.0, "comp_total_mean": 37.36670026318524, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
20
  {"rl_step": 95, "loss": 6.705522537231445e-08, "mean_return": 39.73131561279297, "max_return": 55.03954315185547, "mean_motif_chained": 39.73131465290243, "gnorm": 0.0002899169921875, "elapsed_s": 686.887937784195, "cell_type": "OPC", "edit_type": "cell_type_transfer", "target_tf": "DLX2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -6.705522537231445e-08, "adv_std": 0.9999998807907104, "adv_min": -1.3257139921188354, "adv_max": 1.1026084423065186, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 39.73131465290243, "comp_kd_mean": 0.0, "comp_total_mean": 39.73131465290243, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
21
  {"rl_step": 100, "loss": -5.960464477539063e-08, "mean_return": 64.7710952758789, "max_return": 97.39196014404297, "mean_motif_chained": 64.77109626684509, "gnorm": 0.000354766845703125, "elapsed_s": 723.3714847564697, "cell_type": "Oli", "edit_type": "promoter_retarget", "target_tf": "NKX2-2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 5.960464477539063e-08, "adv_std": 0.9999998807907104, "adv_min": -0.7337556481361389, "adv_max": 1.469343662261963, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 64.77109626684509, "comp_kd_mean": 0.0, "comp_total_mean": 64.77109626684509, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  {"rl_step": 90, "loss": -8.940696716308594e-08, "mean_return": 37.36669921875, "max_return": 47.160789489746094, "mean_motif_chained": 37.36670026318524, "gnorm": 0.0003566741943359375, "elapsed_s": 649.3024468421936, "cell_type": "Mic", "edit_type": "activity_boost", "target_tf": "SPI1", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 8.940696716308594e-08, "adv_std": 0.9999998807907104, "adv_min": -1.413169503211975, "adv_max": 0.7373439073562622, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 37.36670026318524, "comp_kd_mean": 0.0, "comp_total_mean": 37.36670026318524, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
20
  {"rl_step": 95, "loss": 6.705522537231445e-08, "mean_return": 39.73131561279297, "max_return": 55.03954315185547, "mean_motif_chained": 39.73131465290243, "gnorm": 0.0002899169921875, "elapsed_s": 686.887937784195, "cell_type": "OPC", "edit_type": "cell_type_transfer", "target_tf": "DLX2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -6.705522537231445e-08, "adv_std": 0.9999998807907104, "adv_min": -1.3257139921188354, "adv_max": 1.1026084423065186, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 39.73131465290243, "comp_kd_mean": 0.0, "comp_total_mean": 39.73131465290243, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
21
  {"rl_step": 100, "loss": -5.960464477539063e-08, "mean_return": 64.7710952758789, "max_return": 97.39196014404297, "mean_motif_chained": 64.77109626684509, "gnorm": 0.000354766845703125, "elapsed_s": 723.3714847564697, "cell_type": "Oli", "edit_type": "promoter_retarget", "target_tf": "NKX2-2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 5.960464477539063e-08, "adv_std": 0.9999998807907104, "adv_min": -0.7337556481361389, "adv_max": 1.469343662261963, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 64.77109626684509, "comp_kd_mean": 0.0, "comp_total_mean": 64.77109626684509, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
22
+ {"rl_step": 105, "loss": 1.3783574104309082e-07, "mean_return": 38.559959411621094, "max_return": 55.57597351074219, "mean_motif_chained": 38.55995630555346, "gnorm": 0.00021648406982421875, "elapsed_s": 759.8911151885986, "cell_type": "Ex", "edit_type": "cell_type_transfer", "target_tf": "DLX2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -1.3783574104309082e-07, "adv_std": 0.9999998807907104, "adv_min": -1.0895953178405762, "adv_max": 1.3203606605529785, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 38.55995630555346, "comp_kd_mean": 0.0, "comp_total_mean": 38.55995630555346, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
23
+ {"rl_step": 110, "loss": 2.682209014892578e-07, "mean_return": 36.37060546875, "max_return": 48.29936981201172, "mean_motif_chained": 36.37060265805821, "gnorm": 0.00032806396484375, "elapsed_s": 796.1773946285248, "cell_type": "Ast", "edit_type": "cell_type_transfer", "target_tf": "RUNX1", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -2.682209014892578e-07, "adv_std": 0.9999998211860657, "adv_min": -1.346685767173767, "adv_max": 1.0708808898925781, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 36.37060265805821, "comp_kd_mean": 0.0, "comp_total_mean": 36.37060265805821, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
24
+ {"rl_step": 115, "loss": 1.4901161193847656e-07, "mean_return": 58.196258544921875, "max_return": 72.57315826416016, "mean_motif_chained": 58.196255893227956, "gnorm": 0.000362396240234375, "elapsed_s": 832.8803012371063, "cell_type": "Mic", "edit_type": "cell_type_transfer", "target_tf": "STAT3", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -1.4901161193847656e-07, "adv_std": 1.0, "adv_min": -0.9651296734809875, "adv_max": 1.076028823852539, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 58.196255893227956, "comp_kd_mean": 0.0, "comp_total_mean": 58.196255893227956, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
25
+ {"rl_step": 120, "loss": -2.9802322387695312e-08, "mean_return": 40.78408432006836, "max_return": 65.15682220458984, "mean_motif_chained": 40.78408453604997, "gnorm": 0.0004024505615234375, "elapsed_s": 869.6244504451752, "cell_type": "Ex", "edit_type": "activity_boost", "target_tf": "NEUROD2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 2.9802322387695312e-08, "adv_std": 0.9999998211860657, "adv_min": -0.8494985699653625, "adv_max": 1.3790552616119385, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 40.78408453604997, "comp_kd_mean": 0.0, "comp_total_mean": 40.78408453604997, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
26
+ {"rl_step": 125, "loss": 1.4901161193847656e-07, "mean_return": 64.59075927734375, "max_return": 87.43537139892578, "mean_motif_chained": 64.59075611025686, "gnorm": 0.0002765655517578125, "elapsed_s": 907.257563829422, "cell_type": "Ast", "edit_type": "cell_type_transfer", "target_tf": "SOX10", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -1.4901161193847656e-07, "adv_std": 1.0000001192092896, "adv_min": -1.016721248626709, "adv_max": 1.069595217704773, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 64.59075611025686, "comp_kd_mean": 0.0, "comp_total_mean": 64.59075611025686, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
27
+ {"rl_step": 130, "loss": 1.7136335372924805e-07, "mean_return": 42.0273551940918, "max_return": 53.92708969116211, "mean_motif_chained": 42.027353421481706, "gnorm": 0.0002803802490234375, "elapsed_s": 945.5225644111633, "cell_type": "Mic", "edit_type": "cell_type_transfer", "target_tf": "KLF2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -1.7136335372924805e-07, "adv_std": 1.0, "adv_min": -1.1650569438934326, "adv_max": 0.9340017437934875, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 42.027353421481706, "comp_kd_mean": 0.0, "comp_total_mean": 42.027353421481706, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
28
+ {"rl_step": 135, "loss": 1.7881393432617188e-07, "mean_return": 58.7022705078125, "max_return": 69.17412567138672, "mean_motif_chained": 58.702268985890655, "gnorm": 0.0005950927734375, "elapsed_s": 982.7725389003754, "cell_type": "Mic", "edit_type": "activity_boost", "target_tf": "IRF8", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -1.7881393432617188e-07, "adv_std": 0.9999998807907104, "adv_min": -1.402094841003418, "adv_max": 0.9688408374786377, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 58.702268985890655, "comp_kd_mean": 0.0, "comp_total_mean": 58.702268985890655, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
29
+ {"rl_step": 140, "loss": 0.0, "mean_return": 30.887792587280273, "max_return": 49.0815544128418, "mean_motif_chained": 30.88779293928121, "gnorm": 0.0002956390380859375, "elapsed_s": 1019.9133749008179, "cell_type": "OPC", "edit_type": "cell_type_transfer", "target_tf": "TBR1", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 0.0, "adv_std": 0.9999997615814209, "adv_min": -0.7493395209312439, "adv_max": 1.4538414478302002, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 30.88779293928121, "comp_kd_mean": 0.0, "comp_total_mean": 30.88779293928121, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
30
+ {"rl_step": 145, "loss": 5.066394805908203e-07, "mean_return": 44.733558654785156, "max_return": 48.80963897705078, "mean_motif_chained": 44.733555149543406, "gnorm": 0.0003795623779296875, "elapsed_s": 1056.7740361690521, "cell_type": "Ex", "edit_type": "promoter_retarget", "target_tf": "MEF2C", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -5.066394805908203e-07, "adv_std": 0.9999998807907104, "adv_min": -1.4619220495224, "adv_max": 0.7345978617668152, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 44.733555149543406, "comp_kd_mean": 0.0, "comp_total_mean": 44.733555149543406, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
31
+ {"rl_step": 150, "loss": 0.0, "mean_return": 52.2446403503418, "max_return": 107.71989440917969, "mean_motif_chained": 52.24464096008643, "gnorm": 0.0002956390380859375, "elapsed_s": 1093.3430759906769, "cell_type": "Ex", "edit_type": "promoter_retarget", "target_tf": "NEUROD2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 0.0, "adv_std": 1.0, "adv_min": -0.6921064257621765, "adv_max": 1.4684449434280396, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 52.24464096008643, "comp_kd_mean": 0.0, "comp_total_mean": 52.24464096008643, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
32
+ {"rl_step": 155, "loss": -1.4901161193847656e-07, "mean_return": 27.574190139770508, "max_return": 36.148929595947266, "mean_motif_chained": 27.57419125729772, "gnorm": 0.00031280517578125, "elapsed_s": 1129.7867543697357, "cell_type": "OPC", "edit_type": "cell_type_transfer", "target_tf": "NEUROD2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 1.4901161193847656e-07, "adv_std": 0.9999998211860657, "adv_min": -1.1038105487823486, "adv_max": 1.183632493019104, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 27.57419125729772, "comp_kd_mean": 0.0, "comp_total_mean": 27.57419125729772, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
33
+ {"rl_step": 160, "loss": 8.940696716308594e-08, "mean_return": 58.268028259277344, "max_return": 69.70316314697266, "mean_motif_chained": 58.26802711489587, "gnorm": 0.0003223419189453125, "elapsed_s": 1166.7138979434967, "cell_type": "Mic", "edit_type": "promoter_retarget", "target_tf": "IRF8", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -8.940696716308594e-08, "adv_std": 0.9999999403953552, "adv_min": -1.2297286987304688, "adv_max": 1.0134198665618896, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 58.26802711489587, "comp_kd_mean": 0.0, "comp_total_mean": 58.26802711489587, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
34
+ {"rl_step": 165, "loss": -3.203749656677246e-07, "mean_return": 42.21644592285156, "max_return": 52.945106506347656, "mean_motif_chained": 42.21644951115068, "gnorm": 0.00037384033203125, "elapsed_s": 1204.082677602768, "cell_type": "Ast", "edit_type": "activity_boost", "target_tf": "SOX9", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 3.203749656677246e-07, "adv_std": 0.9999998807907104, "adv_min": -1.0169082880020142, "adv_max": 1.189740538597107, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 42.21644951115068, "comp_kd_mean": 0.0, "comp_total_mean": 42.21644951115068, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
35
+ {"rl_step": 170, "loss": 4.470348358154297e-08, "mean_return": 53.39247512817383, "max_return": 76.33849334716797, "mean_motif_chained": 53.3924732942537, "gnorm": 0.000308990478515625, "elapsed_s": 1242.1385493278503, "cell_type": "OPC", "edit_type": "promoter_retarget", "target_tf": "ASCL1", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -4.470348358154297e-08, "adv_std": 0.9999999403953552, "adv_min": -1.3365594148635864, "adv_max": 1.0822937488555908, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 53.3924732942537, "comp_kd_mean": 0.0, "comp_total_mean": 53.3924732942537, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
36
+ {"rl_step": 175, "loss": 0.0, "mean_return": 35.6060676574707, "max_return": 42.338130950927734, "mean_motif_chained": 35.606067804041224, "gnorm": 0.0002593994140625, "elapsed_s": 1279.57825756073, "cell_type": "Oli", "edit_type": "promoter_retarget", "target_tf": "SOX10", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": 0.0, "adv_std": 0.9999998807907104, "adv_min": -0.9474294185638428, "adv_max": 0.9642488956451416, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 35.606067804041224, "comp_kd_mean": 0.0, "comp_total_mean": 35.606067804041224, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
37
+ {"rl_step": 180, "loss": 1.1920928955078125e-07, "mean_return": 52.435306549072266, "max_return": 73.87956237792969, "mean_motif_chained": 52.435304763498934, "gnorm": 0.0003414154052734375, "elapsed_s": 1316.1867308616638, "cell_type": "Oli", "edit_type": "promoter_retarget", "target_tf": "SOX10", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -1.1920928955078125e-07, "adv_std": 0.9999998211860657, "adv_min": -1.0825536251068115, "adv_max": 1.3067914247512817, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 52.435304763498934, "comp_kd_mean": 0.0, "comp_total_mean": 52.435304763498934, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
38
+ {"rl_step": 185, "loss": 1.4901161193847656e-08, "mean_return": 57.61628723144531, "max_return": 78.1861801147461, "mean_motif_chained": 57.61628706292872, "gnorm": 0.0003948211669921875, "elapsed_s": 1352.7921385765076, "cell_type": "Oli", "edit_type": "activity_boost", "target_tf": "NKX2-2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -1.4901161193847656e-08, "adv_std": 1.0, "adv_min": -0.6146942973136902, "adv_max": 1.4943897724151611, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 57.61628706292872, "comp_kd_mean": 0.0, "comp_total_mean": 57.61628706292872, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
39
+ {"rl_step": 190, "loss": 4.76837158203125e-07, "mean_return": 42.397117614746094, "max_return": 47.195247650146484, "mean_motif_chained": 42.39711452937594, "gnorm": 0.000385284423828125, "elapsed_s": 1389.1303074359894, "cell_type": "Mic", "edit_type": "activity_boost", "target_tf": "SPI1", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -4.76837158203125e-07, "adv_std": 0.9999997019767761, "adv_min": -1.4553025960922241, "adv_max": 0.8203241229057312, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 42.39711452937594, "comp_kd_mean": 0.0, "comp_total_mean": 42.39711452937594, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
40
+ {"rl_step": 195, "loss": 2.9802322387695312e-08, "mean_return": 43.280967712402344, "max_return": 76.53419494628906, "mean_motif_chained": 43.28096622513898, "gnorm": 0.0002193450927734375, "elapsed_s": 1425.389592885971, "cell_type": "Oli", "edit_type": "promoter_retarget", "target_tf": "NKX2-2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -2.9802322387695312e-08, "adv_std": 0.9999998807907104, "adv_min": -1.0912439823150635, "adv_max": 1.227744698524475, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 43.28096622513898, "comp_kd_mean": 0.0, "comp_total_mean": 43.28096622513898, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}
41
+ {"rl_step": 200, "loss": 7.450580596923828e-08, "mean_return": 30.549165725708008, "max_return": 63.81435012817383, "mean_motif_chained": 30.549165005386737, "gnorm": 0.000316619873046875, "elapsed_s": 1461.7718422412872, "cell_type": "Oli", "edit_type": "promoter_retarget", "target_tf": "OLIG2", "kd_active": false, "kd_tf": "", "kd_score_mean": 0.0, "ratio_mean": 1.0, "ratio_min": 1.0, "ratio_max": 1.0, "log_ratio_mean": 0.0, "adv_mean": -7.450580596923828e-08, "adv_std": 1.0, "adv_min": -0.8483825922012329, "adv_max": 1.4284799098968506, "comp_oracle_mean": 0.0, "comp_moods_chained_mean": 30.549165005386737, "comp_kd_mean": 0.0, "comp_total_mean": 30.549165005386737, "_algo": "gspo", "_lora_r": 16, "_peak_lr": 1e-05, "_temperature": 1.0, "_seq_len_mean": 360.0}