fxmarty-amd commited on
Commit
e158d9e
·
verified ·
1 Parent(s): 01f0879

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/results.log +3 -0
  3. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/results_2025-12-29T13-36-47.083952.json +0 -0
  4. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_gsm8k_platinum_2025-12-29T13-36-47.083952.jsonl +3 -0
  5. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_abstract_algebra_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  6. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_anatomy_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  7. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_astronomy_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  8. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_business_ethics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  9. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_clinical_knowledge_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  10. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_biology_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  11. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_chemistry_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  12. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_computer_science_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  13. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_mathematics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  14. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_medicine_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  15. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_physics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  16. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_computer_security_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  17. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_conceptual_physics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  18. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_econometrics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  19. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_electrical_engineering_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  20. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_elementary_mathematics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  21. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_formal_logic_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  22. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_global_facts_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  23. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_biology_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  24. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_chemistry_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  25. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_computer_science_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  26. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_european_history_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  27. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_geography_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  28. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_government_and_politics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  29. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_macroeconomics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  30. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_mathematics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  31. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_microeconomics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  32. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_physics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  33. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_psychology_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  34. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_statistics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  35. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_us_history_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  36. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_world_history_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  37. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_human_aging_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  38. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_human_sexuality_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  39. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_international_law_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  40. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_jurisprudence_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  41. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_logical_fallacies_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  42. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_machine_learning_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  43. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_management_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  44. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_marketing_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  45. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_medical_genetics_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  46. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_miscellaneous_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  47. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_moral_disputes_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  48. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_moral_scenarios_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  49. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_nutrition_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
  50. 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_philosophy_generative_2025-12-29T13-36-47.083952.jsonl +0 -0
.gitattributes CHANGED
@@ -43,3 +43,8 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
43
  2025-12-29_11-18-24_rtn_qwen3_14b/2025-12-29_17-05-21_lm_eval_all_NOSHOT/results.log filter=lfs diff=lfs merge=lfs -text
44
  2025-12-29_11-18-24_rtn_qwen3_14b/2025-12-29_17-05-21_lm_eval_all_NOSHOT/samples_gsm8k_platinum_2025-12-29T17-18-51.649879.jsonl filter=lfs diff=lfs merge=lfs -text
45
  2025-12-29_11-18-24_rtn_qwen3_14b/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
43
  2025-12-29_11-18-24_rtn_qwen3_14b/2025-12-29_17-05-21_lm_eval_all_NOSHOT/results.log filter=lfs diff=lfs merge=lfs -text
44
  2025-12-29_11-18-24_rtn_qwen3_14b/2025-12-29_17-05-21_lm_eval_all_NOSHOT/samples_gsm8k_platinum_2025-12-29T17-18-51.649879.jsonl filter=lfs diff=lfs merge=lfs -text
45
  2025-12-29_11-18-24_rtn_qwen3_14b/tokenizer.json filter=lfs diff=lfs merge=lfs -text
46
+ 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/results.log filter=lfs diff=lfs merge=lfs -text
47
+ 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_gsm8k_platinum_2025-12-29T13-36-47.083952.jsonl filter=lfs diff=lfs merge=lfs -text
48
+ 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_17-04-46_lm_eval_all_NOSHOT/results.log filter=lfs diff=lfs merge=lfs -text
49
+ 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_17-04-46_lm_eval_all_NOSHOT/samples_gsm8k_platinum_2025-12-29T17-20-24.545237.jsonl filter=lfs diff=lfs merge=lfs -text
50
+ 2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/tokenizer.json filter=lfs diff=lfs merge=lfs -text
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/results.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e38528836a3b826f324dc9a2b24d3c16fb6ef4d482f314531eaec032dae300d
3
+ size 17431949
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/results_2025-12-29T13-36-47.083952.json ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_gsm8k_platinum_2025-12-29T13-36-47.083952.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4195bc579e96f953efb23da0c687be1046a515bc2ca768e3ed970019f2a03e3c
3
+ size 12924910
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_abstract_algebra_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_anatomy_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_astronomy_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_business_ethics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_clinical_knowledge_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_biology_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_chemistry_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_computer_science_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_mathematics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_medicine_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_college_physics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_computer_security_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_conceptual_physics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_econometrics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_electrical_engineering_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_elementary_mathematics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_formal_logic_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_global_facts_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_biology_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_chemistry_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_computer_science_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_european_history_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_geography_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_government_and_politics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_macroeconomics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_mathematics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_microeconomics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_physics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_psychology_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_statistics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_us_history_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_high_school_world_history_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_human_aging_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_human_sexuality_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_international_law_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_jurisprudence_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_logical_fallacies_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_machine_learning_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_management_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_marketing_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_medical_genetics_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_miscellaneous_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_moral_disputes_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_moral_scenarios_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_nutrition_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
2025-12-18_17-41-03_train_r1_128_online_r2_norm12_shared_parallel_kl_top_1000_steps_700_bs8_lr1.5_qwen3_14b/2025-12-29_13-17-42_lm_eval_all/samples_mmlu_redux_philosophy_generative_2025-12-29T13-36-47.083952.jsonl ADDED
The diff for this file is too large to render. See raw diff