swapnil7777/grpo-grpo-qwen-3b-iso-hendrycks-math-seed42-20260412-145509-checkpoint-272 Updated Apr 13
swapnil7777/grpo-grpo-qwen-3b-iso-hendrycks-math-seed42-20260412-145509-best-checkpoint Updated Apr 13
swapnil7777/gxpo-gxpo-qwen-3b-k-10-shutoff-trajectory-aware-hendrycks-math-seed42-20260412-025004-c-bd2bde43 Updated Apr 13
swapnil7777/gxpo-gxpo-qwen-3b-k-10-shutoff-trajectory-aware-hendrycks-math-seed42-20260412-025004-c-e6e7661f Updated Apr 13
swapnil7777/gxpo-gxpo-qwen-3b-k-10-shutoff-trajectory-aware-hendrycks-math-seed42-20260412-025004-c-5e541317 Updated Apr 13
swapnil7777/gxpo-gxpo-qwen-3b-k-10-shutoff-trajectory-aware-hendrycks-math-seed42-20260412-025004-b-52f6fd45 Updated Apr 13
swapnil7777/gxpo-gxpo-qwen-3b-k-10-shutoff-trajectory-aware-hendrycks-math-seed42-20260412-025004-b-1c30da4f Updated Apr 13
swapnil7777/gxpo-gxpo-qwen-3b-k-5-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114637-ch-33191273 Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-5-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114637-ch-87c4901b Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-5-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114637-ch-1a0ad791 Updated Apr 12 • 1
swapnil7777/gxpo-gxpo-qwen-3b-k-5-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114637-bp-2bfcd1bb Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-5-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114637-bp-3a0c7752 Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-5-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114637-bp-2c933d0a Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-5-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114637-be-52f7c6e5 Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-3-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114108-ch-9c4ebc27 Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-3-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114108-ch-2393e580 Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-3-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114108-ch-8f7a9687 Updated Apr 12
swapnil7777/gxpo-gxpo-qwen-3b-k-3-shutoff-trajectory-aware-hendrycks-math-seed42-20260411-114108-be-a73dba72 Updated Apr 12
swapnil7777/sfpo-sfpo-qwen-3b-k-5-hendrycks-math-seed42-20260410-100741-checkpoint-394 Updated Apr 11
swapnil7777/sfpo-sfpo-qwen-3b-k-5-hendrycks-math-seed42-20260410-100741-checkpoint-392 Updated Apr 11 • 1
swapnil7777/sfpo-sfpo-qwen-3b-k-5-hendrycks-math-seed42-20260410-100741-best-checkpoint Updated Apr 11
swapnil7777/sfpo-sfpo-qwen-3b-k-3-hendrycks-math-seed42-20260410-100614-checkpoint-396 Updated Apr 11 • 1
swapnil7777/sfpo-sfpo-qwen-3b-k-3-hendrycks-math-seed42-20260410-100614-checkpoint-394 Updated Apr 11