| license: mit | |
| base_model: | |
| - Qwen/Qwen3-4B | |
| update_game: trained-game-env | |
| pipeline_tag: text-generation | |
| library_name: transformers | |
| tags: | |
| - reasoning | |
| - qwen3 | |
| - information-seeking | |
| - long-horizon | |
| - PRM | |
| license: mit | |
| base_model: | |
| - Qwen/Qwen3-4B | |
| update_game: trained-game-env | |
| pipeline_tag: text-generation | |
| library_name: transformers | |
| tags: | |
| - reasoning | |
| - qwen3 | |
| - information-seeking | |
| - long-horizon | |
| - PRM | |