metadata
license: mit
base_model:
- Qwen/Qwen3-4B
update_game: trained-game-env
pipeline_tag: text-generation
library_name: transformers
tags:
- reasoning
- qwen3
- information-seeking
- long-horizon
- PRM