| base_model: DavidAU/Qwen2.5-7B-Instruct-1M-Thinking-Claude-Gemini-GPT5.2-DISTILL | |
| gate_mode: hidden | |
| dtype: bfloat16 | |
| architecture: qwen | |
| merge_method: moe | |
| num_experts_per_tok: 1 | |
| num_local_experts: 2 | |
| experts: | |
| - source_model: alibidaran/QWEN7B-Instruction_python | |
| positive_prompts: [] | |
| - source_model: skzxjus/Qwen2.5-7B-1m-Open-R1-Distill | |
| positive_prompts: [] | |
| tokenizer_source: DavidAU/Qwen2.5-7B-Instruct-1M-Thinking-Claude-Gemini-GPT5.2-DISTILL |