| | ============================================================ |
| | Training Flow Bisection |
| | ============================================================ |
| |
|
| | [Step 1] load_engine(max_seq_len=4096)... |
| | [Auto-detect] Qwen3-Omni MoE thinker (30.5B total, ~3.3B active) |
| | [FireEcho] Loading /run/media/echo/Echo/ECHO/training/Prototype Fireecho/model/Qwen3-Omni-30B-A3B-Instruct... |
| | [FireEcho] AutoConfig failed ('Qwen3OmniMoeTalkerCodePredictorConfig' object has no attribute 'use_sliding_window'), loading config.json directly |
| | Qwen3-Omni: will stream-load from 15 shards |
| | [Qwen3 Streaming] Loaded shard index: 28010 keys across 15 shards |
| | [Qwen3 Streaming] Building engine skeleton... |
| | [Qwen3 Streaming] Global params on GPU: 1.2 GB |
| | Layer 4/48: 393 weights, VRAM 2.8 GB, CPU 1.4 GB |
| | Layer 8/48: 393 weights, VRAM 4.3 GB, CPU 1.6 GB |
| | Layer 12/48: 393 weights, VRAM 5.8 GB, CPU 1.7 GB |
| | Layer 16/48: 393 weights, VRAM 7.4 GB, CPU 1.9 GB |
| | Layer 20/48: 393 weights, VRAM 8.9 GB, CPU 2.0 GB |
| | Layer 24/48: 393 weights, VRAM 10.4 GB, CPU 2.2 GB |
| | Layer 28/48: 393 weights, VRAM 11.9 GB, CPU 2.3 GB |
| | Layer 32/48: 393 weights, VRAM 13.5 GB, CPU 2.5 GB |
| | Layer 36/48: 393 weights, VRAM 15.0 GB, CPU 2.6 GB |
| | Layer 40/48: 393 weights, VRAM 16.5 GB, CPU 2.8 GB |
| | Layer 44/48: 393 weights, VRAM 18.0 GB, CPU 2.9 GB |
| | Layer 48/48: 393 weights, VRAM 19.6 GB, CPU 3.1 GB |
| | [Qwen3 Streaming] Final VRAM: 19.6 GB (FP4 quantized) |
| | [Qwen3 Streaming] Done: 1571.8M params, 18867 weights loaded |
| | Total params: 1.57B |
| | Frozen params: 1.54B (base model, FP4) |
| | Trainable params: 30.2M (Hebbian only) |
| | Traceback (most recent call last): |
| | File "/run/media/echo/Echo/ECHO/training/Prototype Fireecho/tool/kernel/FireEcho Engine/debug_bisect.py", line 43, in <module> |
| | check(engine, tokenizer, "after load") |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/utils/_contextlib.py", line 124, in decorate_context |
| | return func(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/training/Prototype Fireecho/tool/kernel/FireEcho Engine/debug_bisect.py", line 23, in check |
| | logits = engine.forward(ids, use_cache=True, position=0) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/training/Prototype Fireecho/tool/kernel/FireEcho Engine/fireecho_kernel.py", line 9964, in forward |
| | x = layer(x, self.kv_cache, self._current_seq_id, position, use_cache) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl |
| | return self._call_impl(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl |
| | return forward_call(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/training/Prototype Fireecho/tool/kernel/FireEcho Engine/fireecho_kernel.py", line 8820, in forward |
| | x = x + self.ffn(self.norm2(x)) |
| | ^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl |
| | return self._call_impl(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl |
| | return forward_call(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/training/Prototype Fireecho/tool/kernel/FireEcho Engine/fireecho_kernel.py", line 8710, in forward |
| | expert_out = self.experts[expert_idx](selected) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl |
| | return self._call_impl(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl |
| | return forward_call(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/training/Prototype Fireecho/tool/kernel/FireEcho Engine/fireecho_kernel.py", line 7565, in forward |
| | gate_up = self.gate_up_proj(x) |
| | ^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1779, in _wrapped_call_impl |
| | return self._call_impl(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/.venv_infer312/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1790, in _call_impl |
| | return forward_call(*args, **kwargs) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | File "/run/media/echo/Echo/ECHO/training/Prototype Fireecho/tool/kernel/FireEcho Engine/fireecho_kernel.py", line 7339, in forward |
| | return F.linear(x, self.weight, self.bias) |
| | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
| | RuntimeError: size mismatch, got input (5), mat (5x2048), vec (0) |
| |
|