When there are two or more speakers, the feedback is incorrect: The expanded size of the tensor (1) must match

#24
by yue666zhen - opened

RuntimeError during agent pushpop
Traceback (most recent call last):
File "/root/v2/seamless-streaming/seamless_server/src/simuleval_transcoder.py", line 342, in process_pipeline_impl
self.agent.pushpop(input_segment, self.states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/pipeline.py", line 304, in pushpop
self.push(segment, states, upstream_states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/pipeline.py", line 321, in push
self.push_impl(self.source_module, segment, states, upstream_states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/pipeline.py", line 296, in push_impl
self.push_impl(child, segment, states, upstream_states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/pipeline.py", line 296, in push_impl
self.push_impl(child, segment, states, upstream_states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/pipeline.py", line 296, in push_impl
self.push_impl(child, segment, states, upstream_states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/pipeline.py", line 287, in push_impl
segment = module.pushpop(segment, states[module], upstream_states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/agent.py", line 170, in pushpop
return self.pop(states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/simuleval/agents/agent.py", line 134, in pop
action = self.policy(states)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/seamless_communication/streaming/agents/online_text_decoder.py", line 329, in policy
index, prob, decoder_features = self.run_decoder(states, pred_indices)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/seamless_communication/streaming/agents/online_text_decoder.py", line 221, in run_decoder
decoder_output, _, p_choose = self.model.decode(
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/seamless_communication/models/monotonic_decoder/model.py", line 54, in decode
return self.text_decoder( # type: ignore[no-any-return]
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/seamless_communication/models/monotonic_decoder/monotonic_decoder.py", line 82, in forward
seqs, padding_mask, p_choose = layer(
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/seamless_communication/models/monotonic_decoder/monotonic_decoder_layer.py", line 118, in forward
seqs = self._forward_self_attn(seqs, padding_mask, self_attn_mask, state_bag)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/seamless_communication/models/monotonic_decoder/monotonic_decoder_layer.py", line 139, in _forward_self_attn
seqs = self.self_attn(
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
return forward_call(*args, **kwargs)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/fairseq2/nn/transformer/multihead_attention.py", line 415, in forward
state.append(k, v)
File "/root/miniconda3/envs/smlss_server_v2/lib/python3.10/site-packages/fairseq2/nn/transformer/multihead_attention.py", line 631, in append
self.k[:, :, pos : pos + 1] = k
RuntimeError: The expanded size of the tensor (1) must match the existing size (31) at non-singleton dimension 2. Target sizes: [1, 16, 1, 64]. Tensor sizes: [16, 31, 64]

Sign up or log in to comment