Dionyssos commited on
Commit
8a9a2fe
·
1 Parent(s): 376e6a0
Files changed (1) hide show
  1. audiocraft/lm.py +1 -1
audiocraft/lm.py CHANGED
@@ -143,7 +143,7 @@ class LMModel(nn.Module):
143
  next_token = self.forward(out_codes[:, 0, [0, 1, 2, 3], torch.tensor([3, 2, 1, 0]) + offset][:, :, None], # index diagonal & exapnd to [bs, n_q, dur=1]
144
  #gen_sequence[:, 0, :, offset-1:offset], # DIAGINDEXING for setting prediction of lm into gen_sequence THE GENSEQUENCE has to be un-delayed in the end [Because it has to be de-delayed for the vocoder then is actually only the lm input that requires to see the delay thus we could just feed by diaggather] so it matches gen_codes -1 a[[0, 1, 2, 3], torch.tensor([0, 1, 2, 3]) + 5] the gen_sequence is indexed by vertical column and fed to lm however the prediction of lm is place diagonally with delay to the gen_sequence
145
  condition_tensors=text_condition, # utilisation of the attention mask of txt condition ?
146
- token_count=offset-1) # [bs, 4, 1, 2048]
147
 
148
 
149
 
 
143
  next_token = self.forward(out_codes[:, 0, [0, 1, 2, 3], torch.tensor([3, 2, 1, 0]) + offset][:, :, None], # index diagonal & exapnd to [bs, n_q, dur=1]
144
  #gen_sequence[:, 0, :, offset-1:offset], # DIAGINDEXING for setting prediction of lm into gen_sequence THE GENSEQUENCE has to be un-delayed in the end [Because it has to be de-delayed for the vocoder then is actually only the lm input that requires to see the delay thus we could just feed by diaggather] so it matches gen_codes -1 a[[0, 1, 2, 3], torch.tensor([0, 1, 2, 3]) + 5] the gen_sequence is indexed by vertical column and fed to lm however the prediction of lm is place diagonally with delay to the gen_sequence
145
  condition_tensors=text_condition, # utilisation of the attention mask of txt condition ?
146
+ token_count=offset) # [bs, 4, 1, 2048]
147
 
148
 
149