|
@@ -3608,7 +3608,7 @@
|
|
|
" with torch.no_grad():\n",
|
|
|
" logits = model(mel.unsqueeze(0), tokens.unsqueeze(0))\n",
|
|
|
"\n",
|
|
|
- " weights = torch.concatenate(QKs) # layers * heads * tokens * frames \n",
|
|
|
+ " weights = torch.cat(QKs) # layers * heads * tokens * frames \n",
|
|
|
" weights = weights[:, :, :, : duration // AUDIO_SAMPLES_PER_TOKEN].cpu()\n",
|
|
|
" weights = medfilt(weights, (1, 1, 1, medfilt_width))\n",
|
|
|
" weights = torch.tensor(weights * qk_scale).softmax(dim=-1)\n",
|