Upload model
Browse files- config.json +1 -1
- modeling_moonshine.py +4 -26
config.json
CHANGED
|
@@ -16,5 +16,5 @@
|
|
| 16 |
"model_type": "moonshine",
|
| 17 |
"n_head": 8,
|
| 18 |
"torch_dtype": "float32",
|
| 19 |
-
"transformers_version": "4.
|
| 20 |
}
|
|
|
|
| 16 |
"model_type": "moonshine",
|
| 17 |
"n_head": 8,
|
| 18 |
"torch_dtype": "float32",
|
| 19 |
+
"transformers_version": "4.46.1"
|
| 20 |
}
|
modeling_moonshine.py
CHANGED
|
@@ -1,9 +1,13 @@
|
|
| 1 |
from einops import rearrange
|
| 2 |
from einops.layers.torch import Rearrange
|
| 3 |
from torch import nn
|
|
|
|
|
|
|
| 4 |
import math
|
| 5 |
import torch
|
| 6 |
|
|
|
|
|
|
|
| 7 |
|
| 8 |
class RotaryEmbedding(nn.Module):
|
| 9 |
def __init__(self, dim, base=10000):
|
|
@@ -426,11 +430,8 @@ class MoonshineModelTorch(nn.Module):
|
|
| 426 |
self.d_head = inner_dim // n_head
|
| 427 |
|
| 428 |
def generate(self, src):
|
| 429 |
-
start = time.time()
|
| 430 |
preprocessed = self.preprocessor(src)
|
| 431 |
-
start = time.time()
|
| 432 |
enc = self.encoder(preprocessed)
|
| 433 |
-
start = time.time()
|
| 434 |
sot_token = 1
|
| 435 |
eot_token = 2
|
| 436 |
|
|
@@ -443,8 +444,6 @@ class MoonshineModelTorch(nn.Module):
|
|
| 443 |
for i in range(1, 1 + self.dec_depth * 4, self.dec_depth)
|
| 444 |
]
|
| 445 |
|
| 446 |
-
start = time.time()
|
| 447 |
-
|
| 448 |
sample = logits[:, -1].argmax(dim=-1, keepdim=True)
|
| 449 |
seq = torch.cat((seq, sample), dim=-1)
|
| 450 |
|
|
@@ -466,8 +465,6 @@ class MoonshineModelTorch(nn.Module):
|
|
| 466 |
|
| 467 |
return seq
|
| 468 |
|
| 469 |
-
from transformers import PreTrainedModel
|
| 470 |
-
from .configuration_moonshine import MoonshineConfig
|
| 471 |
|
| 472 |
class MoonshineModel(PreTrainedModel):
|
| 473 |
config_class = MoonshineConfig
|
|
@@ -487,22 +484,3 @@ class MoonshineModel(PreTrainedModel):
|
|
| 487 |
|
| 488 |
def forward(self, tensor):
|
| 489 |
return self.model.generate(tensor)
|
| 490 |
-
|
| 491 |
-
class MoonshineForConditionalGeneration(PreTrainedModel):
|
| 492 |
-
config_class = MoonshineConfig
|
| 493 |
-
|
| 494 |
-
def __init__(self, config):
|
| 495 |
-
super().__init__(config)
|
| 496 |
-
self.model = MoonshineModelTorch(
|
| 497 |
-
dim = config.dim,
|
| 498 |
-
inner_dim = config.inner_dim,
|
| 499 |
-
enc_depth = config.enc_depth,
|
| 500 |
-
dec_depth = config.dec_depth,
|
| 501 |
-
n_head = config.n_head,
|
| 502 |
-
dec_voc_size = config.dec_voc_size,
|
| 503 |
-
enc_ff_swiglu = config.enc_ff_swiglu,
|
| 504 |
-
dec_ff_swiglu = config.dec_ff_swiglu,
|
| 505 |
-
)
|
| 506 |
-
|
| 507 |
-
def forward(self, tensor):
|
| 508 |
-
return self.model.generate(tensor)
|
|
|
|
| 1 |
from einops import rearrange
|
| 2 |
from einops.layers.torch import Rearrange
|
| 3 |
from torch import nn
|
| 4 |
+
from transformers import PreTrainedModel
|
| 5 |
+
|
| 6 |
import math
|
| 7 |
import torch
|
| 8 |
|
| 9 |
+
from .configuration_moonshine import MoonshineConfig
|
| 10 |
+
|
| 11 |
|
| 12 |
class RotaryEmbedding(nn.Module):
|
| 13 |
def __init__(self, dim, base=10000):
|
|
|
|
| 430 |
self.d_head = inner_dim // n_head
|
| 431 |
|
| 432 |
def generate(self, src):
|
|
|
|
| 433 |
preprocessed = self.preprocessor(src)
|
|
|
|
| 434 |
enc = self.encoder(preprocessed)
|
|
|
|
| 435 |
sot_token = 1
|
| 436 |
eot_token = 2
|
| 437 |
|
|
|
|
| 444 |
for i in range(1, 1 + self.dec_depth * 4, self.dec_depth)
|
| 445 |
]
|
| 446 |
|
|
|
|
|
|
|
| 447 |
sample = logits[:, -1].argmax(dim=-1, keepdim=True)
|
| 448 |
seq = torch.cat((seq, sample), dim=-1)
|
| 449 |
|
|
|
|
| 465 |
|
| 466 |
return seq
|
| 467 |
|
|
|
|
|
|
|
| 468 |
|
| 469 |
class MoonshineModel(PreTrainedModel):
|
| 470 |
config_class = MoonshineConfig
|
|
|
|
| 484 |
|
| 485 |
def forward(self, tensor):
|
| 486 |
return self.model.generate(tensor)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|