Spaces:
Paused
Paused
Harry Coultas Blum commited on
Commit ·
c976192
1
Parent(s): dbb5fad
Trying to fix kvcache
Browse files- vui/inference.py +9 -15
vui/inference.py
CHANGED
|
@@ -10,7 +10,6 @@ from torch.nn.attention import SDPBackend, sdpa_kernel
|
|
| 10 |
|
| 11 |
from vui.model import Vui
|
| 12 |
from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
|
| 13 |
-
from vui.utils import timer
|
| 14 |
from vui.vad import detect_voice_activity as vad
|
| 15 |
|
| 16 |
|
|
@@ -155,7 +154,6 @@ def generate(
|
|
| 155 |
with (
|
| 156 |
torch.autocast("cuda", torch.bfloat16, True),
|
| 157 |
sdpa_kernel([SDPBackend.MATH]),
|
| 158 |
-
timer("generate"),
|
| 159 |
):
|
| 160 |
t1 = time.perf_counter()
|
| 161 |
batch_size = 1
|
|
@@ -362,19 +360,15 @@ def render(
|
|
| 362 |
|
| 363 |
try:
|
| 364 |
print("rendering", current_text)
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
top_k=top_k,
|
| 375 |
-
top_p=top_p,
|
| 376 |
-
max_gen_len=maxlen,
|
| 377 |
-
)
|
| 378 |
|
| 379 |
codes = codes[..., :-10]
|
| 380 |
audio = self.codec.from_indices(codes)
|
|
|
|
| 10 |
|
| 11 |
from vui.model import Vui
|
| 12 |
from vui.sampling import multinomial, sample_top_k, sample_top_p, sample_top_p_top_k
|
|
|
|
| 13 |
from vui.vad import detect_voice_activity as vad
|
| 14 |
|
| 15 |
|
|
|
|
| 154 |
with (
|
| 155 |
torch.autocast("cuda", torch.bfloat16, True),
|
| 156 |
sdpa_kernel([SDPBackend.MATH]),
|
|
|
|
| 157 |
):
|
| 158 |
t1 = time.perf_counter()
|
| 159 |
batch_size = 1
|
|
|
|
| 360 |
|
| 361 |
try:
|
| 362 |
print("rendering", current_text)
|
| 363 |
+
codes = generate(
|
| 364 |
+
self,
|
| 365 |
+
current_text,
|
| 366 |
+
prompt_codes=prev_codes,
|
| 367 |
+
temperature=temperature,
|
| 368 |
+
top_k=top_k,
|
| 369 |
+
top_p=top_p,
|
| 370 |
+
max_gen_len=maxlen,
|
| 371 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 372 |
|
| 373 |
codes = codes[..., :-10]
|
| 374 |
audio = self.codec.from_indices(codes)
|