Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +12 -4
inference.py
CHANGED
|
@@ -402,19 +402,27 @@ class StyleTTS2(torch.nn.Module):
|
|
| 402 |
)
|
| 403 |
print("[DBG] wav shape:", wav.shape)
|
| 404 |
print("[DBG] wav min/max:", wav.min().item(), wav.max().item())
|
| 405 |
-
print("[DBG] wav mean abs:",
|
| 406 |
|
| 407 |
# trim an toàn
|
| 408 |
-
|
| 409 |
-
|
|
|
|
|
|
|
|
|
|
| 410 |
wav = wav[trim:-trim]
|
| 411 |
|
| 412 |
if wav.size > 0:
|
| 413 |
list_wav.append(wav)
|
| 414 |
|
|
|
|
|
|
|
| 415 |
if len(list_wav) == 0:
|
| 416 |
return np.zeros((2400,), dtype=np.float32) # 0.1s silence để không crash
|
| 417 |
|
| 418 |
final_wav = np.concatenate(list_wav)
|
| 419 |
-
final_wav = np.concatenate([np.zeros((4000,), dtype=np.float32), final_wav, np.zeros((4000,), dtype=np.float32)])
|
|
|
|
|
|
|
|
|
|
| 420 |
return final_wav
|
|
|
|
| 402 |
)
|
| 403 |
print("[DBG] wav shape:", wav.shape)
|
| 404 |
print("[DBG] wav min/max:", wav.min().item(), wav.max().item())
|
| 405 |
+
print("[DBG] wav mean abs:", np.abs(wav).mean())
|
| 406 |
|
| 407 |
# trim an toàn
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
# trim only if long enough
|
| 411 |
+
trim = int(0.05 * 24000) # 50ms instead of 4000
|
| 412 |
+
if wav.shape[0] > 4 * trim:
|
| 413 |
wav = wav[trim:-trim]
|
| 414 |
|
| 415 |
if wav.size > 0:
|
| 416 |
list_wav.append(wav)
|
| 417 |
|
| 418 |
+
|
| 419 |
+
|
| 420 |
if len(list_wav) == 0:
|
| 421 |
return np.zeros((2400,), dtype=np.float32) # 0.1s silence để không crash
|
| 422 |
|
| 423 |
final_wav = np.concatenate(list_wav)
|
| 424 |
+
# final_wav = np.concatenate([np.zeros((4000,), dtype=np.float32), final_wav, np.zeros((4000,), dtype=np.float32)])
|
| 425 |
+
pad = int(0.05 * 24000) # 50ms
|
| 426 |
+
final_wav = np.concatenate([np.zeros((pad,), dtype=np.float32), final_wav, np.zeros((pad,), dtype=np.float32)])
|
| 427 |
+
|
| 428 |
return final_wav
|