Commit ·
bc343c9
1
Parent(s): 08ef38f
对其了输入长度
Browse files
api.py
CHANGED
|
@@ -215,6 +215,15 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
|
|
| 215 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
| 216 |
input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
|
| 217 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
# Model inference
|
| 219 |
text = model.generate(
|
| 220 |
input=input_wav,
|
|
@@ -222,7 +231,7 @@ async def process_audio(audio_data: bytes, language: str = "auto") -> str:
|
|
| 222 |
language=language,
|
| 223 |
use_itn=True,
|
| 224 |
batch_size_s=500,
|
| 225 |
-
|
| 226 |
)
|
| 227 |
|
| 228 |
# Format result
|
|
|
|
| 215 |
resampler = torchaudio.transforms.Resample(sample_rate, 16000)
|
| 216 |
input_wav = resampler(torch.from_numpy(input_wav)[None, :])[0, :].numpy()
|
| 217 |
|
| 218 |
+
target_length = 90 * sample_rate
|
| 219 |
+
current_length = input_wav.shape[0]
|
| 220 |
+
if current_length < target_length:
|
| 221 |
+
padding_length = target_length - current_length
|
| 222 |
+
padding = np.zeros(padding_length, dtype=np.float32)
|
| 223 |
+
input_wav = np.concatenate((input_wav, padding))
|
| 224 |
+
elif current_length > target_length:
|
| 225 |
+
input_wav = input_wav[:target_length]
|
| 226 |
+
|
| 227 |
# Model inference
|
| 228 |
text = model.generate(
|
| 229 |
input=input_wav,
|
|
|
|
| 231 |
language=language,
|
| 232 |
use_itn=True,
|
| 233 |
batch_size_s=500,
|
| 234 |
+
merge_vad=True
|
| 235 |
)
|
| 236 |
|
| 237 |
# Format result
|