update
Browse files
main.py
CHANGED
|
@@ -10,12 +10,15 @@ import shutil
|
|
| 10 |
import tempfile
|
| 11 |
import time
|
| 12 |
from typing import Dict, Tuple
|
|
|
|
| 13 |
import zipfile
|
| 14 |
|
| 15 |
import gradio as gr
|
|
|
|
| 16 |
from huggingface_hub import snapshot_download
|
| 17 |
import matplotlib.pyplot as plt
|
| 18 |
import numpy as np
|
|
|
|
| 19 |
|
| 20 |
import log
|
| 21 |
from project_settings import environment, project_path, log_directory, time_zone_info
|
|
@@ -63,6 +66,28 @@ def get_args():
|
|
| 63 |
return args
|
| 64 |
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
def shell(cmd: str):
|
| 67 |
return Command.popen(cmd)
|
| 68 |
|
|
@@ -113,6 +138,10 @@ def when_click_vad_button(audio_file_t = None, audio_microphone_t = None,
|
|
| 113 |
audio_t: Tuple = audio_file_t or audio_microphone_t
|
| 114 |
|
| 115 |
sample_rate, signal = audio_t
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
audio_duration = signal.shape[-1] // sample_rate
|
| 117 |
audio = np.array(signal / (1 << 15), dtype=np.float32)
|
| 118 |
|
|
|
|
| 10 |
import tempfile
|
| 11 |
import time
|
| 12 |
from typing import Dict, Tuple
|
| 13 |
+
import uuid
|
| 14 |
import zipfile
|
| 15 |
|
| 16 |
import gradio as gr
|
| 17 |
+
import librosa
|
| 18 |
from huggingface_hub import snapshot_download
|
| 19 |
import matplotlib.pyplot as plt
|
| 20 |
import numpy as np
|
| 21 |
+
from scipy.io import wavfile
|
| 22 |
|
| 23 |
import log
|
| 24 |
from project_settings import environment, project_path, log_directory, time_zone_info
|
|
|
|
| 66 |
return args
|
| 67 |
|
| 68 |
|
| 69 |
+
def save_input_audio(sample_rate: int, signal: np.ndarray) -> str:
|
| 70 |
+
if signal.dtype != np.int16:
|
| 71 |
+
raise AssertionError(f"only support dtype np.int16, however: {signal.dtype}")
|
| 72 |
+
temp_audio_dir = Path(tempfile.gettempdir()) / "input_audio"
|
| 73 |
+
temp_audio_dir.mkdir(parents=True, exist_ok=True)
|
| 74 |
+
filename = temp_audio_dir / f"{uuid.uuid4()}.wav"
|
| 75 |
+
filename = filename.as_posix()
|
| 76 |
+
wavfile.write(
|
| 77 |
+
filename,
|
| 78 |
+
sample_rate, signal
|
| 79 |
+
)
|
| 80 |
+
return filename
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def convert_sample_rate(signal: np.ndarray, sample_rate: int, target_sample_rate: int):
|
| 84 |
+
filename = save_input_audio(sample_rate, signal)
|
| 85 |
+
|
| 86 |
+
signal, _ = librosa.load(filename, sr=target_sample_rate)
|
| 87 |
+
signal = np.array(signal * (1 << 15), dtype=np.int16)
|
| 88 |
+
return signal
|
| 89 |
+
|
| 90 |
+
|
| 91 |
def shell(cmd: str):
|
| 92 |
return Command.popen(cmd)
|
| 93 |
|
|
|
|
| 138 |
audio_t: Tuple = audio_file_t or audio_microphone_t
|
| 139 |
|
| 140 |
sample_rate, signal = audio_t
|
| 141 |
+
if sample_rate != 8000:
|
| 142 |
+
signal = convert_sample_rate(signal, sample_rate, 8000)
|
| 143 |
+
sample_rate = 8000
|
| 144 |
+
|
| 145 |
audio_duration = signal.shape[-1] // sample_rate
|
| 146 |
audio = np.array(signal / (1 << 15), dtype=np.float32)
|
| 147 |
|