Spaces:
Runtime error
Runtime error
feat: add preloader for effiecient in time
Browse files- app.py +6 -3
- src/infer.py +13 -5
- src/utils.py +4 -4
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
from src import infer, utils
|
| 2 |
import gradio as gr
|
| 3 |
|
| 4 |
-
infer.
|
| 5 |
|
| 6 |
-
|
| 7 |
[None, "assets/audio/male-indonesian.wav", None],
|
| 8 |
[None, "assets/audio/female-indonesian.wav", None],
|
| 9 |
[None, "assets/audio/male-english.wav", None],
|
|
@@ -19,7 +19,10 @@ demo = gr.Interface(
|
|
| 19 |
inputs=[
|
| 20 |
gr.Dropdown(
|
| 21 |
label="Model",
|
| 22 |
-
choices=[
|
|
|
|
|
|
|
|
|
|
| 23 |
value="base"),
|
| 24 |
gr.Radio(label="Language",
|
| 25 |
choices=["indonesian","english"],
|
|
|
|
| 1 |
from src import infer, utils
|
| 2 |
import gradio as gr
|
| 3 |
|
| 4 |
+
infer.model_preloader_downloader()
|
| 5 |
|
| 6 |
+
AUDIO_EXAMPLE = [
|
| 7 |
[None, "assets/audio/male-indonesian.wav", None],
|
| 8 |
[None, "assets/audio/female-indonesian.wav", None],
|
| 9 |
[None, "assets/audio/male-english.wav", None],
|
|
|
|
| 19 |
inputs=[
|
| 20 |
gr.Dropdown(
|
| 21 |
label="Model",
|
| 22 |
+
choices=[
|
| 23 |
+
"tiny", "base", "small", "medium",
|
| 24 |
+
"large", "large-v1", "large-v2"
|
| 25 |
+
],
|
| 26 |
value="base"),
|
| 27 |
gr.Radio(label="Language",
|
| 28 |
choices=["indonesian","english"],
|
src/infer.py
CHANGED
|
@@ -2,15 +2,23 @@
|
|
| 2 |
from typing import *
|
| 3 |
from src import utils
|
| 4 |
import whisper
|
|
|
|
| 5 |
|
|
|
|
|
|
|
| 6 |
|
| 7 |
-
def
|
| 8 |
-
|
| 9 |
-
for mname in list_names:
|
| 10 |
mdl = whisper.load_model(mname)
|
| 11 |
del mdl
|
| 12 |
|
| 13 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
if mic_audio is not None:
|
| 15 |
voice = mic_audio
|
| 16 |
elif audio is not None:
|
|
@@ -20,6 +28,6 @@ def predict(model_name, language, mic_audio=None, audio=None):
|
|
| 20 |
|
| 21 |
voice = utils.preprocess_audio(voice)
|
| 22 |
|
| 23 |
-
model =
|
| 24 |
result = model.transcribe(voice, language=language)
|
| 25 |
return result["text"]
|
|
|
|
| 2 |
from typing import *
|
| 3 |
from src import utils
|
| 4 |
import whisper
|
| 5 |
+
|
| 6 |
|
| 7 |
+
MODEL_NAMES = ["tiny", "base", "small", "medium", "large", "large-v1","large-v2"]
|
| 8 |
+
MODEL_BASE = whisper.load_model("base")
|
| 9 |
|
| 10 |
+
def model_preloader_downloader():
|
| 11 |
+
for mname in MODEL_NAMES:
|
|
|
|
| 12 |
mdl = whisper.load_model(mname)
|
| 13 |
del mdl
|
| 14 |
|
| 15 |
+
def model_loader(name: str):
|
| 16 |
+
if name=="base":
|
| 17 |
+
return MODEL_BASE
|
| 18 |
+
else:
|
| 19 |
+
return whisper.load_model(name)
|
| 20 |
+
|
| 21 |
+
def predict(model_name: str, language: str, mic_audio=None, audio=None):
|
| 22 |
if mic_audio is not None:
|
| 23 |
voice = mic_audio
|
| 24 |
elif audio is not None:
|
|
|
|
| 28 |
|
| 29 |
voice = utils.preprocess_audio(voice)
|
| 30 |
|
| 31 |
+
model = model_loader(model_name)
|
| 32 |
result = model.transcribe(voice, language=language)
|
| 33 |
return result["text"]
|
src/utils.py
CHANGED
|
@@ -1,11 +1,10 @@
|
|
| 1 |
from pathlib import Path
|
| 2 |
import whisper
|
|
|
|
| 3 |
|
| 4 |
-
sample_rate: int = 16000
|
| 5 |
-
float_factor: float = 32678.0
|
| 6 |
|
| 7 |
|
| 8 |
-
def preprocess_audio(filepath: str):
|
| 9 |
# load audio and pad/trim it to fit 30 seconds
|
| 10 |
audio = whisper.load_audio(filepath)
|
| 11 |
audio = whisper.pad_or_trim(audio)
|
|
@@ -18,4 +17,5 @@ def parsing_text(filepath: str):
|
|
| 18 |
raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
|
| 19 |
|
| 20 |
return path.read_text()
|
| 21 |
-
|
|
|
|
|
|
| 1 |
from pathlib import Path
|
| 2 |
import whisper
|
| 3 |
+
from typing import Any
|
| 4 |
|
|
|
|
|
|
|
| 5 |
|
| 6 |
|
| 7 |
+
def preprocess_audio(filepath: str) -> Any:
|
| 8 |
# load audio and pad/trim it to fit 30 seconds
|
| 9 |
audio = whisper.load_audio(filepath)
|
| 10 |
audio = whisper.pad_or_trim(audio)
|
|
|
|
| 17 |
raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
|
| 18 |
|
| 19 |
return path.read_text()
|
| 20 |
+
|
| 21 |
+
|