Spaces:

nunenuh
/

whisper_simple

Runtime error

nunenuh commited on Aug 11, 2023

Commit

7640294

1 Parent(s): 861e352

feat: add preloader for effiecient in time

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 from src import infer, utils
 import gradio as gr
-infer.download_all_available_model()
-audio_examples = [
     [None, "assets/audio/male-indonesian.wav", None],
     [None, "assets/audio/female-indonesian.wav", None],
     [None, "assets/audio/male-english.wav", None],
@@ -19,7 +19,10 @@ demo = gr.Interface(
     inputs=[
         gr.Dropdown(
             label="Model",
-            choices=["tiny","small","base","medium","large","large-v2"],
             value="base"),
         gr.Radio(label="Language",
                  choices=["indonesian","english"],

 from src import infer, utils
 import gradio as gr
+infer.model_preloader_downloader()
+AUDIO_EXAMPLE = [
     [None, "assets/audio/male-indonesian.wav", None],
     [None, "assets/audio/female-indonesian.wav", None],
     [None, "assets/audio/male-english.wav", None],
     inputs=[
         gr.Dropdown(
             label="Model",
+            choices=[
+                "tiny", "base", "small", "medium",
+                "large", "large-v1", "large-v2"
+            ],
             value="base"),
         gr.Radio(label="Language",
                  choices=["indonesian","english"],

src/infer.py CHANGED Viewed

@@ -2,15 +2,23 @@
 from typing import *
 from src import utils
 import whisper
-def download_all_available_model():
-    list_names = ["tiny", "base", "small", "medium", "large", "large-v1","large-v2"]
-    for mname in list_names:
         mdl = whisper.load_model(mname)
         del mdl
-def predict(model_name, language, mic_audio=None, audio=None):
     if mic_audio is not None:
         voice = mic_audio
     elif audio is not None:
@@ -20,6 +28,6 @@ def predict(model_name, language, mic_audio=None, audio=None):
     voice = utils.preprocess_audio(voice)
-    model = whisper.load_model(model_name)
     result = model.transcribe(voice, language=language)
     return result["text"]

 from typing import *
 from src import utils
 import whisper
+MODEL_NAMES = ["tiny", "base", "small", "medium", "large", "large-v1","large-v2"]
+MODEL_BASE = whisper.load_model("base")
+def model_preloader_downloader():
+    for mname in MODEL_NAMES:
         mdl = whisper.load_model(mname)
         del mdl
+def model_loader(name: str):
+    if name=="base":
+        return MODEL_BASE
+    else:
+        return whisper.load_model(name)
+def predict(model_name: str, language: str, mic_audio=None, audio=None):
     if mic_audio is not None:
         voice = mic_audio
     elif audio is not None:
     voice = utils.preprocess_audio(voice)
+    model = model_loader(model_name)
     result = model.transcribe(voice, language=language)
     return result["text"]

src/utils.py CHANGED Viewed

@@ -1,11 +1,10 @@
 from pathlib import Path
 import whisper
-sample_rate: int = 16000
-float_factor: float = 32678.0
-def preprocess_audio(filepath: str):
     # load audio and pad/trim it to fit 30 seconds
     audio = whisper.load_audio(filepath)
     audio = whisper.pad_or_trim(audio)
@@ -18,4 +17,5 @@ def parsing_text(filepath: str):
         raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
     return path.read_text()

 from pathlib import Path
 import whisper
+from typing import Any
+def preprocess_audio(filepath: str) -> Any:
     # load audio and pad/trim it to fit 30 seconds
     audio = whisper.load_audio(filepath)
     audio = whisper.pad_or_trim(audio)
         raise ValueError("Invalid file type. Only '.txt' and '.md' files are supported.")
     return path.read_text()