Spaces:

redhairedshanks1
/

VoiceScript

Sleeping

App Files Files Community

Antigravity Agent commited on Apr 24

Commit

81bd23a

1 Parent(s): c00f45b

Optimize for CPU execution and add CPU models

Browse files

Files changed (2) hide show

app.py +18 -18
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -3,19 +3,20 @@ import tempfile
 import gradio as gr
 from faster_whisper import WhisperModel
 import torch
-import spaces
-# Global cache for the model so we don't reload it if not necessary
-# But on ZeroGPU, it's safer to load it per request or rely on the container state.
 _cached_model = None
-def get_model():
-    global _cached_model
-    if _cached_model is None:
         device = "cuda" if torch.cuda.is_available() else "cpu"
         compute_type = "float16" if torch.cuda.is_available() else "int8"
-        print(f"Loading Whisper Large V3 on {device} ({compute_type})...")
-        _cached_model = WhisperModel("large-v3", device=device, compute_type=compute_type)
     return _cached_model
 def format_timestamp(seconds):
@@ -36,20 +37,18 @@ def segments_to_srt(segments):
         lines.append("")
     return "\n".join(lines)
-@spaces.GPU
-def transcribe(audio_path, task="transcribe", language=None):
     if audio_path is None:
         return "Please upload an audio file.", None
-    # Get model inside the ZeroGPU context
-    model = get_model()
-    print(f"Transcribing {audio_path}...")
     options = {
         "task": task,
-        "beam_size": 2, # Reduced for speed, still high accuracy
-        "best_of": 2,
         "vad_filter": True, # Filter out non-speech/silence to speed up
     }
@@ -75,12 +74,13 @@ def transcribe(audio_path, task="transcribe", language=None):
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🎙️ VoiceScript - Whisper Large V3")
-    gr.Markdown("Fast and accurate transcription powered by Faster-Whisper Large V3.")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(type="filepath", label="Upload Audio/Video")
             task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
             lang_input = gr.Dropdown(["auto", "en", "es", "fr", "de", "zh", "ja", "ko", "hi"], label="Language (optional)", value="auto")
             transcribe_btn = gr.Button("Transcribe", variant="primary")
@@ -91,7 +91,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     transcribe_btn.click(
         fn=transcribe,
-        inputs=[audio_input, task_input, lang_input],
         outputs=[text_output, file_output]
     )

 import gradio as gr
 from faster_whisper import WhisperModel
 import torch
+# Global cache for the model
+_cached_model_name = None
 _cached_model = None
+def get_model(model_name):
+    global _cached_model_name, _cached_model
+    if _cached_model_name != model_name or _cached_model is None:
         device = "cuda" if torch.cuda.is_available() else "cpu"
         compute_type = "float16" if torch.cuda.is_available() else "int8"
+        print(f"Loading {model_name} on {device} ({compute_type})...")
+        # Using 4 CPU threads to maximize performance on free tier
+        _cached_model = WhisperModel(model_name, device=device, compute_type=compute_type, cpu_threads=4)
+        _cached_model_name = model_name
     return _cached_model
 def format_timestamp(seconds):
         lines.append("")
     return "\n".join(lines)
+def transcribe(audio_path, model_name, task="transcribe", language=None):
     if audio_path is None:
         return "Please upload an audio file.", None
+    model = get_model(model_name)
+    print(f"Transcribing {audio_path} using {model_name}...")
     options = {
         "task": task,
+        "beam_size": 1, # Set to 1 for absolute maximum speed on CPU
+        "best_of": 1,
         "vad_filter": True, # Filter out non-speech/silence to speed up
     }
 # Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎙️ VoiceScript - CPU Optimized")
+    gr.Markdown("Fast transcription using models optimized for CPU performance (`large-v3-turbo` and `distil-large-v3`).")
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(type="filepath", label="Upload Audio/Video")
+            model_input = gr.Radio(["large-v3-turbo", "distil-large-v3"], label="Model", value="large-v3-turbo")
             task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
             lang_input = gr.Dropdown(["auto", "en", "es", "fr", "de", "zh", "ja", "ko", "hi"], label="Language (optional)", value="auto")
             transcribe_btn = gr.Button("Transcribe", variant="primary")
     transcribe_btn.click(
         fn=transcribe,
+        inputs=[audio_input, model_input, task_input, lang_input],
         outputs=[text_output, file_output]
     )

requirements.txt CHANGED Viewed

@@ -3,4 +3,4 @@ gradio
 torch
 torchaudio
 ffmpeg-python
-spaces

 torch
 torchaudio
 ffmpeg-python