Antigravity Agent commited on
Commit
81bd23a
·
1 Parent(s): c00f45b

Optimize for CPU execution and add CPU models

Browse files
Files changed (2) hide show
  1. app.py +18 -18
  2. requirements.txt +1 -1
app.py CHANGED
@@ -3,19 +3,20 @@ import tempfile
3
  import gradio as gr
4
  from faster_whisper import WhisperModel
5
  import torch
6
- import spaces
7
 
8
- # Global cache for the model so we don't reload it if not necessary
9
- # But on ZeroGPU, it's safer to load it per request or rely on the container state.
10
  _cached_model = None
11
 
12
- def get_model():
13
- global _cached_model
14
- if _cached_model is None:
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
  compute_type = "float16" if torch.cuda.is_available() else "int8"
17
- print(f"Loading Whisper Large V3 on {device} ({compute_type})...")
18
- _cached_model = WhisperModel("large-v3", device=device, compute_type=compute_type)
 
 
19
  return _cached_model
20
 
21
  def format_timestamp(seconds):
@@ -36,20 +37,18 @@ def segments_to_srt(segments):
36
  lines.append("")
37
  return "\n".join(lines)
38
 
39
- @spaces.GPU
40
- def transcribe(audio_path, task="transcribe", language=None):
41
  if audio_path is None:
42
  return "Please upload an audio file.", None
43
 
44
- # Get model inside the ZeroGPU context
45
- model = get_model()
46
 
47
- print(f"Transcribing {audio_path}...")
48
 
49
  options = {
50
  "task": task,
51
- "beam_size": 2, # Reduced for speed, still high accuracy
52
- "best_of": 2,
53
  "vad_filter": True, # Filter out non-speech/silence to speed up
54
  }
55
 
@@ -75,12 +74,13 @@ def transcribe(audio_path, task="transcribe", language=None):
75
 
76
  # Gradio UI
77
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
78
- gr.Markdown("# 🎙️ VoiceScript - Whisper Large V3")
79
- gr.Markdown("Fast and accurate transcription powered by Faster-Whisper Large V3.")
80
 
81
  with gr.Row():
82
  with gr.Column():
83
  audio_input = gr.Audio(type="filepath", label="Upload Audio/Video")
 
84
  task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
85
  lang_input = gr.Dropdown(["auto", "en", "es", "fr", "de", "zh", "ja", "ko", "hi"], label="Language (optional)", value="auto")
86
  transcribe_btn = gr.Button("Transcribe", variant="primary")
@@ -91,7 +91,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
91
 
92
  transcribe_btn.click(
93
  fn=transcribe,
94
- inputs=[audio_input, task_input, lang_input],
95
  outputs=[text_output, file_output]
96
  )
97
 
 
3
  import gradio as gr
4
  from faster_whisper import WhisperModel
5
  import torch
 
6
 
7
+ # Global cache for the model
8
+ _cached_model_name = None
9
  _cached_model = None
10
 
11
+ def get_model(model_name):
12
+ global _cached_model_name, _cached_model
13
+ if _cached_model_name != model_name or _cached_model is None:
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
  compute_type = "float16" if torch.cuda.is_available() else "int8"
16
+ print(f"Loading {model_name} on {device} ({compute_type})...")
17
+ # Using 4 CPU threads to maximize performance on free tier
18
+ _cached_model = WhisperModel(model_name, device=device, compute_type=compute_type, cpu_threads=4)
19
+ _cached_model_name = model_name
20
  return _cached_model
21
 
22
  def format_timestamp(seconds):
 
37
  lines.append("")
38
  return "\n".join(lines)
39
 
40
+ def transcribe(audio_path, model_name, task="transcribe", language=None):
 
41
  if audio_path is None:
42
  return "Please upload an audio file.", None
43
 
44
+ model = get_model(model_name)
 
45
 
46
+ print(f"Transcribing {audio_path} using {model_name}...")
47
 
48
  options = {
49
  "task": task,
50
+ "beam_size": 1, # Set to 1 for absolute maximum speed on CPU
51
+ "best_of": 1,
52
  "vad_filter": True, # Filter out non-speech/silence to speed up
53
  }
54
 
 
74
 
75
  # Gradio UI
76
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
77
+ gr.Markdown("# 🎙️ VoiceScript - CPU Optimized")
78
+ gr.Markdown("Fast transcription using models optimized for CPU performance (`large-v3-turbo` and `distil-large-v3`).")
79
 
80
  with gr.Row():
81
  with gr.Column():
82
  audio_input = gr.Audio(type="filepath", label="Upload Audio/Video")
83
+ model_input = gr.Radio(["large-v3-turbo", "distil-large-v3"], label="Model", value="large-v3-turbo")
84
  task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
85
  lang_input = gr.Dropdown(["auto", "en", "es", "fr", "de", "zh", "ja", "ko", "hi"], label="Language (optional)", value="auto")
86
  transcribe_btn = gr.Button("Transcribe", variant="primary")
 
91
 
92
  transcribe_btn.click(
93
  fn=transcribe,
94
+ inputs=[audio_input, model_input, task_input, lang_input],
95
  outputs=[text_output, file_output]
96
  )
97
 
requirements.txt CHANGED
@@ -3,4 +3,4 @@ gradio
3
  torch
4
  torchaudio
5
  ffmpeg-python
6
- spaces
 
3
  torch
4
  torchaudio
5
  ffmpeg-python
6
+