Antigravity Agent commited on
Commit
6dd81c2
·
1 Parent(s): 33708d5

Add ZeroGPU support and performance optimizations

Browse files
Files changed (2) hide show
  1. app.py +22 -4
  2. requirements.txt +1 -0
app.py CHANGED
@@ -3,12 +3,17 @@ import tempfile
3
  import gradio as gr
4
  from faster_whisper import WhisperModel
5
  import torch
 
6
 
7
  # Initialize model
 
8
  device = "cuda" if torch.cuda.is_available() else "cpu"
9
  compute_type = "float16" if torch.cuda.is_available() else "int8"
10
 
11
- print(f"Loading Whisper Large V3 on {device} ({compute_type})...")
 
 
 
12
  model = WhisperModel("large-v3", device=device, compute_type=compute_type)
13
 
14
  def format_timestamp(seconds):
@@ -29,17 +34,30 @@ def segments_to_srt(segments):
29
  lines.append("")
30
  return "\n".join(lines)
31
 
 
32
  def transcribe(audio_path, task="transcribe", language=None):
33
  if audio_path is None:
34
  return "Please upload an audio file.", None
35
 
36
- options = {"task": task}
 
 
 
 
 
 
 
 
37
  if language and language != "auto":
38
  options["language"] = language
39
 
40
- segments, info = model.transcribe(audio_path, beam_size=5, **options)
 
 
 
 
 
41
 
42
- segments_list = list(segments)
43
  full_text = " ".join([s.text.strip() for s in segments_list])
44
  srt_content = segments_to_srt(segments_list)
45
 
 
3
  import gradio as gr
4
  from faster_whisper import WhisperModel
5
  import torch
6
+ import spaces
7
 
8
  # Initialize model
9
+ # Note: On ZeroGPU, we initialize on CPU or wait for the GPU function
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  compute_type = "float16" if torch.cuda.is_available() else "int8"
12
 
13
+ print(f"Initial check - CUDA available: {torch.cuda.is_available()}")
14
+ print(f"Loading Whisper Large V3...")
15
+
16
+ # Global model variable
17
  model = WhisperModel("large-v3", device=device, compute_type=compute_type)
18
 
19
  def format_timestamp(seconds):
 
34
  lines.append("")
35
  return "\n".join(lines)
36
 
37
+ @spaces.GPU
38
  def transcribe(audio_path, task="transcribe", language=None):
39
  if audio_path is None:
40
  return "Please upload an audio file.", None
41
 
42
+ print(f"Transcribing {audio_path} on {device}...")
43
+
44
+ options = {
45
+ "task": task,
46
+ "beam_size": 2, # Reduced for speed, still high accuracy
47
+ "best_of": 2,
48
+ "vad_filter": True, # Filter out non-speech/silence to speed up
49
+ }
50
+
51
  if language and language != "auto":
52
  options["language"] = language
53
 
54
+ segments, info = model.transcribe(audio_path, **options)
55
+
56
+ segments_list = []
57
+ for segment in segments:
58
+ segments_list.append(segment)
59
+ print(f"[{format_timestamp(segment.start)}] {segment.text}")
60
 
 
61
  full_text = " ".join([s.text.strip() for s in segments_list])
62
  srt_content = segments_to_srt(segments_list)
63
 
requirements.txt CHANGED
@@ -3,3 +3,4 @@ gradio
3
  torch
4
  torchaudio
5
  ffmpeg-python
 
 
3
  torch
4
  torchaudio
5
  ffmpeg-python
6
+ spaces