Spaces:

GavinHuang
/

asr-demo

Running

GavinHuang commited on May 3

Commit

f334b99

1 Parent(s): 4efbce4

refactor model loading and reintroduce GPU decorator for transcription function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,27 +4,18 @@ import torch
 import nemo.collections.asr as nemo_asr
 from omegaconf import OmegaConf
 import time
 # Check if CUDA is available
 print(f"CUDA available: {torch.cuda.is_available()}")
 if torch.cuda.is_available():
     print(f"CUDA device: {torch.cuda.get_device_name(0)}")
-# Initialize the ASR model - removed spaces.GPU decorator due to pickling issues
-def load_model():
-    print("Loading ASR model...")
-    # Load the NVIDIA NeMo ASR model
-    model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v2")
-    # Move model to GPU if available
-    if torch.cuda.is_available():
-        print(f"CUDA device: {torch.cuda.get_device_name(0)}")
-        model = model.cuda()
-    print(f"Model loaded on device: {model.device}")
-    return model
-# Global variable to store the model
-model = load_model()
 def transcribe(audio, state=""):
     """
     Transcribe audio in real-time
@@ -33,6 +24,11 @@ def transcribe(audio, state=""):
     if audio is None:
         return state, state
     # Get the sample rate from the audio
     sample_rate = 16000  # Default to 16kHz if not specified
@@ -45,7 +41,7 @@ def transcribe(audio, state=""):
         new_state = transcription
     else:
         new_state = state + " " + transcription
     return new_state, new_state
 # Define the Gradio interface

 import nemo.collections.asr as nemo_asr
 from omegaconf import OmegaConf
 import time
+import spaces
 # Check if CUDA is available
 print(f"CUDA available: {torch.cuda.is_available()}")
 if torch.cuda.is_available():
     print(f"CUDA device: {torch.cuda.get_device_name(0)}")
+model = nemo_asr.models.EncDecRNNTBPEModel.from_pretrained("nvidia/parakeet-tdt-0.6b-v2")
+print(f"Model loaded on device: {model.device}")
+@spaces.GPU(duration=120)  # Increase duration if inference takes >60s
 def transcribe(audio, state=""):
     """
     Transcribe audio in real-time
     if audio is None:
         return state, state
+    # Move model to GPU if available
+    if torch.cuda.is_available():
+        print(f"CUDA device: {torch.cuda.get_device_name(0)}")
+        model = model.cuda()
     # Get the sample rate from the audio
     sample_rate = 16000  # Default to 16kHz if not specified
         new_state = transcription
     else:
         new_state = state + " " + transcription
+    model.cpu()
     return new_state, new_state
 # Define the Gradio interface