large

@@ -8,7 +8,7 @@ SAMPLE_RATE=16000
 class EndpointHandler():
     def __init__(self, path=""):
         device = "cuda:0" if torch.cuda.is_available() else "cpu"
-        pipe = pipeline(
           "automatic-speech-recognition",
           model="openai/whisper-large",
           chunk_length_s=30,
@@ -22,7 +22,7 @@ class EndpointHandler():
         audio_nparray = ffmpeg_read(inputs, 16000)
         audio_tensor = torch.from_numpy(audio_nparray)
-        prediction = pipe(audio_nparray, return_timestamps=True)
         return {"text": prediction[0]}
         # we can also return timestamps for the predictions

 class EndpointHandler():
     def __init__(self, path=""):
         device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self.pipe = pipeline(
           "automatic-speech-recognition",
           model="openai/whisper-large",
           chunk_length_s=30,
         audio_nparray = ffmpeg_read(inputs, 16000)
         audio_tensor = torch.from_numpy(audio_nparray)
+        prediction = self.pipe(audio_nparray, return_timestamps=True)
         return {"text": prediction[0]}
         # we can also return timestamps for the predictions