Spaces:

Forward-Operators
/

whisper-api

Paused

mkozak commited on Jan 15, 2024

Commit

7ce298f

verified ·

1 Parent(s): 8aba9e4

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -13,15 +13,15 @@ app = FastAPI()
 def process_audio(url: str):
     response = requests.get(url)
-    with open("/tmp/audio.mp3", mode="wb") as file:
         file.write(response.content)
-    device = "cpu"
     model_id = "openai/whisper-large-v3"
     model = AutoModelForSpeechSeq2Seq.from_pretrained(
-            model_id, torch_dtype=torch.float32, low_cpu_mem_usage=True, use_safetensors=True
             )
     model.to(device)
@@ -35,11 +35,11 @@ def process_audio(url: str):
             chunk_length_s=30,
             batch_size=16,
             return_timestamps=True,
-            torch_dtype=torch.float32,
             device=device
     )
     dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
-    whisper_result = pipe("/tmp/audio.mp3")
     return whisper_result

 def process_audio(url: str):
     response = requests.get(url)
+    with open("/data/audio.mp3", mode="wb") as file:
         file.write(response.content)
+    device = "cuda"
     model_id = "openai/whisper-large-v3"
     model = AutoModelForSpeechSeq2Seq.from_pretrained(
+            model_id, torch_dtype=torch.float16, low_cpu_mem_usage=True, use_safetensors=True
             )
     model.to(device)
             chunk_length_s=30,
             batch_size=16,
             return_timestamps=True,
+            torch_dtype=torch.float16,
             device=device
     )
     dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
+    whisper_result = pipe("/data/audio.mp3")
     return whisper_result