rafaaa2105 commited on
Commit
e41bca8
·
verified ·
1 Parent(s): 503773b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -6
app.py CHANGED
@@ -1,18 +1,41 @@
1
  import gradio as gr
2
- from huggingface_hub import hf_hub_download
3
- from datasets import load_dataset
4
- from whisper import load_model, transcribe
5
  import os
6
  import zipfile
7
  import tempfile
8
  import spaces
9
-
10
- # Download and load the Whisper model for Portuguese
11
- model_path = hf_hub_download(repo_id="distil-whisper/distil-large-v3-openai", filename="model.bin")
12
 
13
 
14
  @spaces.GPU
15
  def transcribe_audio(zip_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  model = load_model(model_path)
17
 
18
  # Create a temporary directory to extract the ZIP file
 
1
  import gradio as gr
 
 
 
2
  import os
3
  import zipfile
4
  import tempfile
5
  import spaces
6
+ import torch
7
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
8
+ from datasets import load_dataset
9
 
10
 
11
  @spaces.GPU
12
  def transcribe_audio(zip_file):
13
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
14
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
15
+
16
+ model_id = "distil-whisper/distil-large-v3"
17
+
18
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
19
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
20
+ )
21
+ model.to(device)
22
+
23
+ processor = AutoProcessor.from_pretrained(model_id)
24
+
25
+ transcribe = pipeline(
26
+ "automatic-speech-recognition",
27
+ model=model,
28
+ tokenizer=processor.tokenizer,
29
+ feature_extractor=processor.feature_extractor,
30
+ max_new_tokens=128,
31
+ torch_dtype=torch_dtype,
32
+ device=device,
33
+ )
34
+
35
+ dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
36
+
37
+
38
+
39
  model = load_model(model_path)
40
 
41
  # Create a temporary directory to extract the ZIP file