arjunanand13 commited on
Commit
f054749
·
verified ·
1 Parent(s): bab6b65

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ from datasets import load_dataset
4
+ import gradio as gr
5
+ import librosa
6
+
7
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
8
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
9
+
10
+ model_id = "openai/whisper-large-v3"
11
+
12
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
13
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
14
+ )
15
+ model.to(device)
16
+
17
+ processor = AutoProcessor.from_pretrained(model_id)
18
+
19
+ pipe = pipeline(
20
+ "automatic-speech-recognition",
21
+ model=model,
22
+ tokenizer=processor.tokenizer,
23
+ feature_extractor=processor.feature_extractor,
24
+ torch_dtype=torch_dtype,
25
+ device=device,
26
+ )
27
+
28
+ # dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
29
+ # sample = dataset[0]["audio"]
30
+ def transcribe(audio):
31
+ print(audio)
32
+ if audio is None:
33
+ return "Please record or upload audio"
34
+ speech , sr = librosa.load(audio)
35
+ result = pipe(speech,return_timestamps=True)
36
+ return result["text"]
37
+
38
+
39
+ gr.Interface( fn = transcribe,
40
+ inputs= gr.Audio(),
41
+ outputs ="text",
42
+ title="Whisper transcription App",
43
+ description ="Record or upload audio and get transcription").launch(debug=True)
44
+