mikelalda commited on
Commit
36a83db
·
verified ·
1 Parent(s): 12229a4

Upload gradio_app_transcribe.py

Browse files
Files changed (1) hide show
  1. gradio_app_transcribe.py +54 -0
gradio_app_transcribe.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ import torch
4
+ import requests
5
+ import config
6
+ import os
7
+
8
+ def fetch_access_token():
9
+ token_response = requests.post(token_url, timeout=15)
10
+ token_response.raise_for_status()
11
+ token = token_response.json()
12
+ return token["access_token"]
13
+
14
+ client_id = config.client_id
15
+ client_secret = config.client_secret
16
+ token_url = "https://id.twitch.tv/oauth2/token?client_id=" + client_id + "&client_secret=" + client_secret + "&grant_type=client_credentials"
17
+ model_id = "distil-whisper/distil-large-v2"
18
+
19
+ access_token = fetch_access_token()
20
+
21
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
22
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
23
+
24
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
25
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
26
+ )
27
+ model.to(device)
28
+
29
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
30
+ processor = AutoProcessor.from_pretrained(model_id)
31
+
32
+ pipe = pipeline(
33
+ "automatic-speech-recognition",
34
+ model=model,
35
+ tokenizer=processor.tokenizer,
36
+ feature_extractor=processor.feature_extractor,
37
+ max_new_tokens=128,
38
+ torch_dtype=torch_dtype,
39
+ device=device,
40
+ )
41
+
42
+ def transcribe_audio(audio_file):
43
+ recorded_filename = audio_file.name
44
+ if os.path.exists(recorded_filename):
45
+ results = pipe(recorded_filename)
46
+ return results["text"]
47
+ else:
48
+ return "Error: No audio file uploaded."
49
+
50
+ inputs = gr.Audio(sources="upload", type="filepath")
51
+ outputs = gr.Textbox()
52
+
53
+ interface = gr.Interface(fn=transcribe_audio, inputs=inputs, outputs=outputs, title="Audio Transcription App")
54
+ interface.launch()