mikelalda commited on
Commit
9232ed6
·
verified ·
1 Parent(s): 57da57f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -53
app.py CHANGED
@@ -1,54 +1,53 @@
1
- import gradio as gr
2
- from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
- import torch
4
- import requests
5
- import config
6
- import os
7
-
8
- def fetch_access_token():
9
- token_response = requests.post(token_url, timeout=15)
10
- token_response.raise_for_status()
11
- token = token_response.json()
12
- return token["access_token"]
13
-
14
- client_id = config.client_id
15
- client_secret = config.client_secret
16
- token_url = "https://id.twitch.tv/oauth2/token?client_id=" + client_id + "&client_secret=" + client_secret + "&grant_type=client_credentials"
17
- model_id = "distil-whisper/distil-large-v2"
18
-
19
- access_token = fetch_access_token()
20
-
21
- device = "cuda:0" if torch.cuda.is_available() else "cpu"
22
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
23
-
24
- model = AutoModelForSpeechSeq2Seq.from_pretrained(
25
- model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
26
- )
27
- model.to(device)
28
-
29
- torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
30
- processor = AutoProcessor.from_pretrained(model_id)
31
-
32
- pipe = pipeline(
33
- "automatic-speech-recognition",
34
- model=model,
35
- tokenizer=processor.tokenizer,
36
- feature_extractor=processor.feature_extractor,
37
- max_new_tokens=128,
38
- torch_dtype=torch_dtype,
39
- device=device,
40
- )
41
-
42
- def transcribe_audio(audio_file):
43
- recorded_filename = audio_file.name
44
- if os.path.exists(recorded_filename):
45
- results = pipe(recorded_filename)
46
- return results["text"]
47
- else:
48
- return "Error: No audio file uploaded."
49
-
50
- inputs = gr.Audio(sources="upload", type="filepath")
51
- outputs = gr.Textbox()
52
-
53
- interface = gr.Interface(fn=transcribe_audio, inputs=inputs, outputs=outputs, title="Audio Transcription App")
54
  interface.launch()
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
3
+ import torch
4
+ import requests
5
+ import os
6
+
7
+ def fetch_access_token():
8
+ token_response = requests.post(token_url, timeout=15)
9
+ token_response.raise_for_status()
10
+ token = token_response.json()
11
+ return token["access_token"]
12
+
13
+ client_id = config.client_id
14
+ client_secret = config.client_secret
15
+ token_url = "https://id.twitch.tv/oauth2/token?client_id=" + client_id + "&client_secret=" + client_secret + "&grant_type=client_credentials"
16
+ model_id = "distil-whisper/distil-large-v2"
17
+
18
+ access_token = fetch_access_token()
19
+
20
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
21
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
22
+
23
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
24
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
25
+ )
26
+ model.to(device)
27
+
28
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
29
+ processor = AutoProcessor.from_pretrained(model_id)
30
+
31
+ pipe = pipeline(
32
+ "automatic-speech-recognition",
33
+ model=model,
34
+ tokenizer=processor.tokenizer,
35
+ feature_extractor=processor.feature_extractor,
36
+ max_new_tokens=128,
37
+ torch_dtype=torch_dtype,
38
+ device=device,
39
+ )
40
+
41
+ def transcribe_audio(audio_file):
42
+ recorded_filename = audio_file.name
43
+ if os.path.exists(recorded_filename):
44
+ results = pipe(recorded_filename)
45
+ return results["text"]
46
+ else:
47
+ return "Error: No audio file uploaded."
48
+
49
+ inputs = gr.Audio(sources="upload", type="filepath")
50
+ outputs = gr.Textbox()
51
+
52
+ interface = gr.Interface(fn=transcribe_audio, inputs=inputs, outputs=outputs, title="Audio Transcription App")
 
53
  interface.launch()