AiCoderv2 commited on
Commit
494e59f
·
verified ·
1 Parent(s): ec0d1b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -52
app.py CHANGED
@@ -1,5 +1,6 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
3
 
4
  # Updated model options with 2 new models
5
  MODEL_OPTIONS = {
@@ -29,7 +30,7 @@ LANGUAGE_CODES = {
29
  "Dutch": "nl"
30
  }
31
 
32
- def transcribe_audio(audio_file, model_choice, task_choice, language_choice):
33
  # Initialize the pipeline with selected model
34
  model_name = MODEL_OPTIONS[model_choice]
35
  task = "translate" if task_choice == "Translate to English" else "transcribe"
@@ -44,18 +45,32 @@ def transcribe_audio(audio_file, model_choice, task_choice, language_choice):
44
  )
45
 
46
  # Generate kwargs for the pipeline
47
- generate_kwargs = {"task": task}
 
 
 
48
  if language and task == "transcribe":
49
  generate_kwargs["language"] = language
50
 
51
  # Process audio file
52
- result = pipe(
53
- audio_file,
54
- generate_kwargs=generate_kwargs,
55
- return_timestamps=False
56
- )
57
-
58
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
59
 
60
  with gr.Blocks() as demo:
61
  gr.Markdown("# 🎵 Audio Transcription & Translation")
@@ -65,8 +80,7 @@ with gr.Blocks() as demo:
65
  with gr.Column():
66
  audio_input = gr.Audio(
67
  label="Audio Input",
68
- type="filepath",
69
- source="upload"
70
  )
71
 
72
  # Updated model selection with new models
@@ -120,48 +134,8 @@ with gr.Blocks() as demo:
120
 
121
  transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
122
 
123
- # Updated function to handle new features
124
- def process_audio(audio_file, model_choice, task_choice, language_choice, timestamp_choice, beam_size):
125
- model_name = MODEL_OPTIONS[model_choice]
126
- task = "translate" if task_choice == "Translate to English" else "transcribe"
127
- language = LANGUAGE_CODES[language_choice]
128
-
129
- pipe = pipeline(
130
- "automatic-speech-recognition",
131
- model=model_name,
132
- chunk_length_s=30,
133
- device=0 if torch.cuda.is_available() else -1
134
- )
135
-
136
- generate_kwargs = {
137
- "task": task,
138
- "num_beams": beam_size
139
- }
140
- if language and task == "transcribe":
141
- generate_kwargs["language"] = language
142
-
143
- # Process with or without timestamps
144
- if timestamp_choice:
145
- result = pipe(
146
- audio_file,
147
- generate_kwargs=generate_kwargs,
148
- return_timestamps=True
149
- )
150
- timestamp_text = "\n".join([
151
- f"[{chunk['timestamp'][0]:.2f}s -> {chunk['timestamp'][1]:.2f}s] {chunk['text']}"
152
- for chunk in result.get("chunks", [])
153
- ])
154
- return result["text"], timestamp_text, gr.update(visible=True)
155
- else:
156
- result = pipe(
157
- audio_file,
158
- generate_kwargs=generate_kwargs,
159
- return_timestamps=False
160
- )
161
- return result["text"], "", gr.update(visible=False)
162
-
163
  transcribe_btn.click(
164
- process_audio,
165
  inputs=[audio_input, model_choice, task_choice, language_choice, timestamp_choice, beam_size],
166
  outputs=[text_output, timestamp_output, timestamp_output]
167
  )
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import torch
4
 
5
  # Updated model options with 2 new models
6
  MODEL_OPTIONS = {
 
30
  "Dutch": "nl"
31
  }
32
 
33
+ def transcribe_audio(audio_file, model_choice, task_choice, language_choice, timestamp_choice, beam_size):
34
  # Initialize the pipeline with selected model
35
  model_name = MODEL_OPTIONS[model_choice]
36
  task = "translate" if task_choice == "Translate to English" else "transcribe"
 
45
  )
46
 
47
  # Generate kwargs for the pipeline
48
+ generate_kwargs = {
49
+ "task": task,
50
+ "num_beams": beam_size
51
+ }
52
  if language and task == "transcribe":
53
  generate_kwargs["language"] = language
54
 
55
  # Process audio file
56
+ if timestamp_choice:
57
+ result = pipe(
58
+ audio_file,
59
+ generate_kwargs=generate_kwargs,
60
+ return_timestamps=True
61
+ )
62
+ timestamp_text = "\n".join([
63
+ f"[{chunk['timestamp'][0]:.2f}s -> {chunk['timestamp'][1]:.2f}s] {chunk['text']}"
64
+ for chunk in result.get("chunks", [])
65
+ ])
66
+ return result["text"], timestamp_text, gr.update(visible=True)
67
+ else:
68
+ result = pipe(
69
+ audio_file,
70
+ generate_kwargs=generate_kwargs,
71
+ return_timestamps=False
72
+ )
73
+ return result["text"], "", gr.update(visible=False)
74
 
75
  with gr.Blocks() as demo:
76
  gr.Markdown("# 🎵 Audio Transcription & Translation")
 
80
  with gr.Column():
81
  audio_input = gr.Audio(
82
  label="Audio Input",
83
+ type="filepath"
 
84
  )
85
 
86
  # Updated model selection with new models
 
134
 
135
  transcribe_btn = gr.Button("Transcribe Audio", variant="primary")
136
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  transcribe_btn.click(
138
+ transcribe_audio,
139
  inputs=[audio_input, model_choice, task_choice, language_choice, timestamp_choice, beam_size],
140
  outputs=[text_output, timestamp_output, timestamp_output]
141
  )