Loren commited on
Commit
2491436
·
verified ·
1 Parent(s): 5d5510d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -26
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import torch
3
  from transformers import AutoProcessor, VoxtralForConditionalGeneration
4
  import spaces
 
5
 
6
  #### Functions
7
 
@@ -83,39 +84,47 @@ dict_languages = {"English": "en",
83
 
84
 
85
  #### Gradio interface
86
- with gr.Blocks(title="Transcription") as audio:
87
  gr.Markdown("# Voxtral Mini Evaluation")
88
- gr.Markdown("#### Choose the language of the audio and set an audio file to process it.")
89
- gr.Markdown("##### *(Voxtral handles audios up to 30 minutes for transcription)*")
 
 
 
 
 
 
 
 
 
 
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  with gr.Row():
92
  with gr.Column():
 
93
  sel_language = gr.Dropdown(
94
  choices=list(dict_languages.keys()),
95
  value="English",
96
  label="Select the language of the audio file:"
97
  )
98
-
99
- with gr.Column():
100
- sel_audio = gr.Audio(sources=["upload", "microphone"], type="filepath",
101
- label="Upload an audio file, record via microphone, or select a demo file:")
102
-
103
- example = [["mapo_tofu.mp3"]]
104
- gr.Examples(
105
- examples=example,
106
- inputs=sel_audio,
107
- outputs=None,
108
- fn=None,
109
- cache_examples=False,
110
- run_on_click=False
111
- )
112
-
113
- with gr.Row():
114
- with gr.Column():
115
  submit_transcript = gr.Button("Extract transcription", variant="primary")
116
- text_transcript = gr.Textbox(label="Generated transcription", lines=10)
117
 
118
  with gr.Column():
 
119
  sel_translate_language = gr.Dropdown(
120
  choices=list(dict_languages.keys()),
121
  value="English",
@@ -123,11 +132,13 @@ with gr.Blocks(title="Transcription") as audio:
123
  )
124
 
125
  submit_translate = gr.Button("Translate audio file", variant="primary")
126
- text_translate = gr.Textbox(label="Generated translation", lines=10)
127
 
128
  with gr.Column():
129
- submit_chat = gr.Button("Ask audio file", variant="primary")
130
- text_chat = gr.Textbox(label="Model answer", lines=10)
 
 
131
 
132
  ### Processing
133
 
@@ -146,7 +157,7 @@ with gr.Blocks(title="Transcription") as audio:
146
  )
147
 
148
  # Translation
149
- submit_transcript.click(
150
  disable_buttons,
151
  outputs=[submit_transcript, submit_translate, submit_chat],
152
  trigger_mode="once",
@@ -163,4 +174,4 @@ with gr.Blocks(title="Transcription") as audio:
163
  ### Launch the app
164
 
165
  if __name__ == "__main__":
166
- audio.launch()
 
2
  import torch
3
  from transformers import AutoProcessor, VoxtralForConditionalGeneration
4
  import spaces
5
+ from gradio_modal import Modal
6
 
7
  #### Functions
8
 
 
84
 
85
 
86
  #### Gradio interface
87
+ with gr.Blocks(title="Voxtral") as voxtral:
88
  gr.Markdown("# Voxtral Mini Evaluation")
89
+ gr.Markdown("Voxtral Mini is an enhancement of Ministral 3B, incorporating state-of-the-art audio input \
90
+ capabilities while retaining best-in-class text performance. It excels at speech transcription, \
91
+ translation and audio understanding.")
92
+ btn = gr.Button("🔎 More on Voxtral", variant="huggingface")
93
+ with Modal(visible=False, allow_user_close=True) as modal:
94
+ gr.Markdown("## Key features:")
95
+ gr.Markdown("Voici comment utiliser l’interface…")
96
+ # placez ici des composants supplémentaires selon besoin
97
+ btn.click(lambda: Modal(visible=True), None, modal)
98
+
99
+ gr.Markdown("## Upload an audio file, record via microphone, or select a demo file:")
100
+ gr.Markdown("### *(Voxtral handles audios up to 30 minutes for transcription)*")
101
 
102
+ with gr.Row():
103
+ sel_audio = gr.Audio(sources=["upload", "microphone"], type="filepath",
104
+ label="Set an audio file to process it:")
105
+ example = [["mapo_tofu.mp3"]]
106
+ gr.Examples(
107
+ examples=example,
108
+ inputs=sel_audio,
109
+ outputs=None,
110
+ fn=None,
111
+ cache_examples=False,
112
+ run_on_click=False
113
+ )
114
+
115
  with gr.Row():
116
  with gr.Column():
117
+ gr.Buton("📝 Transcription", variant="huggingface", interactive=False)
118
  sel_language = gr.Dropdown(
119
  choices=list(dict_languages.keys()),
120
  value="English",
121
  label="Select the language of the audio file:"
122
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  submit_transcript = gr.Button("Extract transcription", variant="primary")
124
+ text_transcript = gr.Textbox(label="💬 Generated transcription", lines=10)
125
 
126
  with gr.Column():
127
+ gr.Buton("🔁 Translation", variant="huggingface", interactive=False)
128
  sel_translate_language = gr.Dropdown(
129
  choices=list(dict_languages.keys()),
130
  value="English",
 
132
  )
133
 
134
  submit_translate = gr.Button("Translate audio file", variant="primary")
135
+ text_translate = gr.Textbox(label="💬 Generated translation", lines=10)
136
 
137
  with gr.Column():
138
+ gr.Buton("🤖 Ask audio file", variant="huggingface", interactive=False)
139
+ question = gr.Textbox(label="Ask audio file", placeholder="Enter your question about audio file")
140
+ submit_chat = gr.Button("Ask audio file:", variant="primary")
141
+ text_chat = gr.Textbox(label="💬 Model answer", lines=10)
142
 
143
  ### Processing
144
 
 
157
  )
158
 
159
  # Translation
160
+ submit_translate.click(
161
  disable_buttons,
162
  outputs=[submit_transcript, submit_translate, submit_chat],
163
  trigger_mode="once",
 
174
  ### Launch the app
175
 
176
  if __name__ == "__main__":
177
+ voxtral.queue().launch()