Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,19 +15,18 @@ TITLE = """
|
|
| 15 |
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
| 16 |
Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
|
| 17 |
</h1>
|
| 18 |
-
<h3 style="font-weight: 450; font-size: 1rem; margin: 0rem">
|
| 19 |
-
[<a href="https://arxiv.org/abs/2303.14307" style="color:blue;">arXiv</a>]
|
| 20 |
-
[<a href="https://github.com/mpc001/auto_avsr" style="color:blue;">Code</a>]
|
| 21 |
-
</h3>
|
| 22 |
</div>
|
| 23 |
<p style="margin-bottom: 10px; font-size: 94%">
|
| 24 |
-
Want to
|
| 25 |
</p>
|
| 26 |
</div>
|
| 27 |
"""
|
| 28 |
|
| 29 |
ARTICLE = """
|
| 30 |
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
|
|
|
|
|
|
|
|
|
| 31 |
<p>
|
| 32 |
Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
|
| 33 |
</p>
|
|
@@ -73,32 +72,20 @@ pipelines = {
|
|
| 73 |
print("Step 0. Model has been loaded.")
|
| 74 |
|
| 75 |
def fn(pipeline_type, filename):
|
| 76 |
-
print("Step 0. Video has been uploaded.")
|
| 77 |
selected_pipeline_instance = pipelines[pipeline_type]
|
| 78 |
-
print("Step 1. Video has been converted.")
|
| 79 |
landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
|
| 80 |
-
print("Step 2. Landmarks have been detected.")
|
| 81 |
data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
|
| 82 |
-
print("Step 3. Data has been preprocessed.")
|
| 83 |
transcript = selected_pipeline_instance.model.infer(data)
|
| 84 |
-
print("Step 4. Inference has been done.")
|
| 85 |
-
print(f"transcript: {transcript}")
|
| 86 |
return transcript
|
| 87 |
|
| 88 |
demo = gr.Blocks(css=CSS)
|
| 89 |
|
| 90 |
with demo:
|
| 91 |
-
|
| 92 |
gr.HTML(TITLE)
|
| 93 |
-
|
| 94 |
-
|
| 95 |
dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
|
| 96 |
video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
|
| 97 |
text = gr.Textbox(label="PREDICTION")
|
| 98 |
btn = gr.Button("Submit").style(full_width=True)
|
| 99 |
-
|
| 100 |
btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
|
| 101 |
-
|
| 102 |
gr.HTML(ARTICLE)
|
| 103 |
-
|
| 104 |
demo.launch()
|
|
|
|
| 15 |
<h1 style="font-weight: 900; margin-bottom: 7px;">
|
| 16 |
Auto-AVSR: Audio-Visual Speech Recognition with Automatic Labels
|
| 17 |
</h1>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
</div>
|
| 19 |
<p style="margin-bottom: 10px; font-size: 94%">
|
| 20 |
+
Want to recognize content in a noisy environment?<br>Our Auto-AVSR models are here to transcribe your answers from audio or visual information!
|
| 21 |
</p>
|
| 22 |
</div>
|
| 23 |
"""
|
| 24 |
|
| 25 |
ARTICLE = """
|
| 26 |
<div style="text-align: center; max-width: 650px; margin: 0 auto;">
|
| 27 |
+
<p>
|
| 28 |
+
Want to look into models? You can find our [<a href="https://github.com/mpc001/auto_avsr">training code</a>] and [<a href="https://arxiv.org/abs/2303.14307">paper</a>].
|
| 29 |
+
</p>
|
| 30 |
<p>
|
| 31 |
Server busy? You can also run on <a href="https://colab.research.google.com/drive/1jfb6e4xxhXHbmQf-nncdLno1u0b4j614?usp=sharing">Google Colab</a>
|
| 32 |
</p>
|
|
|
|
| 72 |
print("Step 0. Model has been loaded.")
|
| 73 |
|
| 74 |
def fn(pipeline_type, filename):
|
|
|
|
| 75 |
selected_pipeline_instance = pipelines[pipeline_type]
|
|
|
|
| 76 |
landmarks = selected_pipeline_instance.process_landmarks(filename, landmarks_filename=None)
|
|
|
|
| 77 |
data = selected_pipeline_instance.dataloader.load_data(filename, landmarks)
|
|
|
|
| 78 |
transcript = selected_pipeline_instance.model.infer(data)
|
|
|
|
|
|
|
| 79 |
return transcript
|
| 80 |
|
| 81 |
demo = gr.Blocks(css=CSS)
|
| 82 |
|
| 83 |
with demo:
|
|
|
|
| 84 |
gr.HTML(TITLE)
|
|
|
|
|
|
|
| 85 |
dropdown_list = gr.inputs.Dropdown(["ASR", "VSR(mediapipe)", "AVSR(mediapipe)"], label="model")
|
| 86 |
video_file = gr.Video(label="INPUT VIDEO", include_audio=True)
|
| 87 |
text = gr.Textbox(label="PREDICTION")
|
| 88 |
btn = gr.Button("Submit").style(full_width=True)
|
|
|
|
| 89 |
btn.click(fn, inputs=[dropdown_list, video_file], outputs=text)
|
|
|
|
| 90 |
gr.HTML(ARTICLE)
|
|
|
|
| 91 |
demo.launch()
|