Spaces:

TaliDror
/

AAS2F

Running on Zero

App Files Files Community

TaliDror commited on 11 days ago

Commit

0362643

1 Parent(s): 5139313

improved UI

Browse files

Files changed (1) hide show

app.py +11 -3

app.py CHANGED Viewed

@@ -516,8 +516,10 @@ def build_demo():
         gr.Markdown("# AAS2F: Ambiguity-Aware Speech-to-Face Synthesis with Speaker-Conditioned Diffusion Models")
         gr.Markdown(
             "**Steps to use the demo:**\n\n"
-            "1. Upload or record a speech audio clip to generate face images conditioned on the speaker's voice. **Please provide at least 5 seconds of speech.** Note that it works best with **English** as the model is trained on English speech, but should work with other languages as well.\n"
-            "2. Click the 'Generate' button to start the generation process."
         )
         DEFAULT_NUM_DISPLAY = 3
@@ -538,12 +540,18 @@ def build_demo():
                         type="filepath",
                         label="Record Audio",
                     )
-                generate_btn = gr.Button("Generate", variant="primary")
             with gr.Column():
                 gallery = gr.Gallery(label="Generated Images")
                 status = gr.Markdown(visible=False)
         def _generate(upload, mic):
             audio = upload if upload is not None else mic
             imgs, msg = generate(audio, DEFAULT_NUM_DISPLAY, DEFAULT_GUIDANCE_SCALE, DEFAULT_NUM_STEPS, DEFAULT_BASE_SEED)

         gr.Markdown("# AAS2F: Ambiguity-Aware Speech-to-Face Synthesis with Speaker-Conditioned Diffusion Models")
         gr.Markdown(
             "**Steps to use the demo:**\n\n"
+            "1. Upload or record a speech audio clip. **Please provide at least 5 seconds of speech.**\n"
+            "2. Note that it works best with **English**, but should work with other languages as well.\n"
+            "3. After you are done recording/uploading the audio, click the 'Generate' button to start the generation process.\n"
+            "4. After a few seconds, the generated images will be displayed on the right."
         )
         DEFAULT_NUM_DISPLAY = 3
                         type="filepath",
                         label="Record Audio",
                     )
+                generate_btn = gr.Button("Generate", variant="primary", interactive=False)
             with gr.Column():
                 gallery = gr.Gallery(label="Generated Images")
                 status = gr.Markdown(visible=False)
+        def _update_btn(upload, mic):
+            return gr.update(interactive=(upload is not None or mic is not None))
+        audio_upload.change(fn=_update_btn, inputs=[audio_upload, audio_mic], outputs=generate_btn)
+        audio_mic.change(fn=_update_btn, inputs=[audio_upload, audio_mic], outputs=generate_btn)
         def _generate(upload, mic):
             audio = upload if upload is not None else mic
             imgs, msg = generate(audio, DEFAULT_NUM_DISPLAY, DEFAULT_GUIDANCE_SCALE, DEFAULT_NUM_STEPS, DEFAULT_BASE_SEED)