Spaces:

wsntxxn
/

UniFlow-Audio

Running on Zero

App Files Files Community

wsntxxn commited on Oct 18

Commit

53eaf02

1 Parent(s): 79d437c

Adjust tabs

Browse files

Files changed (1) hide show

app.py +81 -11

app.py CHANGED Viewed

@@ -2,6 +2,10 @@
 import os
 import gradio as gr
 import spaces
@@ -167,6 +171,37 @@ def speech_enhancement(
         return None, f"Error: {str(e)}"
 @spaces.GPU(duration=60)
 def audio_super_resolution(
     low_sr_audio,
@@ -187,9 +222,18 @@ def audio_super_resolution(
             num_steps=num_steps,
             output_path=output_path
         )
-        return output_path, "Super-resolution successful!"
     except Exception as e:
-        return None, f"Error: {str(e)}"
 @spaces.GPU(duration=60)
@@ -217,9 +261,24 @@ def video_to_audio(
         return None, f"Error: {str(e)}"
 # Create Gradio Interface
 with gr.Blocks(
-    title="UniFlow-Audio Inference Demo", theme=gr.themes.Soft()
 ) as demo:
     gr.Markdown("# 🔊 UniFlow-Audio Inference Demo")
     gr.Markdown(
@@ -228,7 +287,7 @@ with gr.Blocks(
     with gr.Tabs():
         # Tab 1: Text to Audio
-        with gr.Tab("📢 Text to Audio (T2A)"):
             with gr.Row():
                 with gr.Column():
                     t2a_caption = gr.Textbox(
@@ -279,7 +338,7 @@ with gr.Blocks(
             )
         # Tab 2: Text to Music
-        with gr.Tab("🎼 Text to Music (T2M)"):
             with gr.Row():
                 with gr.Column():
                     t2m_caption = gr.Textbox(
@@ -330,7 +389,7 @@ with gr.Blocks(
             )
         # Tab 3: Text to Speech
-        with gr.Tab("🗣️ Text to Speech (TTS)"):
             with gr.Row():
                 with gr.Column():
                     tts_transcript = gr.Textbox(
@@ -393,7 +452,7 @@ with gr.Blocks(
             )
         # Tab 4: Singing Voice Synthesis
-        with gr.Tab("🎤 Singing Voice Synthesis (SVS)"):
             with gr.Row():
                 with gr.Column():
                     svs_singer = gr.Dropdown(
@@ -487,7 +546,7 @@ with gr.Blocks(
             )
         # Tab 5: Speech Enhancement
-        with gr.Tab("🔊 Speech Enhancement (SE)"):
             with gr.Row():
                 with gr.Column():
                     se_input = gr.Audio(label="Noisy Speech", type="filepath")
@@ -533,7 +592,7 @@ with gr.Blocks(
             )
         # Tab 6: Audio Super Resolution
-        with gr.Tab("⬆️ Audio Super Resolution (SR)"):
             with gr.Row():
                 with gr.Column():
                     sr_input = gr.Audio(
@@ -569,10 +628,21 @@ with gr.Blocks(
                     )
                     sr_status = gr.Textbox(label="Status")
             sr_button.click(
                 fn=audio_super_resolution,
                 inputs=[sr_input, sr_model, sr_guidance, sr_steps],
-                outputs=[sr_output, sr_status]
             )
             gr.Examples(
@@ -583,7 +653,7 @@ with gr.Blocks(
             )
         # Tab 7: Video to Audio
-        with gr.Tab("🎬 Video to Audio (V2A)"):
             with gr.Row():
                 with gr.Column():
                     v2a_input = gr.Video(label="Input Video")

 import os
 import gradio as gr
+import numpy as np
+import matplotlib.pyplot as plt
+import librosa
+import librosa.display
 import spaces
         return None, f"Error: {str(e)}"
+def generate_spectrogram(audio_path, title="Spectrogram"):
+    """Generate spectrogram from audio file"""
+    try:
+        # Load audio file
+        y, sr = librosa.load(audio_path, sr=None)
+        # Create figure
+        fig, ax = plt.subplots(figsize=(10, 4))
+        # Generate mel spectrogram
+        D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
+        # Display spectrogram
+        img = librosa.display.specshow(
+            D, y_axis='hz', x_axis='time', sr=sr, ax=ax
+        )
+        ax.set_title(f'{title} (Sample Rate: {sr} Hz)')
+        fig.colorbar(img, ax=ax, format='%+2.0f dB')
+        # Save to file
+        spec_path = audio_path.replace('.wav', '_spec.png')
+        plt.tight_layout()
+        fig.savefig(spec_path, dpi=100, bbox_inches='tight')
+        plt.close(fig)
+        return spec_path
+    except Exception as e:
+        print(f"Error generating spectrogram: {str(e)}")
+        return None
 @spaces.GPU(duration=60)
 def audio_super_resolution(
     low_sr_audio,
             num_steps=num_steps,
             output_path=output_path
         )
+        # Generate spectrograms for input and output
+        input_spec = generate_spectrogram(
+            low_sr_audio, "Input Audio Spectrogram"
+        )
+        output_spec = generate_spectrogram(
+            output_path, "Output Audio Spectrogram"
+        )
+        return output_path, "Super-resolution successful!", input_spec, output_spec
     except Exception as e:
+        return None, f"Error: {str(e)}", None, None
 @spaces.GPU(duration=60)
         return None, f"Error: {str(e)}"
+# Custom CSS for better tab display
+custom_css = """
+.tab-nav button {
+    font-size: 14px !important;
+    padding: 8px 12px !important;
+    min-width: fit-content !important;
+}
+.tab-nav {
+    overflow-x: auto !important;
+    flex-wrap: nowrap !important;
+}
+"""
 # Create Gradio Interface
 with gr.Blocks(
+    title="UniFlow-Audio Inference Demo",
+    theme=gr.themes.Soft(),
+    css=custom_css
 ) as demo:
     gr.Markdown("# 🔊 UniFlow-Audio Inference Demo")
     gr.Markdown(
     with gr.Tabs():
         # Tab 1: Text to Audio
+        with gr.Tab("📢 Text to Audio"):
             with gr.Row():
                 with gr.Column():
                     t2a_caption = gr.Textbox(
             )
         # Tab 2: Text to Music
+        with gr.Tab("🎼 Text to Music"):
             with gr.Row():
                 with gr.Column():
                     t2m_caption = gr.Textbox(
             )
         # Tab 3: Text to Speech
+        with gr.Tab("🗣️ Text to Speech"):
             with gr.Row():
                 with gr.Column():
                     tts_transcript = gr.Textbox(
             )
         # Tab 4: Singing Voice Synthesis
+        with gr.Tab("🎤 Singing Voice Synthesis"):
             with gr.Row():
                 with gr.Column():
                     svs_singer = gr.Dropdown(
             )
         # Tab 5: Speech Enhancement
+        with gr.Tab("🔊 Speech Enhancement"):
             with gr.Row():
                 with gr.Column():
                     se_input = gr.Audio(label="Noisy Speech", type="filepath")
             )
         # Tab 6: Audio Super Resolution
+        with gr.Tab("⬆️ Audio SR"):
             with gr.Row():
                 with gr.Column():
                     sr_input = gr.Audio(
                     )
                     sr_status = gr.Textbox(label="Status")
+            # Spectrograms display
+            with gr.Row():
+                with gr.Column():
+                    sr_input_spec = gr.Image(
+                        label="Input Spectrogram", type="filepath"
+                    )
+                with gr.Column():
+                    sr_output_spec = gr.Image(
+                        label="Output Spectrogram", type="filepath"
+                    )
             sr_button.click(
                 fn=audio_super_resolution,
                 inputs=[sr_input, sr_model, sr_guidance, sr_steps],
+                outputs=[sr_output, sr_status, sr_input_spec, sr_output_spec]
             )
             gr.Examples(
             )
         # Tab 7: Video to Audio
+        with gr.Tab("🎬 Video to Audio"):
             with gr.Row():
                 with gr.Column():
                     v2a_input = gr.Video(label="Input Video")