SingA

Sleeping

App Files Files Community

latterworks commited on May 25

Commit

4d9af98

verified ·

1 Parent(s): 1902030

Update app.py

Browse files

Files changed (1) hide show

app.py +22 -16

app.py CHANGED Viewed

@@ -71,7 +71,7 @@ class AudioAnalyzer:
             return None, f"Download failed: {e.stderr}"
         except Exception as e:
             logger.error(f"Unexpected error during download: {str(e)}")
-            return None, f"Unexpected error: {str(e)}"
     def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
                              progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
@@ -93,14 +93,14 @@ class AudioAnalyzer:
                 'duration': duration,
                 'sample_rate': sr,
                 'samples': len(y),
-                'tempo': librosa.beat.beat_track(y=y, sr=sr)[0],
                 'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
                 'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
                 'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
                 'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
             }
-            progress(0.5, desc="Computing mel spectrogram...")
             hop_length = 512
             S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
@@ -108,7 +108,7 @@ class AudioAnalyzer:
             progress(0.8, desc="Creating visualizations...")
             fig, axes = plt.subplots(2, 2, figsize=(15, 10))
-            time_axis = librosa.frames_to_time(range(len(y)), sr=sr)
             axes[0, 0].plot(time_axis, y)
             axes[0, 0].set_title('Waveform')
             axes[0, 0].set_xlabel('Time (s)')
@@ -130,15 +130,21 @@ class AudioAnalyzer:
             plt.tight_layout()
             plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
-            plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
             summary = f"""
 **Audio Summary:**
 - Duration: {duration:.2f} seconds
 - Sample Rate: {sr} Hz
 - Estimated Tempo: {features['tempo']:.1f} BPM
-- Number of Samples: {len(y):,}
 **Feature Shapes:**
 - MFCC: {features['mfcc'].shape}
@@ -171,9 +177,9 @@ class AudioAnalyzer:
             y_harm = librosa.effects.harmonic(y=y, margin=8)
             chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
             chroma_filter = np.minimum(chroma_harm,
-                                     librosa.decompose.nn_filter(chroma_harm,
-                                                                aggregate=np.median,
-                                                                metric='cosine'))
             chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
             chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
             chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
@@ -195,7 +201,7 @@ class AudioAnalyzer:
             plt.tight_layout()
             plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
-            plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
             summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
@@ -216,7 +222,7 @@ class AudioAnalyzer:
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
-            progress(0.3, desc="Computing mel spectrogram...")
             hop_length = 512
             S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
@@ -233,7 +239,7 @@ class AudioAnalyzer:
             for i in range(num_patches_to_show):
                 librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
-                                       ax=axes[i], sr=sr, hop_length=hop_length)
                 axes[i].set_title(f'Patch {i+1}')
             for i in range(num_patches_to_show, len(axes)):
@@ -241,14 +247,14 @@ class AudioAnalyzer:
             plt.tight_layout()
             plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
-            plt.savefig(plot_path, dpi=150, bbox_inches='tight')
             plt.close()
             summary = f"""
 **Patch Generation Summary:**
 - Total patches generated: {patches.shape[-1]}
-- Patch duration: {patch_duration} seconds
-- Hop duration: {hop_duration} seconds
 - Patch shape (mels, time, patches): {patches.shape}
 - Each patch covers {patch_frames} time frames
             """
@@ -275,7 +281,7 @@ def create_gradio_interface() -> gr.Blocks:
         - 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
         - 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
-        **Requirements**: Install `yt-dlp` with `pip install yt-dlp`.
         """)
         with gr.Row():

             return None, f"Download failed: {e.stderr}"
         except Exception as e:
             logger.error(f"Unexpected error during download: {str(e)}")
+            return None, f"Error: {str(e)}"
     def extract_basic_features(self, audio_path: str, sr: int = 16000, max_duration: float = 60.0,
                              progress=gr.Progress()) -> Tuple[Optional[str], Optional[str], Optional[str]]:
                 'duration': duration,
                 'sample_rate': sr,
                 'samples': len(y),
+                'tempo': float(librosa.beat.beat_track(y=y, sr=sr)[0]),  # Convert to float
                 'mfcc': librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13),
                 'spectral_centroid': librosa.feature.spectral_centroid(y=y, sr=sr)[0],
                 'spectral_rolloff': librosa.feature.spectral_rolloff(y=y, sr=sr)[0],
                 'zero_crossing_rate': librosa.feature.zero_crossing_rate(y)[0]
             }
+            progress(0.5, desc="Computing Mel spectrogram...")
             hop_length = 512
             S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
             progress(0.8, desc="Creating visualizations...")
             fig, axes = plt.subplots(2, 2, figsize=(15, 10))
+            time_axis = np.linspace(0, duration, len(y))
             axes[0, 0].plot(time_axis, y)
             axes[0, 0].set_title('Waveform')
             axes[0, 0].set_xlabel('Time (s)')
             plt.tight_layout()
             plot_path = self.temp_dir / f"basic_features_{np.random.randint(10000)}.png"
+            plt.savefig(plot_path, dpi=300, bbox_inches='tight')
             plt.close()
+            # Validate feature shapes
+            for key in ['mfcc', 'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate']:
+                if not isinstance(features[key].shape, tuple):
+                    logger.error(f"Invalid shape for {key}: {features[key].shape}")
+                    return None, None, f"Invalid feature shape for {key}"
             summary = f"""
 **Audio Summary:**
 - Duration: {duration:.2f} seconds
 - Sample Rate: {sr} Hz
 - Estimated Tempo: {features['tempo']:.1f} BPM
+- Number of Samples: {features['samples']:,}
 **Feature Shapes:**
 - MFCC: {features['mfcc'].shape}
             y_harm = librosa.effects.harmonic(y=y, margin=8)
             chroma_harm = librosa.feature.chroma_cqt(y=y_harm, sr=sr)
             chroma_filter = np.minimum(chroma_harm,
+                                    librosa.decompose.nn_filter(chroma_harm,
+                                                            aggregate=np.median,
+                                                            metric='cosine'))
             chroma_smooth = scipy.ndimage.median_filter(chroma_filter, size=(1, 9))
             chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
             chroma_cens = librosa.feature.chroma_cens(y=y, sr=sr)
             plt.tight_layout()
             plot_path = self.temp_dir / f"chroma_features_{np.random.randint(10000)}.png"
+            plt.savefig(plot_path, dpi=300, bbox_inches='tight')
             plt.close()
             summary = "Chroma feature analysis complete! Visualizations show different chroma extraction methods for harmonic analysis."
             progress(0.1, desc="Loading audio...")
             y, sr = librosa.load(audio_path, sr=sr)
+            progress(0.3, desc="Computing Mel spectrogram...")
             hop_length = 512
             S_mel = librosa.feature.melspectrogram(y=y, sr=sr, hop_length=hop_length, n_mels=80)
             S_dB = librosa.power_to_db(S_mel, ref=np.max)
             for i in range(num_patches_to_show):
                 librosa.display.specshow(patches[..., i], y_axis='mel', x_axis='time',
+                                        ax=axes[i], sr=sr, hop_length=hop_length)
                 axes[i].set_title(f'Patch {i+1}')
             for i in range(num_patches_to_show, len(axes)):
             plt.tight_layout()
             plot_path = self.temp_dir / f"patches_{np.random.randint(10000)}.png"
+            plt.savefig(plot_path, dpi=300, bbox_inches='tight')
             plt.close()
             summary = f"""
 **Patch Generation Summary:**
 - Total patches generated: {patches.shape[-1]}
+- Patch duration: {patch_duration:.1f} seconds
+- Hop duration: {hop_duration:.1f} seconds
 - Patch shape (mels, time, patches): {patches.shape}
 - Each patch covers {patch_frames} time frames
             """
         - 🎼 **Chroma Features**: Harmonic content analysis with multiple extraction methods
         - 🧩 **Transformer Patches**: Fixed-duration patches for deep learning
+        **Requirements**: Dependencies are automatically installed in Hugging Face Spaces via `requirements.txt`.
         """)
         with gr.Row():