| | import torch |
| | import numpy as np |
| | import librosa |
| |
|
| | def load_audio(audio_file, sr=22050): |
| | """Load an audio file and convert to mono if needed.""" |
| | try: |
| | |
| | y, sr = librosa.load(audio_file, sr=sr, mono=True) |
| | return y, sr |
| | except Exception as e: |
| | print(f"Error loading audio with librosa: {str(e)}") |
| | |
| | import soundfile as sf |
| | try: |
| | y, sr = sf.read(audio_file) |
| | |
| | if len(y.shape) > 1: |
| | y = y.mean(axis=1) |
| | return y, sr |
| | except Exception as e2: |
| | print(f"Error loading audio with soundfile: {str(e2)}") |
| | raise ValueError(f"Could not load audio file: {audio_file}") |
| |
|
| | def extract_audio_duration(y, sr): |
| | """Get the duration of audio in seconds.""" |
| | return len(y) / sr |
| |
|
| | def extract_mfcc_features(y, sr, n_mfcc=20): |
| | """Extract MFCC features from audio.""" |
| | try: |
| | mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc) |
| | mfccs_mean = np.mean(mfccs.T, axis=0) |
| | return mfccs_mean |
| | except Exception as e: |
| | print(f"Error extracting MFCCs: {str(e)}") |
| | |
| | return np.zeros(n_mfcc) |
| |
|
| | def format_genre_results(top_genres): |
| | """Format genre classification results for display.""" |
| | result = "Top Detected Genres:\n" |
| | for genre, confidence in top_genres: |
| | result += f"- {genre}: {confidence*100:.2f}%\n" |
| | return result |
| |
|
| | def ensure_cuda_availability(): |
| | """Check and report CUDA availability for informational purposes.""" |
| | cuda_available = torch.cuda.is_available() |
| | if cuda_available: |
| | device_count = torch.cuda.device_count() |
| | device_name = torch.cuda.get_device_name(0) if device_count > 0 else "Unknown" |
| | print(f"CUDA is available with {device_count} device(s). Using: {device_name}") |
| | else: |
| | print("CUDA is not available. Using CPU for inference.") |
| | return cuda_available |
| |
|
| |
|