Spaces:
Sleeping
Sleeping
File size: 1,607 Bytes
e3bdc52 e2cac58 e3bdc52 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 | import sys
import json
import os
try:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
HAS_LIBS = True
except ImportError:
HAS_LIBS = False
def extract_features(audio_path, output_dir=".tmp/"):
"""
Extrai MFCC e Espectrograma de Mel do áudio.
"""
if not HAS_LIBS:
return {"error": "Bibliotecas librosa/numpy não instaladas."}
# Carrega áudio
y, sr = librosa.load(audio_path)
# Mel Spectrogram
S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
S_dB = librosa.power_to_db(S, ref=np.max)
# MFCC
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=40)
# Salva imagem do espectrograma para o dashboard
base_name = os.path.splitext(os.path.basename(audio_path))[0]
spec_filename = base_name + "_spec.png"
spec_path = os.path.join(output_dir, spec_filename)
plt.figure(figsize=(10, 4), facecolor='none')
ax = plt.gca()
ax.set_axis_off()
librosa.display.specshow(S_dB, sr=sr, cmap='magma')
plt.tight_layout(pad=0)
plt.savefig(spec_path, transparent=True, bbox_inches='tight', pad_inches=0)
plt.close()
return {
"audio_info": {
"duration": librosa.get_duration(y=y, sr=sr),
"sample_rate": sr
},
"spectrogram_path": spec_path,
"mfcc_shape": mfccs.shape
}
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Uso: python feature_extractor.py <audio_path>")
else:
print(json.dumps(extract_features(sys.argv[1]), indent=2))
|