RibbID_CNN / predict.py

Upload 3 files

afabda4 verified 8 months ago

5.06 kB

	# -- coding: utf-8 --
	"""
	Created on Mon Jun 30 17:06:08 2025

	@author: User
	"""

	import torch
	import torch.nn as nn
	import numpy as np
	import librosa
	import joblib
	import pickle
	from pathlib import Path
	from sklearn.isotonic import IsotonicRegression
	import argparse

	# ==== CONFIGURACIÓN ====
	SR = 22050
	DURATION = 4.0
	SAMPLES = int(SR * DURATION)
	BANDS = 128
	HOP = 512
	FMIN, FMAX = 150, 4500
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# ==== MODELO ====
	class SEBlock(nn.Module):
	def __init__(self, channels, red=16):
	super().__init__()
	self.fc = nn.Sequential(
	nn.AdaptiveAvgPool2d(1),
	nn.Conv2d(channels, channels // red, 1),
	nn.ReLU(inplace=True),
	nn.Conv2d(channels // red, channels, 1),
	nn.Sigmoid()
	)
	def forward(self, x):
	return x * self.fc(x)

	class EfficientNetSE(nn.Module):
	def __init__(self, backbone, num_classes, drop=0.3):
	super().__init__()
	self.backbone = backbone
	self.se = SEBlock(1280)
	self.pool = nn.AdaptiveAvgPool2d(1)
	self.classifier = nn.Sequential(
	nn.Dropout(drop),
	nn.Linear(1280, num_classes)
	)
	def forward(self, x):
	x = self.backbone.features(x)
	x = self.se(x)
	x = self.pool(x).flatten(1)
	return self.classifier(x)

	# ==== PREPROCESADO ====
	def load_and_normalize(path, sr=SR, target_dBFS=-20.0):
	y, _ = librosa.load(path, sr=sr)
	y = y - np.mean(y)
	rms = np.sqrt(np.mean(y ** 2)) + 1e-9
	scalar = (10 ** (target_dBFS / 20)) / rms
	return y * scalar

	def bandpass(y, sr=SR, low=FMIN, high=FMAX, order=6):
	from scipy.signal import butter, filtfilt
	nyq = 0.5 * sr
	b, a = butter(order, [low / nyq, high / nyq], btype='band')
	return filtfilt(b, a, y)

	def segment(y, sr=SR, win=DURATION, hop=1.0):
	w = int(win * sr)
	h = int(hop * sr)
	if len(y) < w:
	y = np.pad(y, (0, w - len(y)))
	return [y]
	return [y[i:i + w] for i in range(0, len(y) - w + 1, h)]

	def extract_log_mel(y, sr=SR, n_mels=BANDS, hop_length=HOP, fmin=FMIN, fmax=FMAX):
	mel = librosa.feature.melspectrogram(
	y=y, sr=sr, n_mels=n_mels, hop_length=hop_length, fmin=fmin, fmax=fmax, power=1.0)
	pcen = librosa.pcen(mel * (2 ** 31))
	return pcen

	# ==== PREDICCIÓN SEGMENTADA ====
	def predict_segments(file_path, model):
	y = load_and_normalize(file_path)
	y = bandpass(y, SR)
	segments = segment(y, SR)
	all_probs = []
	model.eval()
	with torch.no_grad():
	for seg in segments:
	mel = extract_log_mel(seg)
	inp = torch.tensor(mel[None, None], dtype=torch.float32).to(DEVICE)
	probs = torch.sigmoid(model(inp)).cpu().numpy()[0]
	all_probs.append(probs)
	return np.array(all_probs)

	# ==== ESTRATEGIA HÍBRIDA DE PREDICCIÓN ====
	def predict_file_with_hybrid_strategy(file_path, model, thresholds, label_encoder, override_max=0.9):
	probs = predict_segments(file_path, model)
	mean_probs = probs.mean(axis=0)
	max_probs = probs.max(axis=0)
	sensitive_thresh = [t - 0.15 for t in thresholds]

	preds = []
	for i, sp in enumerate(label_encoder.classes_):
	if mean_probs[i] > sensitive_thresh[i] or max_probs[i] > override_max:
	preds.append(sp)
	return preds, mean_probs, max_probs, probs

	# ==== MAIN ====
	if __name__ == "__main__":
	parser = argparse.ArgumentParser()
	parser.add_argument("audio_file", type=str, help="Ruta al archivo de audio (.wav)")
	parser.add_argument("--model", default="CNN_final.pth", help="Ruta al modelo CNN .pth")
	parser.add_argument("--meta", default="label_encoder_and_thresholds.pkl", help="Pickle con encoder y thresholds")
	args = parser.parse_args()

	# Cargar metadatos (label encoder, thresholds, calibrators si los quieres aplicar también)
	with open(args.meta, "rb") as f:
	meta = pickle.load(f)

	label_encoder = meta["label_encoder"]
	thresholds = meta["thresholds"]

	# Cargar modelo
	from torchvision import models
	backbone = models.efficientnet_b0(weights=None)
	backbone.features[0][0] = nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1, bias=False)
	model = EfficientNetSE(backbone, num_classes=len(label_encoder.classes_))
	model.load_state_dict(torch.load(args.model, map_location=DEVICE))
	model.to(DEVICE)

	# Ejecutar predicción
	file_path = args.audio_file
	preds, mean_probs, max_probs, probs_all = predict_file_with_hybrid_strategy(
	file_path, model, thresholds, label_encoder
	)

	print(f"\n Archivo: {file_path}")
	print(f"Especies detectadas: {', '.join(preds)}\n")

	print("📊 Probabilidades por especie:")
	for i, sp in enumerate(label_encoder.classes_):
	print(f" {sp:<25} → mean: {mean_probs[i]:.2f}, max: {max_probs[i]:.2f}")