pph-emotion-classification-model / handler.py

Fix handler for HF Inference API compatibility

b575114 verified 8 months ago

7.54 kB

	import json
	import base64
	import io
	import torch
	import numpy as np
	from typing import Dict, List, Any
	import os
	import sys

	# Add the current directory to Python path for local imports
	current_dir = os.path.dirname(os.path.abspath(__file__))
	if current_dir not in sys.path:
	sys.path.insert(0, current_dir)

	try:
	from modeling_emotion_av import EmotionAVModel, EmotionAVConfig
	from feature_extraction_emotion_av import EmotionAVFeatureExtractor
	except ImportError as e:
	print(f"Warning: Could not import custom modules: {e}")
	# Fallback imports
	from transformers import AutoModel, AutoConfig, AutoFeatureExtractor


	class EndpointHandler:
	def __init__(self, model_dir: str = ""):
	"""
	Initialize the handler for the emotion-av model.

	Args:
	model_dir (str): Path to the model directory
	"""
	try:
	print(f"Initializing handler with model_dir: {model_dir}")

	# Validate config file exists and is readable
	config_path = os.path.join(model_dir, "config.json")
	if not os.path.exists(config_path):
	raise FileNotFoundError(f"Config file not found: {config_path}")

	# Test reading config file
	with open(config_path, 'r', encoding='utf-8') as f:
	config_content = f.read().strip()
	if not config_content:
	raise ValueError("Config file is empty")

	# Validate JSON
	config_data = json.loads(config_content)
	print(f"Successfully loaded config with keys: {list(config_data.keys())}")

	# Load the custom model and feature extractor with error handling
	try:
	self.model = EmotionAVModel.from_pretrained(
	model_dir,
	trust_remote_code=True,
	local_files_only=True
	)
	print("Successfully loaded EmotionAVModel")
	except Exception as e:
	print(f"Failed to load with EmotionAVModel: {e}")
	# Fallback to AutoModel
	self.model = AutoModel.from_pretrained(
	model_dir,
	trust_remote_code=True,
	local_files_only=True
	)
	print("Successfully loaded with AutoModel")

	try:
	self.feature_extractor = EmotionAVFeatureExtractor.from_pretrained(
	model_dir,
	trust_remote_code=True,
	local_files_only=True
	)
	print("Successfully loaded EmotionAVFeatureExtractor")
	except Exception as e:
	print(f"Failed to load with EmotionAVFeatureExtractor: {e}")
	# Fallback to AutoFeatureExtractor
	self.feature_extractor = AutoFeatureExtractor.from_pretrained(
	model_dir,
	trust_remote_code=True,
	local_files_only=True
	)
	print("Successfully loaded with AutoFeatureExtractor")

	self.model.eval()
	print("Handler initialization completed successfully")

	except Exception as e:
	print(f"Error during handler initialization: {e}")
	raise

	def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
	"""
	Handle inference requests.

	Args:
	data (Dict): Input data containing 'inputs' key with audio data

	Returns:
	List[Dict]: Prediction results in HF-compatible format
	"""
	try:
	# Get the inputs
	inputs = data.get("inputs", data)
	parameters = data.get("parameters", {})

	# Handle different input formats
	if isinstance(inputs, str):
	# Base64 encoded audio
	try:
	audio_bytes = base64.b64decode(inputs)
	audio_data = self._process_audio_bytes(audio_bytes)
	except Exception as e:
	return [{"error": f"Failed to decode base64 audio: {str(e)}"}]
	elif isinstance(inputs, (list, np.ndarray)):
	# Raw audio array
	audio_data = np.array(inputs, dtype=np.float32)
	else:
	return [{"error": "Invalid input format. Expected base64 string or audio array."}]

	# Extract features
	features = self.feature_extractor(
	audio_data,
	sampling_rate=parameters.get("sampling_rate", 16000),
	return_tensors="pt"
	)

	# Run inference
	with torch.no_grad():
	outputs = self.model(features["input_features"])

	# Process outputs
	emotion_logits = outputs.emotion_logits
	arousal_valence = outputs.arousal_valence

	# Get emotion probabilities
	emotion_probs = torch.softmax(emotion_logits, dim=-1)

	# Denormalize arousal-valence from [0,1] to [-1,1]
	arousal = (arousal_valence[0, 0].item() * 2) - 1
	valence = (arousal_valence[0, 1].item() * 2) - 1

	# Create HF-compatible output: Array<{label: string, score: number}>
	results = []
	probs_sorted, indices = torch.sort(emotion_probs[0], descending=True)

	# Return all emotions sorted by confidence
	for i in range(len(indices)):
	idx = indices[i].item()
	label = self.model.config.id2label[idx]
	score = probs_sorted[i].item()

	# Strictly follow HF format: only label and score
	results.append({
	"label": label,
	"score": score
	})

	return results

	except Exception as e:
	return [{"error": f"Inference failed: {str(e)}"}]

	def _process_audio_bytes(self, audio_bytes: bytes) -> np.ndarray:
	"""
	Process audio bytes and convert to numpy array.

	Args:
	audio_bytes (bytes): Raw audio bytes

	Returns:
	np.ndarray: Processed audio array
	"""
	try:
	import soundfile as sf

	# Create BytesIO object from bytes
	audio_io = io.BytesIO(audio_bytes)

	# Load audio using soundfile
	audio_data, sample_rate = sf.read(audio_io)

	# Convert to float32 and ensure mono
	if len(audio_data.shape) > 1:
	audio_data = np.mean(audio_data, axis=1)

	audio_data = audio_data.astype(np.float32)

	return audio_data

	except Exception as e:
	# If soundfile fails, try alternative approach
	try:
	import librosa
	audio_io = io.BytesIO(audio_bytes)
	audio_data, sample_rate = librosa.load(audio_io, sr=16000, mono=True)
	return audio_data.astype(np.float32)
	except Exception as e2:
	raise Exception(f"Failed to process audio: {str(e2)}")