""" Visual agent for EmotionMirror application. Handles image processing and facial analysis. """ import cv2 import numpy as np import logging from typing import Dict, Any, List, Tuple from agent_framework.base_agent import BaseAgent from services.model_service import ModelService from services import get_emotion_service class VisualAgent(BaseAgent): """Agent for visual processing and emotion analysis""" def __init__(self): """Initialize the visual agent""" super().__init__(name="VisualAgent", description="Processes images to detect faces and emotions") self.model_service = ModelService() self.emotion_service = get_emotion_service() self.detection_model = None self.pose_model = None def _ensure_models_loaded(self) -> bool: """ Ensure that required models are loaded. Returns: True if models are loaded successfully, False otherwise """ try: if self.detection_model is None: self.log_activity("Loading detection model") self.detection_model = self.model_service.load_model('detection') if self.pose_model is None: self.log_activity("Loading pose model") self.pose_model = self.model_service.load_model('pose') return self.detection_model is not None and self.pose_model is not None except Exception as e: self.log_activity(f"Error loading models: {str(e)}", "error") return False def process(self, data: Dict[str, Any]) -> Dict[str, Any]: """ Process an image to detect faces and basic expressions. Args: data: Dictionary with: - 'image_path': Path to the image - 'image': (Optional) numpy array of the image - 'confidence': Detection confidence threshold - 'use_preprocessed_image': (Optional) Whether to use preprocessed image - 'preprocessed_image_path': (Optional) Path to preprocessed image Returns: Dictionary with visual analysis results """ # Ensure models are loaded if not self._ensure_models_loaded(): return {"error": "Failed to load required models"} # Get image data image_path = data.get('image_path') image = data.get('image') confidence = data.get('confidence', 0.25) detection_confidence = data.get('detection_confidence', confidence) # Support new parameter name # STEP 4: Handle preprocessed image use_preprocessed = data.get('use_preprocessed_image', False) preprocessed_path = data.get('preprocessed_image_path', None) if image_path is None and image is None: return {'error': 'Image or image path is required'} # Load image if path is provided if image_path is not None: try: # STEP 4: Choose between original and preprocessed image if use_preprocessed and preprocessed_path: self.log_activity(f"Using preprocessed image from: {preprocessed_path}") image = cv2.imread(preprocessed_path) if image is None: self.log_activity("Preprocessed image not found, falling back to original", "warning") image = cv2.imread(image_path) else: image = cv2.imread(image_path) if image is None: return {'error': 'Failed to read image from provided path'} except Exception as e: self.log_activity(f"Error reading image: {e}", 'error') return {'error': f'Error reading image: {str(e)}'} # Process with detection model (for faces) self.log_activity("Running detection model") detection_results = self.detection_model(image, conf=detection_confidence) # Process with pose model (for body language) self.log_activity("Running pose model") pose_results = self.pose_model(image, conf=detection_confidence) # Extract faces (person detections) faces = self._extract_faces(detection_results, image) # Extract poses poses = self._extract_poses(pose_results) # Return combined results return { 'faces': faces, 'poses': poses, 'face_count': len(faces), 'timestamp': data.get('timestamp'), 'used_preprocessed_image': use_preprocessed and preprocessed_path is not None } def _extract_faces(self, results: List, image: np.ndarray) -> List[Dict[str, Any]]: """ Extract face information from detection results. Args: results: Detection model results image: Original image Returns: List of face data dictionaries """ faces = [] for r in results: boxes = r.boxes for box in boxes: # Filter only for persons (class 0 in COCO) if int(box.cls[0]) == 0: # 'person' in COCO dataset x1, y1, x2, y2 = map(int, box.xyxy[0]) # Extract face region face_img = image[y1:y2, x1:x2] # Analyze emotion of the face emotion_data = self.emotion_service.analyze_emotion(face_img) # Check if advanced service was used using_advanced = False if hasattr(self.emotion_service, 'is_advanced_service_active'): using_advanced = self.emotion_service.is_advanced_service_active() # Basic face data face_data = { 'bbox': [x1, y1, x2, y2], 'confidence': float(box.conf[0]), 'emotion': emotion_data['emotion'], 'emotion_confidence': emotion_data['confidence'], 'emotions': emotion_data['emotions'], 'features': emotion_data['features'], # Usar el mismo nombre de clave que en emotion_service 'emotion_features': emotion_data['features'], # Mantener para compatibilidad 'using_advanced': using_advanced # Indicador de si se utilizó el servicio avanzado } # Add advanced data if available (from DeepFace) if 'age' in emotion_data['features']: face_data['age'] = emotion_data['features']['age'] if 'gender' in emotion_data['features']: face_data['gender'] = emotion_data['features']['gender'] faces.append(face_data) return faces def _extract_poses(self, results: List) -> List[Dict[str, Any]]: """ Extract pose information from pose model results. Args: results: Pose model results Returns: List of pose data dictionaries """ poses = [] for r in results: if hasattr(r, 'keypoints') and r.keypoints is not None: for i, keypoints in enumerate(r.keypoints.data): pose_data = { 'keypoints': keypoints.tolist(), 'person_idx': i } poses.append(pose_data) return poses