Spaces:

jarondon82
/

finalproject

Sleeping

App Files Files Community

finalproject / agent_framework /visual_agent.py

jarondon82

Initial commit for EmotionMirror finalproject

f7e620e 9 months ago

raw

history blame contribute delete

7.94 kB

	"""
	Visual agent for EmotionMirror application.
	Handles image processing and facial analysis.
	"""
	import cv2
	import numpy as np
	import logging
	from typing import Dict, Any, List, Tuple

	from agent_framework.base_agent import BaseAgent
	from services.model_service import ModelService
	from services import get_emotion_service

	class VisualAgent(BaseAgent):
	"""Agent for visual processing and emotion analysis"""

	def __init__(self):
	"""Initialize the visual agent"""
	super().__init__(name="VisualAgent", description="Processes images to detect faces and emotions")
	self.model_service = ModelService()
	self.emotion_service = get_emotion_service()
	self.detection_model = None
	self.pose_model = None

	def _ensure_models_loaded(self) -> bool:
	"""
	Ensure that required models are loaded.

	Returns:
	True if models are loaded successfully, False otherwise
	"""
	try:
	if self.detection_model is None:
	self.log_activity("Loading detection model")
	self.detection_model = self.model_service.load_model('detection')

	if self.pose_model is None:
	self.log_activity("Loading pose model")
	self.pose_model = self.model_service.load_model('pose')

	return self.detection_model is not None and self.pose_model is not None
	except Exception as e:
	self.log_activity(f"Error loading models: {str(e)}", "error")
	return False

	def process(self, data: Dict[str, Any]) -> Dict[str, Any]:
	"""
	Process an image to detect faces and basic expressions.

	Args:
	data: Dictionary with:
	- 'image_path': Path to the image
	- 'image': (Optional) numpy array of the image
	- 'confidence': Detection confidence threshold
	- 'use_preprocessed_image': (Optional) Whether to use preprocessed image
	- 'preprocessed_image_path': (Optional) Path to preprocessed image

	Returns:
	Dictionary with visual analysis results
	"""
	# Ensure models are loaded
	if not self._ensure_models_loaded():
	return {"error": "Failed to load required models"}

	# Get image data
	image_path = data.get('image_path')
	image = data.get('image')
	confidence = data.get('confidence', 0.25)
	detection_confidence = data.get('detection_confidence', confidence) # Support new parameter name

	# STEP 4: Handle preprocessed image
	use_preprocessed = data.get('use_preprocessed_image', False)
	preprocessed_path = data.get('preprocessed_image_path', None)

	if image_path is None and image is None:
	return {'error': 'Image or image path is required'}

	# Load image if path is provided
	if image_path is not None:
	try:
	# STEP 4: Choose between original and preprocessed image
	if use_preprocessed and preprocessed_path:
	self.log_activity(f"Using preprocessed image from: {preprocessed_path}")
	image = cv2.imread(preprocessed_path)
	if image is None:
	self.log_activity("Preprocessed image not found, falling back to original", "warning")
	image = cv2.imread(image_path)
	else:
	image = cv2.imread(image_path)

	if image is None:
	return {'error': 'Failed to read image from provided path'}
	except Exception as e:
	self.log_activity(f"Error reading image: {e}", 'error')
	return {'error': f'Error reading image: {str(e)}'}

	# Process with detection model (for faces)
	self.log_activity("Running detection model")
	detection_results = self.detection_model(image, conf=detection_confidence)

	# Process with pose model (for body language)
	self.log_activity("Running pose model")
	pose_results = self.pose_model(image, conf=detection_confidence)

	# Extract faces (person detections)
	faces = self._extract_faces(detection_results, image)

	# Extract poses
	poses = self._extract_poses(pose_results)

	# Return combined results
	return {
	'faces': faces,
	'poses': poses,
	'face_count': len(faces),
	'timestamp': data.get('timestamp'),
	'used_preprocessed_image': use_preprocessed and preprocessed_path is not None
	}

	def _extract_faces(self, results: List, image: np.ndarray) -> List[Dict[str, Any]]:
	"""
	Extract face information from detection results.

	Args:
	results: Detection model results
	image: Original image

	Returns:
	List of face data dictionaries
	"""
	faces = []

	for r in results:
	boxes = r.boxes
	for box in boxes:
	# Filter only for persons (class 0 in COCO)
	if int(box.cls[0]) == 0: # 'person' in COCO dataset
	x1, y1, x2, y2 = map(int, box.xyxy[0])

	# Extract face region
	face_img = image[y1:y2, x1:x2]

	# Analyze emotion of the face
	emotion_data = self.emotion_service.analyze_emotion(face_img)

	# Check if advanced service was used
	using_advanced = False
	if hasattr(self.emotion_service, 'is_advanced_service_active'):
	using_advanced = self.emotion_service.is_advanced_service_active()

	# Basic face data
	face_data = {
	'bbox': [x1, y1, x2, y2],
	'confidence': float(box.conf[0]),
	'emotion': emotion_data['emotion'],
	'emotion_confidence': emotion_data['confidence'],
	'emotions': emotion_data['emotions'],
	'features': emotion_data['features'], # Usar el mismo nombre de clave que en emotion_service
	'emotion_features': emotion_data['features'], # Mantener para compatibilidad
	'using_advanced': using_advanced # Indicador de si se utilizó el servicio avanzado
	}

	# Add advanced data if available (from DeepFace)
	if 'age' in emotion_data['features']:
	face_data['age'] = emotion_data['features']['age']
	if 'gender' in emotion_data['features']:
	face_data['gender'] = emotion_data['features']['gender']

	faces.append(face_data)

	return faces

	def _extract_poses(self, results: List) -> List[Dict[str, Any]]:
	"""
	Extract pose information from pose model results.

	Args:
	results: Pose model results

	Returns:
	List of pose data dictionaries
	"""
	poses = []

	for r in results:
	if hasattr(r, 'keypoints') and r.keypoints is not None:
	for i, keypoints in enumerate(r.keypoints.data):
	pose_data = {
	'keypoints': keypoints.tolist(),
	'person_idx': i
	}
	poses.append(pose_data)

	return poses