File size: 12,526 Bytes
8ae78b0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 | import os
import base64
import logging
from typing import List, Dict, Any, Optional, Union
from pathlib import Path
import json
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_anthropic import ChatAnthropic
from langchain_groq import ChatGroq
# Handle langchain_google_genai import error
try:
from langchain_google_genai import ChatGoogleGenerativeAI
GEMINI_AVAILABLE = True
except ImportError:
# Gemini model is not available
ChatGoogleGenerativeAI = None
GEMINI_AVAILABLE = False
from langchain_core.messages.base import BaseMessage
# Fix import paths
try:
from app.utils.logging_utils import time_it, setup_logger
from app.core.config import settings
except ImportError:
# Try relative imports for running from project root
from behavior_backend.app.utils.logging_utils import time_it, setup_logger
# Mock settings for testing
class Settings:
def __init__(self):
self.OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "")
self.ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "")
self.GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
self.GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
settings = Settings()
# Configure logging
logger = setup_logger(__name__)
class AIFaceAnalyzer:
"""Service for analyzing candidate profile pictures using Visual LLMs."""
def __init__(self, provider: str = "openai"):
"""
Initialize the AI face analyzer service.
Args:
provider: The LLM provider to use ('openai', 'anthropic', 'groq', or 'gemini')
"""
# If Gemini isn't available and that's the requested provider, fall back to OpenAI
if provider.lower() == "gemini" and not GEMINI_AVAILABLE:
logger.warning("Gemini provider requested but langchain_google_genai is not installed. Falling back to OpenAI.")
provider = "openai"
self.provider = provider.lower()
self._init_model()
def _init_model(self):
"""Initialize the LLM model based on the selected provider."""
if self.provider == "openai":
api_key = os.environ.get("OPENAI_API_KEY") or getattr(settings, "OPENAI_API_KEY", "")
if not api_key:
raise ValueError("OPENAI_API_KEY not found in environment or settings")
self.model = ChatOpenAI(
model="gpt-4o-mini",
max_tokens=4096,
temperature=0.2,
api_key=api_key
)
elif self.provider == "anthropic":
api_key = os.environ.get("ANTHROPIC_API_KEY") or getattr(settings, "ANTHROPIC_API_KEY", "")
if not api_key:
raise ValueError("ANTHROPIC_API_KEY not found in environment or settings")
self.model = ChatAnthropic(
model="claude-3-sonnet-20240229",
temperature=0.2,
max_tokens=4096,
api_key=api_key
)
elif self.provider == "groq":
api_key = os.environ.get("GROQ_API_KEY") or getattr(settings, "GROQ_API_KEY", "")
if not api_key:
raise ValueError("GROQ_API_KEY not found in environment or settings")
self.model = ChatGroq(
model="qwen-2.5-32b",
temperature=0.2,
api_key=api_key
)
logger.warning("Groq doesn't currently support vision models. Falling back to text-only analysis.")
elif self.provider == "gemini":
# This should not happen due to the check in __init__, but just in case
if not GEMINI_AVAILABLE:
logger.error("Gemini provider selected but langchain_google_genai is not installed!")
logger.info("Falling back to OpenAI provider")
self.provider = "openai"
return self._init_model()
api_key = os.environ.get("GOOGLE_API_KEY") or getattr(settings, "GOOGLE_API_KEY", "")
if not api_key:
raise ValueError("GOOGLE_API_KEY not found in environment or settings")
self.model = ChatGoogleGenerativeAI(
model="gemini-1.5-pro",
temperature=0.2,
max_tokens=4096,
timeout=None,
max_retries=2,
api_key=api_key,
#convert_system_message_to_human=True # Gemini requires converting system messages to human
)
else:
raise ValueError(f"Unsupported provider: {self.provider}. Use 'openai', 'anthropic', 'groq', or 'gemini'.")
def _encode_image_to_base64(self, image_path: Union[str, Path]) -> str:
"""
Encode an image to base64.
Args:
image_path: Path to the image file
Returns:
Base64 encoded image
"""
image_path = Path(image_path)
if not image_path.exists():
raise FileNotFoundError(f"Image file not found: {image_path}")
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode("utf-8")
def _prepare_messages(self, image_paths: List[Union[str, Path]], job_title: Optional[str] = None) -> List[BaseMessage]:
"""
Prepare messages for the LLM with images.
Args:
image_paths: List of paths to the images
job_title: Optional job title for context
Returns:
List of messages for the LLM
"""
system_prompt = """You are an expert in professional appearance analysis for job interviews.
Analyze the candidate's picture(s) randonly selected from a video (interview/self-introduction/etc) and provide an assessment of their professional appearance,
focusing on:
1. Overall professional impression
2. Attire and dress code appropriateness
3. Facial expressions and perceived attitude
4. Background and setting appropriateness
5. Visual cues that might indicate personality traits relevant for professional settings
6. Areas of improvement for professional presentation
Ouput: it must be a valid and structured JSON object.
Provide your analysis in a structured JSON format with these keys:
- professionalImpression: Overall analysis of how professional the candidate appears
- attireAssessment: Analysis of clothing and accessories
- facialExpressionAnalysis: Analysis of facial expressions, perceived emotions and attitude
- backgroundAssessment: Analysis of the photo background and setting
- personalityIndicators: Potential personality traits inferred from visual cues
- recommendationsForImprovement: Specific recommendations for improving professional appearance
- overallScore: A score from 1-10 on professional appearance suitability"""
system_message = SystemMessage(content=system_prompt)
# Create the content for the human message
content = []
# Add text content
text_content = "Please analyze this candidate's profile picture"
if job_title:
text_content += f" for a {job_title} position"
text_content += "."
content.append(text_content)
# Add image content
for image_path in image_paths:
try:
base64_image = self._encode_image_to_base64(image_path)
if self.provider == "openai":
content.append({
"type": "image_url",
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
})
elif self.provider == "anthropic":
content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/jpeg",
"data": base64_image
}
})
elif self.provider == "gemini" and GEMINI_AVAILABLE:
content.append({
"type": "image_url",
"image_url": f"data:image/jpeg;base64,{base64_image}"
})
except Exception as e:
logger.error(f"Error encoding image {image_path}: {e}")
human_message = HumanMessage(content=content)
# For Gemini, we need to combine system message with human message
if self.provider == "gemini" and GEMINI_AVAILABLE:
return [system_message, human_message]
else:
return [system_message, human_message]
@time_it
def analyze_profile_pictures(
self,
image_paths: List[Union[str, Path]],
job_title: Optional[str] = None
) -> Dict[str, Any]:
"""
Analyze candidate profile pictures using the configured LLM.
Args:
image_paths: List of paths to the profile pictures (1-3 images)
job_title: Optional job title for context
Returns:
Dictionary with analysis results
"""
if not image_paths:
logger.warning("No images provided for analysis")
return self._generate_empty_analysis()
# Limit to max 3 images
if len(image_paths) > 3:
logger.warning(f"Too many images provided ({len(image_paths)}). Using only the first 3.")
image_paths = image_paths[:3]
try:
logger.info(f"Analyzing {len(image_paths)} profile pictures with {self.provider}")
# Prepare messages with images
messages = self._prepare_messages(image_paths, job_title)
# Get response from LLM
response = self.model.invoke(messages)
# Extract and parse the response content
response_content = response.content.replace("```json", "").replace("```", "")
# Try to parse JSON from the response
try:
# First, try to extract JSON if it's wrapped in markdown code blocks
if "```json" in response_content and "```" in response_content.split("```json", 1)[1]:
json_str = response_content.split("```json", 1)[1].split("```", 1)[0].strip()
result = json.loads(json_str)
elif "```" in response_content and "```" in response_content.split("```", 1)[1]:
json_str = response_content.split("```", 1)[1].split("```", 1)[0].strip()
result = json.loads(json_str)
else:
# If not in code blocks, try parsing the whole response
result = json.loads(response_content)
except json.JSONDecodeError:
logger.warning(f"Failed to parse JSON from response: {response_content}")
# Create a formatted response manually
result = {
"professionalImpression": "Could not parse structured analysis",
"rawResponse": response_content
}
# Add metadata
result["provider"] = self.provider
result["imageCount"] = len(image_paths)
return result
except Exception as e:
logger.error(f"Error analyzing profile pictures: {e}")
return self._generate_empty_analysis()
def _generate_empty_analysis(self) -> Dict[str, Any]:
"""
Generate an empty analysis result when analysis fails.
Returns:
Empty analysis dictionary
"""
return {
"professionalImpression": "No analysis available",
"attireAssessment": "No analysis available",
"facialExpressionAnalysis": "No analysis available",
"backgroundAssessment": "No analysis available",
"personalityIndicators": "No analysis available",
"recommendationsForImprovement": "No analysis available",
"overallScore": 0,
"error": "Failed to analyze profile pictures",
"provider": self.provider,
"imageCount": 0
}
|