File size: 12,190 Bytes
389d5e1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 |
import gradio as gr
from transformers import AutoImageProcessor, SiglipForImageClassification
from PIL import Image
import torch
import numpy as np
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from fastapi.middleware.cors import CORSMiddleware
import uvicorn
import io
import logging
import tensorflow as tf
from tensorflow import keras
import cv2
# ----------------- LOGGER SETUP -----------------
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("face-analysis")
# ----------------- LOAD MODELS -----------------
# Emotion model (H5 format)
H5_MODEL_PATH = "my_model3.h5"
INPUT_SIZE = (48, 48)
emotion_model = keras.models.load_model(H5_MODEL_PATH)
logger.info("Emotion model loaded successfully")
logger.info(f"Model input shape: {emotion_model.input_shape}")
logger.info(f"Model output shape: {emotion_model.output_shape}")
# Age model
age_model_name = "prithivMLmods/facial-age-detection"
age_model = SiglipForImageClassification.from_pretrained(age_model_name)
age_processor = AutoImageProcessor.from_pretrained(age_model_name)
# Face detection cascade
HAAR_CASCADE_PATH = 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(HAAR_CASCADE_PATH)
# Verify cascade loaded successfully
if face_cascade.empty():
logger.error(f"Failed to load Haar Cascade from {HAAR_CASCADE_PATH}")
logger.warning("Attempting to load from OpenCV data directory...")
# Try loading from OpenCV's data directory
HAAR_CASCADE_PATH = cv2.data.haarcascades + 'haarcascade_frontalface_default.xml'
face_cascade = cv2.CascadeClassifier(HAAR_CASCADE_PATH)
if face_cascade.empty():
logger.error("Still failed to load Haar Cascade. Face detection will not work.")
else:
logger.info(f"Haar Cascade loaded from OpenCV data: {HAAR_CASCADE_PATH}")
else:
logger.info(f"Haar Cascade loaded successfully from {HAAR_CASCADE_PATH}")
# Emotion classes
emotions = ["Angry", "Disgust", "Fear", "Happy", "Sad", "Surprise", "Neutral"]
# Age labels
id2label = {
"0": "age 01-10",
"1": "age 11-20",
"2": "age 21-30",
"3": "age 31-40",
"4": "age 41-55",
"5": "age 56-65",
"6": "age 66-80",
"7": "age 80+"
}
# ----------------- FACE DETECTION -----------------
def detect_and_crop_face(image: Image.Image):
"""
Detect face in image and crop it.
Returns: (cropped_face, message, success)
"""
try:
# Convert PIL to numpy array for OpenCV
img_array = np.asarray(image)
logger.debug(f"Image shape: {img_array.shape}, dtype: {img_array.dtype}")
# Convert RGB to BGR if needed (OpenCV uses BGR)
if len(img_array.shape) == 3 and img_array.shape[2] == 3:
img_array = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
# Convert to grayscale for better face detection
gray = cv2.cvtColor(img_array, cv2.COLOR_BGR2GRAY) if len(img_array.shape) == 3 else img_array
logger.debug(f"Grayscale shape: {gray.shape}")
# Detect faces with more lenient parameters
faces = face_cascade.detectMultiScale(
gray,
scaleFactor=1.1, # More sensitive (was 1.3)
minNeighbors=3, # More lenient (was 5)
minSize=(30, 30), # Minimum face size
flags=cv2.CASCADE_SCALE_IMAGE
)
logger.info(f"Face detection result: {len(faces)} face(s) detected")
if len(faces) == 0:
logger.warning("No face detected in image - returning original image")
# Fallback: return original image if no face detected
return image, "⚠️ No face detected - using full image", True
if len(faces) == 1:
# Single face detected - crop it
x, y, w, h = faces[0]
crop_img = image.crop((x, y, x+w, y+h))
logger.info(f"✓ Face detected and cropped: position ({x},{y}), size {w}x{h}")
return crop_img, f"✓ Face detected at ({x},{y}), size {w}x{h}", True
else:
# Multiple faces detected - use the largest one
logger.warning(f"Multiple faces detected ({len(faces)}), using largest face")
# Find the largest face
largest_face = max(faces, key=lambda face: face[2] * face[3])
x, y, w, h = largest_face
crop_img = image.crop((x, y, x+w, y+h))
return crop_img, f"⚠️ {len(faces)} faces detected, using largest one", True
except Exception as e:
logger.error(f"Face detection error: {e}")
import traceback
logger.error(traceback.format_exc())
# Return original image on error
return image, f"⚠️ Face detection error - using full image", True
# ----------------- PREDICT FUNCTIONS -----------------
def preprocess_image_for_emotion(image: Image.Image):
"""
Preprocess image for the H5 emotion model.
Model expects: (batch_size, 48, 48, 1) - 48x48 grayscale images
"""
image = image.convert("L") # Convert to grayscale
image = image.resize(INPUT_SIZE)
img_array = np.array(image, dtype=np.float32)
img_array = np.expand_dims(img_array, axis=-1) # (48, 48) -> (48, 48, 1)
img_array = img_array / 255.0 # Normalize
img_array = np.expand_dims(img_array, axis=0) # Add batch dimension
logger.debug(f"Preprocessed shape: {img_array.shape}, dtype: {img_array.dtype}")
return img_array
def predict_emotion(image: Image.Image):
try:
processed_image = preprocess_image_for_emotion(image)
predictions = emotion_model.predict(processed_image, verbose=0)
probs = predictions[0]
idx = np.argmax(probs)
result = {
"predicted_emotion": emotions[idx],
"confidence": round(float(probs[idx]), 4),
"all_confidences": {emotions[i]: float(probs[i]) for i in range(len(emotions))}
}
logger.info(f"Predicted Emotion: {result['predicted_emotion']} (Confidence: {result['confidence']})")
return result
except Exception as e:
logger.error(f"Emotion prediction error: {e}")
import traceback
logger.error(traceback.format_exc())
return {"error": str(e)}
def predict_age(image: Image.Image):
try:
inputs = age_processor(images=image.convert("RGB"), return_tensors="pt")
with torch.no_grad():
outputs = age_model(**inputs)
probs = torch.nn.functional.softmax(outputs.logits, dim=1).squeeze().tolist()
prediction = {id2label[str(i)]: round(probs[i], 3) for i in range(len(probs))}
idx = int(torch.argmax(torch.tensor(probs)))
result = {
"predicted_age": id2label[str(idx)],
"confidence": round(probs[idx], 4),
"all_confidences": prediction
}
logger.info(f"Predicted Age Group: {result['predicted_age']} (Confidence: {result['confidence']})")
return result
except Exception as e:
logger.error(f"Age prediction error: {e}")
import traceback
logger.error(traceback.format_exc())
return {"error": str(e)}
# ----------------- FASTAPI APP -----------------
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def root():
return {
"message": "Face Emotion + Age Detection API",
"status": "running",
"endpoints": {
"GET /": "API information",
"GET /health": "Health check",
"POST /predict": "Upload image for emotion and age prediction",
"GET /gradio": "Gradio web interface"
}
}
@app.get("/health")
async def health():
return {
"status": "ok",
"emotion_model": "loaded",
"age_model": "loaded",
"face_cascade": "loaded" if not face_cascade.empty() else "failed",
"emotion_input_shape": str(emotion_model.input_shape),
"emotion_output_shape": str(emotion_model.output_shape)
}
@app.post("/predict")
async def predict(file: UploadFile = File(...)):
try:
contents = await file.read()
image = Image.open(io.BytesIO(contents))
# Detect and crop face (now always returns success=True with fallback)
cropped_face, face_msg, success = detect_and_crop_face(image)
# Predict on cropped face or full image (fallback)
emotion_result = predict_emotion(cropped_face)
age_result = predict_age(cropped_face)
logger.info(f"API Response -> Emotion: {emotion_result.get('predicted_emotion')} | Age: {age_result.get('predicted_age')}")
return JSONResponse(content={
"face_detection": face_msg,
"emotion": emotion_result,
"age": age_result
})
except Exception as e:
logger.error(f"API Error: {e}")
import traceback
logger.error(traceback.format_exc())
return JSONResponse(content={"error": str(e)}, status_code=500)
# ----------------- GRADIO DEMO -----------------
def gradio_wrapper(image):
if image is None:
return "No image provided", {}, "No image provided", {}, None, None, "No image uploaded"
# Detect and crop face (always succeeds with fallback)
cropped_face, face_msg, success = detect_and_crop_face(image)
# Get the processed image for visualization
processed_image = preprocess_image_for_emotion(cropped_face)
# Convert back to PIL for display
processed_display = Image.fromarray((processed_image[0, :, :, 0] * 255).astype(np.uint8), mode='L')
# Predict emotion and age on cropped face or full image
emotion_result = predict_emotion(cropped_face)
age_result = predict_age(cropped_face)
if "error" in emotion_result or "error" in age_result:
error_msg = emotion_result.get("error", "") or age_result.get("error", "")
return f"Error: {error_msg}", {}, f"Error: {error_msg}", {}, cropped_face, None, face_msg
return (
f"{emotion_result['predicted_emotion']} ({emotion_result['confidence']:.2f})",
emotion_result["all_confidences"],
f"{age_result['predicted_age']} ({age_result['confidence']:.2f})",
age_result["all_confidences"],
cropped_face, # Show the cropped face or full image
processed_display, # Show the processed 48x48 grayscale
face_msg # Face detection message
)
demo = gr.Interface(
fn=gradio_wrapper,
inputs=gr.Image(type="pil", label="Upload Face Image"),
outputs=[
gr.Label(num_top_classes=1, label="Top Emotion"),
gr.Label(label="Emotion Probabilities"),
gr.Label(num_top_classes=1, label="Top Age Group"),
gr.Label(label="Age Probabilities"),
gr.Image(type="pil", label="Detected & Cropped Face"),
gr.Image(type="pil", label="Processed Image (48x48 Grayscale)"),
gr.Textbox(label="Face Detection Status")
],
title="Face Emotion + Age Detection with Face Cropping",
description="Upload an image with a face. The system will:\n1. Detect and crop the face (or use full image if no face found)\n2. Analyze emotion (Angry, Happy, etc.)\n3. Estimate age group (01-10, 11-20, ... 80+)\n4. Show the processing steps",
examples=None
)
# Mount Gradio at /gradio
app = gr.mount_gradio_app(app, demo, path="/gradio")
# ----------------- RUN -----------------
if __name__ == "__main__":
logger.info("="*70)
logger.info("Starting Face Emotion + Age Detection Server")
logger.info("="*70)
logger.info(f"Emotion Model Input Shape: {emotion_model.input_shape}")
logger.info(f"Emotion Model Output Shape: {emotion_model.output_shape}")
logger.info(f"Number of emotion classes: {len(emotions)}")
logger.info("")
logger.info("Server will be available at:")
logger.info(" - Main API: http://0.0.0.0:7860")
logger.info(" - Gradio UI: http://0.0.0.0:7860/gradio")
logger.info("="*70)
uvicorn.run(app, host="0.0.0.0", port=7860) |