import torch from fastai.vision.all import load_learner from ultralytics import YOLO import cv2 from PIL import Image import numpy as np from collections import Counter from pytorch_grad_cam import GradCAM from pytorch_grad_cam.utils.image import show_cam_on_image import numpy as np from collections import defaultdict device = torch.device("cpu") learner = load_learner("best_model.pkl", cpu=True) yolo_model = YOLO("yolov8n.pt") def predict_video(video_path, max_frames=10): cap = cv2.VideoCapture(video_path) frame_count = 0 preds_list = [] conf_list = [] while cap.isOpened() and frame_count < max_frames: ret, frame = cap.read() if not ret: break results = yolo_model.predict(frame, conf=0.5, verbose=False) boxes = results[0].boxes.xyxy for box in boxes: x1, y1, x2, y2 = map(int, box.tolist()) face = frame[y1:y2, x1:x2] if face.size == 0: continue img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB)) _, _, probs = learner.predict(img) pred_class = probs.argmax().item() conf = probs[pred_class].item() preds_list.append(pred_class) conf_list.append(conf) frame_count += 1 cap.release() if len(preds_list) == 0: return "unknown", 0.0 # 🗳️ Majority Vote final_pred = Counter(preds_list).most_common(1)[0][0] final_conf = np.mean([conf for pred, conf in zip(preds_list, conf_list) if pred == final_pred]) label = learner.dls.vocab[final_pred] return label, final_conf def predict_video_with_cam(video_path, max_frames=5): cap = cv2.VideoCapture(video_path) learner.model.eval() results = [] preds_list = [] confs_list = [] frame_index = 0 frames_with_faces = 0 while cap.isOpened() and frame_index < max_frames: ret, frame = cap.read() if not ret: break detections = yolo_model.predict(frame, conf=0.5, verbose=False) boxes = detections[0].boxes.xyxy if len(boxes) > 0: frames_with_faces += 1 x1, y1, x2, y2 = map(int, boxes[0].tolist()) face = frame[y1:y2, x1:x2] if face.size == 0: continue img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB)) _, _, probs = learner.predict(img) pred_class = probs.argmax().item() conf = probs[pred_class].item() def get_last_conv_layer(m): for _, module in reversed(list(m.named_modules())): if isinstance(module, torch.nn.Conv2d): return module raise Exception("No Conv2d layer found") target_layer = get_last_conv_layer(learner.model) cam = GradCAM(model=learner.model, target_layers=[target_layer]) input_tensor = learner.dls.test_dl([img]).one_batch()[0] grayscale_cam = cam(input_tensor=input_tensor)[0] img_array = np.array(img.resize((224, 224))) / 255.0 cam_image = show_cam_on_image(img_array, grayscale_cam, use_rgb=True) label = learner.dls.vocab[pred_class] results.append((label, conf, cam_image)) preds_list.append(pred_class) confs_list.append(conf) frame_index += 1 cap.release() if frames_with_faces == 0: return [], "unknown", 0.0, 0, max_frames # Weighted confidence class_scores = defaultdict(float) total_weight = defaultdict(float) for pred, conf in zip(preds_list, confs_list): class_scores[pred] += conf total_weight[pred] += 1 weighted_avg = {c: class_scores[c] / total_weight[c] for c in class_scores} final_pred = max(weighted_avg, key=weighted_avg.get) final_conf = weighted_avg[final_pred] final_label = learner.dls.vocab[final_pred] return results, final_label, final_conf, frames_with_faces, max_frames