Spaces:
Sleeping
Sleeping
| import torch | |
| from fastai.vision.all import load_learner | |
| from ultralytics import YOLO | |
| import cv2 | |
| from PIL import Image | |
| import numpy as np | |
| from collections import Counter | |
| from pytorch_grad_cam import GradCAM | |
| from pytorch_grad_cam.utils.image import show_cam_on_image | |
| import numpy as np | |
| from collections import defaultdict | |
| device = torch.device("cpu") | |
| learner = load_learner("best_model.pkl", cpu=True) | |
| yolo_model = YOLO("yolov8n.pt") | |
| def predict_video(video_path, max_frames=10): | |
| cap = cv2.VideoCapture(video_path) | |
| frame_count = 0 | |
| preds_list = [] | |
| conf_list = [] | |
| while cap.isOpened() and frame_count < max_frames: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| results = yolo_model.predict(frame, conf=0.5, verbose=False) | |
| boxes = results[0].boxes.xyxy | |
| for box in boxes: | |
| x1, y1, x2, y2 = map(int, box.tolist()) | |
| face = frame[y1:y2, x1:x2] | |
| if face.size == 0: continue | |
| img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB)) | |
| _, _, probs = learner.predict(img) | |
| pred_class = probs.argmax().item() | |
| conf = probs[pred_class].item() | |
| preds_list.append(pred_class) | |
| conf_list.append(conf) | |
| frame_count += 1 | |
| cap.release() | |
| if len(preds_list) == 0: | |
| return "unknown", 0.0 | |
| # 🗳️ Majority Vote | |
| final_pred = Counter(preds_list).most_common(1)[0][0] | |
| final_conf = np.mean([conf for pred, conf in zip(preds_list, conf_list) if pred == final_pred]) | |
| label = learner.dls.vocab[final_pred] | |
| return label, final_conf | |
| def predict_video_with_cam(video_path, max_frames=5): | |
| cap = cv2.VideoCapture(video_path) | |
| learner.model.eval() | |
| results = [] | |
| preds_list = [] | |
| confs_list = [] | |
| frame_index = 0 | |
| frames_with_faces = 0 | |
| while cap.isOpened() and frame_index < max_frames: | |
| ret, frame = cap.read() | |
| if not ret: | |
| break | |
| detections = yolo_model.predict(frame, conf=0.5, verbose=False) | |
| boxes = detections[0].boxes.xyxy | |
| if len(boxes) > 0: | |
| frames_with_faces += 1 | |
| x1, y1, x2, y2 = map(int, boxes[0].tolist()) | |
| face = frame[y1:y2, x1:x2] | |
| if face.size == 0: | |
| continue | |
| img = Image.fromarray(cv2.cvtColor(face, cv2.COLOR_BGR2RGB)) | |
| _, _, probs = learner.predict(img) | |
| pred_class = probs.argmax().item() | |
| conf = probs[pred_class].item() | |
| def get_last_conv_layer(m): | |
| for _, module in reversed(list(m.named_modules())): | |
| if isinstance(module, torch.nn.Conv2d): | |
| return module | |
| raise Exception("No Conv2d layer found") | |
| target_layer = get_last_conv_layer(learner.model) | |
| cam = GradCAM(model=learner.model, target_layers=[target_layer]) | |
| input_tensor = learner.dls.test_dl([img]).one_batch()[0] | |
| grayscale_cam = cam(input_tensor=input_tensor)[0] | |
| img_array = np.array(img.resize((224, 224))) / 255.0 | |
| cam_image = show_cam_on_image(img_array, grayscale_cam, use_rgb=True) | |
| label = learner.dls.vocab[pred_class] | |
| results.append((label, conf, cam_image)) | |
| preds_list.append(pred_class) | |
| confs_list.append(conf) | |
| frame_index += 1 | |
| cap.release() | |
| if frames_with_faces == 0: | |
| return [], "unknown", 0.0, 0, max_frames | |
| # Weighted confidence | |
| class_scores = defaultdict(float) | |
| total_weight = defaultdict(float) | |
| for pred, conf in zip(preds_list, confs_list): | |
| class_scores[pred] += conf | |
| total_weight[pred] += 1 | |
| weighted_avg = {c: class_scores[c] / total_weight[c] for c in class_scores} | |
| final_pred = max(weighted_avg, key=weighted_avg.get) | |
| final_conf = weighted_avg[final_pred] | |
| final_label = learner.dls.vocab[final_pred] | |
| return results, final_label, final_conf, frames_with_faces, max_frames |