#%% import torch from model import * model_path = "best_model.pth" device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # model = NosePointRegressor(input_channels=3) model = ResNetNoseRegressor(pretrained=False) # Set pretrained=False to load custom weights model.load_state_dict(torch.load(model_path, map_location=device)) model.to(device) model.eval() # %% import os import numpy as np import cv2 video_path = "/fs/scratch/PAS2099/danielf/medical/Animal_Behavior_Test/videos/WIN_20250529_15_19_13_Pro.mp4" cap = cv2.VideoCapture(video_path) #%% random_frame = 1000 cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame) ret, frame = cap.read() crop = (500, 550, 800, 620) frame = frame[crop[1]:crop[3], crop[0]:crop[2]] # Crop the frame to the region of interest from PIL import Image from torchvision import transforms import matplotlib.pyplot as plt image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) orig_w, orig_h = image.size transform = transforms.Compose([ transforms.Resize((64, 64)), transforms.ToTensor(), ]) image_tensor = transform(image).unsqueeze(0) # Add batch dimension image_tensor = image_tensor.to(device) with torch.no_grad(): output = model(image_tensor) # === Inference === with torch.no_grad(): pred = model(image_tensor)[0].cpu().numpy() # shape: (2,) normalized print(pred) # === Map back to original resolution === x_pred = int(pred[0] * orig_w) y_pred = int(pred[1] * orig_h) plt.figure(figsize=(6, 4)) plt.imshow(image) plt.scatter([x_pred], [y_pred], c='red', s=40, label='Predicted Nose') plt.title(f'Prediction: ({x_pred}, {y_pred})') plt.legend() plt.tight_layout() plt.show()