#%%
import torch

from model import *

model_path = "best_model.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# model = NosePointRegressor(input_channels=3)
model = ResNetNoseRegressor(pretrained=False)  # Set pretrained=False to load custom weights
model.load_state_dict(torch.load(model_path, map_location=device))
model.to(device)
model.eval()

# %%
import os
import numpy as np
import cv2

video_path = "/fs/scratch/PAS2099/danielf/medical/Animal_Behavior_Test/videos/WIN_20250529_15_19_13_Pro.mp4"

cap = cv2.VideoCapture(video_path)

#%%
random_frame = 1000
cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame)
ret, frame = cap.read()
crop = (500, 550, 800, 620)
frame = frame[crop[1]:crop[3], crop[0]:crop[2]]  # Crop the frame to the region of interest

from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
orig_w, orig_h = image.size

transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
])
image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
image_tensor = image_tensor.to(device)

with torch.no_grad():
    output = model(image_tensor)

# === Inference ===
with torch.no_grad():
    pred = model(image_tensor)[0].cpu().numpy()  # shape: (2,) normalized
print(pred)

# === Map back to original resolution ===
x_pred = int(pred[0] * orig_w)
y_pred = int(pred[1] * orig_h)

plt.figure(figsize=(6, 4))
plt.imshow(image)
plt.scatter([x_pred], [y_pred], c='red', s=40, label='Predicted Nose')
plt.title(f'Prediction: ({x_pred}, {y_pred})')
plt.legend()
plt.tight_layout()
plt.show()