import os
import glob
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models

# 1. Dataset Definition
class CatLandmarkDataset(Dataset):
    def __init__(self, root_dirs, img_size=224):
        self.img_size = img_size
        self.image_paths = []
        self.label_paths = []
        
        for folder in root_dirs:
            if not os.path.exists(folder):
                continue
            jpg_pattern = os.path.join(folder, "*.jpg")
            for img_path in glob.glob(jpg_pattern):
                cat_path = img_path + ".cat"
                if os.path.exists(cat_path):
                    self.image_paths.append(img_path)
                    self.label_paths.append(cat_path)
                    
        print(f"[DATA] Total matching cat images: {len(self.image_paths)}")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        # Read image and convert to RGB
        img = cv2.imread(self.image_paths[idx])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        orig_h, orig_w, _ = img.shape
        
        # Read coordinates from .cat file
        with open(self.label_paths[idx], 'r') as f:
            data = f.read().split()
            landmarks = np.array([float(x) for x in data[1:]], dtype=np.float32)
            landmarks = landmarks.reshape(-1, 2)
            
        # Resize image to 224x224
        img_resized = cv2.resize(img, (self.img_size, self.img_size))
        
        # Scale coordinates to new size and normalize between 0-1
        landmarks[:, 0] = (landmarks[:, 0] * (self.img_size / orig_w)) / self.img_size
        landmarks[:, 1] = (landmarks[:, 1] * (self.img_size / orig_h)) / self.img_size
        
        # Convert to PyTorch format (C, H, W)
        img_tensor = torch.tensor(img_resized, dtype=torch.float32).permute(2, 0, 1) / 255.0
        landmarks_tensor = torch.tensor(landmarks.flatten(), dtype=torch.float32)
        
        return img_tensor, landmarks_tensor

# 2. Model Architecture (MobileNetV3 Small)
def get_model():
    # Lightest and optimized architecture for low-end devices
    # Load pre-trained weights with MobileNet_V3_Small_Weights.DEFAULT
    model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.DEFAULT)
    
    # Modify the final classification layer of the model.
    # We will predict 18 coordinate values (9 points x 2) instead of classification (Regression).
    in_features = model.classifier[3].in_features
    model.classifier[3] = nn.Linear(in_features, 18)
    
    return model

# 3. Training Function
def train_model(model, train_loader, val_loader, epochs=10, lr=0.001, device="cpu"):
    model = model.to(device)
    criterion = nn.MSELoss() # Mean Squared Error is used for coordinate predictions
    optimizer = optim.Adam(model.parameters(), lr=lr)
    
    print(f"\n[TRAINING] Starting... Device: {device}")
    
    for epoch in range(epochs):
        model.train()
        train_loss = 0.0
        
        for images, landmarks in train_loader:
            images = images.to(device)
            landmarks = landmarks.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, landmarks)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * images.size(0)
            
        train_loss /= len(train_loader.dataset)
        
        # Validation Phase
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for images, landmarks in val_loader:
                images = images.to(device)
                landmarks = landmarks.to(device)
                outputs = model(images)
                loss = criterion(outputs, landmarks)
                val_loss += loss.item() * images.size(0)
        val_loss /= len(val_loader.dataset)
        
        print(f"Epoch [{epoch+1}/{epochs}] -> Train Loss: {train_loss:.6f} | Val Loss: {val_loss:.6f}")
        
    return model

# 4. Export to ONNX Format
def export_to_onnx(model, save_path="cat_landmark_model.onnx"):
    model.eval()
    # Dummy input to pass through the model (Batch_size=1, Channel=3, H=224, W=224)
    dummy_input = torch.randn(1, 3, 224, 224).to(next(model.parameters()).device)
    
    print(f"\n[ONNX] Converting model to ONNX format...")
    torch.onnx.export(
        model, 
        dummy_input, 
        save_path, 
        export_params=True, 
        opset_version=11, 
        do_constant_folding=True,
        input_names=['input'], 
        output_names=['output']
    )
    print(f"[ONNX] Successfully saved: {save_path}")

# Main Execution
if __name__ == "__main__":
    # Folder paths (You can update this according to your file structure)
    data_dirs = ['/content/CAT_00', '/content/CAT_01', '/content/CAT_02', 
                 '/content/CAT_03', '/content/CAT_04', '/content/CAT_05', '/content/CAT_06']
    
    # Device Selection (GPU if CUDA is available, otherwise CPU)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    # 1. Load Data
    full_dataset = CatLandmarkDataset(root_dirs=data_dirs, img_size=224)
    
    if len(full_dataset) == 0:
        print("[ERROR] No data found in the specified folders! Please check file paths.")
    else:
        # Split data into 90% Training - 10% Validation
        train_size = int(0.9 * len(full_dataset))
        val_size = len(full_dataset) - train_size
        train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
        
        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
        
        # 2. Get Model
        cat_model = get_model()
        
        # 3. Train Model (Set to 5 epochs for quick Colab execution, increase if desired)
        trained_model = train_model(cat_model, train_loader, val_loader, epochs=5, lr=0.001, device=device)
        
        # 4. Save PyTorch model (As backup)
        torch.save(trained_model.state_dict(), "cat_landmark_model.pth")
        print("\n[SAVE] PyTorch weights saved (cat_landmark_model.pth)")
        
        # 5. Convert to ONNX format for running on low-end devices
        export_to_onnx(trained_model, save_path="cat_landmark_model.onnx")