Spaces:

Daniel-F
/

mouse-behavioral-study

Sleeping

App Files Files Community

Daniel-F commited on Jun 25, 2025

Commit

5be3c34

1 Parent(s): 7d7aa78

add train folder

Browse files

Files changed (4) hide show

train/dataset.py +40 -0
train/eval.py +62 -0
train/model.py +52 -0
train/train.py +91 -0

train/dataset.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import pandas as pd
+import torch
+from torch.utils.data import Dataset
+from PIL import Image
+import torchvision.transforms as T
+class NosePointDataset(Dataset):
+    def __init__(self, root = "/fs/scratch/PAS2099/danielf/medical/nose_clicks_lazy", image_size = (64, 64), device='cpu'):
+        self.root = root
+        self.files = sorted(os.listdir(root))
+        self.files = [f for f in self.files if f.endswith('.png')]
+        self.device = device
+        self.base_transform = T.Compose([
+            T.Resize(image_size),
+            T.ToTensor(),   # [0, 1], shape (1, H, W)
+        ])
+    def __len__(self):
+        return len(self.files)
+    def __getitem__(self, idx):
+        image = Image.open(os.path.join(self.root, self.files[idx])).convert('RGB')
+        orig_w, orig_h = image.size
+        with open(os.path.join(self.root, self.files[idx].replace('.png', '.txt')), 'r') as f:
+            coords = f.read().strip().split(',')
+            x, y = float(coords[0]), float(coords[1])
+        x_norm = x / orig_w
+        y_norm = y / orig_h
+        image = self.base_transform(image).to(self.device)  # [C, H, W], [0, 1]
+        coord = torch.tensor([x_norm, y_norm], dtype=torch.float32).to(self.device)  # [2], normalized coordinates
+        return image, coord

train/eval.py ADDED Viewed

	@@ -0,0 +1,62 @@

+#%%
+import torch
+from model import *
+model_path = "best_model.pth"
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# model = NosePointRegressor(input_channels=3)
+model = ResNetNoseRegressor(pretrained=False)  # Set pretrained=False to load custom weights
+model.load_state_dict(torch.load(model_path, map_location=device))
+model.to(device)
+model.eval()
+# %%
+import os
+import numpy as np
+import cv2
+video_path = "/fs/scratch/PAS2099/danielf/medical/Animal_Behavior_Test/videos/WIN_20250529_15_19_13_Pro.mp4"
+cap = cv2.VideoCapture(video_path)
+#%%
+random_frame = 1000
+cap.set(cv2.CAP_PROP_POS_FRAMES, random_frame)
+ret, frame = cap.read()
+crop = (500, 550, 800, 620)
+frame = frame[crop[1]:crop[3], crop[0]:crop[2]]  # Crop the frame to the region of interest
+from PIL import Image
+from torchvision import transforms
+import matplotlib.pyplot as plt
+image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
+orig_w, orig_h = image.size
+transform = transforms.Compose([
+    transforms.Resize((64, 64)),
+    transforms.ToTensor(),
+])
+image_tensor = transform(image).unsqueeze(0)  # Add batch dimension
+image_tensor = image_tensor.to(device)
+with torch.no_grad():
+    output = model(image_tensor)
+# === Inference ===
+with torch.no_grad():
+    pred = model(image_tensor)[0].cpu().numpy()  # shape: (2,) normalized
+print(pred)
+# === Map back to original resolution ===
+x_pred = int(pred[0] * orig_w)
+y_pred = int(pred[1] * orig_h)
+plt.figure(figsize=(6, 4))
+plt.imshow(image)
+plt.scatter([x_pred], [y_pred], c='red', s=40, label='Predicted Nose')
+plt.title(f'Prediction: ({x_pred}, {y_pred})')
+plt.legend()
+plt.tight_layout()
+plt.show()

train/model.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class NosePointRegressor(nn.Module):
+    def __init__(self, input_channels=1):
+        super(NosePointRegressor, self).__init__()
+        self.encoder = nn.Sequential(
+            nn.Conv2d(input_channels, 16, kernel_size=3, stride=2, padding=1),  # -> [B, 16, H/2, W/2]
+            nn.ReLU(),
+            nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1),              # -> [B, 32, H/4, W/4]
+            nn.ReLU(),
+            nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),              # -> [B, 64, H/8, W/8]
+            nn.ReLU(),
+            nn.AdaptiveAvgPool2d((1, 1)),                                       # -> [B, 64, 1, 1]
+        )
+        self.fc = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(64, 32),
+            nn.ReLU(),
+            nn.Linear(32, 2),  # Predict (x, y) coordinate
+            nn.Sigmoid()       # Normalize output to [0, 1]
+        )
+    def forward(self, x):
+        x = self.encoder(x)
+        x = self.fc(x)
+        return x  # shape [B, 2], where values are in [0, 1]
+import torchvision.models as models
+import torch.nn as nn
+class ResNetNoseRegressor(nn.Module):
+    def __init__(self, pretrained=True):
+        super().__init__()
+        resnet = models.resnet18(pretrained=pretrained)
+        self.backbone = nn.Sequential(*list(resnet.children())[:-2])  # Remove last FC layers
+        self.pool = nn.AdaptiveAvgPool2d((1, 1))
+        self.head = nn.Sequential(
+            nn.Flatten(),
+            nn.Linear(512, 128),
+            nn.ReLU(),
+            nn.Linear(128, 2),
+            nn.Sigmoid()  # Normalized (x, y)
+        )
+    def forward(self, x):
+        x = self.backbone(x)
+        x = self.pool(x)
+        return self.head(x)

train/train.py ADDED Viewed

	@@ -0,0 +1,91 @@

+#%%
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from model import *
+from dataset import NosePointDataset
+image_size = (64, 64)
+batch_size = 32
+num_epochs = 1000
+lr = 1e-3
+val_split = 0.2
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+dataset = NosePointDataset(image_size=image_size)
+train, val = torch.utils.data.random_split(dataset, [int(len(dataset) * (1 - val_split)), len(dataset) - int(len(dataset) * (1 - val_split))])
+train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
+val_loader = torch.utils.data.DataLoader(val, batch_size=batch_size, shuffle=False)
+# model = NosePointRegressor(input_channels=3).to(device)
+model = ResNetNoseRegressor(pretrained=True).to(device)
+# criterion = nn.MSELoss()
+criterion = nn.SmoothL1Loss()
+optimizer = optim.Adam(model.parameters(), lr=lr)
+# %%
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+save_path = "best_model.pth"
+plot_path = "loss_plot.png"
+train_losses = []
+val_losses = []
+best_val_loss = float('inf')
+# ===== Training Loop =====
+for epoch in range(num_epochs):
+    model.train()
+    train_loss = 0.0
+    for images, targets in tqdm(train_loader):
+        images, targets = images.to(device), targets.to(device)
+        optimizer.zero_grad()
+        outputs = model(images)
+        loss = criterion(outputs, targets)
+        loss.backward()
+        optimizer.step()
+        train_loss += loss.item() * images.size(0)
+    train_loss /= len(train_loader.dataset)
+    model.eval()
+    val_loss = 0.0
+    with torch.no_grad():
+        for images, targets in val_loader:
+            images, targets = images.to(device), targets.to(device)
+            outputs = model(images)
+            loss = criterion(outputs, targets)
+            val_loss += loss.item() * images.size(0)
+    val_loss /= len(val_loader.dataset)
+    # Logging
+    train_losses.append(train_loss)
+    val_losses.append(val_loss)
+    print(f"[Epoch {epoch+1}/{num_epochs}] Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f}")
+    # Save best model
+    if val_loss < best_val_loss:
+        best_val_loss = val_loss
+        torch.save(model.state_dict(), save_path)
+        print("✅ Saved best model.")
+    # Save plot
+    plt.figure(figsize=(6, 4))
+    plt.plot(range(1, len(train_losses)+1), train_losses, label="Train Loss")
+    plt.plot(range(1, len(val_losses)+1), val_losses, label="Val Loss")
+    plt.xlabel("Epoch")
+    plt.ylabel("Loss")
+    plt.title("Training vs Validation Loss")
+    plt.legend()
+    plt.grid(True)
+    plt.tight_layout()
+    plt.savefig(plot_path)
+    plt.close()
+print("✅ Training complete.")
+# %%