Merge pull request #1 from bawolf/blog_dataset
Browse files- .gitignore +2 -0
- script/hyperparameter_tuning.py +204 -38
- script/train.py +278 -211
- script/visualization/visualize.py +22 -13
- src/dataset/dataset.py +3 -1
.gitignore
CHANGED
|
@@ -35,6 +35,8 @@ ENV/
|
|
| 35 |
|
| 36 |
# Project specific
|
| 37 |
runs/
|
|
|
|
|
|
|
| 38 |
checkpoints/
|
| 39 |
*.pth
|
| 40 |
*.ckpt
|
|
|
|
| 35 |
|
| 36 |
# Project specific
|
| 37 |
runs/
|
| 38 |
+
outputs/
|
| 39 |
+
runs_hyperparam/
|
| 40 |
checkpoints/
|
| 41 |
*.pth
|
| 42 |
*.ckpt
|
script/hyperparameter_tuning.py
CHANGED
|
@@ -1,24 +1,39 @@
|
|
| 1 |
import optuna
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
import os
|
| 5 |
import sys
|
| 6 |
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
| 7 |
from script.train import train_and_evaluate
|
| 8 |
from src.utils.utils import create_run_directory
|
| 9 |
|
| 10 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
config = {
|
| 12 |
-
"clip_model":
|
| 13 |
-
"
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
-
"
|
| 17 |
-
"gradient_clip_max_norm": trial.
|
| 18 |
"augmentation_strength": trial.suggest_float("augmentation_strength", 0.0, 1.0),
|
| 19 |
"crop_scale_min": trial.suggest_float("crop_scale_min", 0.6, 0.9),
|
| 20 |
"max_frames": trial.suggest_int("max_frames", 5, 15),
|
| 21 |
-
"sigma": trial.
|
| 22 |
}
|
| 23 |
|
| 24 |
class_labels = ["windmill", "halo", "swipe", "baby_mill"][:3]
|
|
@@ -27,9 +42,9 @@ def objective(trial, hyperparam_run_dir):
|
|
| 27 |
config.update({
|
| 28 |
"class_labels": class_labels,
|
| 29 |
"num_classes": len(class_labels),
|
| 30 |
-
"data_path":
|
| 31 |
-
"num_epochs": 50,
|
| 32 |
-
"patience": 10,
|
| 33 |
"image_size": 224,
|
| 34 |
"crop_scale_max": 1.0,
|
| 35 |
"normalization_mean": [0.485, 0.456, 0.406],
|
|
@@ -37,7 +52,7 @@ def objective(trial, hyperparam_run_dir):
|
|
| 37 |
"overfitting_threshold": 10,
|
| 38 |
})
|
| 39 |
|
| 40 |
-
# Derive augmentation parameters
|
| 41 |
config.update({
|
| 42 |
"flip_probability": 0.5 * config["augmentation_strength"],
|
| 43 |
"rotation_degrees": int(15 * config["augmentation_strength"]),
|
|
@@ -47,33 +62,184 @@ def objective(trial, hyperparam_run_dir):
|
|
| 47 |
"hue_jitter": 0.1 * config["augmentation_strength"],
|
| 48 |
})
|
| 49 |
|
| 50 |
-
# Create
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
def main():
|
| 58 |
-
# Set up the study and optimize
|
| 59 |
-
hyperparam_run_dir = create_run_directory(suffix='_hyperparam')
|
| 60 |
-
study = optuna.create_study(direction="maximize")
|
| 61 |
-
study.optimize(lambda trial: objective(trial, hyperparam_run_dir), n_trials=100) # Adjust the number of trials as needed
|
| 62 |
|
| 63 |
-
# Save the study results
|
| 64 |
-
study.trials_dataframe().to_csv(os.path.join(hyperparam_run_dir, 'study_results.csv'))
|
| 65 |
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
if __name__ == "__main__":
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import optuna
|
| 2 |
import os
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
import json
|
| 7 |
+
import math
|
| 8 |
|
|
|
|
| 9 |
import sys
|
| 10 |
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
| 11 |
from script.train import train_and_evaluate
|
| 12 |
from src.utils.utils import create_run_directory
|
| 13 |
|
| 14 |
+
def create_hyperparam_directory():
|
| 15 |
+
"""Create a parent directory for all hyperparameter searches"""
|
| 16 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 17 |
+
base_dir = "runs_hyperparam"
|
| 18 |
+
hyperparam_dir = os.path.join(base_dir, f"hyperparam_{timestamp}")
|
| 19 |
+
os.makedirs(hyperparam_dir, exist_ok=True)
|
| 20 |
+
return hyperparam_dir
|
| 21 |
+
|
| 22 |
+
def objective(trial, hyperparam_run_dir, data_path):
|
| 23 |
+
"""Objective function for a single dataset"""
|
| 24 |
+
|
| 25 |
+
# Then suggest parameters using the model-specific ranges
|
| 26 |
config = {
|
| 27 |
+
"clip_model": trial.suggest_categorical("clip_model", ["openai/clip-vit-base-patch32", "openai/clip-vit-large-patch14"]),
|
| 28 |
+
"batch_size": trial.suggest_categorical("batch_size", [8,16,32]),
|
| 29 |
+
"unfreeze_layers": trial.suggest_int("unfreeze_layers", 1, 4),
|
| 30 |
+
"learning_rate": trial.suggest_float("learning_rate", 1e-6, 1e-4, log=True),
|
| 31 |
+
"weight_decay": trial.suggest_float("weight_decay", 1e-8, 1e-1, log=True),
|
| 32 |
+
"gradient_clip_max_norm": trial.suggest_float("gradient_clip_max_norm", 0.1, 1.0),
|
| 33 |
"augmentation_strength": trial.suggest_float("augmentation_strength", 0.0, 1.0),
|
| 34 |
"crop_scale_min": trial.suggest_float("crop_scale_min", 0.6, 0.9),
|
| 35 |
"max_frames": trial.suggest_int("max_frames", 5, 15),
|
| 36 |
+
"sigma": trial.suggest_float("sigma", 0.1, 0.5),
|
| 37 |
}
|
| 38 |
|
| 39 |
class_labels = ["windmill", "halo", "swipe", "baby_mill"][:3]
|
|
|
|
| 42 |
config.update({
|
| 43 |
"class_labels": class_labels,
|
| 44 |
"num_classes": len(class_labels),
|
| 45 |
+
"data_path": data_path,
|
| 46 |
+
"num_epochs": 50,
|
| 47 |
+
"patience": 10,
|
| 48 |
"image_size": 224,
|
| 49 |
"crop_scale_max": 1.0,
|
| 50 |
"normalization_mean": [0.485, 0.456, 0.406],
|
|
|
|
| 52 |
"overfitting_threshold": 10,
|
| 53 |
})
|
| 54 |
|
| 55 |
+
# Derive augmentation parameters
|
| 56 |
config.update({
|
| 57 |
"flip_probability": 0.5 * config["augmentation_strength"],
|
| 58 |
"rotation_degrees": int(15 * config["augmentation_strength"]),
|
|
|
|
| 62 |
"hue_jitter": 0.1 * config["augmentation_strength"],
|
| 63 |
})
|
| 64 |
|
| 65 |
+
# Create dataset-specific run directory
|
| 66 |
+
dataset_label = '_'.join(Path(data_path).parts[-2:]) # Get last two parts of path
|
| 67 |
+
trial_dir = create_run_directory(
|
| 68 |
+
prefix=f"trial_{dataset_label}",
|
| 69 |
+
parent_dir=hyperparam_run_dir
|
| 70 |
+
)
|
| 71 |
+
config["run_dir"] = trial_dir
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
|
|
|
|
|
|
| 73 |
|
| 74 |
+
# Run training and evaluation with device cleanup
|
| 75 |
+
try:
|
| 76 |
+
val_accuracy, vis_dir = train_and_evaluate(config)
|
| 77 |
+
|
| 78 |
+
if val_accuracy is None or math.isnan(val_accuracy) or math.isinf(val_accuracy):
|
| 79 |
+
raise ValueError(f"Invalid accuracy value: {val_accuracy}")
|
| 80 |
+
|
| 81 |
+
# Save trial info
|
| 82 |
+
trial_info = {
|
| 83 |
+
'dataset': data_path,
|
| 84 |
+
'dataset_label': dataset_label,
|
| 85 |
+
'trial_number': trial.number,
|
| 86 |
+
'parameters': trial.params,
|
| 87 |
+
'value': val_accuracy,
|
| 88 |
+
'visualization_dir': vis_dir
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
with open(os.path.join(trial_dir, 'trial_info.json'), 'w') as f:
|
| 92 |
+
json.dump(trial_info, f, indent=4)
|
| 93 |
+
|
| 94 |
+
return val_accuracy
|
| 95 |
+
|
| 96 |
+
except Exception as e:
|
| 97 |
+
print(f"Error in trial for {data_path}: {str(e)}")
|
| 98 |
+
# Log detailed error information
|
| 99 |
+
error_log_path = os.path.join(hyperparam_run_dir, 'error_log.txt')
|
| 100 |
+
with open(error_log_path, 'a') as f:
|
| 101 |
+
f.write(f"\nError in trial at {datetime.now()}:\n")
|
| 102 |
+
f.write(f"Dataset: {data_path}\n")
|
| 103 |
+
f.write(f"Error: {str(e)}\n")
|
| 104 |
+
f.write(f"Trial params: {trial.params}\n")
|
| 105 |
+
f.write("Stack trace:\n")
|
| 106 |
+
import traceback
|
| 107 |
+
f.write(traceback.format_exc())
|
| 108 |
+
f.write("\n" + "="*50 + "\n")
|
| 109 |
+
|
| 110 |
+
return float('-inf')
|
| 111 |
|
| 112 |
+
def run_hyperparameter_search(data_paths, n_trials=100):
|
| 113 |
+
"""Run hyperparameter search for multiple datasets"""
|
| 114 |
+
|
| 115 |
+
# Create parent directory for all searches
|
| 116 |
+
parent_hyperparam_dir = create_hyperparam_directory()
|
| 117 |
+
|
| 118 |
+
# Store results for all datasets
|
| 119 |
+
all_results = {}
|
| 120 |
+
|
| 121 |
+
for data_path in data_paths:
|
| 122 |
+
print(f"\nStarting hyperparameter search for dataset: {data_path}")
|
| 123 |
+
|
| 124 |
+
# Create dataset-specific directory
|
| 125 |
+
dataset_label = '_'.join(Path(data_path).parts[-2:])
|
| 126 |
+
dataset_dir = os.path.join(parent_hyperparam_dir, f"search_{dataset_label}")
|
| 127 |
+
os.makedirs(dataset_dir, exist_ok=True)
|
| 128 |
+
|
| 129 |
+
# Create and run study with explicit trial count tracking
|
| 130 |
+
study = optuna.create_study(direction="maximize")
|
| 131 |
+
completed_trials = 0
|
| 132 |
+
failed_trials = []
|
| 133 |
+
total_attempts = 0
|
| 134 |
+
max_attempts = n_trials * 2
|
| 135 |
+
while completed_trials < n_trials and total_attempts < max_attempts:
|
| 136 |
+
try:
|
| 137 |
+
total_attempts += 1
|
| 138 |
+
study.optimize(
|
| 139 |
+
lambda trial: objective(trial, dataset_dir, data_path),
|
| 140 |
+
n_trials=1
|
| 141 |
+
)
|
| 142 |
+
# Only increment if the trial actually succeeded
|
| 143 |
+
if study.trials[-1].value != float('-inf'):
|
| 144 |
+
completed_trials += 1
|
| 145 |
+
print(f"Completed trial {completed_trials}/{n_trials} for {dataset_label}")
|
| 146 |
+
else:
|
| 147 |
+
error_info = {
|
| 148 |
+
'trial_number': completed_trials + len(failed_trials) + 1,
|
| 149 |
+
'error': "Trial returned -inf",
|
| 150 |
+
'timestamp': datetime.now().isoformat()
|
| 151 |
+
}
|
| 152 |
+
failed_trials.append(error_info)
|
| 153 |
+
print(f"Failed trial for {dataset_label}: returned -inf")
|
| 154 |
+
|
| 155 |
+
except Exception as e:
|
| 156 |
+
error_info = {
|
| 157 |
+
'trial_number': completed_trials + len(failed_trials) + 1,
|
| 158 |
+
'error': str(e),
|
| 159 |
+
'timestamp': datetime.now().isoformat()
|
| 160 |
+
}
|
| 161 |
+
failed_trials.append(error_info)
|
| 162 |
+
print(f"Error in trial for {dataset_label}: {str(e)}")
|
| 163 |
+
|
| 164 |
+
# Log the error
|
| 165 |
+
with open(os.path.join(dataset_dir, 'failed_trials.json'), 'w') as f:
|
| 166 |
+
json.dump(failed_trials, f, indent=4)
|
| 167 |
+
if total_attempts >= max_attempts:
|
| 168 |
+
print(f"Warning: Reached maximum attempts ({max_attempts}) for {dataset_label}")
|
| 169 |
+
|
| 170 |
+
# Save study results
|
| 171 |
+
results_df = study.trials_dataframe()
|
| 172 |
+
results_df.to_csv(os.path.join(dataset_dir, 'study_results.csv'))
|
| 173 |
+
|
| 174 |
+
# Save trial statistics
|
| 175 |
+
trial_stats = {
|
| 176 |
+
'completed_trials': completed_trials,
|
| 177 |
+
'failed_trials': len(failed_trials),
|
| 178 |
+
'total_attempts': completed_trials + len(failed_trials)
|
| 179 |
+
}
|
| 180 |
+
with open(os.path.join(dataset_dir, 'trial_statistics.json'), 'w') as f:
|
| 181 |
+
json.dump(trial_stats, f, indent=4)
|
| 182 |
+
|
| 183 |
+
# Save best trial info
|
| 184 |
+
best_trial = study.best_trial
|
| 185 |
+
best_params_path = os.path.join(dataset_dir, 'best_params.txt')
|
| 186 |
+
with open(best_params_path, 'w') as f:
|
| 187 |
+
f.write(f"Best trial value: {best_trial.value}\n\n")
|
| 188 |
+
f.write("Best parameters:\n")
|
| 189 |
+
for key, value in best_trial.params.items():
|
| 190 |
+
f.write(f"{key}: {value}\n")
|
| 191 |
+
|
| 192 |
+
# Store results
|
| 193 |
+
all_results[data_path] = {
|
| 194 |
+
'best_value': best_trial.value,
|
| 195 |
+
'best_params': best_trial.params,
|
| 196 |
+
'study': study,
|
| 197 |
+
'results_df': results_df,
|
| 198 |
+
'failed_trials': failed_trials,
|
| 199 |
+
'trial_stats': trial_stats
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
print(f"\nResults for {data_path}:")
|
| 203 |
+
print(f"Completed trials: {completed_trials}")
|
| 204 |
+
print(f"Failed trials: {len(failed_trials)}")
|
| 205 |
+
print(f"Best trial value: {best_trial.value}")
|
| 206 |
+
print("Best parameters:")
|
| 207 |
+
for key, value in best_trial.params.items():
|
| 208 |
+
print(f" {key}: {value}")
|
| 209 |
+
|
| 210 |
+
# Create overall summary with additional statistics
|
| 211 |
+
summary_data = []
|
| 212 |
+
for data_path, result in all_results.items():
|
| 213 |
+
summary_data.append({
|
| 214 |
+
'dataset': data_path,
|
| 215 |
+
'best_accuracy': result['best_value'],
|
| 216 |
+
'completed_trials': result['trial_stats']['completed_trials'],
|
| 217 |
+
'failed_trials': result['trial_stats']['failed_trials'],
|
| 218 |
+
**result['best_params']
|
| 219 |
+
})
|
| 220 |
+
|
| 221 |
+
summary_df = pd.DataFrame(summary_data)
|
| 222 |
+
summary_df.to_csv(os.path.join(parent_hyperparam_dir, 'overall_summary.csv'), index=False)
|
| 223 |
+
|
| 224 |
+
return parent_hyperparam_dir, all_results
|
| 225 |
|
| 226 |
if __name__ == "__main__":
|
| 227 |
+
# List of dataset paths to optimize
|
| 228 |
+
data_paths = [
|
| 229 |
+
'../finetune/blog/bryant/random',
|
| 230 |
+
'../finetune/blog/bryant/adjusted',
|
| 231 |
+
'../finetune/blog/youtube/random',
|
| 232 |
+
'../finetune/blog/youtube/adjusted',
|
| 233 |
+
'../finetune/blog/combined/random',
|
| 234 |
+
'../finetune/blog/combined/adjusted',
|
| 235 |
+
'../finetune/blog/bryant_train_youtube_val/default'
|
| 236 |
+
]
|
| 237 |
+
|
| 238 |
+
# Run hyperparameter search
|
| 239 |
+
hyperparam_dir, results = run_hyperparameter_search(
|
| 240 |
+
data_paths,
|
| 241 |
+
n_trials=8 # Adjust as needed
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
print(f"\nHyperparameter search complete!")
|
| 245 |
+
print(f"Results are saved in: {hyperparam_dir}")
|
script/train.py
CHANGED
|
@@ -7,6 +7,7 @@ import logging
|
|
| 7 |
import csv
|
| 8 |
import json
|
| 9 |
from torch.optim.lr_scheduler import CosineAnnealingLR
|
|
|
|
| 10 |
|
| 11 |
import sys
|
| 12 |
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
|
@@ -15,209 +16,253 @@ from src.utils.utils import create_run_directory
|
|
| 15 |
from src.dataset.dataset import VideoDataset
|
| 16 |
from src.models.model import create_model
|
| 17 |
from src.dataset.video_utils import create_transform
|
|
|
|
|
|
|
| 18 |
|
| 19 |
def train_and_evaluate(config):
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
# Update paths based on run_dir
|
| 25 |
-
config.update({
|
| 26 |
-
"best_model_path": os.path.join(config["run_dir"], 'best_model.pth'),
|
| 27 |
-
"final_model_path": os.path.join(config["run_dir"], 'final_model.pth'),
|
| 28 |
-
"csv_path": os.path.join(config["run_dir"], 'training_log.csv'),
|
| 29 |
-
"misclassifications_dir": os.path.join(config["run_dir"], 'misclassifications'),
|
| 30 |
-
})
|
| 31 |
-
|
| 32 |
-
config_path = os.path.join(config["run_dir"], 'config.json')
|
| 33 |
-
with open(config_path, 'w') as f:
|
| 34 |
-
json.dump(config, f, indent=2)
|
| 35 |
-
|
| 36 |
-
# Set up logging
|
| 37 |
-
logging.basicConfig(level=logging.INFO,
|
| 38 |
-
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 39 |
-
handlers=[logging.FileHandler(os.path.join(config["run_dir"], 'training.log')),
|
| 40 |
-
logging.StreamHandler()])
|
| 41 |
-
logger = logging.getLogger(__name__)
|
| 42 |
-
|
| 43 |
-
# Set device
|
| 44 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 45 |
-
logger.info(f"Using device: {device}")
|
| 46 |
-
|
| 47 |
-
# Initialize variables
|
| 48 |
-
best_val_loss = float('inf')
|
| 49 |
-
epochs_without_improvement = 0
|
| 50 |
-
|
| 51 |
-
model = create_model(config["num_classes"], config["clip_model"])
|
| 52 |
-
|
| 53 |
-
# Unfreeze the last 2 layers of the vision encoder
|
| 54 |
-
model.unfreeze_vision_encoder(num_layers=config["unfreeze_layers"])
|
| 55 |
-
|
| 56 |
-
# Move model to device
|
| 57 |
-
model = model.to(device)
|
| 58 |
-
logger.info(f"Model architecture:\n{model}")
|
| 59 |
-
|
| 60 |
-
# Load datasets
|
| 61 |
-
train_dataset = VideoDataset(
|
| 62 |
-
os.path.join(config['data_path'], 'train.csv'),
|
| 63 |
-
config=config
|
| 64 |
-
)
|
| 65 |
-
|
| 66 |
-
# For validation, create a new config with training=False for transforms
|
| 67 |
-
val_config = config.copy()
|
| 68 |
-
val_dataset = VideoDataset(
|
| 69 |
-
os.path.join(config['data_path'], 'val.csv'),
|
| 70 |
-
config=val_config,
|
| 71 |
-
transform=create_transform(config, training=False)
|
| 72 |
-
)
|
| 73 |
-
|
| 74 |
-
# Create data loaders
|
| 75 |
-
train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
|
| 76 |
-
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)
|
| 77 |
-
|
| 78 |
-
# Define optimizer and learning rate scheduler
|
| 79 |
-
optimizer = torch.optim.AdamW(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
|
| 80 |
-
scheduler = CosineAnnealingLR(optimizer, T_max=config["num_epochs"])
|
| 81 |
-
|
| 82 |
-
# Open a CSV file to log training progress
|
| 83 |
-
with open(config["csv_path"], 'w', newline='') as file:
|
| 84 |
-
writer = csv.writer(file)
|
| 85 |
-
writer.writerow(["epoch", "train_loss", "train_accuracy", "val_loss", "val_accuracy"])
|
| 86 |
-
|
| 87 |
-
# Function to calculate accuracy
|
| 88 |
-
def calculate_accuracy(outputs, labels):
|
| 89 |
-
_, predicted = torch.max(outputs, 1)
|
| 90 |
-
correct = (predicted == labels).sum().item()
|
| 91 |
-
total = labels.size(0)
|
| 92 |
-
return correct / total
|
| 93 |
-
|
| 94 |
-
def log_misclassifications(outputs, labels, video_paths, dataset, misclassified_videos):
|
| 95 |
-
_, predicted = torch.max(outputs, 1)
|
| 96 |
-
for pred, label, video_path in zip(predicted, labels, video_paths):
|
| 97 |
-
if pred != label:
|
| 98 |
-
true_label = dataset.label_map[label.item()]
|
| 99 |
-
predicted_label = dataset.label_map[pred.item()]
|
| 100 |
-
misclassified_videos.append({
|
| 101 |
-
'video_path': video_path,
|
| 102 |
-
'true_label': true_label,
|
| 103 |
-
'predicted_label': predicted_label
|
| 104 |
-
})
|
| 105 |
-
|
| 106 |
-
# Create a subfolder for misclassification logs
|
| 107 |
-
os.makedirs(config["misclassifications_dir"], exist_ok=True)
|
| 108 |
-
|
| 109 |
-
# Training loop
|
| 110 |
-
for epoch in range(config["num_epochs"]):
|
| 111 |
-
model.train()
|
| 112 |
-
total_loss = 0
|
| 113 |
-
total_accuracy = 0
|
| 114 |
-
for frames, labels, video_paths in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{config['num_epochs']}"):
|
| 115 |
-
frames = frames.to(device)
|
| 116 |
-
labels = labels.to(device)
|
| 117 |
-
|
| 118 |
-
logits = model(frames)
|
| 119 |
-
|
| 120 |
-
loss = torch.nn.functional.cross_entropy(logits, labels)
|
| 121 |
-
accuracy = calculate_accuracy(logits, labels)
|
| 122 |
-
|
| 123 |
-
optimizer.zero_grad()
|
| 124 |
-
loss.backward()
|
| 125 |
-
clip_grad_norm_(model.parameters(), max_norm=config["gradient_clip_max_norm"])
|
| 126 |
-
optimizer.step()
|
| 127 |
-
|
| 128 |
-
total_loss += loss.item()
|
| 129 |
-
total_accuracy += accuracy
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
|
| 134 |
-
#
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
frames = frames.to(device)
|
| 142 |
labels = labels.to(device)
|
| 143 |
|
| 144 |
logits = model(frames)
|
| 145 |
|
| 146 |
-
loss =
|
| 147 |
accuracy = calculate_accuracy(logits, labels)
|
| 148 |
|
| 149 |
-
|
| 150 |
-
|
|
|
|
|
|
|
| 151 |
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
| 220 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
|
| 222 |
def main():
|
| 223 |
# Create run directory
|
|
@@ -228,35 +273,57 @@ def main():
|
|
| 228 |
config = {
|
| 229 |
"class_labels": class_labels,
|
| 230 |
"num_classes": len(class_labels),
|
| 231 |
-
"
|
| 232 |
"batch_size": 32,
|
| 233 |
-
"
|
| 234 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
"num_epochs": 50,
|
| 236 |
-
"patience": 10,
|
| 237 |
-
"max_frames": 10,
|
| 238 |
-
"sigma": 0.3,
|
| 239 |
"image_size": 224,
|
| 240 |
-
"flip_probability": 0.5,
|
| 241 |
-
"rotation_degrees": 15,
|
| 242 |
-
"brightness_jitter": 0.2,
|
| 243 |
-
"contrast_jitter": 0.2,
|
| 244 |
-
"saturation_jitter": 0.2,
|
| 245 |
-
"hue_jitter": 0.1,
|
| 246 |
-
"crop_scale_min": 0.8,
|
| 247 |
"crop_scale_max": 1.0,
|
| 248 |
-
"normalization_mean": [
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
"overfitting_threshold": 10,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 255 |
"run_dir": run_dir,
|
| 256 |
-
"best_model_path": os.path.join(run_dir, 'best_model.pth'),
|
| 257 |
-
"final_model_path": os.path.join(run_dir, 'final_model.pth'),
|
| 258 |
-
"csv_path": os.path.join(run_dir, 'training_log.csv'),
|
| 259 |
-
"misclassifications_dir": os.path.join(run_dir, 'misclassifications'),
|
| 260 |
}
|
| 261 |
train_and_evaluate(config)
|
| 262 |
|
|
|
|
| 7 |
import csv
|
| 8 |
import json
|
| 9 |
from torch.optim.lr_scheduler import CosineAnnealingLR
|
| 10 |
+
import math
|
| 11 |
|
| 12 |
import sys
|
| 13 |
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
|
|
|
| 16 |
from src.dataset.dataset import VideoDataset
|
| 17 |
from src.models.model import create_model
|
| 18 |
from src.dataset.video_utils import create_transform
|
| 19 |
+
from visualization.visualize import run_visualization
|
| 20 |
+
from visualization.miscalculations_report import analyze_misclassifications
|
| 21 |
|
| 22 |
def train_and_evaluate(config):
|
| 23 |
+
try:
|
| 24 |
+
# Create a run directory if it doesn't exist
|
| 25 |
+
if "run_dir" not in config:
|
| 26 |
+
config["run_dir"] = create_run_directory()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
# Update paths based on run_dir
|
| 29 |
+
config.update({
|
| 30 |
+
"best_model_path": os.path.join(config["run_dir"], 'best_model.pth'),
|
| 31 |
+
"final_model_path": os.path.join(config["run_dir"], 'final_model.pth'),
|
| 32 |
+
"csv_path": os.path.join(config["run_dir"], 'training_log.csv'),
|
| 33 |
+
"misclassifications_dir": os.path.join(config["run_dir"], 'misclassifications'),
|
| 34 |
+
})
|
| 35 |
+
|
| 36 |
+
config_path = os.path.join(config["run_dir"], 'config.json')
|
| 37 |
+
with open(config_path, 'w') as f:
|
| 38 |
+
json.dump(config, f, indent=2)
|
| 39 |
+
|
| 40 |
+
# Set up logging
|
| 41 |
+
logging.basicConfig(level=logging.INFO,
|
| 42 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
| 43 |
+
handlers=[logging.FileHandler(os.path.join(config["run_dir"], 'training.log')),
|
| 44 |
+
logging.StreamHandler()])
|
| 45 |
+
logger = logging.getLogger(__name__)
|
| 46 |
+
|
| 47 |
+
# Use device from config
|
| 48 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 49 |
+
logger.info(f"Using device: {device}")
|
| 50 |
+
|
| 51 |
+
if torch.cuda.is_available():
|
| 52 |
+
torch.cuda.empty_cache()
|
| 53 |
+
|
| 54 |
+
# Initialize variables
|
| 55 |
+
best_val_loss = float('inf')
|
| 56 |
+
epochs_without_improvement = 0
|
| 57 |
+
|
| 58 |
+
if torch.cuda.is_available():
|
| 59 |
+
torch.cuda.empty_cache()
|
| 60 |
+
print(f"Available GPU memory: {torch.cuda.get_device_properties(0).total_memory/1e9:.2f}GB")
|
| 61 |
+
print(f"Currently allocated: {torch.cuda.memory_allocated()/1e9:.2f}GB")
|
| 62 |
+
|
| 63 |
+
model = create_model(config["num_classes"], config["clip_model"])
|
| 64 |
+
# Unfreeze the last 2 layers of the vision encoder
|
| 65 |
+
model.unfreeze_vision_encoder(num_layers=config["unfreeze_layers"])
|
| 66 |
+
model = model.to(device)
|
| 67 |
+
|
| 68 |
+
# Ensure criterion is on the same device
|
| 69 |
+
criterion = torch.nn.CrossEntropyLoss().to(device)
|
| 70 |
+
|
| 71 |
+
# logger.info(f"Model architecture:\n{model}")
|
| 72 |
+
|
| 73 |
+
# Load datasets
|
| 74 |
+
train_dataset = VideoDataset(
|
| 75 |
+
os.path.join(config['data_path'], 'train.csv'),
|
| 76 |
+
config=config
|
| 77 |
+
)
|
| 78 |
|
| 79 |
+
# For validation, create a new config with training=False for transforms
|
| 80 |
+
val_config = config.copy()
|
| 81 |
+
val_dataset = VideoDataset(
|
| 82 |
+
os.path.join(config['data_path'], 'val.csv'),
|
| 83 |
+
config=val_config,
|
| 84 |
+
transform=create_transform(config, training=False)
|
| 85 |
+
)
|
| 86 |
+
|
| 87 |
+
# Create data loaders
|
| 88 |
+
train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
|
| 89 |
+
val_loader = DataLoader(val_dataset, batch_size=config["batch_size"], shuffle=False)
|
| 90 |
+
|
| 91 |
+
# Define optimizer and learning rate scheduler
|
| 92 |
+
optimizer = torch.optim.AdamW(model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"])
|
| 93 |
+
scheduler = CosineAnnealingLR(optimizer, T_max=config["num_epochs"])
|
| 94 |
+
|
| 95 |
+
# Open a CSV file to log training progress
|
| 96 |
+
with open(config["csv_path"], 'w', newline='') as file:
|
| 97 |
+
writer = csv.writer(file)
|
| 98 |
+
writer.writerow(["epoch", "train_loss", "train_accuracy", "val_loss", "val_accuracy"])
|
| 99 |
+
|
| 100 |
+
# Function to calculate accuracy
|
| 101 |
+
def calculate_accuracy(outputs, labels):
|
| 102 |
+
_, predicted = torch.max(outputs, 1)
|
| 103 |
+
correct = (predicted == labels).sum().item()
|
| 104 |
+
total = labels.size(0)
|
| 105 |
+
return correct / total
|
| 106 |
+
|
| 107 |
+
def log_misclassifications(outputs, labels, video_paths, dataset, misclassified_videos):
|
| 108 |
+
_, predicted = torch.max(outputs, 1)
|
| 109 |
+
for pred, label, video_path in zip(predicted, labels, video_paths):
|
| 110 |
+
if pred != label:
|
| 111 |
+
true_label = dataset.label_map[label.item()]
|
| 112 |
+
predicted_label = dataset.label_map[pred.item()]
|
| 113 |
+
misclassified_videos.append({
|
| 114 |
+
'video_path': video_path,
|
| 115 |
+
'true_label': true_label,
|
| 116 |
+
'predicted_label': predicted_label
|
| 117 |
+
})
|
| 118 |
+
|
| 119 |
+
# Create a subfolder for misclassification logs
|
| 120 |
+
os.makedirs(config["misclassifications_dir"], exist_ok=True)
|
| 121 |
+
|
| 122 |
+
# Training loop
|
| 123 |
+
for epoch in range(config["num_epochs"]):
|
| 124 |
+
model.train()
|
| 125 |
+
total_loss = 0
|
| 126 |
+
total_accuracy = 0
|
| 127 |
+
for frames, labels, video_paths in tqdm(train_loader, desc=f"Epoch {epoch + 1}/{config['num_epochs']}"):
|
| 128 |
frames = frames.to(device)
|
| 129 |
labels = labels.to(device)
|
| 130 |
|
| 131 |
logits = model(frames)
|
| 132 |
|
| 133 |
+
loss = criterion(logits, labels)
|
| 134 |
accuracy = calculate_accuracy(logits, labels)
|
| 135 |
|
| 136 |
+
optimizer.zero_grad()
|
| 137 |
+
loss.backward()
|
| 138 |
+
clip_grad_norm_(model.parameters(), max_norm=config["gradient_clip_max_norm"])
|
| 139 |
+
optimizer.step()
|
| 140 |
|
| 141 |
+
total_loss += loss.item()
|
| 142 |
+
total_accuracy += accuracy
|
| 143 |
+
|
| 144 |
+
avg_train_loss = total_loss / len(train_loader)
|
| 145 |
+
avg_train_accuracy = total_accuracy / len(train_loader)
|
| 146 |
+
|
| 147 |
+
# Validation
|
| 148 |
+
model.eval()
|
| 149 |
+
val_loss = 0
|
| 150 |
+
val_accuracy = 0
|
| 151 |
+
misclassified_videos = []
|
| 152 |
+
with torch.no_grad():
|
| 153 |
+
for frames, labels, video_paths in val_loader:
|
| 154 |
+
frames = frames.to(device)
|
| 155 |
+
labels = labels.to(device)
|
| 156 |
+
|
| 157 |
+
logits = model(frames)
|
| 158 |
+
|
| 159 |
+
loss = criterion(logits, labels)
|
| 160 |
+
accuracy = calculate_accuracy(logits, labels)
|
| 161 |
+
|
| 162 |
+
val_loss += loss.item()
|
| 163 |
+
val_accuracy += accuracy
|
| 164 |
+
|
| 165 |
+
# Log misclassifications
|
| 166 |
+
log_misclassifications(logits, labels, video_paths, val_dataset, misclassified_videos)
|
| 167 |
+
|
| 168 |
+
avg_val_loss = val_loss / len(val_loader)
|
| 169 |
+
avg_val_accuracy = val_accuracy / len(val_loader)
|
| 170 |
+
|
| 171 |
+
# Log misclassified videos
|
| 172 |
+
if misclassified_videos:
|
| 173 |
+
misclassified_log_path = os.path.join(config["misclassifications_dir"], f'epoch_{epoch+1}.json')
|
| 174 |
+
with open(misclassified_log_path, 'w') as f:
|
| 175 |
+
json.dump(misclassified_videos, f, indent=2)
|
| 176 |
+
logger.info(f"Logged {len(misclassified_videos)} misclassified videos to {misclassified_log_path}")
|
| 177 |
+
|
| 178 |
+
# Log the metrics
|
| 179 |
+
logger.info(f"Epoch [{epoch+1}/{config['num_epochs']}], "
|
| 180 |
+
f"Train Loss: {avg_train_loss:.4f}, Train Accuracy: {avg_train_accuracy*100:.2f}%, "
|
| 181 |
+
f"Val Loss: {avg_val_loss:.4f}, Val Accuracy: {avg_val_accuracy*100:.2f}%")
|
| 182 |
+
|
| 183 |
+
# Write to CSV
|
| 184 |
+
with open(config["csv_path"], 'a', newline='') as file:
|
| 185 |
+
writer = csv.writer(file)
|
| 186 |
+
writer.writerow([epoch+1, avg_train_loss, avg_train_accuracy*100, avg_val_loss, avg_val_accuracy*100])
|
| 187 |
+
|
| 188 |
+
# Learning rate scheduling
|
| 189 |
+
scheduler.step()
|
| 190 |
+
|
| 191 |
+
# Save the best model and check for early stopping
|
| 192 |
+
if avg_val_loss < best_val_loss:
|
| 193 |
+
best_val_loss = avg_val_loss
|
| 194 |
+
torch.save(model.state_dict(), config["best_model_path"])
|
| 195 |
+
logger.info(f"Saved best model to {config['best_model_path']}")
|
| 196 |
+
epochs_without_improvement = 0
|
| 197 |
+
else:
|
| 198 |
+
epochs_without_improvement += 1
|
| 199 |
+
|
| 200 |
+
# Early stopping check
|
| 201 |
+
if epochs_without_improvement >= config["patience"]:
|
| 202 |
+
logger.info(f"Early stopping triggered after {config['patience']} epochs without improvement")
|
| 203 |
+
break
|
| 204 |
+
|
| 205 |
+
# Overfitting detection
|
| 206 |
+
if avg_train_accuracy - avg_val_accuracy > config["overfitting_threshold"]:
|
| 207 |
+
logger.warning("Possible overfitting detected")
|
| 208 |
+
|
| 209 |
+
logger.info("Training finished!")
|
| 210 |
+
|
| 211 |
+
# Save the final model
|
| 212 |
+
torch.save(model.state_dict(), config["final_model_path"])
|
| 213 |
+
logger.info(f"Saved final model to {config['final_model_path']}")
|
| 214 |
+
|
| 215 |
+
# Save run information
|
| 216 |
+
with open(os.path.join(config["run_dir"], 'run_info.txt'), 'w') as f:
|
| 217 |
+
for key, value in config.items():
|
| 218 |
+
f.write(f"{key}: {value}\n")
|
| 219 |
+
f.write(f"Device: {device}\n")
|
| 220 |
+
f.write(f"Model: {model.__class__.__name__}\n")
|
| 221 |
+
f.write(f"Optimizer: {optimizer.__class__.__name__}\n")
|
| 222 |
+
f.write(f"Scheduler: {scheduler.__class__.__name__}\n")
|
| 223 |
+
f.write(f"Loss function: CrossEntropyLoss\n")
|
| 224 |
+
f.write(f"Data augmentation: RandomHorizontalFlip, RandomRotation(5), ColorJitter\n")
|
| 225 |
+
f.write(f"Mixed precision training: {'Enabled' if 'scaler' in locals() else 'Disabled'}\n")
|
| 226 |
+
f.write(f"Train dataset size: {len(train_dataset)}\n")
|
| 227 |
+
f.write(f"Validation dataset size: {len(val_dataset)}\n")
|
| 228 |
+
f.write(f"Vision encoder frozen: {'Partially' if hasattr(model, 'unfreeze_vision_encoder') else 'Unknown'}\n")
|
| 229 |
+
|
| 230 |
+
# Run visualization
|
| 231 |
+
try:
|
| 232 |
+
logger.info("Running visualization...")
|
| 233 |
+
vis_dir, confusion_matrix = run_visualization(config["run_dir"])
|
| 234 |
+
logger.info(f"Visualization complete! Check the output directory: {vis_dir}")
|
| 235 |
+
|
| 236 |
+
# Log confusion matrix results
|
| 237 |
+
class_accuracies = confusion_matrix.diagonal() / confusion_matrix.sum(axis=1)
|
| 238 |
+
overall_accuracy = confusion_matrix.diagonal().sum() / confusion_matrix.sum()
|
| 239 |
+
|
| 240 |
+
logger.info("\nConfusion Matrix Results:")
|
| 241 |
+
for i, (label, accuracy) in enumerate(zip(config['class_labels'], class_accuracies)):
|
| 242 |
+
logger.info(f"{label}: {accuracy:.2%}")
|
| 243 |
+
logger.info(f"Overall Accuracy: {overall_accuracy:.2%}")
|
| 244 |
+
|
| 245 |
+
except Exception as e:
|
| 246 |
+
logger.error(f"Error running visualization: {str(e)}")
|
| 247 |
|
| 248 |
+
# Run misclassification analysis
|
| 249 |
+
try:
|
| 250 |
+
analyze_misclassifications(config["run_dir"])
|
| 251 |
+
logger.info(f"Misclassification analysis complete! Check the output directory: {config['run_dir']}")
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.error(f"Error running misclassification analysis: {str(e)}")
|
| 254 |
|
| 255 |
+
|
| 256 |
+
if math.isnan(avg_val_accuracy) or math.isinf(avg_val_accuracy):
|
| 257 |
+
raise ValueError(f"Invalid validation accuracy: {avg_val_accuracy}")
|
| 258 |
+
|
| 259 |
+
print("Script finished.")
|
| 260 |
|
| 261 |
+
return avg_val_accuracy, vis_dir
|
| 262 |
+
|
| 263 |
+
except Exception as e:
|
| 264 |
+
logger.error(f"Training error: {str(e)}")
|
| 265 |
+
raise # Re-raise the exception to be caught by the hyperparameter tuning
|
| 266 |
|
| 267 |
def main():
|
| 268 |
# Create run directory
|
|
|
|
| 273 |
config = {
|
| 274 |
"class_labels": class_labels,
|
| 275 |
"num_classes": len(class_labels),
|
| 276 |
+
"clip_model": "openai/clip-vit-large-patch14",
|
| 277 |
"batch_size": 32,
|
| 278 |
+
"unfreeze_layers": 4,
|
| 279 |
+
"learning_rate": 5.305885796107412e-06,
|
| 280 |
+
"weight_decay": 4.543630233732527e-07,
|
| 281 |
+
"gradient_clip_max_norm": 0.6446650879658523,
|
| 282 |
+
"augmentation_strength": 0.5827616006715585,
|
| 283 |
+
"crop_scale_min": 0.7872781274088598,
|
| 284 |
+
"max_frames": 15,
|
| 285 |
+
"sigma": 0.286510943464138,
|
| 286 |
+
"data_path": "../finetune/blog/bryant/random",
|
| 287 |
"num_epochs": 50,
|
| 288 |
+
"patience": 10,
|
|
|
|
|
|
|
| 289 |
"image_size": 224,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
"crop_scale_max": 1.0,
|
| 291 |
+
"normalization_mean": [
|
| 292 |
+
0.485,
|
| 293 |
+
0.456,
|
| 294 |
+
0.406
|
| 295 |
+
],
|
| 296 |
+
"normalization_std": [
|
| 297 |
+
0.229,
|
| 298 |
+
0.224,
|
| 299 |
+
0.225
|
| 300 |
+
],
|
| 301 |
"overfitting_threshold": 10,
|
| 302 |
+
# "data_path": '../finetune/blog/bryant/random',
|
| 303 |
+
# "batch_size": 8,
|
| 304 |
+
# "learning_rate": 2e-6,
|
| 305 |
+
# "weight_decay": 0.007,
|
| 306 |
+
# "num_epochs": 2,
|
| 307 |
+
# "patience": 10, # for early stopping
|
| 308 |
+
# "max_frames": 10,
|
| 309 |
+
# "sigma": 0.3,
|
| 310 |
+
# "image_size": 224,
|
| 311 |
+
# "flip_probability": 0.5,
|
| 312 |
+
# "rotation_degrees": 15,
|
| 313 |
+
# "brightness_jitter": 0.2,
|
| 314 |
+
# "contrast_jitter": 0.2,
|
| 315 |
+
# "saturation_jitter": 0.2,
|
| 316 |
+
# "hue_jitter": 0.1,
|
| 317 |
+
# "crop_scale_min": 0.8,
|
| 318 |
+
# "crop_scale_max": 1.0,
|
| 319 |
+
# "normalization_mean": [0.485, 0.456, 0.406],
|
| 320 |
+
# "normalization_std": [0.229, 0.224, 0.225],
|
| 321 |
+
# "unfreeze_layers": 3,
|
| 322 |
+
# # "clip_model": "openai/clip-vit-large-patch14",
|
| 323 |
+
# "clip_model": "openai/clip-vit-base-patch32",
|
| 324 |
+
# "gradient_clip_max_norm": 1.0,
|
| 325 |
+
# "overfitting_threshold": 10,
|
| 326 |
"run_dir": run_dir,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
}
|
| 328 |
train_and_evaluate(config)
|
| 329 |
|
script/visualization/visualize.py
CHANGED
|
@@ -110,28 +110,28 @@ def generate_evaluation_metrics(model, data_loader, device, output_dir, class_la
|
|
| 110 |
|
| 111 |
return cm
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
| 120 |
# Load configuration
|
| 121 |
config = get_config(run_dir)
|
| 122 |
|
| 123 |
class_labels = config['class_labels']
|
| 124 |
num_classes = config['num_classes']
|
| 125 |
-
data_path = config['data_path']
|
| 126 |
-
# data_path= '../finetune/3moves_otherpeopleval'
|
| 127 |
-
# data_path = '../finetune/otherpeople3moves'
|
| 128 |
|
| 129 |
# Paths
|
| 130 |
log_file = os.path.join(run_dir, 'training_log.csv')
|
| 131 |
model_path = get_latest_model_path(run_dir)
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
|
| 136 |
# Get the last directory of data_path and the file name
|
| 137 |
last_dir = os.path.basename(os.path.normpath(data_path))
|
|
@@ -160,3 +160,12 @@ if __name__ == "__main__":
|
|
| 160 |
cm = generate_evaluation_metrics(model, test_loader, device, vis_dir, class_labels, data_info)
|
| 161 |
|
| 162 |
print(f"Visualization complete! Check the output directory: {vis_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
return cm
|
| 112 |
|
| 113 |
+
def run_visualization(run_dir, data_path=None, test_csv=None):
|
| 114 |
+
"""
|
| 115 |
+
Run visualization for a specific training run
|
| 116 |
+
|
| 117 |
+
Args:
|
| 118 |
+
run_dir (str): Path to the run directory
|
| 119 |
+
data_path (str, optional): Override the data path from config
|
| 120 |
+
test_csv (str, optional): Override the test CSV path
|
| 121 |
+
"""
|
| 122 |
# Load configuration
|
| 123 |
config = get_config(run_dir)
|
| 124 |
|
| 125 |
class_labels = config['class_labels']
|
| 126 |
num_classes = config['num_classes']
|
| 127 |
+
data_path = data_path or config['data_path']
|
|
|
|
|
|
|
| 128 |
|
| 129 |
# Paths
|
| 130 |
log_file = os.path.join(run_dir, 'training_log.csv')
|
| 131 |
model_path = get_latest_model_path(run_dir)
|
| 132 |
+
|
| 133 |
+
if test_csv is None:
|
| 134 |
+
test_csv = os.path.join(data_path, 'test.csv')
|
| 135 |
|
| 136 |
# Get the last directory of data_path and the file name
|
| 137 |
last_dir = os.path.basename(os.path.normpath(data_path))
|
|
|
|
| 160 |
cm = generate_evaluation_metrics(model, test_loader, device, vis_dir, class_labels, data_info)
|
| 161 |
|
| 162 |
print(f"Visualization complete! Check the output directory: {vis_dir}")
|
| 163 |
+
return vis_dir, cm
|
| 164 |
+
|
| 165 |
+
if __name__ == "__main__":
|
| 166 |
+
# Find the most recent run directory
|
| 167 |
+
run_dir = get_latest_run_dir()
|
| 168 |
+
# run_dir = "/home/bawolf/workspace/break/clip/runs/run_20241024-150232_otherpeopleval_large_model"
|
| 169 |
+
# run_dir = "/home/bawolf/workspace/break/clip/runs/run_20241022-122939_3moves_balanced"
|
| 170 |
+
|
| 171 |
+
run_visualization(run_dir)
|
src/dataset/dataset.py
CHANGED
|
@@ -2,6 +2,7 @@ import torch
|
|
| 2 |
from torch.utils.data import Dataset
|
| 3 |
import csv
|
| 4 |
from .video_utils import create_transform, extract_frames
|
|
|
|
| 5 |
|
| 6 |
class VideoDataset(Dataset):
|
| 7 |
def __init__(self, file_path, config, transform=None):
|
|
@@ -29,7 +30,8 @@ class VideoDataset(Dataset):
|
|
| 29 |
if len(row) != 2:
|
| 30 |
print(f"Skipping invalid row: {row}")
|
| 31 |
continue
|
| 32 |
-
|
|
|
|
| 33 |
try:
|
| 34 |
label = int(label)
|
| 35 |
except ValueError:
|
|
|
|
| 2 |
from torch.utils.data import Dataset
|
| 3 |
import csv
|
| 4 |
from .video_utils import create_transform, extract_frames
|
| 5 |
+
import os
|
| 6 |
|
| 7 |
class VideoDataset(Dataset):
|
| 8 |
def __init__(self, file_path, config, transform=None):
|
|
|
|
| 30 |
if len(row) != 2:
|
| 31 |
print(f"Skipping invalid row: {row}")
|
| 32 |
continue
|
| 33 |
+
relative_video_path, label = row
|
| 34 |
+
video_path = os.path.join(config['data_path'], relative_video_path)
|
| 35 |
try:
|
| 36 |
label = int(label)
|
| 37 |
except ValueError:
|