|
|
import sys
|
|
|
from pathlib import Path
|
|
|
|
|
|
sys.path.append(str(Path(__file__).resolve().parents[1]))
|
|
|
|
|
|
import torch
|
|
|
import torch.nn as nn
|
|
|
import torch.optim as optim
|
|
|
from torch.utils.tensorboard import SummaryWriter
|
|
|
import time
|
|
|
import os
|
|
|
from datetime import datetime
|
|
|
import json
|
|
|
import matplotlib.pyplot as plt
|
|
|
import numpy as np
|
|
|
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
|
|
|
|
|
|
|
|
|
from src import config
|
|
|
from src.data_loader import create_dataloaders
|
|
|
from src.model import create_model
|
|
|
from src.engine import train_step, val_step
|
|
|
|
|
|
def setup_experiment_logging(experiment_name: str):
|
|
|
"""
|
|
|
Setup logging dan direktori untuk eksperimen.
|
|
|
|
|
|
Args:
|
|
|
experiment_name (str): Nama eksperimen
|
|
|
|
|
|
Returns:
|
|
|
tuple: (writer, experiment_dir, model_dir)
|
|
|
"""
|
|
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
|
experiment_dir = Path("outputs") / f"{experiment_name}_{timestamp}"
|
|
|
model_dir = experiment_dir / "models"
|
|
|
log_dir = experiment_dir / "logs"
|
|
|
|
|
|
|
|
|
experiment_dir.mkdir(parents=True, exist_ok=True)
|
|
|
model_dir.mkdir(parents=True, exist_ok=True)
|
|
|
log_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
writer = SummaryWriter(log_dir=str(log_dir))
|
|
|
|
|
|
print(f"[Setup] Eksperimen: {experiment_name}")
|
|
|
print(f"[Setup] Direktori: {experiment_dir}")
|
|
|
print(f"[Setup] Model akan disimpan di: {model_dir}")
|
|
|
print(f"[Setup] Logs akan disimpan di: {log_dir}")
|
|
|
|
|
|
return writer, experiment_dir, model_dir
|
|
|
|
|
|
def save_training_results(experiment_dir: Path, model_name: str,
|
|
|
train_losses: list, val_losses: list,
|
|
|
train_accs: list, val_accs: list,
|
|
|
best_val_acc: float, best_epoch: int):
|
|
|
"""
|
|
|
Simpan hasil training dalam format JSON dan plot.
|
|
|
|
|
|
Args:
|
|
|
experiment_dir (Path): Direktori eksperimen
|
|
|
model_name (str): Nama model
|
|
|
train_losses (list): List loss training per epoch
|
|
|
val_losses (list): List loss validasi per epoch
|
|
|
train_accs (list): List akurasi training per epoch
|
|
|
val_accs (list): List akurasi validasi per epoch
|
|
|
best_val_acc (float): Akurasi validasi terbaik
|
|
|
best_epoch (int): Epoch dengan akurasi terbaik
|
|
|
"""
|
|
|
|
|
|
results = {
|
|
|
"model_name": model_name,
|
|
|
"best_val_accuracy": best_val_acc,
|
|
|
"best_epoch": best_epoch,
|
|
|
"total_epochs": len(train_losses),
|
|
|
"train_losses": train_losses,
|
|
|
"val_losses": val_losses,
|
|
|
"train_accuracies": train_accs,
|
|
|
"val_accuracies": val_accs,
|
|
|
"config": {
|
|
|
"batch_size": config.BATCH_SIZE,
|
|
|
"learning_rate": config.LEARNING_RATE,
|
|
|
"image_size": config.IMAGE_SIZE,
|
|
|
"epochs": config.EPOCHS,
|
|
|
"device": config.DEVICE
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
results_file = experiment_dir / f"{model_name}_results.json"
|
|
|
with open(results_file, 'w') as f:
|
|
|
json.dump(results, f, indent=2)
|
|
|
|
|
|
|
|
|
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
|
|
|
|
|
|
|
|
|
epochs = range(1, len(train_losses) + 1)
|
|
|
ax1.plot(epochs, train_losses, 'b-', label='Training Loss')
|
|
|
ax1.plot(epochs, val_losses, 'r-', label='Validation Loss')
|
|
|
ax1.set_title(f'{model_name} - Training & Validation Loss')
|
|
|
ax1.set_xlabel('Epoch')
|
|
|
ax1.set_ylabel('Loss')
|
|
|
ax1.legend()
|
|
|
ax1.grid(True)
|
|
|
|
|
|
|
|
|
ax2.plot(epochs, train_accs, 'b-', label='Training Accuracy')
|
|
|
ax2.plot(epochs, val_accs, 'r-', label='Validation Accuracy')
|
|
|
ax2.set_title(f'{model_name} - Training & Validation Accuracy')
|
|
|
ax2.set_xlabel('Epoch')
|
|
|
ax2.set_ylabel('Accuracy')
|
|
|
ax2.legend()
|
|
|
ax2.grid(True)
|
|
|
|
|
|
|
|
|
plot_file = experiment_dir / f"{model_name}_training_curves.png"
|
|
|
plt.tight_layout()
|
|
|
plt.savefig(plot_file, dpi=300, bbox_inches='tight')
|
|
|
plt.close()
|
|
|
|
|
|
print(f"[Save] Hasil training disimpan di: {results_file}")
|
|
|
print(f"[Save] Plot training disimpan di: {plot_file}")
|
|
|
|
|
|
def train_model(model_name_key: str, model_name: str, num_classes: int,
|
|
|
train_loader, val_loader, writer, model_dir: Path):
|
|
|
"""
|
|
|
Melatih satu model dan menyimpan hasilnya.
|
|
|
|
|
|
Args:
|
|
|
model_name_key (str): Kunci model dari config (misal: 'vit')
|
|
|
model_name (str): Nama model timm (misal: 'vit_base_patch16_224')
|
|
|
num_classes (int): Jumlah kelas
|
|
|
train_loader: DataLoader untuk training
|
|
|
val_loader: DataLoader untuk validasi
|
|
|
writer: TensorBoard writer
|
|
|
model_dir (Path): Direktori untuk menyimpan model
|
|
|
|
|
|
Returns:
|
|
|
dict: Hasil training (best accuracy, best epoch, dll)
|
|
|
"""
|
|
|
print(f"\n{'='*60}")
|
|
|
print(f"TRAINING MODEL: {model_name_key.upper()} ({model_name})")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
|
|
|
model = create_model(model_name, num_classes, pretrained=True)
|
|
|
if model is None:
|
|
|
print(f"[Error] Gagal membuat model {model_name}")
|
|
|
return None
|
|
|
|
|
|
model = model.to(config.DEVICE)
|
|
|
|
|
|
|
|
|
loss_fn = nn.CrossEntropyLoss()
|
|
|
|
|
|
optimizer = optim.Adam(model.parameters(), lr=config.LEARNING_RATE, weight_decay=1e-4)
|
|
|
|
|
|
|
|
|
scheduler = ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=3, verbose=True)
|
|
|
|
|
|
|
|
|
train_losses, val_losses = [], []
|
|
|
train_accs, val_accs = [], []
|
|
|
best_val_acc = 0.0
|
|
|
best_epoch = 0
|
|
|
|
|
|
|
|
|
patience = 7
|
|
|
epochs_no_improve = 0
|
|
|
|
|
|
|
|
|
print(f"[Training] Memulai training untuk {config.EPOCHS} epochs...")
|
|
|
print(f"[Training] Device: {config.DEVICE}")
|
|
|
print(f"[Training] Learning Rate: {config.LEARNING_RATE}")
|
|
|
print(f"[Training] Batch Size: {config.BATCH_SIZE}")
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
for epoch in range(config.EPOCHS):
|
|
|
print(f"\n[Epoch {epoch+1}/{config.EPOCHS}]")
|
|
|
|
|
|
|
|
|
train_loss, train_acc = train_step(
|
|
|
model=model,
|
|
|
dataloader=train_loader,
|
|
|
loss_fn=loss_fn,
|
|
|
optimizer=optimizer,
|
|
|
device=config.DEVICE
|
|
|
)
|
|
|
|
|
|
|
|
|
val_loss, val_acc = val_step(
|
|
|
model=model,
|
|
|
dataloader=val_loader,
|
|
|
loss_fn=loss_fn,
|
|
|
device=config.DEVICE
|
|
|
)
|
|
|
|
|
|
|
|
|
scheduler.step(val_acc)
|
|
|
|
|
|
|
|
|
train_losses.append(train_loss)
|
|
|
val_losses.append(val_loss)
|
|
|
train_accs.append(train_acc)
|
|
|
val_accs.append(val_acc)
|
|
|
|
|
|
|
|
|
writer.add_scalar(f'{model_name_key}/Train/Loss', train_loss, epoch)
|
|
|
writer.add_scalar(f'{model_name_key}/Train/Accuracy', train_acc, epoch)
|
|
|
writer.add_scalar(f'{model_name_key}/Val/Loss', val_loss, epoch)
|
|
|
writer.add_scalar(f'{model_name_key}/Val/Accuracy', val_acc, epoch)
|
|
|
|
|
|
|
|
|
if val_acc > best_val_acc:
|
|
|
best_val_acc = val_acc
|
|
|
best_epoch = epoch + 1
|
|
|
|
|
|
|
|
|
epochs_no_improve = 0
|
|
|
|
|
|
|
|
|
|
|
|
model_path = model_dir / f"{model_name_key}_best.pth"
|
|
|
torch.save({
|
|
|
|
|
|
}, model_path)
|
|
|
print(f"[Save] Model terbaik disimpan di: {model_path}")
|
|
|
|
|
|
|
|
|
else:
|
|
|
epochs_no_improve += 1
|
|
|
|
|
|
|
|
|
|
|
|
print(f" Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}")
|
|
|
print(f" Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")
|
|
|
print(f" Best Val Acc: {best_val_acc:.4f} (Epoch {best_epoch})")
|
|
|
|
|
|
|
|
|
if epochs_no_improve >= patience:
|
|
|
print(f"\n[Info] Early stopping! Tidak ada kemajuan selama {patience} epoch.")
|
|
|
print(f"[Info] Model terbaik ada di Epoch {best_epoch} dengan Val Acc: {best_val_acc:.4f}")
|
|
|
break
|
|
|
end_time = time.time()
|
|
|
training_time = end_time - start_time
|
|
|
|
|
|
print(f"\n[Training] Selesai dalam {training_time:.2f} detik")
|
|
|
print(f"[Training] Best Validation Accuracy: {best_val_acc:.4f} (Epoch {best_epoch})")
|
|
|
|
|
|
|
|
|
final_model_path = model_dir / f"{model_name_key}_final.pth"
|
|
|
torch.save({
|
|
|
'model_state_dict': model.state_dict(),
|
|
|
'optimizer_state_dict': optimizer.state_dict(),
|
|
|
'epoch': config.EPOCHS,
|
|
|
'val_accuracy': val_acc,
|
|
|
'model_name': model_name,
|
|
|
'num_classes': num_classes
|
|
|
}, final_model_path)
|
|
|
|
|
|
return {
|
|
|
'model_name': model_name_key,
|
|
|
'best_val_acc': best_val_acc,
|
|
|
'best_epoch': best_epoch,
|
|
|
'final_val_acc': val_acc,
|
|
|
'training_time': training_time,
|
|
|
'train_losses': train_losses,
|
|
|
'val_losses': val_losses,
|
|
|
'train_accs': train_accs,
|
|
|
'val_accs': val_accs
|
|
|
}
|
|
|
|
|
|
def main():
|
|
|
"""
|
|
|
Fungsi utama untuk menjalankan training semua model.
|
|
|
"""
|
|
|
print("="*80)
|
|
|
print("BATIK VISION PROJECT - TRAINING SCRIPT")
|
|
|
print("="*80)
|
|
|
|
|
|
|
|
|
experiment_name = "batik_classification"
|
|
|
writer, experiment_dir, model_dir = setup_experiment_logging(experiment_name)
|
|
|
|
|
|
|
|
|
print("\n[Data] Membuat data loaders...")
|
|
|
try:
|
|
|
train_loader, val_loader, class_names = create_dataloaders()
|
|
|
num_classes = len(class_names)
|
|
|
print(f"[Data] Berhasil! {num_classes} kelas ditemukan.")
|
|
|
print(f"[Data] Kelas: {class_names}")
|
|
|
except Exception as e:
|
|
|
print(f"[Error] Gagal membuat data loaders: {e}")
|
|
|
return
|
|
|
|
|
|
|
|
|
model_mapping = {
|
|
|
"vit": "vit_base_patch16_224",
|
|
|
"swin_transformer": "swin_base_patch4_window7_224",
|
|
|
"convnext_tiny": "convnext_tiny"
|
|
|
}
|
|
|
|
|
|
|
|
|
all_results = []
|
|
|
|
|
|
for model_name_key in config.MODEL_LIST:
|
|
|
if model_name_key not in model_mapping:
|
|
|
print(f"[Warning] Model '{model_name_key}' tidak dikenali. Dilewati.")
|
|
|
continue
|
|
|
|
|
|
model_name = model_mapping[model_name_key]
|
|
|
|
|
|
try:
|
|
|
|
|
|
result = train_model(
|
|
|
model_name_key=model_name_key,
|
|
|
model_name=model_name,
|
|
|
num_classes=num_classes,
|
|
|
train_loader=train_loader,
|
|
|
val_loader=val_loader,
|
|
|
writer=writer,
|
|
|
model_dir=model_dir
|
|
|
)
|
|
|
|
|
|
if result:
|
|
|
all_results.append(result)
|
|
|
|
|
|
|
|
|
save_training_results(
|
|
|
experiment_dir=experiment_dir,
|
|
|
model_name=model_name_key,
|
|
|
train_losses=result['train_losses'],
|
|
|
val_losses=result['val_losses'],
|
|
|
train_accs=result['train_accs'],
|
|
|
val_accs=result['val_accs'],
|
|
|
best_val_acc=result['best_val_acc'],
|
|
|
best_epoch=result['best_epoch']
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"[Error] Gagal training model {model_name_key}: {e}")
|
|
|
continue
|
|
|
|
|
|
|
|
|
if all_results:
|
|
|
summary = {
|
|
|
"experiment_name": experiment_name,
|
|
|
"timestamp": datetime.now().isoformat(),
|
|
|
"total_models": len(all_results),
|
|
|
"results": all_results,
|
|
|
"best_model": max(all_results, key=lambda x: x['best_val_acc'])
|
|
|
}
|
|
|
|
|
|
summary_file = experiment_dir / "training_summary.json"
|
|
|
with open(summary_file, 'w') as f:
|
|
|
json.dump(summary, f, indent=2)
|
|
|
|
|
|
print(f"\n{'='*60}")
|
|
|
print("RINGKASAN HASIL TRAINING")
|
|
|
print(f"{'='*60}")
|
|
|
|
|
|
for result in all_results:
|
|
|
print(f"{result['model_name']:15} | Best Val Acc: {result['best_val_acc']:.4f} | "
|
|
|
f"Final Val Acc: {result['final_val_acc']:.4f} | "
|
|
|
f"Time: {result['training_time']:.1f}s")
|
|
|
|
|
|
best_model = summary['best_model']
|
|
|
print(f"\nModel terbaik: {best_model['model_name']} dengan akurasi {best_model['best_val_acc']:.4f}")
|
|
|
print(f"Ringkasan lengkap disimpan di: {summary_file}")
|
|
|
|
|
|
|
|
|
writer.close()
|
|
|
print(f"\n[Complete] Training selesai! Hasil disimpan di: {experiment_dir}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
main()
|
|
|
|