Spaces:

CircleStar
/

Image_Classification

Sleeping

App Files Files Community

CircleStar commited on 12 days ago

Commit

14b719f

verified ·

1 Parent(s): 63e305e

Update train_utils.py

Browse files

Files changed (1) hide show

train_utils.py +130 -35

train_utils.py CHANGED Viewed

@@ -8,9 +8,10 @@ import torch
 import torch.nn as nn
 import torch.optim as optim
-from config import MODEL_DIR, META_DIR
-from model import SimpleCNN
 from data_utils import make_loaders
 def model_weight_path(model_name: str) -> str:
@@ -29,6 +30,10 @@ def list_saved_models() -> List[str]:
     return sorted(names, reverse=True)
 def save_model(model: nn.Module, model_name: str, config: dict, training_summary: dict):
     cpu_state_dict = {k: v.detach().cpu() for k, v in model.state_dict().items()}
     torch.save(cpu_state_dict, model_weight_path(model_name))
@@ -49,22 +54,20 @@ def load_model(model_name: str, device: torch.device) -> Tuple[nn.Module, dict]:
     weight_file = model_weight_path(model_name)
     if not os.path.exists(meta_file):
-        raise FileNotFoundError(f"Metadata not found for model: {model_name}")
     if not os.path.exists(weight_file):
-        raise FileNotFoundError(f"Weights not found for model: {model_name}")
     with open(meta_file, "r", encoding="utf-8") as f:
         meta = json.load(f)
     cfg = meta["config"]
-    model = SimpleCNN(
         num_classes=cfg["num_classes"],
-        conv1_channels=cfg["conv1_channels"],
-        conv2_channels=cfg["conv2_channels"],
-        kernel_size=cfg["kernel_size"],
         dropout=cfg["dropout"],
         fc_dim=cfg["fc_dim"],
     )
     state_dict = torch.load(weight_file, map_location="cpu")
@@ -75,12 +78,9 @@ def load_model(model_name: str, device: torch.device) -> Tuple[nn.Module, dict]:
     return model, meta
-def get_runtime_device() -> torch.device:
-    return torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def evaluate(model, loader, criterion, device):
     model.eval()
     total_loss = 0.0
     total = 0
     correct = 0
@@ -94,21 +94,42 @@ def evaluate(model, loader, criterion, device):
             total_loss += loss.item() * images.size(0)
             preds = outputs.argmax(dim=1)
             correct += (preds == labels).sum().item()
             total += labels.size(0)
-    return total_loss / total if total else 0.0, correct / total if total else 0.0
 def train_model(
-    conv1_channels: int,
-    conv2_channels: int,
-    kernel_size: int,
     dropout: float,
     fc_dim: int,
     learning_rate: float,
     batch_size: int,
     epochs: int,
     model_tag: str,
 ):
     device = get_runtime_device()
@@ -116,24 +137,33 @@ def train_model(
     train_loader, val_loader, test_loader, class_names = make_loaders(batch_size)
     num_classes = len(class_names)
-    model = SimpleCNN(
         num_classes=num_classes,
-        conv1_channels=conv1_channels,
-        conv2_channels=conv2_channels,
-        kernel_size=kernel_size,
         dropout=dropout,
         fc_dim=fc_dim,
     ).to(device)
     criterion = nn.CrossEntropyLoss()
-    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
     history = []
     logs = []
     start_time = time.time()
     for epoch in range(1, epochs + 1):
         model.train()
         running_loss = 0.0
         total = 0
         correct = 0
@@ -143,18 +173,28 @@ def train_model(
             optimizer.zero_grad()
             outputs = model(images)
             loss = criterion(outputs, labels)
             loss.backward()
             optimizer.step()
             running_loss += loss.item() * images.size(0)
             preds = outputs.argmax(dim=1)
             correct += (preds == labels).sum().item()
             total += labels.size(0)
         train_loss = running_loss / total if total else 0.0
         train_acc = correct / total if total else 0.0
-        val_loss, val_acc = evaluate(model, val_loader, criterion, device)
         row = {
             "epoch": epoch,
@@ -163,6 +203,7 @@ def train_model(
             "val_loss": round(val_loss, 4),
             "val_acc": round(val_acc, 4),
         }
         history.append(row)
         logs.append(
@@ -171,36 +212,49 @@ def train_model(
             f"perte validation={val_loss:.4f}, précision validation={val_acc:.4f}"
         )
-    test_loss, test_acc = evaluate(model, test_loader, criterion, device)
     elapsed = time.time() - start_time
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-    safe_tag = model_tag.strip().replace(" ", "_") if model_tag.strip() else "charcoal"
     model_name = f"{safe_tag}_{timestamp}"
     config = {
-        "dataset_name": "Charbons de bois microscopiques",
         "num_classes": num_classes,
         "class_names": class_names,
-        "conv1_channels": conv1_channels,
-        "conv2_channels": conv2_channels,
-        "kernel_size": kernel_size,
         "dropout": dropout,
         "fc_dim": fc_dim,
         "learning_rate": learning_rate,
         "batch_size": batch_size,
         "epochs": epochs,
     }
     training_summary = {
         "final_train_loss": history[-1]["train_loss"] if history else None,
         "final_train_acc": history[-1]["train_acc"] if history else None,
-        "final_val_loss": history[-1]["val_loss"] if history else None,
         "final_val_acc": history[-1]["val_acc"] if history else None,
-        "test_loss": round(test_loss, 4),
-        "test_acc": round(test_acc, 4),
         "elapsed_seconds": round(elapsed, 2),
         "device": str(device),
     }
     save_model(model, model_name, config, training_summary)
@@ -209,8 +263,49 @@ def train_model(
     logs.append("Entraînement terminé.")
     logs.append(f"Modèle sauvegardé : {model_name}")
     logs.append(f"Appareil utilisé : {device}")
-    logs.append(f"Perte test : {test_loss:.4f}")
-    logs.append(f"Précision test : {test_acc:.4f}")
     logs.append(f"Temps écoulé : {elapsed:.1f}s")
-    return "\n".join(logs), history, training_summary, model_name

 import torch.nn as nn
 import torch.optim as optim
+from config import MODEL_DIR, META_DIR, DATASET_DISPLAY_NAME
 from data_utils import make_loaders
+from metrics_utils import compute_classification_metrics, save_confusion_matrix_figure
+from model import ResNet18Classifier
 def model_weight_path(model_name: str) -> str:
     return sorted(names, reverse=True)
+def get_runtime_device() -> torch.device:
+    return torch.device("cuda" if torch.cuda.is_available() else "cpu")
 def save_model(model: nn.Module, model_name: str, config: dict, training_summary: dict):
     cpu_state_dict = {k: v.detach().cpu() for k, v in model.state_dict().items()}
     torch.save(cpu_state_dict, model_weight_path(model_name))
     weight_file = model_weight_path(model_name)
     if not os.path.exists(meta_file):
+        raise FileNotFoundError(f"Métadonnées introuvables pour le modèle : {model_name}")
     if not os.path.exists(weight_file):
+        raise FileNotFoundError(f"Poids introuvables pour le modèle : {model_name}")
     with open(meta_file, "r", encoding="utf-8") as f:
         meta = json.load(f)
     cfg = meta["config"]
+    model = ResNet18Classifier(
         num_classes=cfg["num_classes"],
         dropout=cfg["dropout"],
         fc_dim=cfg["fc_dim"],
+        freeze_backbone=cfg.get("freeze_backbone", True),
     )
     state_dict = torch.load(weight_file, map_location="cpu")
     return model, meta
+def evaluate_loss_acc(model, loader, criterion, device):
     model.eval()
     total_loss = 0.0
     total = 0
     correct = 0
             total_loss += loss.item() * images.size(0)
             preds = outputs.argmax(dim=1)
             correct += (preds == labels).sum().item()
             total += labels.size(0)
+    avg_loss = total_loss / total if total else 0.0
+    acc = correct / total if total else 0.0
+    return avg_loss, acc
+def collect_predictions(model, loader, device):
+    model.eval()
+    y_true = []
+    y_pred = []
+    with torch.no_grad():
+        for images, labels in loader:
+            images = images.to(device)
+            outputs = model(images)
+            preds = outputs.argmax(dim=1).detach().cpu().tolist()
+            y_pred.extend(preds)
+            y_true.extend(labels.tolist())
+    return y_true, y_pred
 def train_model(
     dropout: float,
     fc_dim: int,
     learning_rate: float,
+    weight_decay: float,
     batch_size: int,
     epochs: int,
+    freeze_backbone: bool,
     model_tag: str,
 ):
     device = get_runtime_device()
     train_loader, val_loader, test_loader, class_names = make_loaders(batch_size)
     num_classes = len(class_names)
+    model = ResNet18Classifier(
         num_classes=num_classes,
         dropout=dropout,
         fc_dim=fc_dim,
+        freeze_backbone=freeze_backbone,
     ).to(device)
+    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
+    total_params = sum(p.numel() for p in model.parameters())
     criterion = nn.CrossEntropyLoss()
+    optimizer = optim.AdamW(
+        filter(lambda p: p.requires_grad, model.parameters()),
+        lr=learning_rate,
+        weight_decay=weight_decay,
+    )
     history = []
     logs = []
     start_time = time.time()
+    best_val_loss = float("inf")
+    best_state_dict = None
     for epoch in range(1, epochs + 1):
         model.train()
         running_loss = 0.0
         total = 0
         correct = 0
             optimizer.zero_grad()
             outputs = model(images)
             loss = criterion(outputs, labels)
             loss.backward()
             optimizer.step()
             running_loss += loss.item() * images.size(0)
             preds = outputs.argmax(dim=1)
             correct += (preds == labels).sum().item()
             total += labels.size(0)
         train_loss = running_loss / total if total else 0.0
         train_acc = correct / total if total else 0.0
+        val_loss, val_acc = evaluate_loss_acc(model, val_loader, criterion, device)
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            best_state_dict = {
+                k: v.detach().cpu().clone()
+                for k, v in model.state_dict().items()
+            }
         row = {
             "epoch": epoch,
             "val_loss": round(val_loss, 4),
             "val_acc": round(val_acc, 4),
         }
         history.append(row)
         logs.append(
             f"perte validation={val_loss:.4f}, précision validation={val_acc:.4f}"
         )
+    if best_state_dict is not None:
+        model.load_state_dict(best_state_dict)
+    test_loss, test_acc = evaluate_loss_acc(model, test_loader, criterion, device)
+    y_true, y_pred = collect_predictions(model, test_loader, device)
+    metrics = compute_classification_metrics(y_true, y_pred, class_names)
     elapsed = time.time() - start_time
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    safe_tag = model_tag.strip().replace(" ", "_") if model_tag.strip() else "charcoal_resnet18"
     model_name = f"{safe_tag}_{timestamp}"
+    cm_path = save_confusion_matrix_figure(metrics["confusion_matrix"], model_name)
     config = {
+        "dataset_name": DATASET_DISPLAY_NAME,
+        "architecture": "ResNet18 pretrained + classifier head",
         "num_classes": num_classes,
         "class_names": class_names,
         "dropout": dropout,
         "fc_dim": fc_dim,
         "learning_rate": learning_rate,
+        "weight_decay": weight_decay,
         "batch_size": batch_size,
         "epochs": epochs,
+        "freeze_backbone": freeze_backbone,
     }
     training_summary = {
         "final_train_loss": history[-1]["train_loss"] if history else None,
         "final_train_acc": history[-1]["train_acc"] if history else None,
+        "best_val_loss": round(best_val_loss, 4),
         "final_val_acc": history[-1]["val_acc"] if history else None,
+        "test_cross_entropy_loss": round(test_loss, 4),
+        "test_accuracy": round(test_acc, 4),
+        "test_f1_macro": metrics["f1_macro"],
+        "test_f1_weighted": metrics["f1_weighted"],
         "elapsed_seconds": round(elapsed, 2),
         "device": str(device),
+        "total_params": total_params,
+        "trainable_params": trainable_params,
     }
     save_model(model, model_name, config, training_summary)
     logs.append("Entraînement terminé.")
     logs.append(f"Modèle sauvegardé : {model_name}")
     logs.append(f"Appareil utilisé : {device}")
+    logs.append(f"Nombre total de paramètres : {total_params}")
+    logs.append(f"Paramètres entraînables : {trainable_params}")
+    logs.append(f"Perte test cross-entropy : {test_loss:.4f}")
+    logs.append(f"Accuracy test : {test_acc:.4f}")
+    logs.append(f"F1 macro test : {metrics['f1_macro']:.4f}")
+    logs.append(f"F1 pondéré test : {metrics['f1_weighted']:.4f}")
     logs.append(f"Temps écoulé : {elapsed:.1f}s")
+    return {
+        "logs": "\n".join(logs),
+        "history": history,
+        "summary": training_summary,
+        "model_name": model_name,
+        "classification_report": metrics["classification_report"],
+        "confusion_matrix": metrics["confusion_matrix"],
+        "confusion_matrix_path": cm_path,
+    }
+def evaluate_saved_model(model_name: str):
+    if not model_name:
+        raise ValueError("Aucun modèle sélectionné.")
+    device = get_runtime_device()
+    model, meta = load_model(model_name, device)
+    batch_size = int(meta["config"].get("batch_size", 32))
+    _, _, test_loader, class_names = make_loaders(batch_size)
+    criterion = nn.CrossEntropyLoss()
+    test_loss, test_acc = evaluate_loss_acc(model, test_loader, criterion, device)
+    y_true, y_pred = collect_predictions(model, test_loader, device)
+    metrics = compute_classification_metrics(y_true, y_pred, class_names)
+    cm_path = save_confusion_matrix_figure(metrics["confusion_matrix"], model_name)
+    summary = {
+        "test_cross_entropy_loss": round(test_loss, 4),
+        "test_accuracy": round(test_acc, 4),
+        "test_f1_macro": metrics["f1_macro"],
+        "test_f1_weighted": metrics["f1_weighted"],
+        "device": str(device),
+    }
+    return summary, metrics["classification_report"], metrics["confusion_matrix"], cm_path