Leacb4
/

gap-clip

+#!/usr/bin/env python3
+"""
+Optuna hyperparameter optimization for the main CLIP model.
+This script uses Optuna to find the best hyperparameters to reduce overfitting.
+"""
+import os
+import sys
+# Add parent directory to path to import modules
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+import pandas as pd
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, random_split
+from transformers import CLIPModel as CLIPModel_transformers
+import optuna
+from optuna.trial import TrialState
+import warnings
+import config
+from main_model import (
+    CustomDataset,
+    load_models,
+    train_one_epoch_enhanced,
+    valid_one_epoch
+)
+from transformers import CLIPProcessor
+warnings.filterwarnings("ignore")
+# Global variables for data (to avoid reloading for each trial)
+TRAIN_LOADER = None
+VAL_LOADER = None
+FEATURE_MODELS = None
+DEVICE = None
+def prepare_data(subset_size=5000, batch_size=32):
+    """
+    Prepare data loaders for optimization.
+    Use a smaller subset for faster trials.
+    """
+    print(f"\n📂 Loading data...")
+    df = pd.read_csv(config.local_dataset_path)
+    df_clean = df.dropna(subset=[config.column_local_image_path])
+    print(f"  Total samples: {len(df_clean)}")
+    # Create dataset
+    dataset = CustomDataset(df_clean)
+    # Create smaller subset for optimization
+    subset_size = min(subset_size, len(dataset))
+    train_size = int(0.8 * subset_size)
+    val_size = subset_size - train_size
+    np.random.seed(42)
+    subset_indices = np.random.choice(len(dataset), subset_size, replace=False)
+    subset_dataset = torch.utils.data.Subset(dataset, subset_indices)
+    train_dataset, val_dataset = random_split(
+        subset_dataset,
+        [train_size, val_size],
+        generator=torch.Generator().manual_seed(42)
+    )
+    # Create data loaders
+    train_loader = DataLoader(
+        train_dataset,
+        batch_size=batch_size,
+        shuffle=True,
+        num_workers=2,
+        pin_memory=True if torch.cuda.is_available() else False
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=batch_size,
+        shuffle=False,
+        num_workers=2,
+        pin_memory=True if torch.cuda.is_available() else False
+    )
+    print(f"  Train: {len(train_dataset)} samples")
+    print(f"  Val: {len(val_dataset)} samples")
+    return train_loader, val_loader
+def objective(trial):
+    """
+    Objective function for Optuna optimization.
+    Returns validation loss to minimize.
+    """
+    global TRAIN_LOADER, VAL_LOADER, FEATURE_MODELS, DEVICE
+    # Suggest hyperparameters
+    learning_rate = trial.suggest_float("learning_rate", 1e-6, 5e-5, log=True)
+    temperature = trial.suggest_float("temperature", 0.05, 0.15)
+    alignment_weight = trial.suggest_float("alignment_weight", 0.1, 0.6)
+    weight_decay = trial.suggest_float("weight_decay", 1e-5, 5e-4, log=True)
+    print(f"\n{'='*80}")
+    print(f"Trial {trial.number}")
+    print(f"  LR: {learning_rate:.2e}, Temp: {temperature:.4f}")
+    print(f"  Align weight: {alignment_weight:.3f}, Weight decay: {weight_decay:.2e}")
+    print(f"{'='*80}")
+    # Create fresh model for this trial
+    clip_model = CLIPModel_transformers.from_pretrained(
+        'laion/CLIP-ViT-B-32-laion2B-s34B-b79K'
+    ).to(DEVICE)
+    # Optimizer with weight decay for regularization
+    optimizer = torch.optim.AdamW(
+        clip_model.parameters(),
+        lr=learning_rate,
+        weight_decay=weight_decay
+    )
+    # Create processor
+    processor = CLIPProcessor.from_pretrained('laion/CLIP-ViT-B-32-laion2B-s34B-b79K')
+    # Train for a few epochs (reduced for faster optimization)
+    num_epochs = 5
+    best_val_loss = float('inf')
+    patience_counter = 0
+    patience = 2
+    for epoch in range(num_epochs):
+        # Training
+        color_model = FEATURE_MODELS[config.color_column]
+        hierarchy_model = FEATURE_MODELS[config.hierarchy_column]
+        train_loss, metrics = train_one_epoch_enhanced(
+            clip_model, TRAIN_LOADER, optimizer, FEATURE_MODELS,
+            color_model, hierarchy_model, DEVICE, processor,
+            temperature=temperature, alignment_weight=alignment_weight
+        )
+        # Validation
+        val_loss = valid_one_epoch(
+            clip_model, VAL_LOADER, FEATURE_MODELS, DEVICE, processor,
+            temperature=temperature, alignment_weight=alignment_weight
+        )
+        print(f"  Epoch {epoch+1}/{num_epochs} - Train: {train_loss:.4f}, Val: {val_loss:.4f}")
+        # Track best validation loss
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            patience_counter = 0
+        else:
+            patience_counter += 1
+        # Early stopping within trial
+        if patience_counter >= patience:
+            print(f"  Early stopping at epoch {epoch+1}")
+            break
+        # Report intermediate value for pruning
+        trial.report(val_loss, epoch)
+        # Handle pruning based on intermediate value
+        if trial.should_prune():
+            print(f"  Trial pruned at epoch {epoch+1}")
+            raise optuna.TrialPruned()
+    # Clean up memory
+    del clip_model, optimizer, processor
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    return best_val_loss
+def main():
+    """
+    Main function to run Optuna optimization.
+    """
+    global TRAIN_LOADER, VAL_LOADER, FEATURE_MODELS, DEVICE
+    print("="*80)
+    print("🔍 Optuna Hyperparameter Optimization")
+    print("="*80)
+    # Set device
+    DEVICE = config.device
+    print(f"\nDevice: {DEVICE}")
+    # Load feature models once
+    print("\n🔧 Loading feature models...")
+    FEATURE_MODELS = load_models()
+    # Prepare data once (use smaller subset for faster optimization)
+    TRAIN_LOADER, VAL_LOADER = prepare_data(subset_size=5000, batch_size=32)
+    # Create Optuna study
+    print("\n🎯 Creating Optuna study...")
+    study = optuna.create_study(
+        direction="minimize",
+        pruner=optuna.pruners.MedianPruner(n_startup_trials=5, n_warmup_steps=2),
+        study_name="clip_hyperparameter_optimization"
+    )
+    # Run optimization
+    print("\n🚀 Starting optimization...")
+    print(f"  Running 30 trials (this may take a while)...\n")
+    study.optimize(
+        objective,
+        n_trials=30,
+        timeout=None,
+        catch=(Exception,),
+        show_progress_bar=True
+    )
+    # Print results
+    print("\n" + "="*80)
+    print("✅ Optimization Complete!")
+    print("="*80)
+    print(f"\n📊 Best trial:")
+    trial = study.best_trial
+    print(f"  Value (Val Loss): {trial.value:.4f}")
+    print(f"\n  Best hyperparameters:")
+    for key, value in trial.params.items():
+        if 'learning_rate' in key or 'weight_decay' in key:
+            print(f"    {key}: {value:.2e}")
+        else:
+            print(f"    {key}: {value:.4f}")
+    # Save results in parent directory
+    results_file = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "optuna_results.txt")
+    with open(results_file, 'w') as f:
+        f.write("="*80 + "\n")
+        f.write("Optuna Hyperparameter Optimization Results\n")
+        f.write("="*80 + "\n\n")
+        f.write(f"Best trial value (validation loss): {trial.value:.4f}\n\n")
+        f.write("Best hyperparameters:\n")
+        for key, value in trial.params.items():
+            if 'learning_rate' in key or 'weight_decay' in key:
+                f.write(f"  {key}: {value:.2e}\n")
+            else:
+                f.write(f"  {key}: {value:.4f}\n")
+        f.write("\n" + "="*80 + "\n")
+        f.write("All trials:\n")
+        f.write("="*80 + "\n\n")
+        df_results = study.trials_dataframe()
+        f.write(df_results.to_string())
+    print(f"\n💾 Results saved to: {results_file}")
+    # Save study for later analysis
+    import pickle
+    study_file = os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file__))), 'optuna_study.pkl')
+    with open(study_file, 'wb') as f:
+        pickle.dump(study, f)
+    print(f"💾 Study object saved to: {study_file}")
+    # Print pruned trials statistics
+    pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
+    complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
+    print(f"\n📈 Statistics:")
+    print(f"  Number of finished trials: {len(study.trials)}")
+    print(f"  Number of pruned trials: {len(pruned_trials)}")
+    print(f"  Number of complete trials: {len(complete_trials)}")
+    # Visualization (optional, requires optuna-dashboard or matplotlib)
+    try:
+        from optuna.visualization import plot_optimization_history, plot_param_importances
+        parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        # Plot optimization history
+        fig1 = plot_optimization_history(study)
+        history_file = os.path.join(parent_dir, "optuna_optimization_history.png")
+        fig1.write_image(history_file)
+        print(f"📊 Optimization history saved to: {history_file}")
+        # Plot parameter importances
+        fig2 = plot_param_importances(study)
+        importance_file = os.path.join(parent_dir, "optuna_param_importances.png")
+        fig2.write_image(importance_file)
+        print(f"📊 Parameter importances saved to: {importance_file}")
+    except Exception as e:
+        print(f"\n⚠️  Visualization skipped: {e}")
+        print("  Install plotly and kaleido for visualizations: pip install plotly kaleido")
+    print("\n" + "="*80)
+    print("🎉 Done! Update your config with the best hyperparameters.")
+    print("="*80)
+if __name__ == "__main__":
+    main()