humigencev2 / pipelines /single_gpu_eval.py
lilbablo's picture
chore: initial public release of Humigence (CLI wizard + dual-GPU fine-tuning)
7275aef
# pipelines/single_gpu_eval.py
import torch
import os
from pathlib import Path
from typing import Optional, Dict, Any
from rich.console import Console
console = Console()
def _prepare_model_for_single_gpu_eval(model, config: Dict[str, Any]) -> torch.nn.Module:
"""
Prepare model for single GPU evaluation by removing multi-GPU wrappers and ensuring
all components are on a single device (cuda:0 or first visible GPU).
This fixes the cuda:0 vs cuda:1 device mismatch issue by ensuring the model
is completely on one device before evaluation.
Args:
model: The trained model (may be wrapped with DDP/FSDP/DataParallel)
config: Configuration dictionary
Returns:
Clean model on single GPU (cuda:0 if available, otherwise cpu)
"""
console.print("[blue]🔄 Preparing model for evaluation on single GPU...[/blue]")
# Determine target device
if torch.cuda.is_available():
# Use first visible GPU or cuda:0
visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0")
if visible_devices and visible_devices != "-1":
target_device = "cuda:0" # First visible device
else:
target_device = "cuda:0"
console.print(f"[blue] 🎯 Target device: {target_device}[/blue]")
else:
target_device = "cpu"
console.print("[blue] 🎯 Target device: cpu (no CUDA available)[/blue]")
try:
# Step 1: Remove any multi-GPU wrappers
original_model = model
if hasattr(model, 'module'):
# Remove DataParallel/DistributedDataParallel wrapper
model = model.module
console.print("[blue] ✅ Removed DDP/DataParallel wrapper[/blue]")
# Step 2: Move model to CPU first to clear any device state
model = model.cpu()
console.print("[blue] ✅ Model moved to CPU[/blue]")
# Step 3: Clear any cached states or buffers
if hasattr(model, 'clear_cache'):
model.clear_cache()
console.print("[blue] ✅ Model cache cleared[/blue]")
# Step 4: Move to target device
model = model.to(target_device)
console.print(f"[blue] ✅ Model moved to {target_device}[/blue]")
# Step 5: Verify all components are on the same device
_verify_model_device_consistency(model, target_device)
# Step 6: Re-attach LoRA adapters if needed
if config.get("training_recipe", "").lower() in ["lora", "qlora"]:
model = _reattach_lora_adapters(model, target_device, config)
console.print(f"[green]✅ Model prepared for single GPU evaluation on {target_device}[/green]")
return model
except Exception as e:
console.print(f"[red]❌ Failed to prepare model for single GPU evaluation: {e}[/red]")
# Fallback: return original model
console.print("[yellow]⚠️ Falling back to original model[/yellow]")
return original_model
def _verify_model_device_consistency(model: torch.nn.Module, target_device: str) -> None:
"""
Verify that all model components are on the target device.
Args:
model: The model to verify
target_device: Expected device (e.g., "cuda:0", "cpu")
"""
target_device = torch.device(target_device)
issues = []
# Check parameters
for name, param in model.named_parameters():
if param.device != target_device:
issues.append(f"Parameter {name} on {param.device}, expected {target_device}")
# Check buffers
for name, buffer in model.named_buffers():
if buffer.device != target_device:
issues.append(f"Buffer {name} on {buffer.device}, expected {target_device}")
if issues:
console.print(f"[red]❌ Device consistency issues found:[/red]")
for issue in issues:
console.print(f"[red] - {issue}[/red]")
raise RuntimeError(f"Model device consistency issues: {issues}")
else:
console.print(f"[green] ✅ All model components on {target_device}[/green]")
def _reattach_lora_adapters(model: torch.nn.Module, target_device: str, config: Dict[str, Any]) -> torch.nn.Module:
"""
Re-attach LoRA adapters if they were used during training.
Args:
model: The base model
target_device: Target device for the model
config: Configuration dictionary
Returns:
Model with LoRA adapters re-attached
"""
try:
# Check if LoRA adapters exist
adapter_path = Path(config.get("output_dir", "runs/humigence")) / "final_model"
if adapter_path.exists() and (adapter_path / "adapter_config.json").exists():
console.print("[blue] 🔧 Re-attaching LoRA adapters...[/blue]")
# Import PEFT here to avoid issues if not available
try:
from peft import PeftModel
# Load the model with LoRA adapters
model = PeftModel.from_pretrained(model, str(adapter_path))
model = model.to(target_device)
console.print("[blue] ✅ LoRA adapters re-attached[/blue]")
except ImportError:
console.print("[yellow] ⚠️ PEFT not available, skipping LoRA re-attachment[/yellow]")
except Exception as e:
console.print(f"[yellow] ⚠️ Failed to re-attach LoRA adapters: {e}[/yellow]")
return model
except Exception as e:
console.print(f"[yellow] ⚠️ LoRA re-attachment failed: {e}[/yellow]")
return model
def _move_batch_to_device(batch: Dict[str, torch.Tensor], target_device: str) -> Dict[str, torch.Tensor]:
"""
Move all tensors in a batch to the target device.
Args:
batch: Dictionary of tensors
target_device: Target device (e.g., "cuda:0", "cpu")
Returns:
Batch with all tensors on target device
"""
target_device = torch.device(target_device)
moved_batch = {}
for key, value in batch.items():
if hasattr(value, "to"):
moved_batch[key] = value.to(target_device)
else:
moved_batch[key] = value
return moved_batch
def _move_tensors_to_cpu(*tensors) -> tuple:
"""
Move all tensors to CPU and detach them.
Args:
*tensors: Variable number of tensors
Returns:
Tuple of CPU tensors
"""
cpu_tensors = []
for tensor in tensors:
if hasattr(tensor, "detach"):
cpu_tensors.append(tensor.detach().cpu())
else:
cpu_tensors.append(tensor)
return tuple(cpu_tensors)