|
|
|
|
|
|
|
|
import torch |
|
|
import os |
|
|
from pathlib import Path |
|
|
from typing import Optional, Dict, Any |
|
|
from rich.console import Console |
|
|
|
|
|
console = Console() |
|
|
|
|
|
def _prepare_model_for_single_gpu_eval(model, config: Dict[str, Any]) -> torch.nn.Module: |
|
|
""" |
|
|
Prepare model for single GPU evaluation by removing multi-GPU wrappers and ensuring |
|
|
all components are on a single device (cuda:0 or first visible GPU). |
|
|
|
|
|
This fixes the cuda:0 vs cuda:1 device mismatch issue by ensuring the model |
|
|
is completely on one device before evaluation. |
|
|
|
|
|
Args: |
|
|
model: The trained model (may be wrapped with DDP/FSDP/DataParallel) |
|
|
config: Configuration dictionary |
|
|
|
|
|
Returns: |
|
|
Clean model on single GPU (cuda:0 if available, otherwise cpu) |
|
|
""" |
|
|
console.print("[blue]🔄 Preparing model for evaluation on single GPU...[/blue]") |
|
|
|
|
|
|
|
|
if torch.cuda.is_available(): |
|
|
|
|
|
visible_devices = os.environ.get("CUDA_VISIBLE_DEVICES", "0") |
|
|
if visible_devices and visible_devices != "-1": |
|
|
target_device = "cuda:0" |
|
|
else: |
|
|
target_device = "cuda:0" |
|
|
console.print(f"[blue] 🎯 Target device: {target_device}[/blue]") |
|
|
else: |
|
|
target_device = "cpu" |
|
|
console.print("[blue] 🎯 Target device: cpu (no CUDA available)[/blue]") |
|
|
|
|
|
try: |
|
|
|
|
|
original_model = model |
|
|
if hasattr(model, 'module'): |
|
|
|
|
|
model = model.module |
|
|
console.print("[blue] ✅ Removed DDP/DataParallel wrapper[/blue]") |
|
|
|
|
|
|
|
|
model = model.cpu() |
|
|
console.print("[blue] ✅ Model moved to CPU[/blue]") |
|
|
|
|
|
|
|
|
if hasattr(model, 'clear_cache'): |
|
|
model.clear_cache() |
|
|
console.print("[blue] ✅ Model cache cleared[/blue]") |
|
|
|
|
|
|
|
|
model = model.to(target_device) |
|
|
console.print(f"[blue] ✅ Model moved to {target_device}[/blue]") |
|
|
|
|
|
|
|
|
_verify_model_device_consistency(model, target_device) |
|
|
|
|
|
|
|
|
if config.get("training_recipe", "").lower() in ["lora", "qlora"]: |
|
|
model = _reattach_lora_adapters(model, target_device, config) |
|
|
|
|
|
console.print(f"[green]✅ Model prepared for single GPU evaluation on {target_device}[/green]") |
|
|
return model |
|
|
|
|
|
except Exception as e: |
|
|
console.print(f"[red]❌ Failed to prepare model for single GPU evaluation: {e}[/red]") |
|
|
|
|
|
console.print("[yellow]⚠️ Falling back to original model[/yellow]") |
|
|
return original_model |
|
|
|
|
|
def _verify_model_device_consistency(model: torch.nn.Module, target_device: str) -> None: |
|
|
""" |
|
|
Verify that all model components are on the target device. |
|
|
|
|
|
Args: |
|
|
model: The model to verify |
|
|
target_device: Expected device (e.g., "cuda:0", "cpu") |
|
|
""" |
|
|
target_device = torch.device(target_device) |
|
|
issues = [] |
|
|
|
|
|
|
|
|
for name, param in model.named_parameters(): |
|
|
if param.device != target_device: |
|
|
issues.append(f"Parameter {name} on {param.device}, expected {target_device}") |
|
|
|
|
|
|
|
|
for name, buffer in model.named_buffers(): |
|
|
if buffer.device != target_device: |
|
|
issues.append(f"Buffer {name} on {buffer.device}, expected {target_device}") |
|
|
|
|
|
if issues: |
|
|
console.print(f"[red]❌ Device consistency issues found:[/red]") |
|
|
for issue in issues: |
|
|
console.print(f"[red] - {issue}[/red]") |
|
|
raise RuntimeError(f"Model device consistency issues: {issues}") |
|
|
else: |
|
|
console.print(f"[green] ✅ All model components on {target_device}[/green]") |
|
|
|
|
|
def _reattach_lora_adapters(model: torch.nn.Module, target_device: str, config: Dict[str, Any]) -> torch.nn.Module: |
|
|
""" |
|
|
Re-attach LoRA adapters if they were used during training. |
|
|
|
|
|
Args: |
|
|
model: The base model |
|
|
target_device: Target device for the model |
|
|
config: Configuration dictionary |
|
|
|
|
|
Returns: |
|
|
Model with LoRA adapters re-attached |
|
|
""" |
|
|
try: |
|
|
|
|
|
adapter_path = Path(config.get("output_dir", "runs/humigence")) / "final_model" |
|
|
if adapter_path.exists() and (adapter_path / "adapter_config.json").exists(): |
|
|
console.print("[blue] 🔧 Re-attaching LoRA adapters...[/blue]") |
|
|
|
|
|
|
|
|
try: |
|
|
from peft import PeftModel |
|
|
|
|
|
|
|
|
model = PeftModel.from_pretrained(model, str(adapter_path)) |
|
|
model = model.to(target_device) |
|
|
console.print("[blue] ✅ LoRA adapters re-attached[/blue]") |
|
|
|
|
|
except ImportError: |
|
|
console.print("[yellow] ⚠️ PEFT not available, skipping LoRA re-attachment[/yellow]") |
|
|
except Exception as e: |
|
|
console.print(f"[yellow] ⚠️ Failed to re-attach LoRA adapters: {e}[/yellow]") |
|
|
|
|
|
return model |
|
|
|
|
|
except Exception as e: |
|
|
console.print(f"[yellow] ⚠️ LoRA re-attachment failed: {e}[/yellow]") |
|
|
return model |
|
|
|
|
|
def _move_batch_to_device(batch: Dict[str, torch.Tensor], target_device: str) -> Dict[str, torch.Tensor]: |
|
|
""" |
|
|
Move all tensors in a batch to the target device. |
|
|
|
|
|
Args: |
|
|
batch: Dictionary of tensors |
|
|
target_device: Target device (e.g., "cuda:0", "cpu") |
|
|
|
|
|
Returns: |
|
|
Batch with all tensors on target device |
|
|
""" |
|
|
target_device = torch.device(target_device) |
|
|
moved_batch = {} |
|
|
|
|
|
for key, value in batch.items(): |
|
|
if hasattr(value, "to"): |
|
|
moved_batch[key] = value.to(target_device) |
|
|
else: |
|
|
moved_batch[key] = value |
|
|
|
|
|
return moved_batch |
|
|
|
|
|
def _move_tensors_to_cpu(*tensors) -> tuple: |
|
|
""" |
|
|
Move all tensors to CPU and detach them. |
|
|
|
|
|
Args: |
|
|
*tensors: Variable number of tensors |
|
|
|
|
|
Returns: |
|
|
Tuple of CPU tensors |
|
|
""" |
|
|
cpu_tensors = [] |
|
|
for tensor in tensors: |
|
|
if hasattr(tensor, "detach"): |
|
|
cpu_tensors.append(tensor.detach().cpu()) |
|
|
else: |
|
|
cpu_tensors.append(tensor) |
|
|
return tuple(cpu_tensors) |
|
|
|
|
|
|