File size: 12,512 Bytes

# cli/main.py

import sys
import time
from pathlib import Path

# Add the parent directory to the path so we can import from pipelines
sys.path.insert(0, str(Path(__file__).parent.parent))

# DO NOT import Unsloth here - delay until after wizard completion
UNSLOTH_AVAILABLE = None  # Will be checked later

from cli.config_wizard import collect_training_config
from cli.atomic_eval import app as atomic_eval_app
from rich import print
from rich.console import Console
import inquirer

console = Console()

# Removed download functionality - system now only works with local datasets

def check_unsloth_availability():
    """Check if Unsloth is available (delayed import)"""
    global UNSLOTH_AVAILABLE
    if UNSLOTH_AVAILABLE is None:
        try:
            import unsloth
            UNSLOTH_AVAILABLE = True
        except ImportError:
            UNSLOTH_AVAILABLE = False
    return UNSLOTH_AVAILABLE

def detect_gpus():
    """Detect available GPUs"""
    try:
        import torch
        if torch.cuda.is_available():
            gpu_count = torch.cuda.device_count()
            gpus = []
            for i in range(gpu_count):
                gpus.append({
                    "index": i,
                    "name": torch.cuda.get_device_name(i),
                    "memory": f"{torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f}GB"
                })
            return gpu_count, gpus
        else:
            return 0, []
    except ImportError:
        return 0, []

def choose_training_mode(gpu_count, gpus):
    """Choose training mode based on available GPUs"""
    if gpu_count == 0:
        console.print("[yellow]⚠️ No GPUs detected - CPU training not supported[/yellow]")
        return None
    elif gpu_count == 1:
        console.print(f"[blue]🔧 Single GPU detected - using GPU 0: {gpus[0]['name']}[/blue]")
        return "single"
    else:
        # Multiple GPUs - prompt user to choose
        console.print(f"[blue]🔧 {gpu_count} GPUs detected - choose training mode[/blue]")
        
        # Display available GPUs
        from rich.table import Table
        gpu_table = Table(show_header=True, box=None)
        gpu_table.add_column("Index", style="cyan", width=6)
        gpu_table.add_column("Name", style="white", width=40)
        gpu_table.add_column("VRAM", style="green", width=10)
        
        for gpu in gpus:
            gpu_table.add_row(str(gpu['index']), gpu['name'], gpu['memory'])
        
        console.print(gpu_table)
        
        choices = [
            "Multi-GPU Training (all available GPUs)",
            "Single GPU Training (choose specific GPU)"
        ]
        
        questions = [
            inquirer.List('training_mode',
                         message="🔧 Training Mode: (Use arrow keys)",
                         choices=choices,
                         default=choices[0])
        ]
        
        answers = inquirer.prompt(questions)
        selected_mode = answers['training_mode']
        
        if "Multi-GPU" in selected_mode:
            return "multi"
        else:
            # Single GPU - let user choose which one
            gpu_choices = []
            for gpu in gpus:
                gpu_choices.append(f"GPU{gpu['index']}: {gpu['name']} ({gpu['memory']})")
            
            questions = [
                inquirer.List('gpu_selection',
                             message="Choose GPU: (Use arrow keys)",
                             choices=gpu_choices,
                             default=gpu_choices[0])
            ]
            
            answers = inquirer.prompt(questions)
            selected_gpu = answers['gpu_selection']
            
            # Extract GPU index
            gpu_index = int(selected_gpu.split("GPU")[1].split(":")[0])
            console.print(f"[blue]Selected GPU {gpu_index}: {gpus[gpu_index]['name']}[/blue]")
            return f"single_{gpu_index}"

def show_menu():
    console.rule("[bold cyan]Humigence — Your AI. Your pipeline. Zero code.")
    print("[dim]A complete MLOps suite built for makers, teams, and enterprises.[/dim]\n")
    print("Options:")
    print("[bold green]1.[/bold green] Supervised Fine-Tuning 🚀")
    print("[bold yellow]2.[/bold yellow] RAG Implementation (coming soon)")
    print("[bold yellow]3.[/bold yellow] EnterpriseGPT (coming soon)")
    print("[bold yellow]4.[/bold yellow] Batch Inference (coming soon)")
    print("[bold yellow]5.[/bold yellow] Context Length (coming soon)")
    print("[bold red]6.[/bold red] Exit\n")

def launch_training(config, training_mode, gpus):
    """Launch training based on the selected mode"""
    import os
    import subprocess
    import json
    
    # Change to the humigence directory
    humigence_dir = Path(__file__).parent.parent
    os.chdir(humigence_dir)
    
    # Map model names to Unsloth equivalents
    model_mapping = {
        "Qwen/Qwen2.5-0.5B": "unsloth/Qwen2.5-0.5B-Instruct",
        "microsoft/Phi-2": "unsloth/Phi-2",
        "TinyLlama/TinyLlama-1.1B-Chat-v1.0": "unsloth/TinyLlama-1.1B-Chat-v1.0"
    }
    
    # Use Unsloth model if available, otherwise use original
    base_model = config.get("base_model", config.get("model_name", "Qwen/Qwen2.5-0.5B"))
    model_name = model_mapping.get(base_model, base_model)
    
    # Determine dataset parameters
    dataset_path = config["dataset_path"]
    if dataset_path.startswith("local:"):
        # Local dataset - use as custom dataset
        dataset_name = "jsonl"
        dataset_config = dataset_path[6:]  # Remove "local:" prefix
    else:
        # Default to wikitext for demo
        dataset_name = "wikitext"
        dataset_config = "wikitext-2-raw-v1"
    
    # Map training recipe to precision
    training_recipe = config.get("training_recipe", "QLoRA (4-bit NF4)")
    if "QLoRA" in training_recipe:
        precision = "qlora_4bit"
    elif "BF16" in training_recipe:
        precision = "lora_bf16"
    else:
        precision = "lora_fp16"
    
    # Create output directory with timestamp
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    
    if training_mode == "multi":
        # Multi-GPU training with TorchRun
        output_dir = f"./runs/humigence/out_lora_dual_{timestamp}"
        console.print("[bold green]🚀 Launching multi-GPU training with Unsloth...[/bold green]")
        
        cmd = [
            "torchrun", 
            "--nproc_per_node=2", 
            "training/unsloth/train_lora_dual.py",
            "--model", model_name,
            "--dataset", dataset_name,
            "--dataset_config", dataset_config,
            "--out_dir", output_dir,
            "--max_steps", "1000",
            "--per_device_batch", "2",
            "--grad_accum", "4",
            "--learning_rate", "2e-4",
            "--block_size", "1024",
            "--lora_r", "16",
            "--lora_alpha", "32",
            "--lora_dropout", "0.0",
            "--precision", precision
        ]
        
        console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")
        
        try:
            result = subprocess.run(cmd, check=True, cwd=humigence_dir)
            console.print("[bold green]✅ Multi-GPU training completed successfully![/bold green]")
            console.print(f"[blue]📁 Output saved to: {output_dir}[/blue]")
            return True
        except subprocess.CalledProcessError as e:
            console.print(f"[bold red]❌ Multi-GPU training failed with return code: {e.returncode}[/bold red]")
            console.print("[yellow]🔄 Falling back to single-GPU training...[/yellow]")
            # Fall through to single-GPU fallback
            training_mode = "single"
    
    if training_mode == "single" or training_mode.startswith("single_"):
        # Single-GPU training
        if training_mode.startswith("single_"):
            gpu_index = int(training_mode.split("_")[1])
            output_dir = f"./runs/humigence/out_lora_single_{timestamp}_gpu{gpu_index}"
        else:
            gpu_index = 0
            output_dir = f"./runs/humigence/out_lora_single_{timestamp}"
        
        console.print(f"[bold green]🚀 Launching single-GPU training with Unsloth...[/bold green]")
        console.print(f"[blue]Using GPU {gpu_index}: {gpus[gpu_index]['name']}[/blue]")
        
        cmd = [
            "python3",
            "training/unsloth/train_lora_dual.py",
            "--model", model_name,
            "--dataset", dataset_name,
            "--dataset_config", dataset_config,
            "--out_dir", output_dir,
            "--max_steps", "1000",
            "--per_device_batch", "4",  # Larger batch for single GPU
            "--grad_accum", "2",        # Less accumulation for single GPU
            "--learning_rate", "2e-4",
            "--block_size", "1024",
            "--lora_r", "16",
            "--lora_alpha", "32",
            "--lora_dropout", "0.0",
            "--precision", precision
        ]
        
        console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")
        
        # Set environment for specific GPU
        env = os.environ.copy()
        env["CUDA_VISIBLE_DEVICES"] = str(gpu_index)
        
        try:
            result = subprocess.run(cmd, check=True, cwd=humigence_dir, env=env)
            console.print("[bold green]✅ Single-GPU training completed successfully![/bold green]")
            console.print(f"[blue]📁 Output saved to: {output_dir}[/blue]")
            return True
        except subprocess.CalledProcessError as e:
            console.print(f"[bold red]❌ Single-GPU training failed with return code: {e.returncode}[/bold red]")
            return False
        except Exception as e:
            console.print(f"[bold red]❌ Single-GPU training failed: {e}[/bold red]")
            return False
    
    return False

def main():
    while True:
        show_menu()
        choice = console.input("[bold blue]Select an option[/bold blue]: ")

        if choice == "1":
            console.print("[bold green]Starting Supervised Fine-Tuning...[/bold green]")
            
            # Step 1: Run the configuration wizard (no Unsloth import yet)
            config_path = collect_training_config()
            
            if config_path is None:
                # User cancelled or error occurred
                console.print("[bold red]❌ Training cancelled. Returning to main menu.[/bold red]")
                time.sleep(2)
                continue
            
            # Step 2: Load the configuration from the wizard
            import json
            with open(config_path, 'r') as f:
                config = json.load(f)
            
            # Step 3: NOW check if Unsloth dependencies are available (after wizard completion)
            if not check_unsloth_availability():
                console.print("[bold red]❌ Missing required dependencies: No module named 'unsloth'[/bold red]")
                console.print("[yellow]➡ To install, run:[/yellow]")
                console.print("[cyan]python3 training/unsloth/setup_humigence_unsloth.py[/cyan]")
                time.sleep(2)
                continue
            
            # Step 4: Detect GPUs BEFORE importing Unsloth (to avoid interference)
            gpu_count, gpus = detect_gpus()
            training_mode = choose_training_mode(gpu_count, gpus)
            
            if training_mode is None:
                console.print("[bold red]❌ No suitable training mode available. Returning to main menu.[/bold red]")
                time.sleep(2)
                continue
            
            # Step 5: Launch training
            success = launch_training(config, training_mode, gpus)
            
            if not success:
                console.print("[bold red]❌ Training failed. Check the logs above for details.[/bold red]")
            
            # Ask if user wants to start another training session
            console.print("\n[bold cyan]Training completed![/bold cyan]")
            if console.input("[bold blue]Start another training session? (y/N)[/bold blue]: ").lower() in ['y', 'yes']:
                continue
            else:
                break
        elif choice == "6":
            console.print("[bold red]Exiting Humigence CLI. Goodbye![/bold red]")
            time.sleep(1)
            sys.exit()
        else:
            console.print("[yellow]⚠️ Option not implemented yet. Try 1 or 6.[/yellow]\n")
            time.sleep(1)

if __name__ == "__main__":
    main()