humigencev2 / cli /main.py
lilbablo's picture
chore: initial public release of Humigence (CLI wizard + dual-GPU fine-tuning)
7275aef
# cli/main.py
import sys
import time
from pathlib import Path
# Add the parent directory to the path so we can import from pipelines
sys.path.insert(0, str(Path(__file__).parent.parent))
# DO NOT import Unsloth here - delay until after wizard completion
UNSLOTH_AVAILABLE = None # Will be checked later
from cli.config_wizard import collect_training_config
from cli.atomic_eval import app as atomic_eval_app
from rich import print
from rich.console import Console
import inquirer
console = Console()
# Removed download functionality - system now only works with local datasets
def check_unsloth_availability():
"""Check if Unsloth is available (delayed import)"""
global UNSLOTH_AVAILABLE
if UNSLOTH_AVAILABLE is None:
try:
import unsloth
UNSLOTH_AVAILABLE = True
except ImportError:
UNSLOTH_AVAILABLE = False
return UNSLOTH_AVAILABLE
def detect_gpus():
"""Detect available GPUs"""
try:
import torch
if torch.cuda.is_available():
gpu_count = torch.cuda.device_count()
gpus = []
for i in range(gpu_count):
gpus.append({
"index": i,
"name": torch.cuda.get_device_name(i),
"memory": f"{torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f}GB"
})
return gpu_count, gpus
else:
return 0, []
except ImportError:
return 0, []
def choose_training_mode(gpu_count, gpus):
"""Choose training mode based on available GPUs"""
if gpu_count == 0:
console.print("[yellow]⚠️ No GPUs detected - CPU training not supported[/yellow]")
return None
elif gpu_count == 1:
console.print(f"[blue]πŸ”§ Single GPU detected - using GPU 0: {gpus[0]['name']}[/blue]")
return "single"
else:
# Multiple GPUs - prompt user to choose
console.print(f"[blue]πŸ”§ {gpu_count} GPUs detected - choose training mode[/blue]")
# Display available GPUs
from rich.table import Table
gpu_table = Table(show_header=True, box=None)
gpu_table.add_column("Index", style="cyan", width=6)
gpu_table.add_column("Name", style="white", width=40)
gpu_table.add_column("VRAM", style="green", width=10)
for gpu in gpus:
gpu_table.add_row(str(gpu['index']), gpu['name'], gpu['memory'])
console.print(gpu_table)
choices = [
"Multi-GPU Training (all available GPUs)",
"Single GPU Training (choose specific GPU)"
]
questions = [
inquirer.List('training_mode',
message="πŸ”§ Training Mode: (Use arrow keys)",
choices=choices,
default=choices[0])
]
answers = inquirer.prompt(questions)
selected_mode = answers['training_mode']
if "Multi-GPU" in selected_mode:
return "multi"
else:
# Single GPU - let user choose which one
gpu_choices = []
for gpu in gpus:
gpu_choices.append(f"GPU{gpu['index']}: {gpu['name']} ({gpu['memory']})")
questions = [
inquirer.List('gpu_selection',
message="Choose GPU: (Use arrow keys)",
choices=gpu_choices,
default=gpu_choices[0])
]
answers = inquirer.prompt(questions)
selected_gpu = answers['gpu_selection']
# Extract GPU index
gpu_index = int(selected_gpu.split("GPU")[1].split(":")[0])
console.print(f"[blue]Selected GPU {gpu_index}: {gpus[gpu_index]['name']}[/blue]")
return f"single_{gpu_index}"
def show_menu():
console.rule("[bold cyan]Humigence β€” Your AI. Your pipeline. Zero code.")
print("[dim]A complete MLOps suite built for makers, teams, and enterprises.[/dim]\n")
print("Options:")
print("[bold green]1.[/bold green] Supervised Fine-Tuning πŸš€")
print("[bold yellow]2.[/bold yellow] RAG Implementation (coming soon)")
print("[bold yellow]3.[/bold yellow] EnterpriseGPT (coming soon)")
print("[bold yellow]4.[/bold yellow] Batch Inference (coming soon)")
print("[bold yellow]5.[/bold yellow] Context Length (coming soon)")
print("[bold red]6.[/bold red] Exit\n")
def launch_training(config, training_mode, gpus):
"""Launch training based on the selected mode"""
import os
import subprocess
import json
# Change to the humigence directory
humigence_dir = Path(__file__).parent.parent
os.chdir(humigence_dir)
# Map model names to Unsloth equivalents
model_mapping = {
"Qwen/Qwen2.5-0.5B": "unsloth/Qwen2.5-0.5B-Instruct",
"microsoft/Phi-2": "unsloth/Phi-2",
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": "unsloth/TinyLlama-1.1B-Chat-v1.0"
}
# Use Unsloth model if available, otherwise use original
base_model = config.get("base_model", config.get("model_name", "Qwen/Qwen2.5-0.5B"))
model_name = model_mapping.get(base_model, base_model)
# Determine dataset parameters
dataset_path = config["dataset_path"]
if dataset_path.startswith("local:"):
# Local dataset - use as custom dataset
dataset_name = "jsonl"
dataset_config = dataset_path[6:] # Remove "local:" prefix
else:
# Default to wikitext for demo
dataset_name = "wikitext"
dataset_config = "wikitext-2-raw-v1"
# Map training recipe to precision
training_recipe = config.get("training_recipe", "QLoRA (4-bit NF4)")
if "QLoRA" in training_recipe:
precision = "qlora_4bit"
elif "BF16" in training_recipe:
precision = "lora_bf16"
else:
precision = "lora_fp16"
# Create output directory with timestamp
timestamp = time.strftime("%Y%m%d_%H%M%S")
if training_mode == "multi":
# Multi-GPU training with TorchRun
output_dir = f"./runs/humigence/out_lora_dual_{timestamp}"
console.print("[bold green]πŸš€ Launching multi-GPU training with Unsloth...[/bold green]")
cmd = [
"torchrun",
"--nproc_per_node=2",
"training/unsloth/train_lora_dual.py",
"--model", model_name,
"--dataset", dataset_name,
"--dataset_config", dataset_config,
"--out_dir", output_dir,
"--max_steps", "1000",
"--per_device_batch", "2",
"--grad_accum", "4",
"--learning_rate", "2e-4",
"--block_size", "1024",
"--lora_r", "16",
"--lora_alpha", "32",
"--lora_dropout", "0.0",
"--precision", precision
]
console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")
try:
result = subprocess.run(cmd, check=True, cwd=humigence_dir)
console.print("[bold green]βœ… Multi-GPU training completed successfully![/bold green]")
console.print(f"[blue]πŸ“ Output saved to: {output_dir}[/blue]")
return True
except subprocess.CalledProcessError as e:
console.print(f"[bold red]❌ Multi-GPU training failed with return code: {e.returncode}[/bold red]")
console.print("[yellow]πŸ”„ Falling back to single-GPU training...[/yellow]")
# Fall through to single-GPU fallback
training_mode = "single"
if training_mode == "single" or training_mode.startswith("single_"):
# Single-GPU training
if training_mode.startswith("single_"):
gpu_index = int(training_mode.split("_")[1])
output_dir = f"./runs/humigence/out_lora_single_{timestamp}_gpu{gpu_index}"
else:
gpu_index = 0
output_dir = f"./runs/humigence/out_lora_single_{timestamp}"
console.print(f"[bold green]πŸš€ Launching single-GPU training with Unsloth...[/bold green]")
console.print(f"[blue]Using GPU {gpu_index}: {gpus[gpu_index]['name']}[/blue]")
cmd = [
"python3",
"training/unsloth/train_lora_dual.py",
"--model", model_name,
"--dataset", dataset_name,
"--dataset_config", dataset_config,
"--out_dir", output_dir,
"--max_steps", "1000",
"--per_device_batch", "4", # Larger batch for single GPU
"--grad_accum", "2", # Less accumulation for single GPU
"--learning_rate", "2e-4",
"--block_size", "1024",
"--lora_r", "16",
"--lora_alpha", "32",
"--lora_dropout", "0.0",
"--precision", precision
]
console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")
# Set environment for specific GPU
env = os.environ.copy()
env["CUDA_VISIBLE_DEVICES"] = str(gpu_index)
try:
result = subprocess.run(cmd, check=True, cwd=humigence_dir, env=env)
console.print("[bold green]βœ… Single-GPU training completed successfully![/bold green]")
console.print(f"[blue]πŸ“ Output saved to: {output_dir}[/blue]")
return True
except subprocess.CalledProcessError as e:
console.print(f"[bold red]❌ Single-GPU training failed with return code: {e.returncode}[/bold red]")
return False
except Exception as e:
console.print(f"[bold red]❌ Single-GPU training failed: {e}[/bold red]")
return False
return False
def main():
while True:
show_menu()
choice = console.input("[bold blue]Select an option[/bold blue]: ")
if choice == "1":
console.print("[bold green]Starting Supervised Fine-Tuning...[/bold green]")
# Step 1: Run the configuration wizard (no Unsloth import yet)
config_path = collect_training_config()
if config_path is None:
# User cancelled or error occurred
console.print("[bold red]❌ Training cancelled. Returning to main menu.[/bold red]")
time.sleep(2)
continue
# Step 2: Load the configuration from the wizard
import json
with open(config_path, 'r') as f:
config = json.load(f)
# Step 3: NOW check if Unsloth dependencies are available (after wizard completion)
if not check_unsloth_availability():
console.print("[bold red]❌ Missing required dependencies: No module named 'unsloth'[/bold red]")
console.print("[yellow]➑ To install, run:[/yellow]")
console.print("[cyan]python3 training/unsloth/setup_humigence_unsloth.py[/cyan]")
time.sleep(2)
continue
# Step 4: Detect GPUs BEFORE importing Unsloth (to avoid interference)
gpu_count, gpus = detect_gpus()
training_mode = choose_training_mode(gpu_count, gpus)
if training_mode is None:
console.print("[bold red]❌ No suitable training mode available. Returning to main menu.[/bold red]")
time.sleep(2)
continue
# Step 5: Launch training
success = launch_training(config, training_mode, gpus)
if not success:
console.print("[bold red]❌ Training failed. Check the logs above for details.[/bold red]")
# Ask if user wants to start another training session
console.print("\n[bold cyan]Training completed![/bold cyan]")
if console.input("[bold blue]Start another training session? (y/N)[/bold blue]: ").lower() in ['y', 'yes']:
continue
else:
break
elif choice == "6":
console.print("[bold red]Exiting Humigence CLI. Goodbye![/bold red]")
time.sleep(1)
sys.exit()
else:
console.print("[yellow]⚠️ Option not implemented yet. Try 1 or 6.[/yellow]\n")
time.sleep(1)
if __name__ == "__main__":
main()