chore: initial public release of Humigence (CLI wizard + dual-GPU fine-tuning)

7275aef 3 months ago

12.5 kB

	# cli/main.py

	import sys
	import time
	from pathlib import Path

	# Add the parent directory to the path so we can import from pipelines
	sys.path.insert(0, str(Path(__file__).parent.parent))

	# DO NOT import Unsloth here - delay until after wizard completion
	UNSLOTH_AVAILABLE = None # Will be checked later

	from cli.config_wizard import collect_training_config
	from cli.atomic_eval import app as atomic_eval_app
	from rich import print
	from rich.console import Console
	import inquirer

	console = Console()

	# Removed download functionality - system now only works with local datasets

	def check_unsloth_availability():
	"""Check if Unsloth is available (delayed import)"""
	global UNSLOTH_AVAILABLE
	if UNSLOTH_AVAILABLE is None:
	try:
	import unsloth
	UNSLOTH_AVAILABLE = True
	except ImportError:
	UNSLOTH_AVAILABLE = False
	return UNSLOTH_AVAILABLE

	def detect_gpus():
	"""Detect available GPUs"""
	try:
	import torch
	if torch.cuda.is_available():
	gpu_count = torch.cuda.device_count()
	gpus = []
	for i in range(gpu_count):
	gpus.append({
	"index": i,
	"name": torch.cuda.get_device_name(i),
	"memory": f"{torch.cuda.get_device_properties(i).total_memory / 1024**3:.1f}GB"
	})
	return gpu_count, gpus
	else:
	return 0, []
	except ImportError:
	return 0, []

	def choose_training_mode(gpu_count, gpus):
	"""Choose training mode based on available GPUs"""
	if gpu_count == 0:
	console.print("[yellow]⚠️ No GPUs detected - CPU training not supported[/yellow]")
	return None
	elif gpu_count == 1:
	console.print(f"[blue]🔧 Single GPU detected - using GPU 0: {gpus[0]['name']}[/blue]")
	return "single"
	else:
	# Multiple GPUs - prompt user to choose
	console.print(f"[blue]🔧 {gpu_count} GPUs detected - choose training mode[/blue]")

	# Display available GPUs
	from rich.table import Table
	gpu_table = Table(show_header=True, box=None)
	gpu_table.add_column("Index", style="cyan", width=6)
	gpu_table.add_column("Name", style="white", width=40)
	gpu_table.add_column("VRAM", style="green", width=10)

	for gpu in gpus:
	gpu_table.add_row(str(gpu['index']), gpu['name'], gpu['memory'])

	console.print(gpu_table)

	choices = [
	"Multi-GPU Training (all available GPUs)",
	"Single GPU Training (choose specific GPU)"
	]

	questions = [
	inquirer.List('training_mode',
	message="🔧 Training Mode: (Use arrow keys)",
	choices=choices,
	default=choices[0])
	]

	answers = inquirer.prompt(questions)
	selected_mode = answers['training_mode']

	if "Multi-GPU" in selected_mode:
	return "multi"
	else:
	# Single GPU - let user choose which one
	gpu_choices = []
	for gpu in gpus:
	gpu_choices.append(f"GPU{gpu['index']}: {gpu['name']} ({gpu['memory']})")

	questions = [
	inquirer.List('gpu_selection',
	message="Choose GPU: (Use arrow keys)",
	choices=gpu_choices,
	default=gpu_choices[0])
	]

	answers = inquirer.prompt(questions)
	selected_gpu = answers['gpu_selection']

	# Extract GPU index
	gpu_index = int(selected_gpu.split("GPU")[1].split(":")[0])
	console.print(f"[blue]Selected GPU {gpu_index}: {gpus[gpu_index]['name']}[/blue]")
	return f"single_{gpu_index}"

	def show_menu():
	console.rule("[bold cyan]Humigence — Your AI. Your pipeline. Zero code.")
	print("[dim]A complete MLOps suite built for makers, teams, and enterprises.[/dim]\n")
	print("Options:")
	print("[bold green]1.[/bold green] Supervised Fine-Tuning 🚀")
	print("[bold yellow]2.[/bold yellow] RAG Implementation (coming soon)")
	print("[bold yellow]3.[/bold yellow] EnterpriseGPT (coming soon)")
	print("[bold yellow]4.[/bold yellow] Batch Inference (coming soon)")
	print("[bold yellow]5.[/bold yellow] Context Length (coming soon)")
	print("[bold red]6.[/bold red] Exit\n")

	def launch_training(config, training_mode, gpus):
	"""Launch training based on the selected mode"""
	import os
	import subprocess
	import json

	# Change to the humigence directory
	humigence_dir = Path(__file__).parent.parent
	os.chdir(humigence_dir)

	# Map model names to Unsloth equivalents
	model_mapping = {
	"Qwen/Qwen2.5-0.5B": "unsloth/Qwen2.5-0.5B-Instruct",
	"microsoft/Phi-2": "unsloth/Phi-2",
	"TinyLlama/TinyLlama-1.1B-Chat-v1.0": "unsloth/TinyLlama-1.1B-Chat-v1.0"
	}

	# Use Unsloth model if available, otherwise use original
	base_model = config.get("base_model", config.get("model_name", "Qwen/Qwen2.5-0.5B"))
	model_name = model_mapping.get(base_model, base_model)

	# Determine dataset parameters
	dataset_path = config["dataset_path"]
	if dataset_path.startswith("local:"):
	# Local dataset - use as custom dataset
	dataset_name = "jsonl"
	dataset_config = dataset_path[6:] # Remove "local:" prefix
	else:
	# Default to wikitext for demo
	dataset_name = "wikitext"
	dataset_config = "wikitext-2-raw-v1"

	# Map training recipe to precision
	training_recipe = config.get("training_recipe", "QLoRA (4-bit NF4)")
	if "QLoRA" in training_recipe:
	precision = "qlora_4bit"
	elif "BF16" in training_recipe:
	precision = "lora_bf16"
	else:
	precision = "lora_fp16"

	# Create output directory with timestamp
	timestamp = time.strftime("%Y%m%d_%H%M%S")

	if training_mode == "multi":
	# Multi-GPU training with TorchRun
	output_dir = f"./runs/humigence/out_lora_dual_{timestamp}"
	console.print("[bold green]🚀 Launching multi-GPU training with Unsloth...[/bold green]")

	cmd = [
	"torchrun",
	"--nproc_per_node=2",
	"training/unsloth/train_lora_dual.py",
	"--model", model_name,
	"--dataset", dataset_name,
	"--dataset_config", dataset_config,
	"--out_dir", output_dir,
	"--max_steps", "1000",
	"--per_device_batch", "2",
	"--grad_accum", "4",
	"--learning_rate", "2e-4",
	"--block_size", "1024",
	"--lora_r", "16",
	"--lora_alpha", "32",
	"--lora_dropout", "0.0",
	"--precision", precision
	]

	console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")

	try:
	result = subprocess.run(cmd, check=True, cwd=humigence_dir)
	console.print("[bold green]✅ Multi-GPU training completed successfully![/bold green]")
	console.print(f"[blue]📁 Output saved to: {output_dir}[/blue]")
	return True
	except subprocess.CalledProcessError as e:
	console.print(f"[bold red]❌ Multi-GPU training failed with return code: {e.returncode}[/bold red]")
	console.print("[yellow]🔄 Falling back to single-GPU training...[/yellow]")
	# Fall through to single-GPU fallback
	training_mode = "single"

	if training_mode == "single" or training_mode.startswith("single_"):
	# Single-GPU training
	if training_mode.startswith("single_"):
	gpu_index = int(training_mode.split("_")[1])
	output_dir = f"./runs/humigence/out_lora_single_{timestamp}_gpu{gpu_index}"
	else:
	gpu_index = 0
	output_dir = f"./runs/humigence/out_lora_single_{timestamp}"

	console.print(f"[bold green]🚀 Launching single-GPU training with Unsloth...[/bold green]")
	console.print(f"[blue]Using GPU {gpu_index}: {gpus[gpu_index]['name']}[/blue]")

	cmd = [
	"python3",
	"training/unsloth/train_lora_dual.py",
	"--model", model_name,
	"--dataset", dataset_name,
	"--dataset_config", dataset_config,
	"--out_dir", output_dir,
	"--max_steps", "1000",
	"--per_device_batch", "4", # Larger batch for single GPU
	"--grad_accum", "2", # Less accumulation for single GPU
	"--learning_rate", "2e-4",
	"--block_size", "1024",
	"--lora_r", "16",
	"--lora_alpha", "32",
	"--lora_dropout", "0.0",
	"--precision", precision
	]

	console.print(f"[dim]Command: {' '.join(cmd)}[/dim]")

	# Set environment for specific GPU
	env = os.environ.copy()
	env["CUDA_VISIBLE_DEVICES"] = str(gpu_index)

	try:
	result = subprocess.run(cmd, check=True, cwd=humigence_dir, env=env)
	console.print("[bold green]✅ Single-GPU training completed successfully![/bold green]")
	console.print(f"[blue]📁 Output saved to: {output_dir}[/blue]")
	return True
	except subprocess.CalledProcessError as e:
	console.print(f"[bold red]❌ Single-GPU training failed with return code: {e.returncode}[/bold red]")
	return False
	except Exception as e:
	console.print(f"[bold red]❌ Single-GPU training failed: {e}[/bold red]")
	return False

	return False

	def main():
	while True:
	show_menu()
	choice = console.input("[bold blue]Select an option[/bold blue]: ")

	if choice == "1":
	console.print("[bold green]Starting Supervised Fine-Tuning...[/bold green]")

	# Step 1: Run the configuration wizard (no Unsloth import yet)
	config_path = collect_training_config()

	if config_path is None:
	# User cancelled or error occurred
	console.print("[bold red]❌ Training cancelled. Returning to main menu.[/bold red]")
	time.sleep(2)
	continue

	# Step 2: Load the configuration from the wizard
	import json
	with open(config_path, 'r') as f:
	config = json.load(f)

	# Step 3: NOW check if Unsloth dependencies are available (after wizard completion)
	if not check_unsloth_availability():
	console.print("[bold red]❌ Missing required dependencies: No module named 'unsloth'[/bold red]")
	console.print("[yellow]➡ To install, run:[/yellow]")
	console.print("[cyan]python3 training/unsloth/setup_humigence_unsloth.py[/cyan]")
	time.sleep(2)
	continue

	# Step 4: Detect GPUs BEFORE importing Unsloth (to avoid interference)
	gpu_count, gpus = detect_gpus()
	training_mode = choose_training_mode(gpu_count, gpus)

	if training_mode is None:
	console.print("[bold red]❌ No suitable training mode available. Returning to main menu.[/bold red]")
	time.sleep(2)
	continue

	# Step 5: Launch training
	success = launch_training(config, training_mode, gpus)

	if not success:
	console.print("[bold red]❌ Training failed. Check the logs above for details.[/bold red]")

	# Ask if user wants to start another training session
	console.print("\n[bold cyan]Training completed![/bold cyan]")
	if console.input("[bold blue]Start another training session? (y/N)[/bold blue]: ").lower() in ['y', 'yes']:
	continue
	else:
	break
	elif choice == "6":
	console.print("[bold red]Exiting Humigence CLI. Goodbye![/bold red]")
	time.sleep(1)
	sys.exit()
	else:
	console.print("[yellow]⚠️ Option not implemented yet. Try 1 or 6.[/yellow]\n")
	time.sleep(1)

	if __name__ == "__main__":
	main()