42hgyn26hz-cpu

update

f6ceb9b 11 days ago

31.9 kB

	import os
	import torch
	import gc
	from concurrent.futures import ThreadPoolExecutor, as_completed
	from functools import partial
	import psutil
	import multiprocessing as mp
	from datasets import load_dataset, Dataset, DatasetDict
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	TrainingArguments,
	Trainer,
	DataCollatorForLanguageModeling,
	GPT2TokenizerFast
	)
	import shutil
	from typing import Dict, Any, List
	import warnings
	import platform
	import traceback
	from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training
	import json
	import tempfile
	from datetime import datetime
	warnings.filterwarnings("ignore")


	# ─── Configuration ───────────────────────────────────────────────────────────
	MODEL_NAME = "zxc4wewewe/blackthinking"
	OUTPUT_DIR = "./offsec_model"
	MERGED_MODELS_DIR = "./merged_models"
	MAX_LENGTH = 512
	BATCH_SIZE = 1
	GRADIENT_ACCUMULATION = 8
	EPOCHS = 3
	LEARNING_RATE = 2e-5
	SAVE_STEPS = 100
	EVAL_STEPS = 100
	LOGGING_STEPS = 50

	# LoRA Configuration
	USE_LORA = True
	LORA_R = 8
	LORA_ALPHA = 16
	LORA_DROPOUT = 0.1

	# Dataset Configuration
	DATASET_SOURCES = [
	"huihui-ai/Guilherme34_uncensor-v2",
	"zxc4wewewe/offsec",
	]

	# System Configuration
	NUM_WORKERS = min(2, mp.cpu_count())
	BATCH_SIZE_TOKENIZATION = 50

	# ─── Analyzer Class ──────────────────────────────────────────────────────────
	class TrainingAnalyzer:
	"""Analyzes training progress and system resources"""

	def __init__(self):
	self.start_time = datetime.now()
	self.training_metrics = {
	"total_samples": 0,
	"processed_samples": 0,
	"training_time": 0,
	"peak_memory": 0,
	"gpu_memory": 0,
	}

	def analyze_system(self):
	"""Analyze system resources"""
	try:
	memory = psutil.virtual_memory()
	gpu_memory = 0
	if torch.cuda.is_available():
	gpu_memory = torch.cuda.memory_allocated() / (1024**3)

	return {
	"cpu_cores": mp.cpu_count(),
	"total_memory_gb": memory.total / (1024**3),
	"available_memory_gb": memory.available / (1024**3),
	"memory_usage_percent": memory.percent,
	"gpu_memory_gb": gpu_memory,
	"cuda_available": torch.cuda.is_available(),
	"cuda_version": torch.version.cuda,
	"pytorch_version": torch.__version__,
	}
	except Exception as e:
	print(f"⚠️ System analysis failed: {e}")
	return {}

	def analyze_dataset(self, dataset):
	"""Analyze dataset characteristics"""
	if not dataset:
	return {}

	try:
	analysis = {}
	for split_name, split_data in dataset.items():
	if hasattr(split_data, '__len__'):
	analysis[split_name] = {
	"num_samples": len(split_data),
	"columns": split_data.column_names if hasattr(split_data, 'column_names') else [],
	}

	return analysis
	except Exception as e:
	print(f"⚠️ Dataset analysis failed: {e}")
	return {}

	def analyze_training(self, trainer, train_result):
	"""Analyze training results"""
	try:
	current_time = datetime.now()
	training_time = (current_time - self.start_time).total_seconds()

	memory = psutil.virtual_memory()
	peak_memory = memory.used / (1024**3)
	gpu_memory = 0
	if torch.cuda.is_available():
	gpu_memory = torch.cuda.memory_allocated() / (1024**3)

	return {
	"training_time_seconds": training_time,
	"training_time_minutes": training_time / 60,
	"peak_memory_gb": peak_memory,
	"peak_gpu_memory_gb": gpu_memory,
	"final_loss": getattr(train_result, 'training_loss', 'unknown'),
	"total_steps": getattr(train_result, 'global_step', 0),
	"samples_per_second": train_result.metrics.get('train_samples_per_second', 0) if train_result.metrics else 0,
	}
	except Exception as e:
	print(f"⚠️ Training analysis failed: {e}")
	return {}

	def generate_report(self, system_info, dataset_info, training_info):
	"""Generate comprehensive training report"""
	report = f"""
	{'='*60}
	TRAINING ANALYSIS REPORT
	{'='*60}

	SYSTEM INFORMATION:
	- CPU Cores: {system_info.get('cpu_cores', 'unknown')}
	- Total Memory: {system_info.get('total_memory_gb', 0):.1f} GB
	- Available Memory: {system_info.get('available_memory_gb', 0):.1f} GB
	- Memory Usage: {system_info.get('memory_usage_percent', 0):.1f}%
	- CUDA Available: {system_info.get('cuda_available', False)}
	- CUDA Version: {system_info.get('cuda_version', 'unknown')}
	- PyTorch Version: {system_info.get('pytorch_version', 'unknown')}
	- GPU Memory Used: {system_info.get('gpu_memory_gb', 0):.2f} GB

	DATASET ANALYSIS:
	"""

	for split_name, split_info in dataset_info.items():
	report += f"- {split_name.upper()}: {split_info.get('num_samples', 0)} samples\n"
	if split_info.get('columns'):
	report += f" Columns: {', '.join(split_info['columns'])}\n"

	report += f"""
	TRAINING PERFORMANCE:
	- Training Time: {training_info.get('training_time_minutes', 0):.2f} minutes
	- Final Loss: {training_info.get('final_loss', 'unknown')}
	- Total Steps: {training_info.get('total_steps', 0)}
	- Samples/Second: {training_info.get('samples_per_second', 0):.2f}
	- Peak Memory: {training_info.get('peak_memory_gb', 0):.2f} GB
	- Peak GPU Memory: {training_info.get('peak_gpu_memory_gb', 0):.2f} GB

	TRAINING CONFIGURATION:
	- Model: {MODEL_NAME}
	- Batch Size: {BATCH_SIZE}
	- Gradient Accumulation: {GRADIENT_ACCUMULATION}
	- Learning Rate: {LEARNING_RATE}
	- Epochs: {EPOCHS}
	- LoRA Enabled: {USE_LORA}
	- Max Length: {MAX_LENGTH}

	{'='*60}
	END REPORT
	{'='*60}
	"""

	return report

	# ─── Utility Functions ───────────────────────────────────────────────────────
	def safe_makedirs(path):
	"""Safely create directories"""
	try:
	os.makedirs(path, exist_ok=True)
	return True
	except Exception as e:
	print(f"⚠️ Failed to create directory {path}: {e}")
	return False

	def cleanup_gpu_memory():
	"""Clean up GPU memory"""
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	def load_tokenizer_robust(model_name):
	"""Load tokenizer with multiple fallback strategies"""
	print(f"🔄 Loading tokenizer for: {model_name}")

	strategies = [
	lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True),
	lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=False),
	lambda: GPT2TokenizerFast.from_pretrained("gpt2"),
	lambda: create_minimal_tokenizer(),
	]

	for i, strategy in enumerate(strategies, 1):
	try:
	tokenizer = strategy()

	# Add missing special tokens
	if tokenizer.pad_token is None:
	if tokenizer.eos_token:
	tokenizer.pad_token = tokenizer.eos_token
	else:
	tokenizer.add_special_tokens({"pad_token": "<\|pad\|>"})

	print(f"✅ Tokenizer loaded (strategy {i})")
	return tokenizer
	except Exception as e:
	print(f"⚠️ Strategy {i} failed: {str(e)[:100]}...")

	print("❌ All tokenizer strategies failed")
	return None

	def create_minimal_tokenizer():
	"""Create absolute minimal tokenizer"""
	try:
	from transformers import PreTrainedTokenizerFast
	import json

	vocab = {
	"<\|pad\|>": 0,
	"</s>": 1,
	"<s>": 2,
	"<\|unk\|>": 3,
	}

	for i, char in enumerate("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \n\t.,!?-", start=4):
	vocab[char] = i

	tokenizer_json = {
	"version": "1.0",
	"model": {
	"type": "BPE",
	"vocab": vocab,
	"merges": []
	}
	}

	with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
	json.dump(tokenizer_json, f)
	temp_path = f.name

	tokenizer = PreTrainedTokenizerFast(tokenizer_file=temp_path)
	tokenizer.pad_token = "<\|pad\|>"
	tokenizer.eos_token = "</s>"
	tokenizer.bos_token = "<s>"

	os.unlink(temp_path)
	return tokenizer
	except:
	return None

	def load_dataset_fallback():
	"""Load dataset with comprehensive fallbacks"""
	print("📥 Loading dataset...")

	for dataset_name in DATASET_SOURCES:
	try:
	print(f"🔄 Trying: {dataset_name}")
	dataset = load_dataset(dataset_name, streaming=False)
	print(f"✅ Loaded: {dataset_name}")

	# Ensure proper splits
	if "train" not in dataset and "test" not in dataset:
	keys = list(dataset.keys())
	if keys:
	main_split = dataset[keys[0]]
	dataset = main_split.train_test_split(test_size=0.1, seed=42)
	print(f"✅ Created train/test split")
	else:
	continue

	return dataset
	except Exception as e:
	print(f"⚠️ Failed: {str(e)[:100]}...")

	# Create dummy dataset
	print("🔄 Creating dummy dataset...")
	try:
	dummy_data = {
	"train": [
	{"prompt": "What is AI?", "response": "Artificial Intelligence is computer systems performing human tasks."},
	{"prompt": "How to code?", "response": "Start with basics like variables, loops, functions."},
	] * 10,
	"test": [
	{"prompt": "Define ML", "response": "Machine Learning enables computers to learn from data."},
	] * 3,
	}

	dataset = DatasetDict({
	split: Dataset.from_list(data)
	for split, data in dummy_data.items()
	})

	print("✅ Created dummy dataset")
	return dataset
	except Exception as e:
	print(f"❌ Dummy dataset failed: {e}")
	return None

	def normalize_example(example):
	"""Normalize example format"""
	if not example:
	return {"prompt": "default", "response": "default"}

	try:
	if "prompt" in example and "response" in example:
	return {
	"prompt": str(example.get("prompt", "")).strip() or "default",
	"response": str(example.get("response", "")).strip() or "default",
	}

	if "messages" in example and isinstance(example["messages"], list):
	prompt, response = "", ""
	for msg in example["messages"]:
	if isinstance(msg, dict):
	role, content = str(msg.get("role", "")), str(msg.get("content", ""))
	if role.lower() in ["user", "human"]:
	prompt = content
	elif role.lower() in ["assistant", "bot"]:
	response = content
	return {"prompt": prompt or "default", "response": response or "default"}

	text = str(example.get("text", example.get("content", "default")))
	if "Assistant:" in text:
	parts = text.split("Assistant:", 1)
	return {"prompt": parts[0].replace("User:", "").strip() or "default",
	"response": parts[1].strip() or "default"}

	return {"prompt": text[:200] or "default",
	"response": (text[-200:] if len(text) > 200 else text) or "default"}
	except:
	return {"prompt": "default", "response": "default"}

	def tokenize_function(examples, tokenizer):
	"""Tokenize examples safely"""
	try:
	full_texts = [
	f"{prompt}\n\n{response}{tokenizer.eos_token}"
	for prompt, response in zip(examples["prompt"], examples["response"])
	]

	result = tokenizer(
	full_texts,
	truncation=True,
	max_length=MAX_LENGTH,
	padding=False,
	return_tensors=None,
	)

	result["labels"] = [
	[-100 if (hasattr(tokenizer, 'pad_token_id') and token_id == tokenizer.pad_token_id) else token_id
	for token_id in labels]
	for labels in result["input_ids"]
	]

	return result
	except Exception as e:
	print(f"⚠️ Tokenization error: {e}")
	return {
	"input_ids": [[1, 2, 3]] * len(examples["prompt"]),
	"attention_mask": [[1, 1, 1]] * len(examples["prompt"]),
	"labels": [[1, 2, 3]] * len(examples["prompt"]),
	}

	def process_dataset(dataset, tokenizer):
	"""Process dataset efficiently"""
	if not dataset or not tokenizer:
	return None

	print("⚡ Processing dataset...")

	processed_splits = {}
	for split_name in dataset.keys():
	try:
	print(f"🔄 Processing {split_name} ({len(dataset[split_name])} samples)...")

	# Normalize
	normalized = dataset[split_name].map(
	normalize_example,
	remove_columns=dataset[split_name].column_names,
	num_proc=1,
	)

	# Tokenize
	tokenized = normalized.map(
	lambda x: tokenize_function(x, tokenizer),
	batched=True,
	batch_size=BATCH_SIZE_TOKENIZATION,
	num_proc=1,
	remove_columns=["prompt", "response"],
	load_from_cache_file=False
	)

	processed_splits[split_name] = tokenized
	print(f"✅ {split_name}: {len(tokenized)} samples")

	except Exception as e:
	print(f"⚠️ {split_name} failed: {e}")
	# Create minimal fallback
	try:
	dummy_tokens = tokenizer("test\n\ntest", return_tensors=None)
	dummy_tokens["labels"] = dummy_tokens["input_ids"].copy()
	processed_splits[split_name] = Dataset.from_list([dummy_tokens] * min(10, len(dataset[split_name])))
	except:
	processed_splits[split_name] = Dataset.from_list([
	{"input_ids": [1], "attention_mask": [1], "labels": [1]}
	] * 5)

	return DatasetDict(processed_splits) if processed_splits else None

	def load_model(model_name, tokenizer, use_lora=True):
	"""Load model with LoRA support"""
	print("🧠 Loading model...")

	strategies = [
	{
	"name": "8-bit + LoRA",
	"params": {
	"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
	"device_map": "auto" if torch.cuda.is_available() else None,
	"trust_remote_code": True,
	"low_cpu_mem_usage": True,
	"load_in_8bit": True,
	}
	},
	{
	"name": "float16",
	"params": {
	"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
	"device_map": "auto" if torch.cuda.is_available() else None,
	"trust_remote_code": True,
	"low_cpu_mem_usage": True,
	}
	},
	{
	"name": "CPU fallback",
	"params": {
	"low_cpu_mem_usage": True,
	}
	}
	]

	for strategy in strategies:
	try:
	print(f"🔄 {strategy['name']}...")
	model = AutoModelForCausalLM.from_pretrained(model_name, **strategy["params"])

	# Apply LoRA if requested
	if use_lora and USE_LORA:
	try:
	model = prepare_model_for_kbit_training(model)
	lora_config = LoraConfig(
	r=LORA_R,
	lora_alpha=LORA_ALPHA,
	target_modules=["q_proj", "v_proj"],
	lora_dropout=LORA_DROPOUT,
	bias="none",
	task_type="CAUSAL_LM"
	)
	model = get_peft_model(model, lora_config)
	print("✅ LoRA applied")
	except Exception as e:
	print(f"⚠️ LoRA failed: {e}")

	# Resize embeddings
	if tokenizer:
	try:
	model.resize_token_embeddings(len(tokenizer))
	except Exception as e:
	print(f"⚠️ Embedding resize failed: {e}")

	print(f"✅ Model loaded ({strategy['name']})")
	return model
	except Exception as e:
	print(f"⚠️ {strategy['name']} failed: {str(e)[:100]}...")

	print("❌ All model strategies failed")
	return None

	def setup_training(model, tokenizer, tokenized_dataset, dataset_name):
	"""Setup training configuration"""
	if not model or not tokenizer or not tokenized_dataset:
	return None

	print(f"⚙️ Setting up training for {dataset_name}...")

	try:
	train_dataset = tokenized_dataset.get("train")
	eval_dataset = tokenized_dataset.get("test") or tokenized_dataset.get("train")

	if not train_dataset or len(train_dataset) == 0:
	print("❌ No training data")
	return None

	# Limit samples for efficiency
	max_samples = 50
	if len(train_dataset) > max_samples:
	train_dataset = train_dataset.select(range(max_samples))
	if eval_dataset and len(eval_dataset) > 10:
	eval_dataset = eval_dataset.select(range(min(10, len(eval_dataset))))

	output_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
	safe_makedirs(output_dir)

	training_args = TrainingArguments(
	output_dir=output_dir,

	num_train_epochs=EPOCHS,
	per_device_train_batch_size=BATCH_SIZE,
	per_device_eval_batch_size=BATCH_SIZE,
	gradient_accumulation_steps=GRADIENT_ACCUMULATION,

	learning_rate=LEARNING_RATE,
	weight_decay=0.01,
	warmup_ratio=0.1,
	lr_scheduler_type="linear",

	logging_dir=os.path.join(output_dir, "logs"),
	logging_steps=LOGGING_STEPS,
	save_strategy="steps",
	save_steps=SAVE_STEPS,
	save_total_limit=2,

	eval_strategy="steps" if eval_dataset else "no",
	eval_steps=EVAL_STEPS if eval_dataset else None,

	fp16=torch.cuda.is_available(),
	bf16=False,
	dataloader_num_workers=1,
	dataloader_pin_memory=False,
	remove_unused_columns=False,

	optim="adamw_torch",
	dataloader_drop_last=True,
	gradient_checkpointing=True,

	report_to="none",
	run_name=f"training_{dataset_name}",
	tf32=False,
	)

	data_collator = DataCollatorForLanguageModeling(
	tokenizer=tokenizer,
	mlm=False,
	pad_to_multiple_of=8,
	)

	trainer = Trainer(
	model=model,
	args=training_args,
	train_dataset=train_dataset,
	eval_dataset=eval_dataset,
	data_collator=data_collator,
	processing_class=tokenizer,
	callbacks=[]
	)

	print("✅ Training setup complete")
	return trainer, output_dir
	except Exception as e:
	print(f"❌ Training setup failed: {e}")
	return None, None

	def train_model(trainer, dataset_name):
	"""Execute training and save results"""
	if not trainer:
	return False, None, None

	print(f"🏃 Training {dataset_name}...")

	try:
	train_result = trainer.train()

	# Save final model
	output_dir = trainer.args.output_dir
	final_model_dir = os.path.join(output_dir, "final_model")
	safe_makedirs(final_model_dir)

	print("💾 Saving model...")
	trainer.save_model(final_model_dir)
	trainer.save_state()

	print("💾 Saving tokenizer...")
	trainer.tokenizer.save_pretrained(final_model_dir)

	print(f"✅ Training complete for {dataset_name}")
	return True, final_model_dir, train_result

	except Exception as e:
	print(f"❌ Training failed: {e}")
	traceback.print_exc()
	return False, None, None

	def merge_model(base_model_path, adapter_path, dataset_name):
	"""Merge LoRA weights with base model"""
	print(f"🔗 Merging {dataset_name}...")

	try:
	output_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_"))
	safe_makedirs(output_path)

	# Load tokenizer from adapter
	try:
	tokenizer = AutoTokenizer.from_pretrained(adapter_path)
	except:
	tokenizer = load_tokenizer_robust(base_model_path)

	# Load base model
	base_model = AutoModelForCausalLM.from_pretrained(
	base_model_path,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	trust_remote_code=True,
	low_cpu_mem_usage=True
	)

	# Resize embeddings to match tokenizer
	current_vocab_size = len(tokenizer)
	model_vocab_size = base_model.get_input_embeddings().weight.size(0)
	if current_vocab_size != model_vocab_size:
	base_model.resize_token_embeddings(current_vocab_size)

	# Load and merge LoRA adapter
	merged_model = PeftModel.from_pretrained(base_model, adapter_path)
	merged_model = merged_model.merge_and_unload()

	# Save merged model
	merged_model.save_pretrained(output_path)
	tokenizer.save_pretrained(output_path)

	print(f"✅ {dataset_name} merged successfully")
	cleanup_gpu_memory()
	return True, output_path

	except Exception as e:
	print(f"❌ Merging {dataset_name} failed: {e}")

	# Fallback: copy adapter files
	try:
	fallback_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_") + "_adapter_only")
	safe_makedirs(fallback_path)

	adapter_files = os.listdir(adapter_path)
	for file in adapter_files:
	src = os.path.join(adapter_path, file)
	dst = os.path.join(fallback_path, file)
	if os.path.isfile(src):
	shutil.copy2(src, dst)

	print(f"⚠️ {dataset_name} adapter copied (merging failed)")
	return True, fallback_path
	except Exception as e2:
	print(f"❌ Fallback also failed: {e2}")
	return False, None

	def save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name):
	"""Save analysis report"""
	try:
	report = analyzer.generate_report(system_info, dataset_info, training_info)

	report_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
	safe_makedirs(report_dir)

	report_path = os.path.join(report_dir, "training_analysis.txt")
	with open(report_path, "w") as f:
	f.write(report)

	# Save metrics as JSON
	metrics_path = os.path.join(report_dir, "training_metrics.json")
	with open(metrics_path, "w") as f:
	json.dump({
	"system": system_info,
	"dataset": dataset_info,
	"training": training_info
	}, f, indent=2)

	print(f"📋 Analysis saved for {dataset_name}")
	return True
	except Exception as e:
	print(f"⚠️ Failed to save analysis: {e}")
	return False

	# ─── Main Training Pipeline ───────────────────────────────────────────────────
	def main():
	"""Main training pipeline with automatic model merging"""
	print("🚀 STARTING AUTOMATED TRAINING PIPELINE")
	print(f"🔧 Model: {MODEL_NAME}")
	print(f"🎯 LoRA: {USE_LORA} \| Batch: {BATCH_SIZE} \| Epochs: {EPOCHS}")
	print(f"🖥️ System: {platform.system()} \| CUDA: {torch.cuda.is_available()}")

	# Initialize analyzer
	analyzer = TrainingAnalyzer()

	# Create directories
	safe_makedirs(OUTPUT_DIR)
	safe_makedirs(MERGED_MODELS_DIR)

	# Load tokenizer (shared across all training)
	print("\n🔤 LOADING SHARED TOKENIZER...")
	tokenizer = load_tokenizer_robust(MODEL_NAME)
	if not tokenizer:
	print("❌ CRITICAL: Tokenizer loading failed")
	return

	print(f"✅ Tokenizer loaded (vocab: {len(tokenizer)})")

	# Analyze system
	system_info = analyzer.analyze_system()
	print(f"📊 System: {system_info.get('total_memory_gb', 0):.1f}GB RAM, {system_info.get('cpu_cores', 0)} cores")

	# Process each dataset
	results = []
	total_training_time = 0

	for dataset_name in DATASET_SOURCES:
	print(f"\n{'='*60}")
	print(f"🎯 PROCESSING DATASET: {dataset_name}")
	print(f"{'='*60}")

	# 1. Load dataset
	dataset = load_dataset_fallback()
	if not dataset:
	print(f"❌ Failed to load {dataset_name}")
	continue

	# 2. Analyze dataset
	dataset_info = analyzer.analyze_dataset(dataset)
	print(f"📊 Dataset analysis: {dataset_info}")

	# 3. Process dataset
	tokenized_dataset = process_dataset(dataset, tokenizer)
	if not tokenized_dataset:
	print(f"❌ Failed to process {dataset_name}")
	continue

	# 4. Load model
	model = load_model(MODEL_NAME, tokenizer, use_lora=True)
	if not model:
	print(f"❌ Failed to load model for {dataset_name}")
	continue

	# 5. Setup training
	setup_result = setup_training(model, tokenizer, tokenized_dataset, dataset_name)
	if not setup_result or setup_result[0] is None:
	print(f"❌ Failed to setup training for {dataset_name}")
	continue

	trainer, model_dir = setup_result

	# 6. Train model
	success, final_model_dir, train_result = train_model(trainer, dataset_name)
	if not success:
	print(f"❌ Training failed for {dataset_name}")
	continue

	# 7. Analyze training
	training_info = analyzer.analyze_training(trainer, train_result)
	total_training_time += training_info.get('training_time_minutes', 0)

	# 8. Save analysis report
	save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name)

	# 9. Merge model (if LoRA was used)
	if USE_LORA and success:
	merge_success, merged_path = merge_model(MODEL_NAME, final_model_dir, dataset_name)

	# Store results
	results.append({
	"dataset": dataset_name,
	"training_time": training_info.get('training_time_minutes', 0),
	"final_loss": training_info.get('final_loss', 'unknown'),
	"model_saved": final_model_dir,
	"model_merged": merged_path if merge_success else None,
	"success": True
	})
	else:
	results.append({
	"dataset": dataset_name,
	"training_time": training_info.get('training_time_minutes', 0),
	"final_loss": training_info.get('final_loss', 'unknown'),
	"model_saved": final_model_dir,
	"model_merged": None,
	"success": success
	})

	# Cleanup memory
	cleanup_gpu_memory()
	print(f"✅ {dataset_name} processing complete\n")

	# Generate final summary
	print(f"\n{'='*60}")
	print("📊 FINAL TRAINING SUMMARY")
	print(f"{'='*60}")

	successful_trainings = sum(1 for r in results if r['success'])
	successful_merges = sum(1 for r in results if r.get('model_merged'))

	print(f"✅ Total Datasets Processed: {len(results)}")
	print(f"✅ Successful Trainings: {successful_trainings}")
	print(f"✅ Successful Merges: {successful_merges}")
	print(f"⏱️ Total Training Time: {total_training_time:.2f} minutes")

	for result in results:
	status = "✅" if result['success'] else "❌"
	merge_status = "🔗" if result.get('model_merged') else "⏭️"
	print(f"{status} {result['dataset']}: {result['training_time']:.1f}min \| Loss: {result['final_loss']} {merge_status}")

	print(f"\n📂 Models saved in: {OUTPUT_DIR}")
	print(f"🔗 Merged models in: {MERGED_MODELS_DIR}")
	print(f"{'='*60}")

	return results

	# ─── Execute Training ───────────────────────────────────────────────────────
	if __name__ == "__main__":
	print("🏁 STARTING AUTOMATED TRAINING...")

	try:
	results = main()

	if results:
	print("🎊 TRAINING PIPELINE COMPLETED SUCCESSFULLY!")
	else:
	print("⚠️ TRAINING COMPLETED WITH ISSUES")

	except KeyboardInterrupt:
	print("\n🛑 TRAINING STOPPED BY USER")
	except Exception as e:
	print(f"💥 UNEXPECTED ERROR: {str(e)}")
	traceback.print_exc()
	print("⚠️ CONTINUING DESPITE ERROR...")

	print("🏁 TRAINING PROCESS FINISHED")