import os
import torch
import gc
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial
import psutil
import multiprocessing as mp
from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    DataCollatorForLanguageModeling,
    GPT2TokenizerFast
)
import shutil
from typing import Dict, Any, List
import warnings
import platform
import traceback
from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training
import json
import tempfile
from datetime import datetime
warnings.filterwarnings("ignore")


# ─── Configuration ───────────────────────────────────────────────────────────
MODEL_NAME = "zxc4wewewe/blackthinking"
OUTPUT_DIR = "./offsec_model"
MERGED_MODELS_DIR = "./merged_models"
MAX_LENGTH = 512
BATCH_SIZE = 1
GRADIENT_ACCUMULATION = 8
EPOCHS = 3
LEARNING_RATE = 2e-5
SAVE_STEPS = 100
EVAL_STEPS = 100
LOGGING_STEPS = 50

# LoRA Configuration
USE_LORA = True
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.1

# Dataset Configuration
DATASET_SOURCES = [
    "huihui-ai/Guilherme34_uncensor-v2",
    "zxc4wewewe/offsec",
]

# System Configuration
NUM_WORKERS = min(2, mp.cpu_count())
BATCH_SIZE_TOKENIZATION = 50

# ─── Analyzer Class ──────────────────────────────────────────────────────────
class TrainingAnalyzer:
    """Analyzes training progress and system resources"""
    
    def __init__(self):
        self.start_time = datetime.now()
        self.training_metrics = {
            "total_samples": 0,
            "processed_samples": 0,
            "training_time": 0,
            "peak_memory": 0,
            "gpu_memory": 0,
        }
    
    def analyze_system(self):
        """Analyze system resources"""
        try:
            memory = psutil.virtual_memory()
            gpu_memory = 0
            if torch.cuda.is_available():
                gpu_memory = torch.cuda.memory_allocated() / (1024**3)
            
            return {
                "cpu_cores": mp.cpu_count(),
                "total_memory_gb": memory.total / (1024**3),
                "available_memory_gb": memory.available / (1024**3),
                "memory_usage_percent": memory.percent,
                "gpu_memory_gb": gpu_memory,
                "cuda_available": torch.cuda.is_available(),
                "cuda_version": torch.version.cuda,
                "pytorch_version": torch.__version__,
            }
        except Exception as e:
            print(f"⚠️  System analysis failed: {e}")
            return {}
    
    def analyze_dataset(self, dataset):
        """Analyze dataset characteristics"""
        if not dataset:
            return {}
        
        try:
            analysis = {}
            for split_name, split_data in dataset.items():
                if hasattr(split_data, '__len__'):
                    analysis[split_name] = {
                        "num_samples": len(split_data),
                        "columns": split_data.column_names if hasattr(split_data, 'column_names') else [],
                    }
            
            return analysis
        except Exception as e:
            print(f"⚠️  Dataset analysis failed: {e}")
            return {}
    
    def analyze_training(self, trainer, train_result):
        """Analyze training results"""
        try:
            current_time = datetime.now()
            training_time = (current_time - self.start_time).total_seconds()
            
            memory = psutil.virtual_memory()
            peak_memory = memory.used / (1024**3)
            gpu_memory = 0
            if torch.cuda.is_available():
                gpu_memory = torch.cuda.memory_allocated() / (1024**3)
            
            return {
                "training_time_seconds": training_time,
                "training_time_minutes": training_time / 60,
                "peak_memory_gb": peak_memory,
                "peak_gpu_memory_gb": gpu_memory,
                "final_loss": getattr(train_result, 'training_loss', 'unknown'),
                "total_steps": getattr(train_result, 'global_step', 0),
                "samples_per_second": train_result.metrics.get('train_samples_per_second', 0) if train_result.metrics else 0,
            }
        except Exception as e:
            print(f"⚠️  Training analysis failed: {e}")
            return {}
    
    def generate_report(self, system_info, dataset_info, training_info):
        """Generate comprehensive training report"""
        report = f"""
{'='*60}
TRAINING ANALYSIS REPORT
{'='*60}

SYSTEM INFORMATION:
- CPU Cores: {system_info.get('cpu_cores', 'unknown')}
- Total Memory: {system_info.get('total_memory_gb', 0):.1f} GB
- Available Memory: {system_info.get('available_memory_gb', 0):.1f} GB
- Memory Usage: {system_info.get('memory_usage_percent', 0):.1f}%
- CUDA Available: {system_info.get('cuda_available', False)}
- CUDA Version: {system_info.get('cuda_version', 'unknown')}
- PyTorch Version: {system_info.get('pytorch_version', 'unknown')}
- GPU Memory Used: {system_info.get('gpu_memory_gb', 0):.2f} GB

DATASET ANALYSIS:
"""
        
        for split_name, split_info in dataset_info.items():
            report += f"- {split_name.upper()}: {split_info.get('num_samples', 0)} samples\n"
            if split_info.get('columns'):
                report += f"  Columns: {', '.join(split_info['columns'])}\n"
        
        report += f"""
TRAINING PERFORMANCE:
- Training Time: {training_info.get('training_time_minutes', 0):.2f} minutes
- Final Loss: {training_info.get('final_loss', 'unknown')}
- Total Steps: {training_info.get('total_steps', 0)}
- Samples/Second: {training_info.get('samples_per_second', 0):.2f}
- Peak Memory: {training_info.get('peak_memory_gb', 0):.2f} GB
- Peak GPU Memory: {training_info.get('peak_gpu_memory_gb', 0):.2f} GB

TRAINING CONFIGURATION:
- Model: {MODEL_NAME}
- Batch Size: {BATCH_SIZE}
- Gradient Accumulation: {GRADIENT_ACCUMULATION}
- Learning Rate: {LEARNING_RATE}
- Epochs: {EPOCHS}
- LoRA Enabled: {USE_LORA}
- Max Length: {MAX_LENGTH}

{'='*60}
END REPORT
{'='*60}
"""
        
        return report

# ─── Utility Functions ───────────────────────────────────────────────────────
def safe_makedirs(path):
    """Safely create directories"""
    try:
        os.makedirs(path, exist_ok=True)
        return True
    except Exception as e:
        print(f"⚠️  Failed to create directory {path}: {e}")
        return False

def cleanup_gpu_memory():
    """Clean up GPU memory"""
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    gc.collect()

def load_tokenizer_robust(model_name):
    """Load tokenizer with multiple fallback strategies"""
    print(f"🔄 Loading tokenizer for: {model_name}")
    
    strategies = [
        lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True),
        lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=False),
        lambda: GPT2TokenizerFast.from_pretrained("gpt2"),
        lambda: create_minimal_tokenizer(),
    ]
    
    for i, strategy in enumerate(strategies, 1):
        try:
            tokenizer = strategy()
            
            # Add missing special tokens
            if tokenizer.pad_token is None:
                if tokenizer.eos_token:
                    tokenizer.pad_token = tokenizer.eos_token
                else:
                    tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
            
            print(f"✅ Tokenizer loaded (strategy {i})")
            return tokenizer
        except Exception as e:
            print(f"⚠️  Strategy {i} failed: {str(e)[:100]}...")
    
    print("❌ All tokenizer strategies failed")
    return None

def create_minimal_tokenizer():
    """Create absolute minimal tokenizer"""
    try:
        from transformers import PreTrainedTokenizerFast
        import json
        
        vocab = {
            "<|pad|>": 0,
            "</s>": 1,
            "<s>": 2,
            "<|unk|>": 3,
        }
        
        for i, char in enumerate("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \n\t.,!?-", start=4):
            vocab[char] = i
        
        tokenizer_json = {
            "version": "1.0",
            "model": {
                "type": "BPE",
                "vocab": vocab,
                "merges": []
            }
        }
        
        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
            json.dump(tokenizer_json, f)
            temp_path = f.name
        
        tokenizer = PreTrainedTokenizerFast(tokenizer_file=temp_path)
        tokenizer.pad_token = "<|pad|>"
        tokenizer.eos_token = "</s>"
        tokenizer.bos_token = "<s>"
        
        os.unlink(temp_path)
        return tokenizer
    except:
        return None

def load_dataset_fallback():
    """Load dataset with comprehensive fallbacks"""
    print("📥 Loading dataset...")
    
    for dataset_name in DATASET_SOURCES:
        try:
            print(f"🔄 Trying: {dataset_name}")
            dataset = load_dataset(dataset_name, streaming=False)
            print(f"✅ Loaded: {dataset_name}")
            
            # Ensure proper splits
            if "train" not in dataset and "test" not in dataset:
                keys = list(dataset.keys())
                if keys:
                    main_split = dataset[keys[0]]
                    dataset = main_split.train_test_split(test_size=0.1, seed=42)
                    print(f"✅ Created train/test split")
                else:
                    continue
            
            return dataset
        except Exception as e:
            print(f"⚠️  Failed: {str(e)[:100]}...")
    
    # Create dummy dataset
    print("🔄 Creating dummy dataset...")
    try:
        dummy_data = {
            "train": [
                {"prompt": "What is AI?", "response": "Artificial Intelligence is computer systems performing human tasks."},
                {"prompt": "How to code?", "response": "Start with basics like variables, loops, functions."},
            ] * 10,
            "test": [
                {"prompt": "Define ML", "response": "Machine Learning enables computers to learn from data."},
            ] * 3,
        }
        
        dataset = DatasetDict({
            split: Dataset.from_list(data) 
            for split, data in dummy_data.items()
        })
        
        print("✅ Created dummy dataset")
        return dataset
    except Exception as e:
        print(f"❌ Dummy dataset failed: {e}")
        return None

def normalize_example(example):
    """Normalize example format"""
    if not example:
        return {"prompt": "default", "response": "default"}
    
    try:
        if "prompt" in example and "response" in example:
            return {
                "prompt": str(example.get("prompt", "")).strip() or "default",
                "response": str(example.get("response", "")).strip() or "default",
            }
        
        if "messages" in example and isinstance(example["messages"], list):
            prompt, response = "", ""
            for msg in example["messages"]:
                if isinstance(msg, dict):
                    role, content = str(msg.get("role", "")), str(msg.get("content", ""))
                    if role.lower() in ["user", "human"]:
                        prompt = content
                    elif role.lower() in ["assistant", "bot"]:
                        response = content
            return {"prompt": prompt or "default", "response": response or "default"}
        
        text = str(example.get("text", example.get("content", "default")))
        if "Assistant:" in text:
            parts = text.split("Assistant:", 1)
            return {"prompt": parts[0].replace("User:", "").strip() or "default", 
                   "response": parts[1].strip() or "default"}
        
        return {"prompt": text[:200] or "default", 
               "response": (text[-200:] if len(text) > 200 else text) or "default"}
    except:
        return {"prompt": "default", "response": "default"}

def tokenize_function(examples, tokenizer):
    """Tokenize examples safely"""
    try:
        full_texts = [
            f"{prompt}\n\n{response}{tokenizer.eos_token}"
            for prompt, response in zip(examples["prompt"], examples["response"])
        ]
        
        result = tokenizer(
            full_texts,
            truncation=True,
            max_length=MAX_LENGTH,
            padding=False,
            return_tensors=None,
        )
        
        result["labels"] = [
            [-100 if (hasattr(tokenizer, 'pad_token_id') and token_id == tokenizer.pad_token_id) else token_id 
             for token_id in labels] 
            for labels in result["input_ids"]
        ]
        
        return result
    except Exception as e:
        print(f"⚠️  Tokenization error: {e}")
        return {
            "input_ids": [[1, 2, 3]] * len(examples["prompt"]),
            "attention_mask": [[1, 1, 1]] * len(examples["prompt"]),
            "labels": [[1, 2, 3]] * len(examples["prompt"]),
        }

def process_dataset(dataset, tokenizer):
    """Process dataset efficiently"""
    if not dataset or not tokenizer:
        return None
    
    print("⚡ Processing dataset...")
    
    processed_splits = {}
    for split_name in dataset.keys():
        try:
            print(f"🔄 Processing {split_name} ({len(dataset[split_name])} samples)...")
            
            # Normalize
            normalized = dataset[split_name].map(
                normalize_example,
                remove_columns=dataset[split_name].column_names,
                num_proc=1,
            )
            
            # Tokenize
            tokenized = normalized.map(
                lambda x: tokenize_function(x, tokenizer),
                batched=True,
                batch_size=BATCH_SIZE_TOKENIZATION,
                num_proc=1,
                remove_columns=["prompt", "response"],
                load_from_cache_file=False
            )
            
            processed_splits[split_name] = tokenized
            print(f"✅ {split_name}: {len(tokenized)} samples")
            
        except Exception as e:
            print(f"⚠️  {split_name} failed: {e}")
            # Create minimal fallback
            try:
                dummy_tokens = tokenizer("test\n\ntest", return_tensors=None)
                dummy_tokens["labels"] = dummy_tokens["input_ids"].copy()
                processed_splits[split_name] = Dataset.from_list([dummy_tokens] * min(10, len(dataset[split_name])))
            except:
                processed_splits[split_name] = Dataset.from_list([
                    {"input_ids": [1], "attention_mask": [1], "labels": [1]}
                ] * 5)
    
    return DatasetDict(processed_splits) if processed_splits else None

def load_model(model_name, tokenizer, use_lora=True):
    """Load model with LoRA support"""
    print("🧠 Loading model...")
    
    strategies = [
        {
            "name": "8-bit + LoRA",
            "params": {
                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
                "device_map": "auto" if torch.cuda.is_available() else None,
                "trust_remote_code": True,
                "low_cpu_mem_usage": True,
                "load_in_8bit": True,
            }
        },
        {
            "name": "float16",
            "params": {
                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
                "device_map": "auto" if torch.cuda.is_available() else None,
                "trust_remote_code": True,
                "low_cpu_mem_usage": True,
            }
        },
        {
            "name": "CPU fallback",
            "params": {
                "low_cpu_mem_usage": True,
            }
        }
    ]
    
    for strategy in strategies:
        try:
            print(f"🔄 {strategy['name']}...")
            model = AutoModelForCausalLM.from_pretrained(model_name, **strategy["params"])
            
            # Apply LoRA if requested
            if use_lora and USE_LORA:
                try:
                    model = prepare_model_for_kbit_training(model)
                    lora_config = LoraConfig(
                        r=LORA_R,
                        lora_alpha=LORA_ALPHA,
                        target_modules=["q_proj", "v_proj"],
                        lora_dropout=LORA_DROPOUT,
                        bias="none",
                        task_type="CAUSAL_LM"
                    )
                    model = get_peft_model(model, lora_config)
                    print("✅ LoRA applied")
                except Exception as e:
                    print(f"⚠️  LoRA failed: {e}")
            
            # Resize embeddings
            if tokenizer:
                try:
                    model.resize_token_embeddings(len(tokenizer))
                except Exception as e:
                    print(f"⚠️  Embedding resize failed: {e}")
            
            print(f"✅ Model loaded ({strategy['name']})")
            return model
        except Exception as e:
            print(f"⚠️  {strategy['name']} failed: {str(e)[:100]}...")
    
    print("❌ All model strategies failed")
    return None

def setup_training(model, tokenizer, tokenized_dataset, dataset_name):
    """Setup training configuration"""
    if not model or not tokenizer or not tokenized_dataset:
        return None
    
    print(f"⚙️  Setting up training for {dataset_name}...")
    
    try:
        train_dataset = tokenized_dataset.get("train")
        eval_dataset = tokenized_dataset.get("test") or tokenized_dataset.get("train")
        
        if not train_dataset or len(train_dataset) == 0:
            print("❌ No training data")
            return None
        
        # Limit samples for efficiency
        max_samples = 50
        if len(train_dataset) > max_samples:
            train_dataset = train_dataset.select(range(max_samples))
        if eval_dataset and len(eval_dataset) > 10:
            eval_dataset = eval_dataset.select(range(min(10, len(eval_dataset))))
        
        output_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
        safe_makedirs(output_dir)
        
        training_args = TrainingArguments(
            output_dir=output_dir,

            num_train_epochs=EPOCHS,
            per_device_train_batch_size=BATCH_SIZE,
            per_device_eval_batch_size=BATCH_SIZE,
            gradient_accumulation_steps=GRADIENT_ACCUMULATION,
            
            learning_rate=LEARNING_RATE,
            weight_decay=0.01,
            warmup_ratio=0.1,
            lr_scheduler_type="linear",
            
            logging_dir=os.path.join(output_dir, "logs"),
            logging_steps=LOGGING_STEPS,
            save_strategy="steps",
            save_steps=SAVE_STEPS,
            save_total_limit=2,
            
            eval_strategy="steps" if eval_dataset else "no",
            eval_steps=EVAL_STEPS if eval_dataset else None,
            
            fp16=torch.cuda.is_available(),
            bf16=False,
            dataloader_num_workers=1,
            dataloader_pin_memory=False,
            remove_unused_columns=False,
            
            optim="adamw_torch",
            dataloader_drop_last=True,
            gradient_checkpointing=True,
            
            report_to="none",
            run_name=f"training_{dataset_name}",
            tf32=False,
        )
        
        data_collator = DataCollatorForLanguageModeling(
            tokenizer=tokenizer,
            mlm=False,
            pad_to_multiple_of=8,
        )
        
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            data_collator=data_collator,
            processing_class=tokenizer,
            callbacks=[]
        )
        
        print("✅ Training setup complete")
        return trainer, output_dir
    except Exception as e:
        print(f"❌ Training setup failed: {e}")
        return None, None

def train_model(trainer, dataset_name):
    """Execute training and save results"""
    if not trainer:
        return False, None, None
    
    print(f"🏃 Training {dataset_name}...")
    
    try:
        train_result = trainer.train()
        
        # Save final model
        output_dir = trainer.args.output_dir
        final_model_dir = os.path.join(output_dir, "final_model")
        safe_makedirs(final_model_dir)
        
        print("💾 Saving model...")
        trainer.save_model(final_model_dir)
        trainer.save_state()
        
        print("💾 Saving tokenizer...")
        trainer.tokenizer.save_pretrained(final_model_dir)
        
        print(f"✅ Training complete for {dataset_name}")
        return True, final_model_dir, train_result
        
    except Exception as e:
        print(f"❌ Training failed: {e}")
        traceback.print_exc()
        return False, None, None

def merge_model(base_model_path, adapter_path, dataset_name):
    """Merge LoRA weights with base model"""
    print(f"🔗 Merging {dataset_name}...")
    
    try:
        output_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_"))
        safe_makedirs(output_path)
        
        # Load tokenizer from adapter
        try:
            tokenizer = AutoTokenizer.from_pretrained(adapter_path)
        except:
            tokenizer = load_tokenizer_robust(base_model_path)
        
        # Load base model
        base_model = AutoModelForCausalLM.from_pretrained(
            base_model_path,
            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
            device_map="auto" if torch.cuda.is_available() else None,
            trust_remote_code=True,
            low_cpu_mem_usage=True
        )
        
        # Resize embeddings to match tokenizer
        current_vocab_size = len(tokenizer)
        model_vocab_size = base_model.get_input_embeddings().weight.size(0)
        if current_vocab_size != model_vocab_size:
            base_model.resize_token_embeddings(current_vocab_size)
        
        # Load and merge LoRA adapter
        merged_model = PeftModel.from_pretrained(base_model, adapter_path)
        merged_model = merged_model.merge_and_unload()
        
        # Save merged model
        merged_model.save_pretrained(output_path)
        tokenizer.save_pretrained(output_path)
        
        print(f"✅ {dataset_name} merged successfully")
        cleanup_gpu_memory()
        return True, output_path
        
    except Exception as e:
        print(f"❌ Merging {dataset_name} failed: {e}")
        
        # Fallback: copy adapter files
        try:
            fallback_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_") + "_adapter_only")
            safe_makedirs(fallback_path)
            
            adapter_files = os.listdir(adapter_path)
            for file in adapter_files:
                src = os.path.join(adapter_path, file)
                dst = os.path.join(fallback_path, file)
                if os.path.isfile(src):
                    shutil.copy2(src, dst)
            
            print(f"⚠️  {dataset_name} adapter copied (merging failed)")
            return True, fallback_path
        except Exception as e2:
            print(f"❌ Fallback also failed: {e2}")
            return False, None

def save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name):
    """Save analysis report"""
    try:
        report = analyzer.generate_report(system_info, dataset_info, training_info)
        
        report_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
        safe_makedirs(report_dir)
        
        report_path = os.path.join(report_dir, "training_analysis.txt")
        with open(report_path, "w") as f:
            f.write(report)
        
        # Save metrics as JSON
        metrics_path = os.path.join(report_dir, "training_metrics.json")
        with open(metrics_path, "w") as f:
            json.dump({
                "system": system_info,
                "dataset": dataset_info,
                "training": training_info
            }, f, indent=2)
        
        print(f"📋 Analysis saved for {dataset_name}")
        return True
    except Exception as e:
        print(f"⚠️  Failed to save analysis: {e}")
        return False

# ─── Main Training Pipeline ───────────────────────────────────────────────────
def main():
    """Main training pipeline with automatic model merging"""
    print("🚀 STARTING AUTOMATED TRAINING PIPELINE")
    print(f"🔧 Model: {MODEL_NAME}")
    print(f"🎯 LoRA: {USE_LORA} | Batch: {BATCH_SIZE} | Epochs: {EPOCHS}")
    print(f"🖥️  System: {platform.system()} | CUDA: {torch.cuda.is_available()}")
    
    # Initialize analyzer
    analyzer = TrainingAnalyzer()
    
    # Create directories
    safe_makedirs(OUTPUT_DIR)
    safe_makedirs(MERGED_MODELS_DIR)
    
    # Load tokenizer (shared across all training)
    print("\n🔤 LOADING SHARED TOKENIZER...")
    tokenizer = load_tokenizer_robust(MODEL_NAME)
    if not tokenizer:
        print("❌ CRITICAL: Tokenizer loading failed")
        return
    
    print(f"✅ Tokenizer loaded (vocab: {len(tokenizer)})")
    
    # Analyze system
    system_info = analyzer.analyze_system()
    print(f"📊 System: {system_info.get('total_memory_gb', 0):.1f}GB RAM, {system_info.get('cpu_cores', 0)} cores")
    
    # Process each dataset
    results = []
    total_training_time = 0
    
    for dataset_name in DATASET_SOURCES:
        print(f"\n{'='*60}")
        print(f"🎯 PROCESSING DATASET: {dataset_name}")
        print(f"{'='*60}")
        
        # 1. Load dataset
        dataset = load_dataset_fallback()
        if not dataset:
            print(f"❌ Failed to load {dataset_name}")
            continue
        
        # 2. Analyze dataset
        dataset_info = analyzer.analyze_dataset(dataset)
        print(f"📊 Dataset analysis: {dataset_info}")
        
        # 3. Process dataset
        tokenized_dataset = process_dataset(dataset, tokenizer)
        if not tokenized_dataset:
            print(f"❌ Failed to process {dataset_name}")
            continue
        
        # 4. Load model
        model = load_model(MODEL_NAME, tokenizer, use_lora=True)
        if not model:
            print(f"❌ Failed to load model for {dataset_name}")
            continue
        
        # 5. Setup training
        setup_result = setup_training(model, tokenizer, tokenized_dataset, dataset_name)
        if not setup_result or setup_result[0] is None:
            print(f"❌ Failed to setup training for {dataset_name}")
            continue
        
        trainer, model_dir = setup_result
        
        # 6. Train model
        success, final_model_dir, train_result = train_model(trainer, dataset_name)
        if not success:
            print(f"❌ Training failed for {dataset_name}")
            continue
        
        # 7. Analyze training
        training_info = analyzer.analyze_training(trainer, train_result)
        total_training_time += training_info.get('training_time_minutes', 0)
        
        # 8. Save analysis report
        save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name)
        
        # 9. Merge model (if LoRA was used)
        if USE_LORA and success:
            merge_success, merged_path = merge_model(MODEL_NAME, final_model_dir, dataset_name)
            
            # Store results
            results.append({
                "dataset": dataset_name,
                "training_time": training_info.get('training_time_minutes', 0),
                "final_loss": training_info.get('final_loss', 'unknown'),
                "model_saved": final_model_dir,
                "model_merged": merged_path if merge_success else None,
                "success": True
            })
        else:
            results.append({
                "dataset": dataset_name,
                "training_time": training_info.get('training_time_minutes', 0),
                "final_loss": training_info.get('final_loss', 'unknown'),
                "model_saved": final_model_dir,
                "model_merged": None,
                "success": success
            })
        
        # Cleanup memory
        cleanup_gpu_memory()
        print(f"✅ {dataset_name} processing complete\n")
    
    # Generate final summary
    print(f"\n{'='*60}")
    print("📊 FINAL TRAINING SUMMARY")
    print(f"{'='*60}")
    
    successful_trainings = sum(1 for r in results if r['success'])
    successful_merges = sum(1 for r in results if r.get('model_merged'))
    
    print(f"✅ Total Datasets Processed: {len(results)}")
    print(f"✅ Successful Trainings: {successful_trainings}")
    print(f"✅ Successful Merges: {successful_merges}")
    print(f"⏱️  Total Training Time: {total_training_time:.2f} minutes")
    
    for result in results:
        status = "✅" if result['success'] else "❌"
        merge_status = "🔗" if result.get('model_merged') else "⏭️"
        print(f"{status} {result['dataset']}: {result['training_time']:.1f}min | Loss: {result['final_loss']} {merge_status}")
    
    print(f"\n📂 Models saved in: {OUTPUT_DIR}")
    print(f"🔗 Merged models in: {MERGED_MODELS_DIR}")
    print(f"{'='*60}")
    
    return results

# ─── Execute Training ───────────────────────────────────────────────────────
if __name__ == "__main__":
    print("🏁 STARTING AUTOMATED TRAINING...")
    
    try:
        results = main()
        
        if results:
            print("🎊 TRAINING PIPELINE COMPLETED SUCCESSFULLY!")
        else:
            print("⚠️  TRAINING COMPLETED WITH ISSUES")
            
    except KeyboardInterrupt:
        print("\n🛑 TRAINING STOPPED BY USER")
    except Exception as e:
        print(f"💥 UNEXPECTED ERROR: {str(e)}")
        traceback.print_exc()
        print("⚠️  CONTINUING DESPITE ERROR...")
    
    print("🏁 TRAINING PROCESS FINISHED")