import os
import torch
import gc
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import partial
import psutil
import multiprocessing as mp
from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TrainingArguments,
Trainer,
DataCollatorForLanguageModeling,
GPT2TokenizerFast
)
import shutil
from typing import Dict, Any, List
import warnings
import platform
import traceback
from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training
import json
import tempfile
from datetime import datetime
warnings.filterwarnings("ignore")
# ─── Configuration ───────────────────────────────────────────────────────────
MODEL_NAME = "zxc4wewewe/blackthinking"
OUTPUT_DIR = "./offsec_model"
MERGED_MODELS_DIR = "./merged_models"
MAX_LENGTH = 512
BATCH_SIZE = 1
GRADIENT_ACCUMULATION = 8
EPOCHS = 3
LEARNING_RATE = 2e-5
SAVE_STEPS = 100
EVAL_STEPS = 100
LOGGING_STEPS = 50
# LoRA Configuration
USE_LORA = True
LORA_R = 8
LORA_ALPHA = 16
LORA_DROPOUT = 0.1
# Dataset Configuration
DATASET_SOURCES = [
"huihui-ai/Guilherme34_uncensor-v2",
"zxc4wewewe/offsec",
]
# System Configuration
NUM_WORKERS = min(2, mp.cpu_count())
BATCH_SIZE_TOKENIZATION = 50
# ─── Analyzer Class ──────────────────────────────────────────────────────────
class TrainingAnalyzer:
"""Analyzes training progress and system resources"""
def __init__(self):
self.start_time = datetime.now()
self.training_metrics = {
"total_samples": 0,
"processed_samples": 0,
"training_time": 0,
"peak_memory": 0,
"gpu_memory": 0,
}
def analyze_system(self):
"""Analyze system resources"""
try:
memory = psutil.virtual_memory()
gpu_memory = 0
if torch.cuda.is_available():
gpu_memory = torch.cuda.memory_allocated() / (1024**3)
return {
"cpu_cores": mp.cpu_count(),
"total_memory_gb": memory.total / (1024**3),
"available_memory_gb": memory.available / (1024**3),
"memory_usage_percent": memory.percent,
"gpu_memory_gb": gpu_memory,
"cuda_available": torch.cuda.is_available(),
"cuda_version": torch.version.cuda,
"pytorch_version": torch.__version__,
}
except Exception as e:
print(f"⚠️ System analysis failed: {e}")
return {}
def analyze_dataset(self, dataset):
"""Analyze dataset characteristics"""
if not dataset:
return {}
try:
analysis = {}
for split_name, split_data in dataset.items():
if hasattr(split_data, '__len__'):
analysis[split_name] = {
"num_samples": len(split_data),
"columns": split_data.column_names if hasattr(split_data, 'column_names') else [],
}
return analysis
except Exception as e:
print(f"⚠️ Dataset analysis failed: {e}")
return {}
def analyze_training(self, trainer, train_result):
"""Analyze training results"""
try:
current_time = datetime.now()
training_time = (current_time - self.start_time).total_seconds()
memory = psutil.virtual_memory()
peak_memory = memory.used / (1024**3)
gpu_memory = 0
if torch.cuda.is_available():
gpu_memory = torch.cuda.memory_allocated() / (1024**3)
return {
"training_time_seconds": training_time,
"training_time_minutes": training_time / 60,
"peak_memory_gb": peak_memory,
"peak_gpu_memory_gb": gpu_memory,
"final_loss": getattr(train_result, 'training_loss', 'unknown'),
"total_steps": getattr(train_result, 'global_step', 0),
"samples_per_second": train_result.metrics.get('train_samples_per_second', 0) if train_result.metrics else 0,
}
except Exception as e:
print(f"⚠️ Training analysis failed: {e}")
return {}
def generate_report(self, system_info, dataset_info, training_info):
"""Generate comprehensive training report"""
report = f"""
{'='*60}
TRAINING ANALYSIS REPORT
{'='*60}
SYSTEM INFORMATION:
- CPU Cores: {system_info.get('cpu_cores', 'unknown')}
- Total Memory: {system_info.get('total_memory_gb', 0):.1f} GB
- Available Memory: {system_info.get('available_memory_gb', 0):.1f} GB
- Memory Usage: {system_info.get('memory_usage_percent', 0):.1f}%
- CUDA Available: {system_info.get('cuda_available', False)}
- CUDA Version: {system_info.get('cuda_version', 'unknown')}
- PyTorch Version: {system_info.get('pytorch_version', 'unknown')}
- GPU Memory Used: {system_info.get('gpu_memory_gb', 0):.2f} GB
DATASET ANALYSIS:
"""
for split_name, split_info in dataset_info.items():
report += f"- {split_name.upper()}: {split_info.get('num_samples', 0)} samples\n"
if split_info.get('columns'):
report += f" Columns: {', '.join(split_info['columns'])}\n"
report += f"""
TRAINING PERFORMANCE:
- Training Time: {training_info.get('training_time_minutes', 0):.2f} minutes
- Final Loss: {training_info.get('final_loss', 'unknown')}
- Total Steps: {training_info.get('total_steps', 0)}
- Samples/Second: {training_info.get('samples_per_second', 0):.2f}
- Peak Memory: {training_info.get('peak_memory_gb', 0):.2f} GB
- Peak GPU Memory: {training_info.get('peak_gpu_memory_gb', 0):.2f} GB
TRAINING CONFIGURATION:
- Model: {MODEL_NAME}
- Batch Size: {BATCH_SIZE}
- Gradient Accumulation: {GRADIENT_ACCUMULATION}
- Learning Rate: {LEARNING_RATE}
- Epochs: {EPOCHS}
- LoRA Enabled: {USE_LORA}
- Max Length: {MAX_LENGTH}
{'='*60}
END REPORT
{'='*60}
"""
return report
# ─── Utility Functions ───────────────────────────────────────────────────────
def safe_makedirs(path):
"""Safely create directories"""
try:
os.makedirs(path, exist_ok=True)
return True
except Exception as e:
print(f"⚠️ Failed to create directory {path}: {e}")
return False
def cleanup_gpu_memory():
"""Clean up GPU memory"""
if torch.cuda.is_available():
torch.cuda.empty_cache()
gc.collect()
def load_tokenizer_robust(model_name):
"""Load tokenizer with multiple fallback strategies"""
print(f"🔄 Loading tokenizer for: {model_name}")
strategies = [
lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True),
lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=False),
lambda: GPT2TokenizerFast.from_pretrained("gpt2"),
lambda: create_minimal_tokenizer(),
]
for i, strategy in enumerate(strategies, 1):
try:
tokenizer = strategy()
# Add missing special tokens
if tokenizer.pad_token is None:
if tokenizer.eos_token:
tokenizer.pad_token = tokenizer.eos_token
else:
tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
print(f"✅ Tokenizer loaded (strategy {i})")
return tokenizer
except Exception as e:
print(f"⚠️ Strategy {i} failed: {str(e)[:100]}...")
print("❌ All tokenizer strategies failed")
return None
def create_minimal_tokenizer():
"""Create absolute minimal tokenizer"""
try:
from transformers import PreTrainedTokenizerFast
import json
vocab = {
"<|pad|>": 0,
"": 1,
"": 2,
"<|unk|>": 3,
}
for i, char in enumerate("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \n\t.,!?-", start=4):
vocab[char] = i
tokenizer_json = {
"version": "1.0",
"model": {
"type": "BPE",
"vocab": vocab,
"merges": []
}
}
with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
json.dump(tokenizer_json, f)
temp_path = f.name
tokenizer = PreTrainedTokenizerFast(tokenizer_file=temp_path)
tokenizer.pad_token = "<|pad|>"
tokenizer.eos_token = ""
tokenizer.bos_token = ""
os.unlink(temp_path)
return tokenizer
except:
return None
def load_dataset_fallback():
"""Load dataset with comprehensive fallbacks"""
print("📥 Loading dataset...")
for dataset_name in DATASET_SOURCES:
try:
print(f"🔄 Trying: {dataset_name}")
dataset = load_dataset(dataset_name, streaming=False)
print(f"✅ Loaded: {dataset_name}")
# Ensure proper splits
if "train" not in dataset and "test" not in dataset:
keys = list(dataset.keys())
if keys:
main_split = dataset[keys[0]]
dataset = main_split.train_test_split(test_size=0.1, seed=42)
print(f"✅ Created train/test split")
else:
continue
return dataset
except Exception as e:
print(f"⚠️ Failed: {str(e)[:100]}...")
# Create dummy dataset
print("🔄 Creating dummy dataset...")
try:
dummy_data = {
"train": [
{"prompt": "What is AI?", "response": "Artificial Intelligence is computer systems performing human tasks."},
{"prompt": "How to code?", "response": "Start with basics like variables, loops, functions."},
] * 10,
"test": [
{"prompt": "Define ML", "response": "Machine Learning enables computers to learn from data."},
] * 3,
}
dataset = DatasetDict({
split: Dataset.from_list(data)
for split, data in dummy_data.items()
})
print("✅ Created dummy dataset")
return dataset
except Exception as e:
print(f"❌ Dummy dataset failed: {e}")
return None
def normalize_example(example):
"""Normalize example format"""
if not example:
return {"prompt": "default", "response": "default"}
try:
if "prompt" in example and "response" in example:
return {
"prompt": str(example.get("prompt", "")).strip() or "default",
"response": str(example.get("response", "")).strip() or "default",
}
if "messages" in example and isinstance(example["messages"], list):
prompt, response = "", ""
for msg in example["messages"]:
if isinstance(msg, dict):
role, content = str(msg.get("role", "")), str(msg.get("content", ""))
if role.lower() in ["user", "human"]:
prompt = content
elif role.lower() in ["assistant", "bot"]:
response = content
return {"prompt": prompt or "default", "response": response or "default"}
text = str(example.get("text", example.get("content", "default")))
if "Assistant:" in text:
parts = text.split("Assistant:", 1)
return {"prompt": parts[0].replace("User:", "").strip() or "default",
"response": parts[1].strip() or "default"}
return {"prompt": text[:200] or "default",
"response": (text[-200:] if len(text) > 200 else text) or "default"}
except:
return {"prompt": "default", "response": "default"}
def tokenize_function(examples, tokenizer):
"""Tokenize examples safely"""
try:
full_texts = [
f"{prompt}\n\n{response}{tokenizer.eos_token}"
for prompt, response in zip(examples["prompt"], examples["response"])
]
result = tokenizer(
full_texts,
truncation=True,
max_length=MAX_LENGTH,
padding=False,
return_tensors=None,
)
result["labels"] = [
[-100 if (hasattr(tokenizer, 'pad_token_id') and token_id == tokenizer.pad_token_id) else token_id
for token_id in labels]
for labels in result["input_ids"]
]
return result
except Exception as e:
print(f"⚠️ Tokenization error: {e}")
return {
"input_ids": [[1, 2, 3]] * len(examples["prompt"]),
"attention_mask": [[1, 1, 1]] * len(examples["prompt"]),
"labels": [[1, 2, 3]] * len(examples["prompt"]),
}
def process_dataset(dataset, tokenizer):
"""Process dataset efficiently"""
if not dataset or not tokenizer:
return None
print("⚡ Processing dataset...")
processed_splits = {}
for split_name in dataset.keys():
try:
print(f"🔄 Processing {split_name} ({len(dataset[split_name])} samples)...")
# Normalize
normalized = dataset[split_name].map(
normalize_example,
remove_columns=dataset[split_name].column_names,
num_proc=1,
)
# Tokenize
tokenized = normalized.map(
lambda x: tokenize_function(x, tokenizer),
batched=True,
batch_size=BATCH_SIZE_TOKENIZATION,
num_proc=1,
remove_columns=["prompt", "response"],
load_from_cache_file=False
)
processed_splits[split_name] = tokenized
print(f"✅ {split_name}: {len(tokenized)} samples")
except Exception as e:
print(f"⚠️ {split_name} failed: {e}")
# Create minimal fallback
try:
dummy_tokens = tokenizer("test\n\ntest", return_tensors=None)
dummy_tokens["labels"] = dummy_tokens["input_ids"].copy()
processed_splits[split_name] = Dataset.from_list([dummy_tokens] * min(10, len(dataset[split_name])))
except:
processed_splits[split_name] = Dataset.from_list([
{"input_ids": [1], "attention_mask": [1], "labels": [1]}
] * 5)
return DatasetDict(processed_splits) if processed_splits else None
def load_model(model_name, tokenizer, use_lora=True):
"""Load model with LoRA support"""
print("🧠 Loading model...")
strategies = [
{
"name": "8-bit + LoRA",
"params": {
"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
"device_map": "auto" if torch.cuda.is_available() else None,
"trust_remote_code": True,
"low_cpu_mem_usage": True,
"load_in_8bit": True,
}
},
{
"name": "float16",
"params": {
"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
"device_map": "auto" if torch.cuda.is_available() else None,
"trust_remote_code": True,
"low_cpu_mem_usage": True,
}
},
{
"name": "CPU fallback",
"params": {
"low_cpu_mem_usage": True,
}
}
]
for strategy in strategies:
try:
print(f"🔄 {strategy['name']}...")
model = AutoModelForCausalLM.from_pretrained(model_name, **strategy["params"])
# Apply LoRA if requested
if use_lora and USE_LORA:
try:
model = prepare_model_for_kbit_training(model)
lora_config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
target_modules=["q_proj", "v_proj"],
lora_dropout=LORA_DROPOUT,
bias="none",
task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
print("✅ LoRA applied")
except Exception as e:
print(f"⚠️ LoRA failed: {e}")
# Resize embeddings
if tokenizer:
try:
model.resize_token_embeddings(len(tokenizer))
except Exception as e:
print(f"⚠️ Embedding resize failed: {e}")
print(f"✅ Model loaded ({strategy['name']})")
return model
except Exception as e:
print(f"⚠️ {strategy['name']} failed: {str(e)[:100]}...")
print("❌ All model strategies failed")
return None
def setup_training(model, tokenizer, tokenized_dataset, dataset_name):
"""Setup training configuration"""
if not model or not tokenizer or not tokenized_dataset:
return None
print(f"⚙️ Setting up training for {dataset_name}...")
try:
train_dataset = tokenized_dataset.get("train")
eval_dataset = tokenized_dataset.get("test") or tokenized_dataset.get("train")
if not train_dataset or len(train_dataset) == 0:
print("❌ No training data")
return None
# Limit samples for efficiency
max_samples = 50
if len(train_dataset) > max_samples:
train_dataset = train_dataset.select(range(max_samples))
if eval_dataset and len(eval_dataset) > 10:
eval_dataset = eval_dataset.select(range(min(10, len(eval_dataset))))
output_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
safe_makedirs(output_dir)
training_args = TrainingArguments(
output_dir=output_dir,
num_train_epochs=EPOCHS,
per_device_train_batch_size=BATCH_SIZE,
per_device_eval_batch_size=BATCH_SIZE,
gradient_accumulation_steps=GRADIENT_ACCUMULATION,
learning_rate=LEARNING_RATE,
weight_decay=0.01,
warmup_ratio=0.1,
lr_scheduler_type="linear",
logging_dir=os.path.join(output_dir, "logs"),
logging_steps=LOGGING_STEPS,
save_strategy="steps",
save_steps=SAVE_STEPS,
save_total_limit=2,
eval_strategy="steps" if eval_dataset else "no",
eval_steps=EVAL_STEPS if eval_dataset else None,
fp16=torch.cuda.is_available(),
bf16=False,
dataloader_num_workers=1,
dataloader_pin_memory=False,
remove_unused_columns=False,
optim="adamw_torch",
dataloader_drop_last=True,
gradient_checkpointing=True,
report_to="none",
run_name=f"training_{dataset_name}",
tf32=False,
)
data_collator = DataCollatorForLanguageModeling(
tokenizer=tokenizer,
mlm=False,
pad_to_multiple_of=8,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
data_collator=data_collator,
processing_class=tokenizer,
callbacks=[]
)
print("✅ Training setup complete")
return trainer, output_dir
except Exception as e:
print(f"❌ Training setup failed: {e}")
return None, None
def train_model(trainer, dataset_name):
"""Execute training and save results"""
if not trainer:
return False, None, None
print(f"🏃 Training {dataset_name}...")
try:
train_result = trainer.train()
# Save final model
output_dir = trainer.args.output_dir
final_model_dir = os.path.join(output_dir, "final_model")
safe_makedirs(final_model_dir)
print("💾 Saving model...")
trainer.save_model(final_model_dir)
trainer.save_state()
print("💾 Saving tokenizer...")
trainer.tokenizer.save_pretrained(final_model_dir)
print(f"✅ Training complete for {dataset_name}")
return True, final_model_dir, train_result
except Exception as e:
print(f"❌ Training failed: {e}")
traceback.print_exc()
return False, None, None
def merge_model(base_model_path, adapter_path, dataset_name):
"""Merge LoRA weights with base model"""
print(f"🔗 Merging {dataset_name}...")
try:
output_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_"))
safe_makedirs(output_path)
# Load tokenizer from adapter
try:
tokenizer = AutoTokenizer.from_pretrained(adapter_path)
except:
tokenizer = load_tokenizer_robust(base_model_path)
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
base_model_path,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
trust_remote_code=True,
low_cpu_mem_usage=True
)
# Resize embeddings to match tokenizer
current_vocab_size = len(tokenizer)
model_vocab_size = base_model.get_input_embeddings().weight.size(0)
if current_vocab_size != model_vocab_size:
base_model.resize_token_embeddings(current_vocab_size)
# Load and merge LoRA adapter
merged_model = PeftModel.from_pretrained(base_model, adapter_path)
merged_model = merged_model.merge_and_unload()
# Save merged model
merged_model.save_pretrained(output_path)
tokenizer.save_pretrained(output_path)
print(f"✅ {dataset_name} merged successfully")
cleanup_gpu_memory()
return True, output_path
except Exception as e:
print(f"❌ Merging {dataset_name} failed: {e}")
# Fallback: copy adapter files
try:
fallback_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_") + "_adapter_only")
safe_makedirs(fallback_path)
adapter_files = os.listdir(adapter_path)
for file in adapter_files:
src = os.path.join(adapter_path, file)
dst = os.path.join(fallback_path, file)
if os.path.isfile(src):
shutil.copy2(src, dst)
print(f"⚠️ {dataset_name} adapter copied (merging failed)")
return True, fallback_path
except Exception as e2:
print(f"❌ Fallback also failed: {e2}")
return False, None
def save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name):
"""Save analysis report"""
try:
report = analyzer.generate_report(system_info, dataset_info, training_info)
report_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
safe_makedirs(report_dir)
report_path = os.path.join(report_dir, "training_analysis.txt")
with open(report_path, "w") as f:
f.write(report)
# Save metrics as JSON
metrics_path = os.path.join(report_dir, "training_metrics.json")
with open(metrics_path, "w") as f:
json.dump({
"system": system_info,
"dataset": dataset_info,
"training": training_info
}, f, indent=2)
print(f"📋 Analysis saved for {dataset_name}")
return True
except Exception as e:
print(f"⚠️ Failed to save analysis: {e}")
return False
# ─── Main Training Pipeline ───────────────────────────────────────────────────
def main():
"""Main training pipeline with automatic model merging"""
print("🚀 STARTING AUTOMATED TRAINING PIPELINE")
print(f"🔧 Model: {MODEL_NAME}")
print(f"🎯 LoRA: {USE_LORA} | Batch: {BATCH_SIZE} | Epochs: {EPOCHS}")
print(f"🖥️ System: {platform.system()} | CUDA: {torch.cuda.is_available()}")
# Initialize analyzer
analyzer = TrainingAnalyzer()
# Create directories
safe_makedirs(OUTPUT_DIR)
safe_makedirs(MERGED_MODELS_DIR)
# Load tokenizer (shared across all training)
print("\n🔤 LOADING SHARED TOKENIZER...")
tokenizer = load_tokenizer_robust(MODEL_NAME)
if not tokenizer:
print("❌ CRITICAL: Tokenizer loading failed")
return
print(f"✅ Tokenizer loaded (vocab: {len(tokenizer)})")
# Analyze system
system_info = analyzer.analyze_system()
print(f"📊 System: {system_info.get('total_memory_gb', 0):.1f}GB RAM, {system_info.get('cpu_cores', 0)} cores")
# Process each dataset
results = []
total_training_time = 0
for dataset_name in DATASET_SOURCES:
print(f"\n{'='*60}")
print(f"🎯 PROCESSING DATASET: {dataset_name}")
print(f"{'='*60}")
# 1. Load dataset
dataset = load_dataset_fallback()
if not dataset:
print(f"❌ Failed to load {dataset_name}")
continue
# 2. Analyze dataset
dataset_info = analyzer.analyze_dataset(dataset)
print(f"📊 Dataset analysis: {dataset_info}")
# 3. Process dataset
tokenized_dataset = process_dataset(dataset, tokenizer)
if not tokenized_dataset:
print(f"❌ Failed to process {dataset_name}")
continue
# 4. Load model
model = load_model(MODEL_NAME, tokenizer, use_lora=True)
if not model:
print(f"❌ Failed to load model for {dataset_name}")
continue
# 5. Setup training
setup_result = setup_training(model, tokenizer, tokenized_dataset, dataset_name)
if not setup_result or setup_result[0] is None:
print(f"❌ Failed to setup training for {dataset_name}")
continue
trainer, model_dir = setup_result
# 6. Train model
success, final_model_dir, train_result = train_model(trainer, dataset_name)
if not success:
print(f"❌ Training failed for {dataset_name}")
continue
# 7. Analyze training
training_info = analyzer.analyze_training(trainer, train_result)
total_training_time += training_info.get('training_time_minutes', 0)
# 8. Save analysis report
save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name)
# 9. Merge model (if LoRA was used)
if USE_LORA and success:
merge_success, merged_path = merge_model(MODEL_NAME, final_model_dir, dataset_name)
# Store results
results.append({
"dataset": dataset_name,
"training_time": training_info.get('training_time_minutes', 0),
"final_loss": training_info.get('final_loss', 'unknown'),
"model_saved": final_model_dir,
"model_merged": merged_path if merge_success else None,
"success": True
})
else:
results.append({
"dataset": dataset_name,
"training_time": training_info.get('training_time_minutes', 0),
"final_loss": training_info.get('final_loss', 'unknown'),
"model_saved": final_model_dir,
"model_merged": None,
"success": success
})
# Cleanup memory
cleanup_gpu_memory()
print(f"✅ {dataset_name} processing complete\n")
# Generate final summary
print(f"\n{'='*60}")
print("📊 FINAL TRAINING SUMMARY")
print(f"{'='*60}")
successful_trainings = sum(1 for r in results if r['success'])
successful_merges = sum(1 for r in results if r.get('model_merged'))
print(f"✅ Total Datasets Processed: {len(results)}")
print(f"✅ Successful Trainings: {successful_trainings}")
print(f"✅ Successful Merges: {successful_merges}")
print(f"⏱️ Total Training Time: {total_training_time:.2f} minutes")
for result in results:
status = "✅" if result['success'] else "❌"
merge_status = "🔗" if result.get('model_merged') else "⏭️"
print(f"{status} {result['dataset']}: {result['training_time']:.1f}min | Loss: {result['final_loss']} {merge_status}")
print(f"\n📂 Models saved in: {OUTPUT_DIR}")
print(f"🔗 Merged models in: {MERGED_MODELS_DIR}")
print(f"{'='*60}")
return results
# ─── Execute Training ───────────────────────────────────────────────────────
if __name__ == "__main__":
print("🏁 STARTING AUTOMATED TRAINING...")
try:
results = main()
if results:
print("🎊 TRAINING PIPELINE COMPLETED SUCCESSFULLY!")
else:
print("⚠️ TRAINING COMPLETED WITH ISSUES")
except KeyboardInterrupt:
print("\n🛑 TRAINING STOPPED BY USER")
except Exception as e:
print(f"💥 UNEXPECTED ERROR: {str(e)}")
traceback.print_exc()
print("⚠️ CONTINUING DESPITE ERROR...")
print("🏁 TRAINING PROCESS FINISHED")