File size: 6,066 Bytes
5b6f681 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | """Model utilities and helper functions."""
import json
import os
from typing import Dict, Any, Optional
import torch
import numpy as np
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import evaluate
def load_model_and_tokenizer(model_name: str, num_labels: int = 2):
"""Load pre-trained model and tokenizer."""
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
model_name,
num_labels=num_labels
)
return model, tokenizer
def compute_metrics(eval_pred):
"""Compute metrics for evaluation."""
accuracy_metric = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")
predictions, labels = eval_pred
predictions = np.argmax(predictions, axis=1)
accuracy = accuracy_metric.compute(predictions=predictions, references=labels)
f1 = f1_metric.compute(predictions=predictions, references=labels, average="weighted")
return {
"accuracy": accuracy["accuracy"],
"f1": f1["f1"]
}
def detailed_evaluation(y_true, y_pred, class_names: Optional[list] = None) -> Dict[str, Any]:
"""
Perform detailed evaluation with classification report and confusion matrix.
Args:
y_true: True labels
y_pred: Predicted labels
class_names: Names of classes for visualization
Returns:
Dictionary with evaluation metrics and plots
"""
if class_names is None:
class_names = [f"Class {i}" for i in range(len(np.unique(y_true)))]
# Classification report
report = classification_report(y_true, y_pred, target_names=class_names, output_dict=True)
# Confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.tight_layout()
plt.savefig('confusion_matrix.png', dpi=300, bbox_inches='tight')
plt.close()
return {
"classification_report": report,
"confusion_matrix": cm.tolist(),
"accuracy": report["accuracy"],
"macro_f1": report["macro avg"]["f1-score"],
"weighted_f1": report["weighted avg"]["f1-score"]
}
def save_model_info(model_path: str, config: Dict[str, Any], metrics: Dict[str, Any]):
"""Save model information and metrics."""
info = {
"model_config": config,
"training_metrics": metrics,
"model_path": model_path
}
with open(os.path.join(model_path, "model_info.json"), "w") as f:
json.dump(info, f, indent=2)
def get_model_size(model) -> Dict[str, Any]:
"""Get model size information."""
param_size = 0
param_count = 0
for param in model.parameters():
param_count += param.nelement()
param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
buffer_size += buffer.nelement() * buffer.element_size()
size_mb = (param_size + buffer_size) / 1024**2
return {
"param_count": param_count,
"param_size_mb": param_size / 1024**2,
"buffer_size_mb": buffer_size / 1024**2,
"total_size_mb": size_mb
}
def plot_training_history(trainer_log_history: list, save_path: str = "training_history.png"):
"""Plot training history from trainer logs."""
train_losses = []
eval_losses = []
eval_accuracies = []
epochs = []
for log in trainer_log_history:
if "train_loss" in log:
train_losses.append(log["train_loss"])
epochs.append(log["epoch"])
if "eval_loss" in log:
eval_losses.append(log["eval_loss"])
if "eval_accuracy" in log:
eval_accuracies.append(log["eval_accuracy"])
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
# Plot losses
ax1.plot(epochs, train_losses, label="Train Loss", marker='o')
if eval_losses:
# Crear epochs correspondientes a las evaluaciones
eval_epochs = [i+1 for i in range(len(eval_losses))]
ax1.plot(eval_epochs, eval_losses, label="Eval Loss", marker='s')
ax1.set_xlabel("Epoch")
ax1.set_ylabel("Loss")
ax1.set_title("Training and Evaluation Loss")
ax1.legend()
ax1.grid(True)
# Plot accuracy
if eval_accuracies:
# Crear epochs correspondientes a las evaluaciones de accuracy
eval_acc_epochs = [i+1 for i in range(len(eval_accuracies))]
ax2.plot(eval_acc_epochs, eval_accuracies,
label="Eval Accuracy", marker='s', color='green')
ax2.set_xlabel("Epoch")
ax2.set_ylabel("Accuracy")
ax2.set_title("Evaluation Accuracy")
ax2.legend()
ax2.grid(True)
plt.tight_layout()
plt.savefig(save_path, dpi=300, bbox_inches='tight')
plt.close()
def estimate_gpu_memory(model, batch_size: int, seq_length: int) -> Dict[str, float]:
"""Estimate GPU memory requirements."""
model_size = get_model_size(model)["total_size_mb"]
# Rough estimation for activations (this is a simplified calculation)
activation_size_mb = batch_size * seq_length * model.config.hidden_size * 4 / 1024**2
# Gradients are roughly the same size as model parameters
gradient_size_mb = model_size
# Add some overhead
overhead_mb = 500
total_mb = model_size + activation_size_mb + gradient_size_mb + overhead_mb
return {
"model_size_mb": model_size,
"activation_size_mb": activation_size_mb,
"gradient_size_mb": gradient_size_mb,
"overhead_mb": overhead_mb,
"total_estimated_mb": total_mb,
"total_estimated_gb": total_mb / 1024
} |