| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| from datasets import load_dataset | |
| import torch | |
| from sklearn.metrics import classification_report, confusion_matrix | |
| # Загружаем модель и токенизатор | |
| model_name = 'your_model_name' | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| # Загружаем датасет | |
| dataset = load_dataset('mnli', split='validation_matched[:1%]') | |
| # Токенизация | |
| def tokenize_function(examples): | |
| return tokenizer(examples["premise"], examples["hypothesis"], truncation=True) | |
| tokenized_dataset = dataset.map(tokenize_function, batched=True) | |
| labels = tokenized_dataset['label'] | |
| # Готовим батчи для предсказаний | |
| inputs = tokenized_dataset.remove_columns(['premise', 'hypothesis']) | |
| inputs.set_format(type="torch") | |
| loader = torch.utils.data.DataLoader(inputs, batch_size=8) | |
| # Используем GPU, если доступно | |
| device = torch.device("cuda") if torch.cuda.isavailable() else torch.device("cpu") | |
| model.to(device) | |
| # Получаем предсказания | |
| preds = [] | |
| for batch in loader: | |
| outputs = model(**batch.to(device)) | |
| preds.extend(outputs.logits.argmax(dim=-1).tolist()) | |
| predicted_labels = preds | |
| # Оцениваем производительность | |
| report = classification_report(labels, predicted_labels) | |
| matrix = confusion_matrix(labels, predicted_labels) | |
| print(report) | |
| print("\nМатрица путаницы:") | |
| print(matrix) |