Update README.md

f3f4d59 verified 4 months ago

1.57 kB

from transformers import AutoModelForSequenceClassification, AutoTokenizer from datasets import load_dataset import torch from sklearn.metrics import classification_report, confusion_matrix

Загружаем модель и токенизатор

model_name = 'your_model_name' tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name)

Загружаем датасет

dataset = load_dataset('mnli', split='validation_matched[:1%]')

Токенизация

def tokenize_function(examples): return tokenizer(examples["premise"], examples["hypothesis"], truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True) labels = tokenized_dataset['label']

Готовим батчи для предсказаний

inputs = tokenized_dataset.remove_columns(['premise', 'hypothesis']) inputs.set_format(type="torch") loader = torch.utils.data.DataLoader(inputs, batch_size=8)

Используем GPU, если доступно

device = torch.device("cuda") if torch.cuda.isavailable() else torch.device("cpu") model.to(device)

Получаем предсказания

preds = [] for batch in loader: outputs = model(**batch.to(device)) preds.extend(outputs.logits.argmax(dim=-1).tolist())

predicted_labels = preds

Оцениваем производительность

report = classification_report(labels, predicted_labels) matrix = confusion_matrix(labels, predicted_labels)

print(report) print("\nМатрица путаницы:") print(matrix)