from transformers import MT5ForConditionalGeneration, MT5Tokenizer from datasets import load_dataset import torch # Путь к модели и данным model_path = "./mt5-finetuned" validation_file = "mt5_validation_data-1.jsonl" # Загрузка модели и токенизатора tokenizer = MT5Tokenizer.from_pretrained(model_path) model = MT5ForConditionalGeneration.from_pretrained(model_path) model.eval() # Используем GPU если есть device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) # Загрузка валидационной выборки dataset = load_dataset("json", data_files={"validation": validation_file}) val_data = dataset["validation"] # Функция предсказания def predict(text): inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device) outputs = model.generate( **inputs, max_length=64, num_beams=5, early_stopping=True ) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Подсчёт точности correct = 0 results = [] for idx, example in enumerate(val_data): text = example["text"] target = example["target"].strip() pred = predict(text).strip() results.append((text, pred, target)) if pred == target: correct += 1 # Примеры print("📋 Примеры предсказаний:\n") for i, (text, pred, target) in enumerate(results[:80]): # кол-во примеров print(f"#{i+1}") print(f"📝 Вход: {text}") print(f"✅ Target: {target}") print(f"🤖 Предсказание: {pred}") print("-" * 50) # Accuracy accuracy = correct / len(val_data) print(f"\n✅ Accuracy: {accuracy:.4f} ({correct}/{len(val_data)})\n")