|
|
from transformers import MT5ForConditionalGeneration, MT5Tokenizer |
|
|
from datasets import load_dataset |
|
|
import torch |
|
|
|
|
|
|
|
|
model_path = "./mt5-finetuned" |
|
|
validation_file = "mt5_validation_data-1.jsonl" |
|
|
|
|
|
|
|
|
tokenizer = MT5Tokenizer.from_pretrained(model_path) |
|
|
model = MT5ForConditionalGeneration.from_pretrained(model_path) |
|
|
model.eval() |
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
model = model.to(device) |
|
|
|
|
|
|
|
|
dataset = load_dataset("json", data_files={"validation": validation_file}) |
|
|
val_data = dataset["validation"] |
|
|
|
|
|
|
|
|
def predict(text): |
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device) |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_length=64, |
|
|
num_beams=5, |
|
|
early_stopping=True |
|
|
) |
|
|
return tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
correct = 0 |
|
|
results = [] |
|
|
|
|
|
for idx, example in enumerate(val_data): |
|
|
text = example["text"] |
|
|
target = example["target"].strip() |
|
|
pred = predict(text).strip() |
|
|
|
|
|
results.append((text, pred, target)) |
|
|
if pred == target: |
|
|
correct += 1 |
|
|
|
|
|
|
|
|
print("📋 Примеры предсказаний:\n") |
|
|
for i, (text, pred, target) in enumerate(results[:80]): |
|
|
print(f"#{i+1}") |
|
|
print(f"📝 Вход: {text}") |
|
|
print(f"✅ Target: {target}") |
|
|
print(f"🤖 Предсказание: {pred}") |
|
|
print("-" * 50) |
|
|
|
|
|
|
|
|
accuracy = correct / len(val_data) |
|
|
print(f"\n✅ Accuracy: {accuracy:.4f} ({correct}/{len(val_data)})\n") |
|
|
|