ajkndfjsdfasdf's picture
🚀 Full upload with token redacted
ff2f4fe verified
from transformers import MT5ForConditionalGeneration, MT5Tokenizer
from datasets import load_dataset
import torch
# Путь к модели и данным
model_path = "./mt5-finetuned"
validation_file = "mt5_validation_data-1.jsonl"
# Загрузка модели и токенизатора
tokenizer = MT5Tokenizer.from_pretrained(model_path)
model = MT5ForConditionalGeneration.from_pretrained(model_path)
model.eval()
# Используем GPU если есть
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# Загрузка валидационной выборки
dataset = load_dataset("json", data_files={"validation": validation_file})
val_data = dataset["validation"]
# Функция предсказания
def predict(text):
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=256).to(device)
outputs = model.generate(
**inputs,
max_length=64,
num_beams=5,
early_stopping=True
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# Подсчёт точности
correct = 0
results = []
for idx, example in enumerate(val_data):
text = example["text"]
target = example["target"].strip()
pred = predict(text).strip()
results.append((text, pred, target))
if pred == target:
correct += 1
# Примеры
print("📋 Примеры предсказаний:\n")
for i, (text, pred, target) in enumerate(results[:80]): # кол-во примеров
print(f"#{i+1}")
print(f"📝 Вход: {text}")
print(f"✅ Target: {target}")
print(f"🤖 Предсказание: {pred}")
print("-" * 50)
# Accuracy
accuracy = correct / len(val_data)
print(f"\n✅ Accuracy: {accuracy:.4f} ({correct}/{len(val_data)})\n")