Mitchins's picture
Upload folder using huggingface_hub
2667b42 verified
#!/usr/bin/env python3
"""
Test script for plot arc classifier
"""
import json
import torch
from transformers import DebertaV2Tokenizer, DebertaV2ForSequenceClassification
def load_tests():
"""Load synthetic test cases"""
with open('tests/synthetic_tests.json', 'r') as f:
return json.load(f)
def run_tests():
"""Run all synthetic tests"""
print("Loading model...")
tokenizer = DebertaV2Tokenizer.from_pretrained('.')
model = DebertaV2ForSequenceClassification.from_pretrained('.')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
class_names = ['NONE', 'INTERNAL', 'EXTERNAL', 'BOTH']
class_to_idx = {name: idx for idx, name in enumerate(class_names)}
tests = load_tests()
correct = 0
total = len(tests)
print(f"Running {total} synthetic tests...\n")
for i, test in enumerate(tests, 1):
text = test['description']
expected = test['expected_class']
expected_idx = class_to_idx[expected]
# Predict
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
outputs = model(**inputs)
probabilities = torch.softmax(outputs.logits, dim=-1)
predicted_idx = torch.argmax(probabilities, dim=-1).item()
confidence = probabilities[0][predicted_idx].item()
predicted = class_names[predicted_idx]
is_correct = predicted == expected
if is_correct:
correct += 1
status = "✅ PASS"
else:
status = "❌ FAIL"
print(f"Test {i:2d}: {status}")
print(f" Text: {text[:100]}{'...' if len(text) > 100 else ''}")
print(f" Expected: {expected} | Predicted: {predicted} (conf: {confidence:.3f})")
print(f" Reasoning: {test['reasoning']}")
print()
accuracy = correct / total
print(f"Results: {correct}/{total} correct ({accuracy:.1%})")
return accuracy
if __name__ == "__main__":
run_tests()