piko / app.py
SpawnedShoyo's picture
Create app.py
52cebf7 verified
import yaml
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
# Load test data from YAML file
with open("test_data.yaml", "r") as file:
test_data = yaml.safe_load(file)["test_data"]
# Load pre-trained model and tokenizer
model_name = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# Load your fine-tuned model weights
model.load_state_dict(torch.load("path/to/your/fine-tuned/model.pth"))
model.eval()
# Evaluate on test data
correct_predictions = 0
total_samples = 0
for sample in test_data:
text = sample["text"]
expected_label = sample["label"]
# Tokenize and encode input text
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
# Get model predictions
outputs = model(**inputs)
logits = outputs.logits
predicted_label = "Positive" if logits.argmax().item() else "Negative"
# Check if prediction matches expected label
if predicted_label == expected_label:
correct_predictions += 1
total_samples += 1
# Calculate accuracy
accuracy = correct_predictions / total_samples
print(f"Accuracy on test data: {accuracy * 100:.2f}%")
# Demonstrate model predictions
print("\nModel Predictions:")
for sample in test_data:
text = sample["text"]
expected_label = sample["label"]
# Tokenize and encode input text
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
# Get model predictions
outputs = model(**inputs)
logits = outputs.logits
predicted_label = "Positive" if logits.argmax().item() else "Negative"
print(f"Text: {text}")
print(f"Expected Label: {expected_label}")
print(f"Predicted Label: {predicted_label}")
print()