import yaml from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch # Load test data from YAML file with open("test_data.yaml", "r") as file: test_data = yaml.safe_load(file)["test_data"] # Load pre-trained model and tokenizer model_name = "distilbert-base-uncased" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2) # Load your fine-tuned model weights model.load_state_dict(torch.load("path/to/your/fine-tuned/model.pth")) model.eval() # Evaluate on test data correct_predictions = 0 total_samples = 0 for sample in test_data: text = sample["text"] expected_label = sample["label"] # Tokenize and encode input text inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) # Get model predictions outputs = model(**inputs) logits = outputs.logits predicted_label = "Positive" if logits.argmax().item() else "Negative" # Check if prediction matches expected label if predicted_label == expected_label: correct_predictions += 1 total_samples += 1 # Calculate accuracy accuracy = correct_predictions / total_samples print(f"Accuracy on test data: {accuracy * 100:.2f}%") # Demonstrate model predictions print("\nModel Predictions:") for sample in test_data: text = sample["text"] expected_label = sample["label"] # Tokenize and encode input text inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) # Get model predictions outputs = model(**inputs) logits = outputs.logits predicted_label = "Positive" if logits.argmax().item() else "Negative" print(f"Text: {text}") print(f"Expected Label: {expected_label}") print(f"Predicted Label: {predicted_label}") print()