Hopcroft-Skill-Classification / tests /behavioral /test_minimum_functionality.py
DaCrow13
Deploy to HF Spaces (Clean)
39d224b
"""
Minimum Functionality Tests (MFT) for Skill Classification Model
These tests verify that the model performs well on basic, straightforward examples
where the expected output is clear. The model should correctly predict skills for
simple, unambiguous cases.
Based on Ribeiro et al. (2020) "Beyond Accuracy: Behavioral Testing of NLP models"
Note: Expected labels will vary based on your actual label schema.
These tests use common programming/software engineering skill categories.
"""
import pytest
import numpy as np
@pytest.mark.mft
class TestMinimumFunctionality:
"""Test suite for minimum functionality on basic examples."""
def test_simple_bug_fix(self, predict_with_labels):
"""
Test prediction on a simple bug fix description.
Should predict basic programming and error handling skills.
"""
text = "Fixed null pointer exception in user authentication"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
# Should predict at least some skills
assert len(predictions) > 0, "Should predict at least one skill for a bug fix"
def test_database_work(self, predict_with_labels):
"""
Test prediction on database-related work.
Should predict database-related skills.
"""
text = "Implemented SQL query optimization for user table"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for database work"
def test_api_development(self, predict_with_labels):
"""
Test prediction on API development work.
Should predict API/web service related skills.
"""
text = "Created REST API endpoint for retrieving user data"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for API development"
def test_data_structure_implementation(self, predict_with_labels):
"""
Test prediction on data structure implementation.
Should predict data structure and algorithm skills.
"""
text = "Implemented binary search tree with insert and delete operations"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for data structure work"
def test_testing_work(self, predict_with_labels):
"""
Test prediction on testing-related work.
Should predict testing skills.
"""
text = "Added unit tests for authentication module using JUnit"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for testing work"
def test_frontend_work(self, predict_with_labels):
"""
Test prediction on frontend development work.
Should predict frontend/UI related skills.
"""
text = "Updated user interface with React components for login page"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for frontend work"
def test_security_work(self, predict_with_labels):
"""
Test prediction on security-related work.
Should predict security skills.
"""
text = "Implemented OAuth2 authentication with password encryption"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for security work"
def test_performance_optimization(self, predict_with_labels):
"""
Test prediction on performance optimization work.
Should predict performance/optimization skills.
"""
text = "Optimized algorithm to reduce time complexity from O(n²) to O(n log n)"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for performance work"
def test_devops_deployment(self, predict_with_labels):
"""
Test prediction on DevOps/deployment work.
Should predict DevOps skills.
"""
text = "Configured Docker container and CI/CD pipeline for automated deployment"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for DevOps work"
def test_error_handling(self, predict_with_labels):
"""
Test prediction on error handling work.
Should predict error handling skills.
"""
text = "Added try-catch blocks and proper exception handling for file operations"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for error handling work"
def test_refactoring_work(self, predict_with_labels):
"""
Test prediction on code refactoring.
Should predict code quality/refactoring skills.
"""
text = "Refactored legacy code to improve maintainability and readability"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for refactoring work"
def test_documentation_work(self, predict_with_labels):
"""
Test prediction on documentation work.
Should predict documentation skills.
"""
text = "Updated API documentation with examples and usage guidelines"
predictions = predict_with_labels(text)
print(f"\nPredictions for '{text}':")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for documentation work"
def test_empty_input(self, predict_with_labels):
"""
Test that model handles empty input gracefully.
"""
text = ""
predictions = predict_with_labels(text)
# Empty input should return some default prediction or empty list
# Should not crash
assert isinstance(predictions, list), "Should return a list for empty input"
def test_minimal_input(self, predict_with_labels):
"""
Test that model handles very short input.
"""
text = "bug"
predictions = predict_with_labels(text)
print(f"\nPredictions for minimal input '{text}':")
print(f" {predictions}")
# Should handle minimal input without crashing
assert isinstance(predictions, list), "Should return a list for minimal input"
def test_multiple_skills_in_one_task(self, predict_with_labels):
"""
Test that model can predict multiple skills for complex tasks.
A task involving multiple technologies should predict multiple relevant skills.
"""
text = (
"Implemented user authentication API with JWT tokens, "
"PostgreSQL database integration, and Redis caching"
)
predictions = predict_with_labels(text)
print(f"\nPredictions for multi-skill task:")
print(f" {predictions}")
# Complex task should predict multiple skills
assert len(predictions) >= 2, (
f"Complex multi-technology task should predict multiple skills, "
f"got {len(predictions)}: {predictions}"
)
def test_common_github_issue_format(self, predict_with_labels):
"""
Test on realistic GitHub issue format.
"""
text = """
## Description
Fixed a bug where the login API was throwing 500 errors
## Changes
- Added null check in UserService
- Improved error handling
- Updated unit tests
"""
predictions = predict_with_labels(text)
print(f"\nPredictions for GitHub-style issue:")
print(f" {predictions}")
assert len(predictions) > 0, "Should predict skills for realistic issue format"
def test_consistency_on_similar_inputs(self, predict_text):
"""
Test that similar inputs produce similar predictions.
"""
text1 = "Fixed authentication bug"
text2 = "Fixed authentication bug" # Identical
text3 = "Resolved authentication bug" # Very similar
pred1 = set(predict_text(text1))
pred2 = set(predict_text(text2))
pred3 = set(predict_text(text3))
# Identical inputs should have identical predictions
assert pred1 == pred2, "Identical inputs should produce identical predictions"
# Very similar inputs should have highly similar predictions
intersection = len(pred1 & pred3)
union = len(pred1 | pred3)
if union > 0:
similarity = intersection / union
assert similarity >= 0.7, (
f"Very similar inputs should produce similar predictions. "
f"Similarity: {similarity:.2f}"
)