|
|
""" |
|
|
Minimum Functionality Tests (MFT) for Skill Classification Model |
|
|
|
|
|
These tests verify that the model performs well on basic, straightforward examples |
|
|
where the expected output is clear. The model should correctly predict skills for |
|
|
simple, unambiguous cases. |
|
|
|
|
|
Based on Ribeiro et al. (2020) "Beyond Accuracy: Behavioral Testing of NLP models" |
|
|
|
|
|
Note: Expected labels will vary based on your actual label schema. |
|
|
These tests use common programming/software engineering skill categories. |
|
|
""" |
|
|
import pytest |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
@pytest.mark.mft |
|
|
class TestMinimumFunctionality: |
|
|
"""Test suite for minimum functionality on basic examples.""" |
|
|
|
|
|
def test_simple_bug_fix(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on a simple bug fix description. |
|
|
Should predict basic programming and error handling skills. |
|
|
""" |
|
|
text = "Fixed null pointer exception in user authentication" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
|
|
|
assert len(predictions) > 0, "Should predict at least one skill for a bug fix" |
|
|
|
|
|
def test_database_work(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on database-related work. |
|
|
Should predict database-related skills. |
|
|
""" |
|
|
text = "Implemented SQL query optimization for user table" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for database work" |
|
|
|
|
|
def test_api_development(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on API development work. |
|
|
Should predict API/web service related skills. |
|
|
""" |
|
|
text = "Created REST API endpoint for retrieving user data" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for API development" |
|
|
|
|
|
def test_data_structure_implementation(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on data structure implementation. |
|
|
Should predict data structure and algorithm skills. |
|
|
""" |
|
|
text = "Implemented binary search tree with insert and delete operations" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for data structure work" |
|
|
|
|
|
def test_testing_work(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on testing-related work. |
|
|
Should predict testing skills. |
|
|
""" |
|
|
text = "Added unit tests for authentication module using JUnit" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for testing work" |
|
|
|
|
|
def test_frontend_work(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on frontend development work. |
|
|
Should predict frontend/UI related skills. |
|
|
""" |
|
|
text = "Updated user interface with React components for login page" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for frontend work" |
|
|
|
|
|
def test_security_work(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on security-related work. |
|
|
Should predict security skills. |
|
|
""" |
|
|
text = "Implemented OAuth2 authentication with password encryption" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for security work" |
|
|
|
|
|
def test_performance_optimization(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on performance optimization work. |
|
|
Should predict performance/optimization skills. |
|
|
""" |
|
|
text = "Optimized algorithm to reduce time complexity from O(n²) to O(n log n)" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for performance work" |
|
|
|
|
|
def test_devops_deployment(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on DevOps/deployment work. |
|
|
Should predict DevOps skills. |
|
|
""" |
|
|
text = "Configured Docker container and CI/CD pipeline for automated deployment" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for DevOps work" |
|
|
|
|
|
def test_error_handling(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on error handling work. |
|
|
Should predict error handling skills. |
|
|
""" |
|
|
text = "Added try-catch blocks and proper exception handling for file operations" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for error handling work" |
|
|
|
|
|
def test_refactoring_work(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on code refactoring. |
|
|
Should predict code quality/refactoring skills. |
|
|
""" |
|
|
text = "Refactored legacy code to improve maintainability and readability" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for refactoring work" |
|
|
|
|
|
def test_documentation_work(self, predict_with_labels): |
|
|
""" |
|
|
Test prediction on documentation work. |
|
|
Should predict documentation skills. |
|
|
""" |
|
|
text = "Updated API documentation with examples and usage guidelines" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for documentation work" |
|
|
|
|
|
def test_empty_input(self, predict_with_labels): |
|
|
""" |
|
|
Test that model handles empty input gracefully. |
|
|
""" |
|
|
text = "" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
|
|
|
|
|
|
assert isinstance(predictions, list), "Should return a list for empty input" |
|
|
|
|
|
def test_minimal_input(self, predict_with_labels): |
|
|
""" |
|
|
Test that model handles very short input. |
|
|
""" |
|
|
text = "bug" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for minimal input '{text}':") |
|
|
print(f" {predictions}") |
|
|
|
|
|
|
|
|
assert isinstance(predictions, list), "Should return a list for minimal input" |
|
|
|
|
|
def test_multiple_skills_in_one_task(self, predict_with_labels): |
|
|
""" |
|
|
Test that model can predict multiple skills for complex tasks. |
|
|
|
|
|
A task involving multiple technologies should predict multiple relevant skills. |
|
|
""" |
|
|
text = ( |
|
|
"Implemented user authentication API with JWT tokens, " |
|
|
"PostgreSQL database integration, and Redis caching" |
|
|
) |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for multi-skill task:") |
|
|
print(f" {predictions}") |
|
|
|
|
|
|
|
|
assert len(predictions) >= 2, ( |
|
|
f"Complex multi-technology task should predict multiple skills, " |
|
|
f"got {len(predictions)}: {predictions}" |
|
|
) |
|
|
|
|
|
def test_common_github_issue_format(self, predict_with_labels): |
|
|
""" |
|
|
Test on realistic GitHub issue format. |
|
|
""" |
|
|
text = """ |
|
|
## Description |
|
|
Fixed a bug where the login API was throwing 500 errors |
|
|
|
|
|
## Changes |
|
|
- Added null check in UserService |
|
|
- Improved error handling |
|
|
- Updated unit tests |
|
|
""" |
|
|
predictions = predict_with_labels(text) |
|
|
|
|
|
print(f"\nPredictions for GitHub-style issue:") |
|
|
print(f" {predictions}") |
|
|
|
|
|
assert len(predictions) > 0, "Should predict skills for realistic issue format" |
|
|
|
|
|
def test_consistency_on_similar_inputs(self, predict_text): |
|
|
""" |
|
|
Test that similar inputs produce similar predictions. |
|
|
""" |
|
|
text1 = "Fixed authentication bug" |
|
|
text2 = "Fixed authentication bug" |
|
|
text3 = "Resolved authentication bug" |
|
|
|
|
|
pred1 = set(predict_text(text1)) |
|
|
pred2 = set(predict_text(text2)) |
|
|
pred3 = set(predict_text(text3)) |
|
|
|
|
|
|
|
|
assert pred1 == pred2, "Identical inputs should produce identical predictions" |
|
|
|
|
|
|
|
|
intersection = len(pred1 & pred3) |
|
|
union = len(pred1 | pred3) |
|
|
|
|
|
if union > 0: |
|
|
similarity = intersection / union |
|
|
assert similarity >= 0.7, ( |
|
|
f"Very similar inputs should produce similar predictions. " |
|
|
f"Similarity: {similarity:.2f}" |
|
|
) |
|
|
|