Spaces:
Sleeping
Sleeping
| """ | |
| Minimum Functionality Tests (MFT) for Skill Classification Model | |
| These tests verify that the model performs well on basic, straightforward examples | |
| where the expected output is clear. The model should correctly predict skills for | |
| simple, unambiguous cases. | |
| Based on Ribeiro et al. (2020) "Beyond Accuracy: Behavioral Testing of NLP models" | |
| Note: Expected labels will vary based on your actual label schema. | |
| These tests use common programming/software engineering skill categories. | |
| """ | |
| import pytest | |
| import numpy as np | |
| class TestMinimumFunctionality: | |
| """Test suite for minimum functionality on basic examples.""" | |
| def test_simple_bug_fix(self, predict_with_labels): | |
| """ | |
| Test prediction on a simple bug fix description. | |
| Should predict basic programming and error handling skills. | |
| """ | |
| text = "Fixed null pointer exception in user authentication" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| # Should predict at least some skills | |
| assert len(predictions) > 0, "Should predict at least one skill for a bug fix" | |
| def test_database_work(self, predict_with_labels): | |
| """ | |
| Test prediction on database-related work. | |
| Should predict database-related skills. | |
| """ | |
| text = "Implemented SQL query optimization for user table" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for database work" | |
| def test_api_development(self, predict_with_labels): | |
| """ | |
| Test prediction on API development work. | |
| Should predict API/web service related skills. | |
| """ | |
| text = "Created REST API endpoint for retrieving user data" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for API development" | |
| def test_data_structure_implementation(self, predict_with_labels): | |
| """ | |
| Test prediction on data structure implementation. | |
| Should predict data structure and algorithm skills. | |
| """ | |
| text = "Implemented binary search tree with insert and delete operations" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for data structure work" | |
| def test_testing_work(self, predict_with_labels): | |
| """ | |
| Test prediction on testing-related work. | |
| Should predict testing skills. | |
| """ | |
| text = "Added unit tests for authentication module using JUnit" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for testing work" | |
| def test_frontend_work(self, predict_with_labels): | |
| """ | |
| Test prediction on frontend development work. | |
| Should predict frontend/UI related skills. | |
| """ | |
| text = "Updated user interface with React components for login page" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for frontend work" | |
| def test_security_work(self, predict_with_labels): | |
| """ | |
| Test prediction on security-related work. | |
| Should predict security skills. | |
| """ | |
| text = "Implemented OAuth2 authentication with password encryption" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for security work" | |
| def test_performance_optimization(self, predict_with_labels): | |
| """ | |
| Test prediction on performance optimization work. | |
| Should predict performance/optimization skills. | |
| """ | |
| text = "Optimized algorithm to reduce time complexity from O(n²) to O(n log n)" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for performance work" | |
| def test_devops_deployment(self, predict_with_labels): | |
| """ | |
| Test prediction on DevOps/deployment work. | |
| Should predict DevOps skills. | |
| """ | |
| text = "Configured Docker container and CI/CD pipeline for automated deployment" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for DevOps work" | |
| def test_error_handling(self, predict_with_labels): | |
| """ | |
| Test prediction on error handling work. | |
| Should predict error handling skills. | |
| """ | |
| text = "Added try-catch blocks and proper exception handling for file operations" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for error handling work" | |
| def test_refactoring_work(self, predict_with_labels): | |
| """ | |
| Test prediction on code refactoring. | |
| Should predict code quality/refactoring skills. | |
| """ | |
| text = "Refactored legacy code to improve maintainability and readability" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for refactoring work" | |
| def test_documentation_work(self, predict_with_labels): | |
| """ | |
| Test prediction on documentation work. | |
| Should predict documentation skills. | |
| """ | |
| text = "Updated API documentation with examples and usage guidelines" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for '{text}':") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for documentation work" | |
| def test_empty_input(self, predict_with_labels): | |
| """ | |
| Test that model handles empty input gracefully. | |
| """ | |
| text = "" | |
| predictions = predict_with_labels(text) | |
| # Empty input should return some default prediction or empty list | |
| # Should not crash | |
| assert isinstance(predictions, list), "Should return a list for empty input" | |
| def test_minimal_input(self, predict_with_labels): | |
| """ | |
| Test that model handles very short input. | |
| """ | |
| text = "bug" | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for minimal input '{text}':") | |
| print(f" {predictions}") | |
| # Should handle minimal input without crashing | |
| assert isinstance(predictions, list), "Should return a list for minimal input" | |
| def test_multiple_skills_in_one_task(self, predict_with_labels): | |
| """ | |
| Test that model can predict multiple skills for complex tasks. | |
| A task involving multiple technologies should predict multiple relevant skills. | |
| """ | |
| text = ( | |
| "Implemented user authentication API with JWT tokens, " | |
| "PostgreSQL database integration, and Redis caching" | |
| ) | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for multi-skill task:") | |
| print(f" {predictions}") | |
| # Complex task should predict multiple skills | |
| assert len(predictions) >= 2, ( | |
| f"Complex multi-technology task should predict multiple skills, " | |
| f"got {len(predictions)}: {predictions}" | |
| ) | |
| def test_common_github_issue_format(self, predict_with_labels): | |
| """ | |
| Test on realistic GitHub issue format. | |
| """ | |
| text = """ | |
| ## Description | |
| Fixed a bug where the login API was throwing 500 errors | |
| ## Changes | |
| - Added null check in UserService | |
| - Improved error handling | |
| - Updated unit tests | |
| """ | |
| predictions = predict_with_labels(text) | |
| print(f"\nPredictions for GitHub-style issue:") | |
| print(f" {predictions}") | |
| assert len(predictions) > 0, "Should predict skills for realistic issue format" | |
| def test_consistency_on_similar_inputs(self, predict_text): | |
| """ | |
| Test that similar inputs produce similar predictions. | |
| """ | |
| text1 = "Fixed authentication bug" | |
| text2 = "Fixed authentication bug" # Identical | |
| text3 = "Resolved authentication bug" # Very similar | |
| pred1 = set(predict_text(text1)) | |
| pred2 = set(predict_text(text2)) | |
| pred3 = set(predict_text(text3)) | |
| # Identical inputs should have identical predictions | |
| assert pred1 == pred2, "Identical inputs should produce identical predictions" | |
| # Very similar inputs should have highly similar predictions | |
| intersection = len(pred1 & pred3) | |
| union = len(pred1 | pred3) | |
| if union > 0: | |
| similarity = intersection / union | |
| assert similarity >= 0.7, ( | |
| f"Very similar inputs should produce similar predictions. " | |
| f"Similarity: {similarity:.2f}" | |
| ) | |