File size: 9,952 Bytes
225af6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
"""
Minimum Functionality Tests (MFT) for Skill Classification Model

These tests verify that the model performs well on basic, straightforward examples
where the expected output is clear. The model should correctly predict skills for
simple, unambiguous cases.

Based on Ribeiro et al. (2020) "Beyond Accuracy: Behavioral Testing of NLP models"

Note: Expected labels will vary based on your actual label schema. 
These tests use common programming/software engineering skill categories.
"""
import pytest
import numpy as np


@pytest.mark.mft
class TestMinimumFunctionality:
    """Test suite for minimum functionality on basic examples."""
    
    def test_simple_bug_fix(self, predict_with_labels):
        """
        Test prediction on a simple bug fix description.
        Should predict basic programming and error handling skills.
        """
        text = "Fixed null pointer exception in user authentication"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        # Should predict at least some skills
        assert len(predictions) > 0, "Should predict at least one skill for a bug fix"
    
    def test_database_work(self, predict_with_labels):
        """
        Test prediction on database-related work.
        Should predict database-related skills.
        """
        text = "Implemented SQL query optimization for user table"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for database work"
    
    def test_api_development(self, predict_with_labels):
        """
        Test prediction on API development work.
        Should predict API/web service related skills.
        """
        text = "Created REST API endpoint for retrieving user data"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for API development"
    
    def test_data_structure_implementation(self, predict_with_labels):
        """
        Test prediction on data structure implementation.
        Should predict data structure and algorithm skills.
        """
        text = "Implemented binary search tree with insert and delete operations"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for data structure work"
    
    def test_testing_work(self, predict_with_labels):
        """
        Test prediction on testing-related work.
        Should predict testing skills.
        """
        text = "Added unit tests for authentication module using JUnit"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for testing work"
    
    def test_frontend_work(self, predict_with_labels):
        """
        Test prediction on frontend development work.
        Should predict frontend/UI related skills.
        """
        text = "Updated user interface with React components for login page"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for frontend work"
    
    def test_security_work(self, predict_with_labels):
        """
        Test prediction on security-related work.
        Should predict security skills.
        """
        text = "Implemented OAuth2 authentication with password encryption"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for security work"
    
    def test_performance_optimization(self, predict_with_labels):
        """
        Test prediction on performance optimization work.
        Should predict performance/optimization skills.
        """
        text = "Optimized algorithm to reduce time complexity from O(n²) to O(n log n)"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for performance work"
    
    def test_devops_deployment(self, predict_with_labels):
        """
        Test prediction on DevOps/deployment work.
        Should predict DevOps skills.
        """
        text = "Configured Docker container and CI/CD pipeline for automated deployment"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for DevOps work"
    
    def test_error_handling(self, predict_with_labels):
        """
        Test prediction on error handling work.
        Should predict error handling skills.
        """
        text = "Added try-catch blocks and proper exception handling for file operations"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for error handling work"
    
    def test_refactoring_work(self, predict_with_labels):
        """
        Test prediction on code refactoring.
        Should predict code quality/refactoring skills.
        """
        text = "Refactored legacy code to improve maintainability and readability"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for refactoring work"
    
    def test_documentation_work(self, predict_with_labels):
        """
        Test prediction on documentation work.
        Should predict documentation skills.
        """
        text = "Updated API documentation with examples and usage guidelines"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for '{text}':")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for documentation work"
    
    def test_empty_input(self, predict_with_labels):
        """
        Test that model handles empty input gracefully.
        """
        text = ""
        predictions = predict_with_labels(text)
        
        # Empty input should return some default prediction or empty list
        # Should not crash
        assert isinstance(predictions, list), "Should return a list for empty input"
    
    def test_minimal_input(self, predict_with_labels):
        """
        Test that model handles very short input.
        """
        text = "bug"
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for minimal input '{text}':")
        print(f"  {predictions}")
        
        # Should handle minimal input without crashing
        assert isinstance(predictions, list), "Should return a list for minimal input"
    
    def test_multiple_skills_in_one_task(self, predict_with_labels):
        """
        Test that model can predict multiple skills for complex tasks.
        
        A task involving multiple technologies should predict multiple relevant skills.
        """
        text = (
            "Implemented user authentication API with JWT tokens, "
            "PostgreSQL database integration, and Redis caching"
        )
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for multi-skill task:")
        print(f"  {predictions}")
        
        # Complex task should predict multiple skills
        assert len(predictions) >= 2, (
            f"Complex multi-technology task should predict multiple skills, "
            f"got {len(predictions)}: {predictions}"
        )
    
    def test_common_github_issue_format(self, predict_with_labels):
        """
        Test on realistic GitHub issue format.
        """
        text = """
        ## Description
        Fixed a bug where the login API was throwing 500 errors
        
        ## Changes
        - Added null check in UserService
        - Improved error handling
        - Updated unit tests
        """
        predictions = predict_with_labels(text)
        
        print(f"\nPredictions for GitHub-style issue:")
        print(f"  {predictions}")
        
        assert len(predictions) > 0, "Should predict skills for realistic issue format"
    
    def test_consistency_on_similar_inputs(self, predict_text):
        """
        Test that similar inputs produce similar predictions.
        """
        text1 = "Fixed authentication bug"
        text2 = "Fixed authentication bug"  # Identical
        text3 = "Resolved authentication bug"  # Very similar
        
        pred1 = set(predict_text(text1))
        pred2 = set(predict_text(text2))
        pred3 = set(predict_text(text3))
        
        # Identical inputs should have identical predictions
        assert pred1 == pred2, "Identical inputs should produce identical predictions"
        
        # Very similar inputs should have highly similar predictions
        intersection = len(pred1 & pred3)
        union = len(pred1 | pred3)
        
        if union > 0:
            similarity = intersection / union
            assert similarity >= 0.7, (
                f"Very similar inputs should produce similar predictions. "
                f"Similarity: {similarity:.2f}"
            )