File size: 5,693 Bytes
6631b9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
"""

Error Classification & Taxonomy

Classifies errors into 10+ error types with severity and fixability assessment

"""

import re
from typing import Dict, Any, List


def classify_error(error: Dict[str, Any]) -> Dict[str, Any]:
    """

    Classify error into taxonomy with severity and fixability.

    

    Args:

        error: Error dictionary with type, found, correct, etc.

        

    Returns:

        Enhanced error dictionary with classification details

    """
    error_type = error.get("type", "unknown")
    found = error.get("found", "")
    correct = error.get("correct", "")
    operation = error.get("operation", "")
    
    # Classification mapping
    classification_map = {
        "calculation_error": {
            "category": "Arithmetic Error",
            "description": "Calculation mistakes in arithmetic operations",
            "severity": "HIGH",
            "fixability": 0.92,  # 92% fixable
            "fixable": True
        },
        "logical_error": {
            "category": "Logical Error",
            "description": "Contradictions, circular reasoning, or logical inconsistencies",
            "severity": "MEDIUM",
            "fixability": 0.60,  # 60% fixable
            "fixable": False
        },
        "operation_mismatch": {
            "category": "Operation Mismatch",
            "description": "Text describes one operation but math uses another",
            "severity": "HIGH",
            "fixability": 0.68,  # 68% fixable
            "fixable": True
        },
        "semantic_error": {
            "category": "Semantic Error",
            "description": "Meaning doesn't match the mathematical expression",
            "severity": "MEDIUM",
            "fixability": 0.45,  # 45% fixable
            "fixable": False
        }
    }
    
    # Determine classification based on error type
    if error_type in classification_map:
        classification = classification_map[error_type]
    else:
        # Try to infer from content
        classification = _infer_classification(error, found, correct, operation)
    
    # Additional error type detection from content
    additional_types = _detect_additional_error_types(found, correct, operation)
    
    # Enhance error dictionary
    enhanced_error = error.copy()
    enhanced_error.update({
        "category": classification["category"],
        "error_description": classification["description"],
        "severity": classification["severity"],
        "fixability_score": classification["fixability"],
        "fixable": classification["fixable"],
        "additional_types": additional_types
    })
    
    return enhanced_error


def _infer_classification(error: Dict[str, Any], found: str, correct: str, operation: str) -> Dict[str, Any]:
    """Infer error classification from content when type is unknown."""
    found_lower = found.lower()
    correct_lower = correct.lower()
    
    # Check for algebraic errors (variables)
    if re.search(r'[a-zA-Z]', found):
        return {
            "category": "Algebraic Error",
            "description": "Wrong operations on variables or algebraic expressions",
            "severity": "HIGH",
            "fixability": 0.75,
            "fixable": True
        }
    
    # Check for unit errors
    if re.search(r'\b(kg|g|m|cm|km|lb|oz|ft|in)\b', found_lower):
        return {
            "category": "Unit Error",
            "description": "Wrong units or unit conversions",
            "severity": "MEDIUM",
            "fixability": 0.70,
            "fixable": True
        }
    
    # Check for sign errors
    if operation in ['+', '-']:
        # Check if result has wrong sign
        found_nums = re.findall(r'-?\d+\.?\d*', found)
        correct_nums = re.findall(r'-?\d+\.?\d*', correct)
        if found_nums and correct_nums:
            try:
                found_result = float(found_nums[-1])
                correct_result = float(correct_nums[-1])
                if abs(found_result) == abs(correct_result) and found_result != correct_result:
                    return {
                        "category": "Sign Error",
                        "description": "Wrong positive/negative sign",
                        "severity": "HIGH",
                        "fixability": 0.90,
                        "fixable": True
                    }
            except:
                pass
    
    # Default to arithmetic error
    return {
        "category": "Arithmetic Error",
        "description": "Calculation mistakes",
        "severity": "HIGH",
        "fixability": 0.85,
        "fixable": True
    }


def _detect_additional_error_types(found: str, correct: str, operation: str) -> List[str]:
    """Detect additional error types from content."""
    additional_types = []
    found_lower = found.lower()
    
    # Check for notation errors
    if re.search(r'[≈~≈]', found) or re.search(r'\b(about|around|approximately)\b', found_lower):
        additional_types.append("Notation Error")
    
    # Check for order of operations issues
    if re.search(r'\d+\s*[+\-*/]\s*\d+\s*[+\-*/]\s*\d+', found):
        # Multiple operations without parentheses might indicate order issue
        if '(' not in found and '(' in correct:
            additional_types.append("Order of Operations")
    
    # Check for conceptual errors (complex patterns)
    if len(found.split()) > 10:  # Very long expressions might indicate conceptual issues
        additional_types.append("Conceptual Error")
    
    return additional_types