File size: 3,822 Bytes
462a39a
c3731ea
 
462a39a
 
c3731ea
462a39a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3731ea
 
462a39a
c3731ea
462a39a
 
c3731ea
 
462a39a
c3731ea
462a39a
 
c3731ea
 
462a39a
c3731ea
462a39a
 
c3731ea
462a39a
 
c3731ea
 
462a39a
c3731ea
 
462a39a
c3731ea
462a39a
c3731ea
462a39a
 
 
 
 
 
 
 
 
 
 
 
 
 
c3731ea
462a39a
 
 
 
 
c3731ea
462a39a
 
c3731ea
462a39a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from __future__ import annotations


def normalize_category(category: str | None) -> str:
    c = (category or "").strip().lower()

    if c in {"quantitative", "quant", "q", "math"}:
        return "Quantitative"
    if c in {"datainsight", "data_insight", "data insight", "di", "data"}:
        return "DataInsight"
    if c in {"verbal", "v"}:
        return "Verbal"
    if c in {"general", "", "unknown", "none", "null"}:
        return "General"

    return category or "General"


def classify_question(question_text: str, category: str | None = None) -> dict:
    q = (question_text or "").lower()
    normalized = normalize_category(category)

    if normalized == "Quantitative":
        if ("percent" in q or "%" in q) and any(
            k in q for k in ["then", "after", "followed by", "successive", "increase", "decrease", "discount"]
        ):
            return {"category": normalized, "topic": "percent", "type": "successive_percent"}

        if "percent" in q or "%" in q:
            return {"category": normalized, "topic": "percent", "type": "percent_change"}

        if "ratio" in q or ":" in q:
            return {"category": normalized, "topic": "ratio", "type": "ratio_total"}

        if "probability" in q or "chosen at random" in q:
            return {"category": normalized, "topic": "probability", "type": "simple_probability"}

        if "divisible" in q or "remainder" in q or "mod" in q:
            return {"category": normalized, "topic": "number_theory", "type": "remainder_or_divisibility"}

        if "|" in q:
            return {"category": normalized, "topic": "algebra", "type": "absolute_value"}

        if any(k in q for k in ["circle", "radius", "circumference", "triangle", "perimeter", "area"]):
            return {"category": normalized, "topic": "geometry", "type": "geometry"}

        if any(k in q for k in ["average", "mean", "median"]):
            return {"category": normalized, "topic": "statistics", "type": "average"}

        if "sequence" in q:
            return {"category": normalized, "topic": "sequence", "type": "sequence"}

        if "=" in q:
            return {"category": normalized, "topic": "algebra", "type": "equation"}

        return {"category": normalized, "topic": "quant", "type": "general"}

    if normalized == "DataInsight":
        if "percent" in q or "%" in q:
            return {"category": normalized, "topic": "percent", "type": "percent_change"}
        if any(k in q for k in ["mean", "median", "distribution"]):
            return {"category": normalized, "topic": "statistics", "type": "distribution"}
        if any(k in q for k in ["correlation", "scatter", "trend", "table", "chart"]):
            return {"category": normalized, "topic": "data", "type": "correlation_or_graph"}
        return {"category": normalized, "topic": "data", "type": "general"}

    if normalized == "Verbal":
        if "meaning" in q or "definition" in q:
            return {"category": normalized, "topic": "vocabulary", "type": "definition"}
        if "grammatically" in q or "sentence correction" in q:
            return {"category": normalized, "topic": "grammar", "type": "sentence_correction"}
        if "argument" in q or "author" in q:
            return {"category": normalized, "topic": "reasoning", "type": "argument_analysis"}
        return {"category": normalized, "topic": "verbal", "type": "general"}

    if any(k in q for k in ["percent", "%", "ratio", "remainder", "divisible", "probability", "circle", "triangle", "="]):
        return classify_question(question_text, "Quantitative")

    if any(k in q for k in ["table", "chart", "scatter", "trend", "distribution"]):
        return classify_question(question_text, "DataInsight")

    return {"category": "General", "topic": "unknown", "type": "unknown"}