j-js commited on
Commit
462a39a
·
verified ·
1 Parent(s): 6204838

Update question_classifier.py

Browse files
Files changed (1) hide show
  1. question_classifier.py +60 -44
question_classifier.py CHANGED
@@ -1,66 +1,82 @@
1
- import re
2
 
3
 
4
- def classify_question(question_text: str, category: str) -> dict:
5
- q = question_text.lower()
6
 
7
- if category == "Quantitative":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  if "percent" in q or "%" in q:
10
- if "then" in q:
11
- return {"topic": "percent", "type": "successive_percent"}
12
- return {"topic": "percent", "type": "percent_change"}
13
 
14
- if "ratio" in q:
15
- return {"topic": "ratio", "type": "ratio_total"}
16
 
17
  if "probability" in q or "chosen at random" in q:
18
- return {"topic": "probability", "type": "simple_probability"}
19
 
20
- if "divisible" in q or "remainder" in q:
21
- return {"topic": "number_theory", "type": "divisibility"}
22
 
23
  if "|" in q:
24
- return {"topic": "algebra", "type": "absolute_value"}
25
 
26
- if "circle" in q or "radius" in q or "circumference" in q:
27
- return {"topic": "geometry", "type": "circle_geometry"}
28
 
29
- if "average" in q or "mean" in q:
30
- return {"topic": "statistics", "type": "average"}
31
 
32
  if "sequence" in q:
33
- return {"topic": "sequence", "type": "geometric_sequence"}
34
 
35
  if "=" in q:
36
- return {"topic": "algebra", "type": "equation"}
37
-
38
- return {"topic": "quant", "type": "general"}
39
-
40
- if category == "DataInsight":
41
-
42
- if "percent" in q:
43
- return {"topic": "percent", "type": "percent_change"}
44
 
45
- if "mean" in q or "median" in q:
46
- return {"topic": "statistics", "type": "distribution"}
47
-
48
- if "correlation" in q or "scatter" in q:
49
- return {"topic": "statistics", "type": "correlation"}
50
-
51
- return {"topic": "data", "type": "general"}
52
-
53
- if category == "Verbal":
54
-
55
- if "meaning" in q:
56
- return {"topic": "vocabulary", "type": "definition"}
57
-
58
- if "grammatically" in q:
59
- return {"topic": "grammar", "type": "sentence_correction"}
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  if "argument" in q or "author" in q:
62
- return {"topic": "reasoning", "type": "argument_analysis"}
 
 
 
 
63
 
64
- return {"topic": "verbal", "type": "general"}
 
65
 
66
- return {"topic": "unknown", "type": "unknown"}
 
1
+ from __future__ import annotations
2
 
3
 
4
+ def normalize_category(category: str | None) -> str:
5
+ c = (category or "").strip().lower()
6
 
7
+ if c in {"quantitative", "quant", "q", "math"}:
8
+ return "Quantitative"
9
+ if c in {"datainsight", "data_insight", "data insight", "di", "data"}:
10
+ return "DataInsight"
11
+ if c in {"verbal", "v"}:
12
+ return "Verbal"
13
+ if c in {"general", "", "unknown", "none", "null"}:
14
+ return "General"
15
+
16
+ return category or "General"
17
+
18
+
19
+ def classify_question(question_text: str, category: str | None = None) -> dict:
20
+ q = (question_text or "").lower()
21
+ normalized = normalize_category(category)
22
+
23
+ if normalized == "Quantitative":
24
+ if ("percent" in q or "%" in q) and any(
25
+ k in q for k in ["then", "after", "followed by", "successive", "increase", "decrease", "discount"]
26
+ ):
27
+ return {"category": normalized, "topic": "percent", "type": "successive_percent"}
28
 
29
  if "percent" in q or "%" in q:
30
+ return {"category": normalized, "topic": "percent", "type": "percent_change"}
 
 
31
 
32
+ if "ratio" in q or ":" in q:
33
+ return {"category": normalized, "topic": "ratio", "type": "ratio_total"}
34
 
35
  if "probability" in q or "chosen at random" in q:
36
+ return {"category": normalized, "topic": "probability", "type": "simple_probability"}
37
 
38
+ if "divisible" in q or "remainder" in q or "mod" in q:
39
+ return {"category": normalized, "topic": "number_theory", "type": "remainder_or_divisibility"}
40
 
41
  if "|" in q:
42
+ return {"category": normalized, "topic": "algebra", "type": "absolute_value"}
43
 
44
+ if any(k in q for k in ["circle", "radius", "circumference", "triangle", "perimeter", "area"]):
45
+ return {"category": normalized, "topic": "geometry", "type": "geometry"}
46
 
47
+ if any(k in q for k in ["average", "mean", "median"]):
48
+ return {"category": normalized, "topic": "statistics", "type": "average"}
49
 
50
  if "sequence" in q:
51
+ return {"category": normalized, "topic": "sequence", "type": "sequence"}
52
 
53
  if "=" in q:
54
+ return {"category": normalized, "topic": "algebra", "type": "equation"}
 
 
 
 
 
 
 
55
 
56
+ return {"category": normalized, "topic": "quant", "type": "general"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ if normalized == "DataInsight":
59
+ if "percent" in q or "%" in q:
60
+ return {"category": normalized, "topic": "percent", "type": "percent_change"}
61
+ if any(k in q for k in ["mean", "median", "distribution"]):
62
+ return {"category": normalized, "topic": "statistics", "type": "distribution"}
63
+ if any(k in q for k in ["correlation", "scatter", "trend", "table", "chart"]):
64
+ return {"category": normalized, "topic": "data", "type": "correlation_or_graph"}
65
+ return {"category": normalized, "topic": "data", "type": "general"}
66
+
67
+ if normalized == "Verbal":
68
+ if "meaning" in q or "definition" in q:
69
+ return {"category": normalized, "topic": "vocabulary", "type": "definition"}
70
+ if "grammatically" in q or "sentence correction" in q:
71
+ return {"category": normalized, "topic": "grammar", "type": "sentence_correction"}
72
  if "argument" in q or "author" in q:
73
+ return {"category": normalized, "topic": "reasoning", "type": "argument_analysis"}
74
+ return {"category": normalized, "topic": "verbal", "type": "general"}
75
+
76
+ if any(k in q for k in ["percent", "%", "ratio", "remainder", "divisible", "probability", "circle", "triangle", "="]):
77
+ return classify_question(question_text, "Quantitative")
78
 
79
+ if any(k in q for k in ["table", "chart", "scatter", "trend", "distribution"]):
80
+ return classify_question(question_text, "DataInsight")
81
 
82
+ return {"category": "General", "topic": "unknown", "type": "unknown"}