ayush2917 commited on
Commit
7dcc5b5
·
verified ·
1 Parent(s): d78a152

Create parser.py

Browse files
Files changed (1) hide show
  1. app/utils/parser.py +60 -0
app/utils/parser.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/utils/parser.py
2
+ import re
3
+ from typing import List, Dict
4
+
5
+ def parse_questions(raw_text: str) -> List[Dict]:
6
+ """
7
+ Parse a model-generated block into list of question dicts:
8
+ Each block expected like:
9
+ 1. Question ...
10
+ A) ...
11
+ B) ...
12
+ C) ...
13
+ D) ...
14
+ Answer: X
15
+ Explanation: ...
16
+ """
17
+ if not raw_text:
18
+ return []
19
+
20
+ # normalize line endings
21
+ text = raw_text.replace("\r\n", "\n").strip()
22
+ # split by question number (keep lines starting with digit + dot)
23
+ parts = re.split(r'\n(?=\d+\.)', text)
24
+ questions = []
25
+ for idx, part in enumerate(parts):
26
+ part = part.strip()
27
+ if not part:
28
+ continue
29
+ # number
30
+ num_match = re.match(r'^\s*(\d+)\.\s*(.*)', part, re.DOTALL)
31
+ if num_match:
32
+ # remove leading "1. " from the part content
33
+ content = part
34
+ else:
35
+ content = part
36
+
37
+ # extract question (up to A))
38
+ q_text = re.split(r'\nA\)', content, maxsplit=1)[0]
39
+ q_text = re.sub(r'^\s*\d+\.\s*', '', q_text).strip()
40
+
41
+ # extract options
42
+ opts = []
43
+ for letter in ['A','B','C','D']:
44
+ m = re.search(rf'{letter}\)\s*(.*?)(?=\n[A-D]\)|\nAnswer:|\nExplanation:|$)', content, re.DOTALL)
45
+ opts.append(m.group(1).strip() if m else "")
46
+
47
+ ans_m = re.search(r'Answer:\s*([A-D])', content, re.IGNORECASE)
48
+ answer = ans_m.group(1).upper() if ans_m else ""
49
+
50
+ expl_m = re.search(r'Explanation:\s*([\s\S]*)', content, re.IGNORECASE)
51
+ explanation = expl_m.group(1).strip() if expl_m else ""
52
+
53
+ questions.append({
54
+ "number": idx + 1,
55
+ "question": q_text,
56
+ "options": opts,
57
+ "answer": answer,
58
+ "explanation": explanation
59
+ })
60
+ return questions