ayush2917 commited on
Commit
65cb957
·
verified ·
1 Parent(s): 7cc81f7

Update app/utils/parser.py

Browse files
Files changed (1) hide show
  1. app/utils/parser.py +42 -28
app/utils/parser.py CHANGED
@@ -4,50 +4,42 @@ from typing import List, Dict
4
 
5
  def parse_questions(raw_text: str) -> List[Dict]:
6
  """
7
- Parse a model-generated block into list of question dicts:
8
- Each block expected like:
9
- 1. Question ...
10
- A) ...
11
- B) ...
12
- C) ...
13
- D) ...
14
- Answer: X
15
- Explanation: ...
16
  """
17
  if not raw_text:
18
  return []
19
 
20
- # normalize line endings
21
  text = raw_text.replace("\r\n", "\n").strip()
22
- # split by question number (keep lines starting with digit + dot)
23
- parts = re.split(r'\n(?=\d+\.)', text)
 
 
 
 
 
 
 
 
 
24
  questions = []
25
  for idx, part in enumerate(parts):
26
  part = part.strip()
27
  if not part:
28
  continue
29
- # number
30
- num_match = re.match(r'^\s*(\d+)\.\s*(.*)', part, re.DOTALL)
31
- if num_match:
32
- # remove leading "1. " from the part content
33
- content = part
34
- else:
35
- content = part
36
-
37
- # extract question (up to A))
38
- q_text = re.split(r'\nA\)', content, maxsplit=1)[0]
39
  q_text = re.sub(r'^\s*\d+\.\s*', '', q_text).strip()
40
 
41
- # extract options
42
  opts = []
43
  for letter in ['A','B','C','D']:
44
- m = re.search(rf'{letter}\)\s*(.*?)(?=\n[A-D]\)|\nAnswer:|\nExplanation:|$)', content, re.DOTALL)
45
- opts.append(m.group(1).strip() if m else "")
46
 
47
- ans_m = re.search(r'Answer:\s*([A-D])', content, re.IGNORECASE)
48
  answer = ans_m.group(1).upper() if ans_m else ""
49
 
50
- expl_m = re.search(r'Explanation:\s*([\s\S]*)', content, re.IGNORECASE)
51
  explanation = expl_m.group(1).strip() if expl_m else ""
52
 
53
  questions.append({
@@ -55,6 +47,28 @@ def parse_questions(raw_text: str) -> List[Dict]:
55
  "question": q_text,
56
  "options": opts,
57
  "answer": answer,
58
- "explanation": explanation
 
59
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  return questions
 
4
 
5
  def parse_questions(raw_text: str) -> List[Dict]:
6
  """
7
+ Parse a model-generated block into list of question dicts with keys:
8
+ number, question, options [A,B,C,D], answer, explanation, detailed_solution (optional)
 
 
 
 
 
 
 
9
  """
10
  if not raw_text:
11
  return []
12
 
 
13
  text = raw_text.replace("\r\n", "\n").strip()
14
+
15
+ # Try to separate Detailed Solutions section if present
16
+ sol_section = ""
17
+ m = re.search(r'(?:Detailed Solutions:|Solutions:)\s*([\s\S]*)$', text, re.IGNORECASE)
18
+ if m:
19
+ sol_section = m.group(1).strip()
20
+ main_text = text[:m.start(0)].strip()
21
+ else:
22
+ main_text = text
23
+
24
+ parts = re.split(r'\n(?=\d+\.)', main_text)
25
  questions = []
26
  for idx, part in enumerate(parts):
27
  part = part.strip()
28
  if not part:
29
  continue
30
+
31
+ q_text = re.split(r'\nA\)', part, maxsplit=1)[0]
 
 
 
 
 
 
 
 
32
  q_text = re.sub(r'^\s*\d+\.\s*', '', q_text).strip()
33
 
 
34
  opts = []
35
  for letter in ['A','B','C','D']:
36
+ m_opt = re.search(rf'{letter}\)\s*(.*?)(?=\n[A-D]\)|\nAnswer:|\nExplanation:|$)', part, re.DOTALL)
37
+ opts.append(m_opt.group(1).strip() if m_opt else "")
38
 
39
+ ans_m = re.search(r'Answer:\s*([A-D])', part, re.IGNORECASE)
40
  answer = ans_m.group(1).upper() if ans_m else ""
41
 
42
+ expl_m = re.search(r'Explanation:\s*([\s\S]*?)(?=$)', part, re.IGNORECASE)
43
  explanation = expl_m.group(1).strip() if expl_m else ""
44
 
45
  questions.append({
 
47
  "question": q_text,
48
  "options": opts,
49
  "answer": answer,
50
+ "explanation": explanation,
51
+ "detailed_solution": ""
52
  })
53
+
54
+ # Map detailed solutions if present
55
+ if sol_section:
56
+ sol_parts = re.split(r'\n(?=\d+\.)', sol_section)
57
+ sol_list = []
58
+ for sp in sol_parts:
59
+ sp = sp.strip()
60
+ if not sp:
61
+ continue
62
+ sp_text = re.sub(r'^\s*\d+\.\s*', '', sp).strip()
63
+ sol_list.append(sp_text)
64
+ for i, q in enumerate(questions):
65
+ if i < len(sol_list):
66
+ q['detailed_solution'] = sol_list[i]
67
+ else:
68
+ q['detailed_solution'] = q.get('explanation', '')
69
+
70
+ else:
71
+ for q in questions:
72
+ q['detailed_solution'] = q.get('explanation', '')
73
+
74
  return questions