|
|
|
|
|
import re |
|
|
from typing import List, Dict |
|
|
|
|
|
def parse_questions(raw_text: str) -> List[Dict]: |
|
|
""" |
|
|
Parse a model-generated block into list of question dicts with keys: |
|
|
number, question, options [A,B,C,D], answer, explanation, detailed_solution (optional) |
|
|
""" |
|
|
if not raw_text: |
|
|
return [] |
|
|
|
|
|
text = raw_text.replace("\r\n", "\n").strip() |
|
|
|
|
|
|
|
|
sol_section = "" |
|
|
m = re.search(r'(?:Detailed Solutions:|Solutions:)\s*([\s\S]*)$', text, re.IGNORECASE) |
|
|
if m: |
|
|
sol_section = m.group(1).strip() |
|
|
main_text = text[:m.start(0)].strip() |
|
|
else: |
|
|
main_text = text |
|
|
|
|
|
parts = re.split(r'\n(?=\d+\.)', main_text) |
|
|
questions = [] |
|
|
for idx, part in enumerate(parts): |
|
|
part = part.strip() |
|
|
if not part: |
|
|
continue |
|
|
|
|
|
q_text = re.split(r'\nA\)', part, maxsplit=1)[0] |
|
|
q_text = re.sub(r'^\s*\d+\.\s*', '', q_text).strip() |
|
|
|
|
|
opts = [] |
|
|
for letter in ['A','B','C','D']: |
|
|
m_opt = re.search(rf'{letter}\)\s*(.*?)(?=\n[A-D]\)|\nAnswer:|\nExplanation:|$)', part, re.DOTALL) |
|
|
opts.append(m_opt.group(1).strip() if m_opt else "") |
|
|
|
|
|
ans_m = re.search(r'Answer:\s*([A-D])', part, re.IGNORECASE) |
|
|
answer = ans_m.group(1).upper() if ans_m else "" |
|
|
|
|
|
expl_m = re.search(r'Explanation:\s*([\s\S]*?)(?=$)', part, re.IGNORECASE) |
|
|
explanation = expl_m.group(1).strip() if expl_m else "" |
|
|
|
|
|
questions.append({ |
|
|
"number": idx + 1, |
|
|
"question": q_text, |
|
|
"options": opts, |
|
|
"answer": answer, |
|
|
"explanation": explanation, |
|
|
"detailed_solution": "" |
|
|
}) |
|
|
|
|
|
|
|
|
if sol_section: |
|
|
sol_parts = re.split(r'\n(?=\d+\.)', sol_section) |
|
|
sol_list = [] |
|
|
for sp in sol_parts: |
|
|
sp = sp.strip() |
|
|
if not sp: |
|
|
continue |
|
|
sp_text = re.sub(r'^\s*\d+\.\s*', '', sp).strip() |
|
|
sol_list.append(sp_text) |
|
|
for i, q in enumerate(questions): |
|
|
if i < len(sol_list): |
|
|
q['detailed_solution'] = sol_list[i] |
|
|
else: |
|
|
q['detailed_solution'] = q.get('explanation', '') |
|
|
|
|
|
else: |
|
|
for q in questions: |
|
|
q['detailed_solution'] = q.get('explanation', '') |
|
|
|
|
|
return questions |
|
|
|