Spaces:

ayush2917
/

gate-chemical-engineering-practice

Sleeping

App Files Files Community

ayush2917 commited on Nov 21, 2025

Commit

65cb957

verified ·

1 Parent(s): 7cc81f7

Update app/utils/parser.py

Browse files

Files changed (1) hide show

app/utils/parser.py +42 -28

app/utils/parser.py CHANGED Viewed

@@ -4,50 +4,42 @@ from typing import List, Dict
 def parse_questions(raw_text: str) -> List[Dict]:
     """
-    Parse a model-generated block into list of question dicts:
-    Each block expected like:
-    1. Question ...
-    A) ...
-    B) ...
-    C) ...
-    D) ...
-    Answer: X
-    Explanation: ...
     """
     if not raw_text:
         return []
-    # normalize line endings
     text = raw_text.replace("\r\n", "\n").strip()
-    # split by question number (keep lines starting with digit + dot)
-    parts = re.split(r'\n(?=\d+\.)', text)
     questions = []
     for idx, part in enumerate(parts):
         part = part.strip()
         if not part:
             continue
-        # number
-        num_match = re.match(r'^\s*(\d+)\.\s*(.*)', part, re.DOTALL)
-        if num_match:
-            # remove leading "1. " from the part content
-            content = part
-        else:
-            content = part
-        # extract question (up to A))
-        q_text = re.split(r'\nA\)', content, maxsplit=1)[0]
         q_text = re.sub(r'^\s*\d+\.\s*', '', q_text).strip()
-        # extract options
         opts = []
         for letter in ['A','B','C','D']:
-            m = re.search(rf'{letter}\)\s*(.*?)(?=\n[A-D]\)|\nAnswer:|\nExplanation:|$)', content, re.DOTALL)
-            opts.append(m.group(1).strip() if m else "")
-        ans_m = re.search(r'Answer:\s*([A-D])', content, re.IGNORECASE)
         answer = ans_m.group(1).upper() if ans_m else ""
-        expl_m = re.search(r'Explanation:\s*([\s\S]*)', content, re.IGNORECASE)
         explanation = expl_m.group(1).strip() if expl_m else ""
         questions.append({
@@ -55,6 +47,28 @@ def parse_questions(raw_text: str) -> List[Dict]:
             "question": q_text,
             "options": opts,
             "answer": answer,
-            "explanation": explanation
         })
     return questions

 def parse_questions(raw_text: str) -> List[Dict]:
     """
+    Parse a model-generated block into list of question dicts with keys:
+    number, question, options [A,B,C,D], answer, explanation, detailed_solution (optional)
     """
     if not raw_text:
         return []
     text = raw_text.replace("\r\n", "\n").strip()
+    # Try to separate Detailed Solutions section if present
+    sol_section = ""
+    m = re.search(r'(?:Detailed Solutions:|Solutions:)\s*([\s\S]*)$', text, re.IGNORECASE)
+    if m:
+        sol_section = m.group(1).strip()
+        main_text = text[:m.start(0)].strip()
+    else:
+        main_text = text
+    parts = re.split(r'\n(?=\d+\.)', main_text)
     questions = []
     for idx, part in enumerate(parts):
         part = part.strip()
         if not part:
             continue
+        q_text = re.split(r'\nA\)', part, maxsplit=1)[0]
         q_text = re.sub(r'^\s*\d+\.\s*', '', q_text).strip()
         opts = []
         for letter in ['A','B','C','D']:
+            m_opt = re.search(rf'{letter}\)\s*(.*?)(?=\n[A-D]\)|\nAnswer:|\nExplanation:|$)', part, re.DOTALL)
+            opts.append(m_opt.group(1).strip() if m_opt else "")
+        ans_m = re.search(r'Answer:\s*([A-D])', part, re.IGNORECASE)
         answer = ans_m.group(1).upper() if ans_m else ""
+        expl_m = re.search(r'Explanation:\s*([\s\S]*?)(?=$)', part, re.IGNORECASE)
         explanation = expl_m.group(1).strip() if expl_m else ""
         questions.append({
             "question": q_text,
             "options": opts,
             "answer": answer,
+            "explanation": explanation,
+            "detailed_solution": ""
         })
+    # Map detailed solutions if present
+    if sol_section:
+        sol_parts = re.split(r'\n(?=\d+\.)', sol_section)
+        sol_list = []
+        for sp in sol_parts:
+            sp = sp.strip()
+            if not sp:
+                continue
+            sp_text = re.sub(r'^\s*\d+\.\s*', '', sp).strip()
+            sol_list.append(sp_text)
+        for i, q in enumerate(questions):
+            if i < len(sol_list):
+                q['detailed_solution'] = sol_list[i]
+            else:
+                q['detailed_solution'] = q.get('explanation', '')
+    else:
+        for q in questions:
+            q['detailed_solution'] = q.get('explanation', '')
     return questions