| import re |
| from typing import Any |
|
|
| SKIP_LINES = { |
| "q. no.", "questions", "marks", "cl", "co", "co no.", "co description", |
| "cognitive", "level", "remember", "understand", "apply", "analyze", |
| "analyse", "evaluate", "create", "course outcomes", "usn", "no.", |
| } |
|
|
| BLOOMS_MAP = { |
| "cl1": "CL1", "cl2": "CL2", "cl3": "CL3", |
| "cl4": "CL4", "cl5": "CL5", "cl6": "CL6", |
| "btl1": "CL1", "btl2": "CL2", "btl3": "CL3", |
| "btl4": "CL4", "btl5": "CL5", "btl6": "CL6", |
| "l1": "CL1", "l2": "CL2", "l3": "CL3", |
| "l4": "CL4", "l5": "CL5", "l6": "CL6", |
| } |
|
|
| IS_PART_HEADER = re.compile(r'^\s*PART\s+[A-Z]\s*$', re.IGNORECASE) |
| IS_OR = re.compile(r'^\s*OR\s*$', re.IGNORECASE) |
| IS_PAGE_BREAK = re.compile(r'---\s*PAGE BREAK\s*---', re.IGNORECASE) |
| IS_QUESTION_NUM = re.compile(r'^\s*(\d{1,2})\s*$') |
| IS_SUB_LABEL = re.compile(r'^\s*([a-e])\s*$', re.IGNORECASE) |
| IS_BLOOMS = re.compile(r'^\s*(CL[1-6]|BTL[1-6]|L[1-6])\s*$', re.IGNORECASE) |
| IS_CO = re.compile(r'^\s*(CO\s*\d{1,2})\s*$', re.IGNORECASE) |
|
|
|
|
| def normalise_co(raw: str) -> str: |
| num = re.search(r'\d+', raw) |
| return f"CO{num.group()}" if num else raw.strip().upper().replace(" ", "") |
|
|
|
|
| def clean(line: str) -> str: |
| return line.strip() |
|
|
|
|
| def is_skip(line: str) -> bool: |
| return clean(line).lower() in SKIP_LINES or not clean(line) |
|
|
|
|
| |
| |
| |
| |
|
|
| TOKEN_PART = "PART" |
| TOKEN_OR = "OR" |
| TOKEN_QNUM = "QNUM" |
| TOKEN_SUB = "SUB" |
| TOKEN_BLOOMS = "BLOOMS" |
| TOKEN_CO = "CO" |
| TOKEN_MARKS = "MARKS" |
| TOKEN_TEXT = "TEXT" |
|
|
|
|
| def tokenise(lines: list[str]) -> list[tuple[str, str]]: |
| tokens: list[tuple[str, str]] = [] |
| for raw in lines: |
| line = clean(raw) |
| if not line: |
| continue |
| if IS_PAGE_BREAK.match(line): |
| continue |
| if is_skip(line): |
| continue |
| if IS_PART_HEADER.match(line): |
| tokens.append((TOKEN_PART, line)) |
| continue |
| if IS_OR.match(line): |
| tokens.append((TOKEN_OR, line)) |
| continue |
| if IS_BLOOMS.match(line): |
| key = line.strip().upper().replace(" ", "") |
| tokens.append((TOKEN_BLOOMS, BLOOMS_MAP.get(key.lower(), key))) |
| continue |
| if IS_CO.match(line): |
| tokens.append((TOKEN_CO, normalise_co(line))) |
| continue |
| if IS_SUB_LABEL.match(line): |
| tokens.append((TOKEN_SUB, line.strip().lower())) |
| continue |
| if IS_QUESTION_NUM.match(line): |
| |
| |
| tokens.append(("AMBIG", line.strip())) |
| continue |
| tokens.append((TOKEN_TEXT, line)) |
| return tokens |
|
|
|
|
| def parse_question_paper(text: str) -> dict[str, Any]: |
| lines = text.splitlines() |
| tokens = tokenise(lines) |
| warnings: list[str] = [] |
|
|
| parts: list[dict] = [] |
| part_number = 0 |
| current_q1: int | None = None |
| current_q2: int | None = None |
| after_or = False |
|
|
| |
| sub_label = "" |
| marks = 0 |
| blooms = "" |
| co_number = "" |
| in_sub = False |
| |
| saw_marks = False |
| saw_blooms = False |
|
|
| def get_or_create_part() -> dict: |
| for p in parts: |
| if p["partNumber"] == part_number: |
| return p |
| p = { |
| "partNumber": part_number, |
| "question1Number": current_q1 or 1, |
| "question2Number": current_q2 or (current_q1 + 1 if current_q1 else 2), |
| "subQuestions": [], |
| } |
| parts.append(p) |
| return p |
|
|
| def commit_sub(): |
| nonlocal in_sub, sub_label, marks, blooms, co_number, saw_marks, saw_blooms |
| p = get_or_create_part() |
| if after_or and current_q2: |
| p["question2Number"] = current_q2 |
| p["subQuestions"].append({ |
| "label": sub_label, |
| "marks": marks, |
| "bloomsLevel": blooms, |
| "co_number": co_number, |
| }) |
| in_sub = False |
| sub_label = "" |
| marks = 0 |
| blooms = "" |
| co_number = "" |
| saw_marks = False |
| saw_blooms = False |
|
|
| def reset_sub(): |
| nonlocal in_sub, sub_label, marks, blooms, co_number, saw_marks, saw_blooms |
| in_sub = False |
| sub_label = "" |
| marks = 0 |
| blooms = "" |
| co_number = "" |
| saw_marks = False |
| saw_blooms = False |
|
|
| for kind, value in tokens: |
|
|
| |
| if kind == TOKEN_PART: |
| reset_sub() |
| after_or = False |
| continue |
|
|
| if kind == TOKEN_OR: |
| if in_sub: |
| reset_sub() |
| after_or = True |
| continue |
|
|
| |
| if kind == "AMBIG": |
| val = int(value) |
| if in_sub and not saw_marks and 1 <= val <= 25: |
| |
| marks = val |
| saw_marks = True |
| continue |
| elif not in_sub: |
| |
| if not after_or: |
| current_q1 = val |
| current_q2 = val + 1 |
| part_number += 1 |
| else: |
| current_q2 = val |
| for p in parts: |
| if p["partNumber"] == part_number: |
| p["question2Number"] = val |
| continue |
| |
| continue |
|
|
| |
| if kind == TOKEN_SUB: |
| if in_sub and saw_marks and saw_blooms and co_number: |
| commit_sub() |
| elif in_sub: |
| reset_sub() |
| sub_label = value |
| in_sub = True |
| continue |
|
|
| |
| if in_sub: |
| if kind == TOKEN_BLOOMS and saw_marks: |
| blooms = value |
| saw_blooms = True |
| continue |
|
|
| if kind == TOKEN_CO and saw_blooms: |
| co_number = value |
| commit_sub() |
| continue |
|
|
| |
| if kind == TOKEN_TEXT: |
| continue |
|
|
| |
| continue |
|
|
| |
| if in_sub and marks > 0: |
| commit_sub() |
|
|
| |
| if not parts: |
| warnings.append("No sub-questions detected. The paper layout may be unusual.") |
| else: |
| missing_co = sum(1 for p in parts for sq in p["subQuestions"] if not sq["co_number"]) |
| missing_bl = sum(1 for p in parts for sq in p["subQuestions"] if not sq["bloomsLevel"]) |
| if missing_co: |
| warnings.append(f"{missing_co} sub-question(s) have no CO detected β please fill manually.") |
| if missing_bl: |
| warnings.append(f"{missing_bl} sub-question(s) have no Bloom's level detected β please fill manually.") |
|
|
| total_marks = sum(sq["marks"] for p in parts for sq in p["subQuestions"]) |
|
|
| return { |
| "question_parts": parts, |
| "total_marks": total_marks, |
| "warnings": warnings, |
| } |