""" Component 6 evaluation helpers. """ from __future__ import annotations import ast import json import re from pathlib import Path from typing import Dict, List def python_syntax_ok(code: str) -> bool: try: ast.parse(code) return True except Exception: return False def save_json(path: str, payload: Dict) -> None: p = Path(path) p.parent.mkdir(parents=True, exist_ok=True) p.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8") def _normalize_punctuation_spacing(text: str) -> str: text = re.sub(r"\s+([,.:;\)\]\}])", r"\1", text) text = re.sub(r"([\(\[\{])\s+", r"\1", text) text = re.sub(r"\s*=\s*", " = ", text) text = re.sub(r"\s*\+\s*", " + ", text) text = re.sub(r"\s*-\s*", " - ", text) text = re.sub(r"\s*\*\s*", " * ", text) text = re.sub(r"\s*/\s*", " / ", text) text = re.sub(r"\s*%\s*", " % ", text) return re.sub(r"[ \t]+", " ", text).strip() def _remove_non_python_noise(line: str) -> str: line = line.replace("", "1") line = line.replace("\u0000", "") line = line.replace("{", "") line = line.replace("}", "") line = line.replace(";", "") return line def _fix_identifier_spacing(line: str) -> str: # def name with spaces -> def name_with_spaces m = re.match(r"^(\s*def\s+)([A-Za-z_][A-Za-z0-9_\s]*)(\s*\(.*)$", line) if m: fn = re.sub(r"\s+", "_", m.group(2).strip()) line = f"{m.group(1)}{fn}{m.group(3)}" # class name with spaces -> class Name_With_Spaces m = re.match(r"^(\s*class\s+)([A-Za-z_][A-Za-z0-9_\s]*)(\s*:.*)$", line) if m: cn = re.sub(r"\s+", "_", m.group(2).strip()) line = f"{m.group(1)}{cn}{m.group(3)}" # assignment lhs spaces -> underscore. if "=" in line and "==" not in line: lhs, rhs = line.split("=", 1) lhs_clean = lhs.strip() if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_\s]*", lhs_clean): lhs_clean = re.sub(r"\s+", "_", lhs_clean) line = f"{lhs_clean} = {rhs.strip()}" return line def _looks_like_python_line(line: str) -> bool: if not line.strip(): return False starts = ( "def ", "class ", "if ", "for ", "while ", "try:", "except", "with ", "return ", "import ", "from ", "print(", ) s = line.strip() if s.startswith(starts): return True if re.match(r"^[A-Za-z_][A-Za-z0-9_]*\s*=", s): return True return False def _trim_to_code(lines: List[str]) -> List[str]: # Drop noisy preamble lines until first plausible Python line. i = 0 while i < len(lines) and not _looks_like_python_line(lines[i]): i += 1 lines = lines[i:] if i < len(lines) else [] # Keep only plausible lines after start; allow blank lines. out = [] for line in lines: if not line.strip(): out.append(line) continue if _looks_like_python_line(line) or line.startswith(" "): out.append(line) return out def _best_effort_python_format(lines: List[str]) -> List[str]: out: List[str] = [] indent = 0 for raw in lines: line = raw.strip() if not line: out.append("") continue if line in {"return", "pass", "break", "continue"}: indent = max(0, indent - 1) out.append((" " * indent) + line) if line.endswith(":"): indent += 1 return out def restore_code_from_structured(decoded: str) -> str: text = decoded for tok in ["", "", "", "", ""]: text = text.replace(tok, "") if "

" in text:
        text = text.split("", 1)[1]

    text = text.replace("_", " ")
    tokens = text.strip().split()

    lines: List[str] = []
    current_tokens: List[str] = []
    indent = 0

    for tok in tokens:
        if tok == "":
            indent += 1
            continue
        if tok == "":
            indent = max(0, indent - 1)
            continue
        if tok == "":
            line = " ".join(current_tokens).strip()
            line = _remove_non_python_noise(line)
            line = _normalize_punctuation_spacing(line)
            line = _fix_identifier_spacing(line)
            if line:
                lines.append(("    " * indent) + line)
            else:
                lines.append("")
            current_tokens = []
            continue
        current_tokens.append(tok)

    if current_tokens:
        line = " ".join(current_tokens).strip()
        line = _remove_non_python_noise(line)
        line = _normalize_punctuation_spacing(line)
        line = _fix_identifier_spacing(line)
        if line:
            lines.append(("    " * indent) + line)

    lines = _trim_to_code(lines)
    lines = _best_effort_python_format(lines)

    while lines and not lines[0].strip():
        lines.pop(0)
    while lines and not lines[-1].strip():
        lines.pop()

    return "\n".join(lines).strip()