""" Component 6 evaluation helpers. """ from __future__ import annotations import ast import json import re from pathlib import Path from typing import Dict, List def python_syntax_ok(code: str) -> bool: try: ast.parse(code) return True except Exception: return False def save_json(path: str, payload: Dict) -> None: p = Path(path) p.parent.mkdir(parents=True, exist_ok=True) p.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8") def _normalize_punctuation_spacing(text: str) -> str: text = re.sub(r"\s+([,.:;\)\]\}])", r"\1", text) text = re.sub(r"([\(\[\{])\s+", r"\1", text) text = re.sub(r"\s*=\s*", " = ", text) text = re.sub(r"\s*\+\s*", " + ", text) text = re.sub(r"\s*-\s*", " - ", text) text = re.sub(r"\s*\*\s*", " * ", text) text = re.sub(r"\s*/\s*", " / ", text) text = re.sub(r"\s*%\s*", " % ", text) return re.sub(r"[ \t]+", " ", text).strip() def _remove_non_python_noise(line: str) -> str: line = line.replace("", "1") line = line.replace("\u0000", "") line = line.replace("{", "") line = line.replace("}", "") line = line.replace(";", "") return line def _fix_identifier_spacing(line: str) -> str: # def name with spaces -> def name_with_spaces m = re.match(r"^(\s*def\s+)([A-Za-z_][A-Za-z0-9_\s]*)(\s*\(.*)$", line) if m: fn = re.sub(r"\s+", "_", m.group(2).strip()) line = f"{m.group(1)}{fn}{m.group(3)}" # class name with spaces -> class Name_With_Spaces m = re.match(r"^(\s*class\s+)([A-Za-z_][A-Za-z0-9_\s]*)(\s*:.*)$", line) if m: cn = re.sub(r"\s+", "_", m.group(2).strip()) line = f"{m.group(1)}{cn}{m.group(3)}" # assignment lhs spaces -> underscore. if "=" in line and "==" not in line: lhs, rhs = line.split("=", 1) lhs_clean = lhs.strip() if re.fullmatch(r"[A-Za-z_][A-Za-z0-9_\s]*", lhs_clean): lhs_clean = re.sub(r"\s+", "_", lhs_clean) line = f"{lhs_clean} = {rhs.strip()}" return line def _looks_like_python_line(line: str) -> bool: if not line.strip(): return False starts = ( "def ", "class ", "if ", "for ", "while ", "try:", "except", "with ", "return ", "import ", "from ", "print(", ) s = line.strip() if s.startswith(starts): return True if re.match(r"^[A-Za-z_][A-Za-z0-9_]*\s*=", s): return True return False def _trim_to_code(lines: List[str]) -> List[str]: # Drop noisy preamble lines until first plausible Python line. i = 0 while i < len(lines) and not _looks_like_python_line(lines[i]): i += 1 lines = lines[i:] if i < len(lines) else [] # Keep only plausible lines after start; allow blank lines. out = [] for line in lines: if not line.strip(): out.append(line) continue if _looks_like_python_line(line) or line.startswith(" "): out.append(line) return out def _best_effort_python_format(lines: List[str]) -> List[str]: out: List[str] = [] indent = 0 for raw in lines: line = raw.strip() if not line: out.append("") continue if line in {"return", "pass", "break", "continue"}: indent = max(0, indent - 1) out.append((" " * indent) + line) if line.endswith(":"): indent += 1 return out def restore_code_from_structured(decoded: str) -> str: text = decoded for tok in ["", "", "", "", ""]: text = text.replace(tok, "") if "" in text: text = text.split("", 1)[1] text = text.replace("_", " ") tokens = text.strip().split() lines: List[str] = [] current_tokens: List[str] = [] indent = 0 for tok in tokens: if tok == "": indent += 1 continue if tok == "": indent = max(0, indent - 1) continue if tok == "": line = " ".join(current_tokens).strip() line = _remove_non_python_noise(line) line = _normalize_punctuation_spacing(line) line = _fix_identifier_spacing(line) if line: lines.append((" " * indent) + line) else: lines.append("") current_tokens = [] continue current_tokens.append(tok) if current_tokens: line = " ".join(current_tokens).strip() line = _remove_non_python_noise(line) line = _normalize_punctuation_spacing(line) line = _fix_identifier_spacing(line) if line: lines.append((" " * indent) + line) lines = _trim_to_code(lines) lines = _best_effort_python_format(lines) while lines and not lines[0].strip(): lines.pop(0) while lines and not lines[-1].strip(): lines.pop() return "\n".join(lines).strip()