""" spec_validator_v2.py Strict JSON schema validator + generator for Django backend specs. """ import json import re from copy import deepcopy from typing import Dict, Any, List, Tuple from jsonschema import Draft202012Validator, ValidationError from langchain_core.prompts import ChatPromptTemplate from langchain_core.output_parsers import JsonOutputParser # ===================================================== # FILE LOADERS # ===================================================== def load_schema(path: str) -> Dict[str, Any]: with open(path, "r", encoding="utf-8") as f: return json.load(f) def load_json(path: str) -> Dict[str, Any]: with open(path, "r", encoding="utf-8") as f: return json.load(f) # ===================================================== # VALIDATION HELPERS # ===================================================== def _format_error(err: ValidationError) -> Dict[str, str]: path = "/" + "/".join(map(str, err.absolute_path)) if err.absolute_path else "/" return {"path": path, "message": err.message} def validate_schema( spec: Dict[str, Any], schema: Dict[str, Any] ) -> List[Dict[str, str]]: validator = Draft202012Validator(schema) errors = sorted(validator.iter_errors(spec), key=lambda e: e.path) return [_format_error(e) for e in errors] # ===================================================== # SAFE NORMALIZATIONS (SCHEMA-COMPATIBLE ONLY) # ===================================================== def _to_pascal_case(name: str) -> str: return "".join(part.capitalize() for part in re.split(r"[_\\s-]+", name) if part) def normalize_spec(spec: Dict[str, Any]) -> List[str]: """ Normalize names without introducing new keys. """ warnings = [] # Normalize project name if "project_name" in spec: normalized = re.sub(r"[^a-z0-9_]", "_", spec["project_name"].lower()) if normalized != spec["project_name"]: warnings.append("Normalized project_name") spec["project_name"] = normalized # Normalize model names models = spec.get("apps", {}).get("core", {}).get("models", {}) new_models = {} for model_name, model_def in models.items(): new_name = _to_pascal_case(model_name) if new_name != model_name: warnings.append(f"Renamed model '{model_name}' → '{new_name}'") new_models[new_name] = model_def if new_models: spec["apps"]["core"]["models"] = new_models return warnings # ===================================================== # MAIN VALIDATION FUNCTION # ===================================================== def validate_and_clean_spec( spec: Dict[str, Any], schema: Dict[str, Any], auto_fix: bool = True ) -> Tuple[bool, Dict[str, Any], List[Dict[str, str]], List[str]]: cleaned = deepcopy(spec) warnings: List[str] = [] if auto_fix: warnings.extend(normalize_spec(cleaned)) errors = validate_schema(cleaned, schema) if errors: return False, cleaned, errors, warnings return True, cleaned, [], warnings # ===================================================== # JSON SPEC GENERATION (LLM) # ===================================================== def generate_valid_json_spec( *, user_prompt: str, llm, retries: int = 3, ) -> Dict[str, Any]: """ Generates schema-valid JSON using LLM + strict validation. """ system_prompt = """ You are a JSON compiler. Return ONLY a JSON object that EXACTLY matches this schema. Do NOT add, remove, or rename keys. Do NOT include explanations or formatting. Schema: {{ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Minimal Django Backend Generator Spec", "type": "object", "required": [ "project_name", "database", "auth", "apps", "api_config" ], "properties": {{ "project_name": {{ "type": "string" }}, "database": {{ "type": "object", "required": ["engine", "name"], "properties": {{ "engine": {{ "type": "string", "enum": ["sqlite", "postgresql"] }}, "name": {{ "type": "string" }} }} }}, "auth": {{ "type": "object", "required": ["type"], "properties": {{ "type": {{ "type": "string", "enum": ["jwt", "session"] }} }} }}, "apps": {{ "type": "object", "required": ["core"], "properties": {{ "core": {{ "type": "object", "required": ["models", "apis"], "properties": {{ "models": {{ "type": "object", "patternProperties": {{ "^[A-Z][a-zA-Z0-9]*$": {{ "type": "object", "required": ["fields"], "properties": {{ "fields": {{ "type": "object", "patternProperties": {{ "^[a-z_][a-z0-9_]*$": {{ "type": "object", "required": ["type"], "properties": {{ "type": {{ "type": "string", "enum": [ "CharField", "TextField", "EmailField", "IntegerField", "BooleanField", "DateField", "DateTimeField", "ForeignKey" ] }}, "to": {{ "type": "string" }} }} }} }} }} }} }} }} }}, "apis": {{ "type": "object", "patternProperties": {{ "^[A-Z][a-zA-Z0-9]*$": {{ "type": "array", "items": {{ "type": "string", "enum": ["list", "create", "retrieve", "update", "delete"] }} }} }} }} }} }} }} }}, "api_config": {{ "type": "object", "required": ["base_url"], "properties": {{ "base_url": {{ "type": "string" }} }} }} }} }} """ prompt = ChatPromptTemplate.from_messages([ ("system", system_prompt), ("user", "{user_prompt}") ]) # usage schema = load_schema('spec_schema.json') chain = prompt | llm | JsonOutputParser() for attempt in range(1, retries + 1): try: result = chain.invoke({"user_prompt": user_prompt}) valid, cleaned, errors, _ = validate_and_clean_spec( result, schema, auto_fix=False ) if valid: return cleaned raise ValueError(errors) except Exception as e: if attempt == retries: raise RuntimeError(f"Failed after {retries} attempts: {e}") raise RuntimeError("Unreachable") # ===================================================== # CLI (OPTIONAL) # ===================================================== if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(description="Validate Django backend JSON spec") parser.add_argument("spec", help="Path to spec.json") parser.add_argument("schema", help="Path to schema.json") args = parser.parse_args() schema = load_schema(args.schema) spec = load_json(args.spec) valid, cleaned, errors, warnings = validate_and_clean_spec(spec, schema) print("VALID:", valid) if warnings: print("WARNINGS:") for w in warnings: print("-", w) if errors: print("ERRORS:") for e in errors: print(f"{e['path']}: {e['message']}") else: print("Spec is valid.")