""" Validate generated code for common issues. """ import ast import re from typing import Optional class CodeChecker: """Validate generated Space code for correctness and common issues.""" def check(self, files: dict, sdk: str) -> dict: """ Validate all files in the repo. Returns dict with: valid: bool errors: list of error strings warnings: list of warning strings file_checks: dict of filename -> {valid, issues} """ errors = [] warnings = [] file_checks = {} for filename, content in files.items(): check = self._check_file(filename, content, sdk) file_checks[filename] = check errors.extend(check.get("errors", [])) warnings.extend(check.get("warnings", [])) # Cross-file checks cross_issues = self._cross_file_checks(files, sdk) errors.extend(cross_issues.get("errors", [])) warnings.extend(cross_issues.get("warnings", [])) return { "valid": len(errors) == 0, "errors": errors, "warnings": warnings, "file_checks": file_checks, } def _check_file(self, filename: str, content: str, sdk: str) -> dict: """Check a single file.""" errors = [] warnings = [] if not content or not content.strip(): errors.append(f"{filename}: File is empty") return {"valid": False, "errors": errors, "warnings": warnings} if filename.endswith(".py"): py_result = self._check_python(filename, content) errors.extend(py_result["errors"]) warnings.extend(py_result["warnings"]) elif filename == "Dockerfile": docker_result = self._check_dockerfile(content) errors.extend(docker_result["errors"]) warnings.extend(docker_result["warnings"]) elif filename == "requirements.txt": req_result = self._check_requirements(content) errors.extend(req_result["errors"]) warnings.extend(req_result["warnings"]) elif filename == "README.md": if "---" not in content: warnings.append("README.md: Missing YAML frontmatter") elif filename.endswith(".html"): html_result = self._check_html(filename, content) errors.extend(html_result["errors"]) warnings.extend(html_result["warnings"]) valid = len(errors) == 0 return {"valid": valid, "errors": errors, "warnings": warnings} def _check_python(self, filename: str, content: str) -> dict: """Validate Python code syntax and common patterns.""" errors = [] warnings = [] # Syntax check try: ast.parse(content) except SyntaxError as e: errors.append(f"{filename}: Python syntax error at line {e.lineno}: {e.msg}") return {"errors": errors, "warnings": warnings} # Check for common issues if "import " not in content and "from " not in content: warnings.append(f"{filename}: No imports found") # Check for dangerous patterns dangerous_patterns = [ (r"os\.system\(", "os.system() call found - potential security risk"), (r"eval\(", "eval() call found - potential security risk"), (r"exec\(", "exec() call found - potential security risk"), (r"__import__\(", "__import__() call found - potential security risk"), ] for pattern, msg in dangerous_patterns: if re.search(pattern, content): warnings.append(f"{filename}: {msg}") # Check for hardcoded tokens/secrets secret_patterns = [ (r'(?:token|key|secret|password)\s*=\s*["\'][^"\']{10,}["\']', "Possible hardcoded secret"), ] for pattern, msg in secret_patterns: if re.search(pattern, content, re.IGNORECASE): warnings.append(f"{filename}: {msg}") return {"errors": errors, "warnings": warnings} def _check_dockerfile(self, content: str) -> dict: """Validate Dockerfile content.""" errors = [] warnings = [] if "FROM" not in content: errors.append("Dockerfile: Missing FROM instruction") if "EXPOSE" not in content: warnings.append("Dockerfile: Missing EXPOSE instruction") if "7860" not in content: warnings.append("Dockerfile: Port 7860 not found (required for HF Spaces)") if "CMD" not in content and "ENTRYPOINT" not in content: errors.append("Dockerfile: Missing CMD or ENTRYPOINT") return {"errors": errors, "warnings": warnings} def _check_requirements(self, content: str) -> dict: """Validate requirements.txt.""" errors = [] warnings = [] lines = [l.strip() for l in content.strip().split("\n") if l.strip() and not l.strip().startswith("#")] if not lines: warnings.append("requirements.txt: No dependencies listed") for line in lines: # Basic format check if " " in line and ";" not in line and "#" not in line: warnings.append(f"requirements.txt: Suspicious line: '{line}'") return {"errors": errors, "warnings": warnings} def _check_html(self, filename: str, content: str) -> dict: """Basic HTML validation.""" errors = [] warnings = [] if " tag or DOCTYPE") # Check for unclosed tags (very basic) for tag in ["html", "head", "body"]: open_count = len(re.findall(f"<{tag}[\\s>]", content, re.IGNORECASE)) close_count = len(re.findall(f"", content, re.IGNORECASE)) if open_count > close_count: warnings.append(f"{filename}: Unclosed <{tag}> tag") return {"errors": errors, "warnings": warnings} def _cross_file_checks(self, files: dict, sdk: str) -> dict: """Perform checks across multiple files.""" errors = [] warnings = [] if sdk == "gradio": if "app.py" not in files: errors.append("Missing app.py (required for Gradio Spaces)") if "requirements.txt" not in files: warnings.append("Missing requirements.txt") if "README.md" not in files: warnings.append("Missing README.md") # Check that requirements includes gradio req = files.get("requirements.txt", "") if "gradio" not in req.lower(): warnings.append("requirements.txt: 'gradio' not listed as dependency") elif sdk == "docker": if "Dockerfile" not in files: errors.append("Missing Dockerfile (required for Docker Spaces)") if "README.md" not in files: warnings.append("Missing README.md") elif sdk == "static": if "index.html" not in files: errors.append("Missing index.html (required for Static Spaces)") return {"errors": errors, "warnings": warnings}