Spaces:
Sleeping
Sleeping
| """ | |
| Validate generated code for common issues. | |
| """ | |
| import ast | |
| import re | |
| from typing import Optional | |
| class CodeChecker: | |
| """Validate generated Space code for correctness and common issues.""" | |
| def check(self, files: dict, sdk: str) -> dict: | |
| """ | |
| Validate all files in the repo. | |
| Returns dict with: | |
| valid: bool | |
| errors: list of error strings | |
| warnings: list of warning strings | |
| file_checks: dict of filename -> {valid, issues} | |
| """ | |
| errors = [] | |
| warnings = [] | |
| file_checks = {} | |
| for filename, content in files.items(): | |
| check = self._check_file(filename, content, sdk) | |
| file_checks[filename] = check | |
| errors.extend(check.get("errors", [])) | |
| warnings.extend(check.get("warnings", [])) | |
| # Cross-file checks | |
| cross_issues = self._cross_file_checks(files, sdk) | |
| errors.extend(cross_issues.get("errors", [])) | |
| warnings.extend(cross_issues.get("warnings", [])) | |
| return { | |
| "valid": len(errors) == 0, | |
| "errors": errors, | |
| "warnings": warnings, | |
| "file_checks": file_checks, | |
| } | |
| def _check_file(self, filename: str, content: str, sdk: str) -> dict: | |
| """Check a single file.""" | |
| errors = [] | |
| warnings = [] | |
| if not content or not content.strip(): | |
| errors.append(f"{filename}: File is empty") | |
| return {"valid": False, "errors": errors, "warnings": warnings} | |
| if filename.endswith(".py"): | |
| py_result = self._check_python(filename, content) | |
| errors.extend(py_result["errors"]) | |
| warnings.extend(py_result["warnings"]) | |
| elif filename == "Dockerfile": | |
| docker_result = self._check_dockerfile(content) | |
| errors.extend(docker_result["errors"]) | |
| warnings.extend(docker_result["warnings"]) | |
| elif filename == "requirements.txt": | |
| req_result = self._check_requirements(content) | |
| errors.extend(req_result["errors"]) | |
| warnings.extend(req_result["warnings"]) | |
| elif filename == "README.md": | |
| if "---" not in content: | |
| warnings.append("README.md: Missing YAML frontmatter") | |
| elif filename.endswith(".html"): | |
| html_result = self._check_html(filename, content) | |
| errors.extend(html_result["errors"]) | |
| warnings.extend(html_result["warnings"]) | |
| valid = len(errors) == 0 | |
| return {"valid": valid, "errors": errors, "warnings": warnings} | |
| def _check_python(self, filename: str, content: str) -> dict: | |
| """Validate Python code syntax and common patterns.""" | |
| errors = [] | |
| warnings = [] | |
| # Syntax check | |
| try: | |
| ast.parse(content) | |
| except SyntaxError as e: | |
| errors.append(f"{filename}: Python syntax error at line {e.lineno}: {e.msg}") | |
| return {"errors": errors, "warnings": warnings} | |
| # Check for common issues | |
| if "import " not in content and "from " not in content: | |
| warnings.append(f"{filename}: No imports found") | |
| # Check for dangerous patterns | |
| dangerous_patterns = [ | |
| (r"os\.system\(", "os.system() call found - potential security risk"), | |
| (r"eval\(", "eval() call found - potential security risk"), | |
| (r"exec\(", "exec() call found - potential security risk"), | |
| (r"__import__\(", "__import__() call found - potential security risk"), | |
| ] | |
| for pattern, msg in dangerous_patterns: | |
| if re.search(pattern, content): | |
| warnings.append(f"{filename}: {msg}") | |
| # Check for hardcoded tokens/secrets | |
| secret_patterns = [ | |
| (r'(?:token|key|secret|password)\s*=\s*["\'][^"\']{10,}["\']', "Possible hardcoded secret"), | |
| ] | |
| for pattern, msg in secret_patterns: | |
| if re.search(pattern, content, re.IGNORECASE): | |
| warnings.append(f"{filename}: {msg}") | |
| return {"errors": errors, "warnings": warnings} | |
| def _check_dockerfile(self, content: str) -> dict: | |
| """Validate Dockerfile content.""" | |
| errors = [] | |
| warnings = [] | |
| if "FROM" not in content: | |
| errors.append("Dockerfile: Missing FROM instruction") | |
| if "EXPOSE" not in content: | |
| warnings.append("Dockerfile: Missing EXPOSE instruction") | |
| if "7860" not in content: | |
| warnings.append("Dockerfile: Port 7860 not found (required for HF Spaces)") | |
| if "CMD" not in content and "ENTRYPOINT" not in content: | |
| errors.append("Dockerfile: Missing CMD or ENTRYPOINT") | |
| return {"errors": errors, "warnings": warnings} | |
| def _check_requirements(self, content: str) -> dict: | |
| """Validate requirements.txt.""" | |
| errors = [] | |
| warnings = [] | |
| lines = [l.strip() for l in content.strip().split("\n") if l.strip() and not l.strip().startswith("#")] | |
| if not lines: | |
| warnings.append("requirements.txt: No dependencies listed") | |
| for line in lines: | |
| # Basic format check | |
| if " " in line and ";" not in line and "#" not in line: | |
| warnings.append(f"requirements.txt: Suspicious line: '{line}'") | |
| return {"errors": errors, "warnings": warnings} | |
| def _check_html(self, filename: str, content: str) -> dict: | |
| """Basic HTML validation.""" | |
| errors = [] | |
| warnings = [] | |
| if "<html" not in content.lower() and "<!doctype" not in content.lower(): | |
| warnings.append(f"{filename}: Missing <html> tag or DOCTYPE") | |
| # Check for unclosed tags (very basic) | |
| for tag in ["html", "head", "body"]: | |
| open_count = len(re.findall(f"<{tag}[\\s>]", content, re.IGNORECASE)) | |
| close_count = len(re.findall(f"</{tag}>", content, re.IGNORECASE)) | |
| if open_count > close_count: | |
| warnings.append(f"{filename}: Unclosed <{tag}> tag") | |
| return {"errors": errors, "warnings": warnings} | |
| def _cross_file_checks(self, files: dict, sdk: str) -> dict: | |
| """Perform checks across multiple files.""" | |
| errors = [] | |
| warnings = [] | |
| if sdk == "gradio": | |
| if "app.py" not in files: | |
| errors.append("Missing app.py (required for Gradio Spaces)") | |
| if "requirements.txt" not in files: | |
| warnings.append("Missing requirements.txt") | |
| if "README.md" not in files: | |
| warnings.append("Missing README.md") | |
| # Check that requirements includes gradio | |
| req = files.get("requirements.txt", "") | |
| if "gradio" not in req.lower(): | |
| warnings.append("requirements.txt: 'gradio' not listed as dependency") | |
| elif sdk == "docker": | |
| if "Dockerfile" not in files: | |
| errors.append("Missing Dockerfile (required for Docker Spaces)") | |
| if "README.md" not in files: | |
| warnings.append("Missing README.md") | |
| elif sdk == "static": | |
| if "index.html" not in files: | |
| errors.append("Missing index.html (required for Static Spaces)") | |
| return {"errors": errors, "warnings": warnings} | |