Spaces:
Sleeping
Sleeping
File size: 7,330 Bytes
2c304fc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 | """
Validate generated code for common issues.
"""
import ast
import re
from typing import Optional
class CodeChecker:
"""Validate generated Space code for correctness and common issues."""
def check(self, files: dict, sdk: str) -> dict:
"""
Validate all files in the repo.
Returns dict with:
valid: bool
errors: list of error strings
warnings: list of warning strings
file_checks: dict of filename -> {valid, issues}
"""
errors = []
warnings = []
file_checks = {}
for filename, content in files.items():
check = self._check_file(filename, content, sdk)
file_checks[filename] = check
errors.extend(check.get("errors", []))
warnings.extend(check.get("warnings", []))
# Cross-file checks
cross_issues = self._cross_file_checks(files, sdk)
errors.extend(cross_issues.get("errors", []))
warnings.extend(cross_issues.get("warnings", []))
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
"file_checks": file_checks,
}
def _check_file(self, filename: str, content: str, sdk: str) -> dict:
"""Check a single file."""
errors = []
warnings = []
if not content or not content.strip():
errors.append(f"{filename}: File is empty")
return {"valid": False, "errors": errors, "warnings": warnings}
if filename.endswith(".py"):
py_result = self._check_python(filename, content)
errors.extend(py_result["errors"])
warnings.extend(py_result["warnings"])
elif filename == "Dockerfile":
docker_result = self._check_dockerfile(content)
errors.extend(docker_result["errors"])
warnings.extend(docker_result["warnings"])
elif filename == "requirements.txt":
req_result = self._check_requirements(content)
errors.extend(req_result["errors"])
warnings.extend(req_result["warnings"])
elif filename == "README.md":
if "---" not in content:
warnings.append("README.md: Missing YAML frontmatter")
elif filename.endswith(".html"):
html_result = self._check_html(filename, content)
errors.extend(html_result["errors"])
warnings.extend(html_result["warnings"])
valid = len(errors) == 0
return {"valid": valid, "errors": errors, "warnings": warnings}
def _check_python(self, filename: str, content: str) -> dict:
"""Validate Python code syntax and common patterns."""
errors = []
warnings = []
# Syntax check
try:
ast.parse(content)
except SyntaxError as e:
errors.append(f"{filename}: Python syntax error at line {e.lineno}: {e.msg}")
return {"errors": errors, "warnings": warnings}
# Check for common issues
if "import " not in content and "from " not in content:
warnings.append(f"{filename}: No imports found")
# Check for dangerous patterns
dangerous_patterns = [
(r"os\.system\(", "os.system() call found - potential security risk"),
(r"eval\(", "eval() call found - potential security risk"),
(r"exec\(", "exec() call found - potential security risk"),
(r"__import__\(", "__import__() call found - potential security risk"),
]
for pattern, msg in dangerous_patterns:
if re.search(pattern, content):
warnings.append(f"{filename}: {msg}")
# Check for hardcoded tokens/secrets
secret_patterns = [
(r'(?:token|key|secret|password)\s*=\s*["\'][^"\']{10,}["\']', "Possible hardcoded secret"),
]
for pattern, msg in secret_patterns:
if re.search(pattern, content, re.IGNORECASE):
warnings.append(f"{filename}: {msg}")
return {"errors": errors, "warnings": warnings}
def _check_dockerfile(self, content: str) -> dict:
"""Validate Dockerfile content."""
errors = []
warnings = []
if "FROM" not in content:
errors.append("Dockerfile: Missing FROM instruction")
if "EXPOSE" not in content:
warnings.append("Dockerfile: Missing EXPOSE instruction")
if "7860" not in content:
warnings.append("Dockerfile: Port 7860 not found (required for HF Spaces)")
if "CMD" not in content and "ENTRYPOINT" not in content:
errors.append("Dockerfile: Missing CMD or ENTRYPOINT")
return {"errors": errors, "warnings": warnings}
def _check_requirements(self, content: str) -> dict:
"""Validate requirements.txt."""
errors = []
warnings = []
lines = [l.strip() for l in content.strip().split("\n") if l.strip() and not l.strip().startswith("#")]
if not lines:
warnings.append("requirements.txt: No dependencies listed")
for line in lines:
# Basic format check
if " " in line and ";" not in line and "#" not in line:
warnings.append(f"requirements.txt: Suspicious line: '{line}'")
return {"errors": errors, "warnings": warnings}
def _check_html(self, filename: str, content: str) -> dict:
"""Basic HTML validation."""
errors = []
warnings = []
if "<html" not in content.lower() and "<!doctype" not in content.lower():
warnings.append(f"{filename}: Missing <html> tag or DOCTYPE")
# Check for unclosed tags (very basic)
for tag in ["html", "head", "body"]:
open_count = len(re.findall(f"<{tag}[\\s>]", content, re.IGNORECASE))
close_count = len(re.findall(f"</{tag}>", content, re.IGNORECASE))
if open_count > close_count:
warnings.append(f"{filename}: Unclosed <{tag}> tag")
return {"errors": errors, "warnings": warnings}
def _cross_file_checks(self, files: dict, sdk: str) -> dict:
"""Perform checks across multiple files."""
errors = []
warnings = []
if sdk == "gradio":
if "app.py" not in files:
errors.append("Missing app.py (required for Gradio Spaces)")
if "requirements.txt" not in files:
warnings.append("Missing requirements.txt")
if "README.md" not in files:
warnings.append("Missing README.md")
# Check that requirements includes gradio
req = files.get("requirements.txt", "")
if "gradio" not in req.lower():
warnings.append("requirements.txt: 'gradio' not listed as dependency")
elif sdk == "docker":
if "Dockerfile" not in files:
errors.append("Missing Dockerfile (required for Docker Spaces)")
if "README.md" not in files:
warnings.append("Missing README.md")
elif sdk == "static":
if "index.html" not in files:
errors.append("Missing index.html (required for Static Spaces)")
return {"errors": errors, "warnings": warnings}
|