autoapp-builder / app /validators /code_checker.py
ruslanmv's picture
feat: complete AutoApp Builder - AI-powered HF Space generator
2c304fc verified
"""
Validate generated code for common issues.
"""
import ast
import re
from typing import Optional
class CodeChecker:
"""Validate generated Space code for correctness and common issues."""
def check(self, files: dict, sdk: str) -> dict:
"""
Validate all files in the repo.
Returns dict with:
valid: bool
errors: list of error strings
warnings: list of warning strings
file_checks: dict of filename -> {valid, issues}
"""
errors = []
warnings = []
file_checks = {}
for filename, content in files.items():
check = self._check_file(filename, content, sdk)
file_checks[filename] = check
errors.extend(check.get("errors", []))
warnings.extend(check.get("warnings", []))
# Cross-file checks
cross_issues = self._cross_file_checks(files, sdk)
errors.extend(cross_issues.get("errors", []))
warnings.extend(cross_issues.get("warnings", []))
return {
"valid": len(errors) == 0,
"errors": errors,
"warnings": warnings,
"file_checks": file_checks,
}
def _check_file(self, filename: str, content: str, sdk: str) -> dict:
"""Check a single file."""
errors = []
warnings = []
if not content or not content.strip():
errors.append(f"{filename}: File is empty")
return {"valid": False, "errors": errors, "warnings": warnings}
if filename.endswith(".py"):
py_result = self._check_python(filename, content)
errors.extend(py_result["errors"])
warnings.extend(py_result["warnings"])
elif filename == "Dockerfile":
docker_result = self._check_dockerfile(content)
errors.extend(docker_result["errors"])
warnings.extend(docker_result["warnings"])
elif filename == "requirements.txt":
req_result = self._check_requirements(content)
errors.extend(req_result["errors"])
warnings.extend(req_result["warnings"])
elif filename == "README.md":
if "---" not in content:
warnings.append("README.md: Missing YAML frontmatter")
elif filename.endswith(".html"):
html_result = self._check_html(filename, content)
errors.extend(html_result["errors"])
warnings.extend(html_result["warnings"])
valid = len(errors) == 0
return {"valid": valid, "errors": errors, "warnings": warnings}
def _check_python(self, filename: str, content: str) -> dict:
"""Validate Python code syntax and common patterns."""
errors = []
warnings = []
# Syntax check
try:
ast.parse(content)
except SyntaxError as e:
errors.append(f"{filename}: Python syntax error at line {e.lineno}: {e.msg}")
return {"errors": errors, "warnings": warnings}
# Check for common issues
if "import " not in content and "from " not in content:
warnings.append(f"{filename}: No imports found")
# Check for dangerous patterns
dangerous_patterns = [
(r"os\.system\(", "os.system() call found - potential security risk"),
(r"eval\(", "eval() call found - potential security risk"),
(r"exec\(", "exec() call found - potential security risk"),
(r"__import__\(", "__import__() call found - potential security risk"),
]
for pattern, msg in dangerous_patterns:
if re.search(pattern, content):
warnings.append(f"{filename}: {msg}")
# Check for hardcoded tokens/secrets
secret_patterns = [
(r'(?:token|key|secret|password)\s*=\s*["\'][^"\']{10,}["\']', "Possible hardcoded secret"),
]
for pattern, msg in secret_patterns:
if re.search(pattern, content, re.IGNORECASE):
warnings.append(f"{filename}: {msg}")
return {"errors": errors, "warnings": warnings}
def _check_dockerfile(self, content: str) -> dict:
"""Validate Dockerfile content."""
errors = []
warnings = []
if "FROM" not in content:
errors.append("Dockerfile: Missing FROM instruction")
if "EXPOSE" not in content:
warnings.append("Dockerfile: Missing EXPOSE instruction")
if "7860" not in content:
warnings.append("Dockerfile: Port 7860 not found (required for HF Spaces)")
if "CMD" not in content and "ENTRYPOINT" not in content:
errors.append("Dockerfile: Missing CMD or ENTRYPOINT")
return {"errors": errors, "warnings": warnings}
def _check_requirements(self, content: str) -> dict:
"""Validate requirements.txt."""
errors = []
warnings = []
lines = [l.strip() for l in content.strip().split("\n") if l.strip() and not l.strip().startswith("#")]
if not lines:
warnings.append("requirements.txt: No dependencies listed")
for line in lines:
# Basic format check
if " " in line and ";" not in line and "#" not in line:
warnings.append(f"requirements.txt: Suspicious line: '{line}'")
return {"errors": errors, "warnings": warnings}
def _check_html(self, filename: str, content: str) -> dict:
"""Basic HTML validation."""
errors = []
warnings = []
if "<html" not in content.lower() and "<!doctype" not in content.lower():
warnings.append(f"{filename}: Missing <html> tag or DOCTYPE")
# Check for unclosed tags (very basic)
for tag in ["html", "head", "body"]:
open_count = len(re.findall(f"<{tag}[\\s>]", content, re.IGNORECASE))
close_count = len(re.findall(f"</{tag}>", content, re.IGNORECASE))
if open_count > close_count:
warnings.append(f"{filename}: Unclosed <{tag}> tag")
return {"errors": errors, "warnings": warnings}
def _cross_file_checks(self, files: dict, sdk: str) -> dict:
"""Perform checks across multiple files."""
errors = []
warnings = []
if sdk == "gradio":
if "app.py" not in files:
errors.append("Missing app.py (required for Gradio Spaces)")
if "requirements.txt" not in files:
warnings.append("Missing requirements.txt")
if "README.md" not in files:
warnings.append("Missing README.md")
# Check that requirements includes gradio
req = files.get("requirements.txt", "")
if "gradio" not in req.lower():
warnings.append("requirements.txt: 'gradio' not listed as dependency")
elif sdk == "docker":
if "Dockerfile" not in files:
errors.append("Missing Dockerfile (required for Docker Spaces)")
if "README.md" not in files:
warnings.append("Missing README.md")
elif sdk == "static":
if "index.html" not in files:
errors.append("Missing index.html (required for Static Spaces)")
return {"errors": errors, "warnings": warnings}