File size: 7,330 Bytes
2c304fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
"""
Validate generated code for common issues.
"""

import ast
import re
from typing import Optional


class CodeChecker:
    """Validate generated Space code for correctness and common issues."""

    def check(self, files: dict, sdk: str) -> dict:
        """
        Validate all files in the repo.

        Returns dict with:
            valid: bool
            errors: list of error strings
            warnings: list of warning strings
            file_checks: dict of filename -> {valid, issues}
        """
        errors = []
        warnings = []
        file_checks = {}

        for filename, content in files.items():
            check = self._check_file(filename, content, sdk)
            file_checks[filename] = check
            errors.extend(check.get("errors", []))
            warnings.extend(check.get("warnings", []))

        # Cross-file checks
        cross_issues = self._cross_file_checks(files, sdk)
        errors.extend(cross_issues.get("errors", []))
        warnings.extend(cross_issues.get("warnings", []))

        return {
            "valid": len(errors) == 0,
            "errors": errors,
            "warnings": warnings,
            "file_checks": file_checks,
        }

    def _check_file(self, filename: str, content: str, sdk: str) -> dict:
        """Check a single file."""
        errors = []
        warnings = []

        if not content or not content.strip():
            errors.append(f"{filename}: File is empty")
            return {"valid": False, "errors": errors, "warnings": warnings}

        if filename.endswith(".py"):
            py_result = self._check_python(filename, content)
            errors.extend(py_result["errors"])
            warnings.extend(py_result["warnings"])
        elif filename == "Dockerfile":
            docker_result = self._check_dockerfile(content)
            errors.extend(docker_result["errors"])
            warnings.extend(docker_result["warnings"])
        elif filename == "requirements.txt":
            req_result = self._check_requirements(content)
            errors.extend(req_result["errors"])
            warnings.extend(req_result["warnings"])
        elif filename == "README.md":
            if "---" not in content:
                warnings.append("README.md: Missing YAML frontmatter")
        elif filename.endswith(".html"):
            html_result = self._check_html(filename, content)
            errors.extend(html_result["errors"])
            warnings.extend(html_result["warnings"])

        valid = len(errors) == 0
        return {"valid": valid, "errors": errors, "warnings": warnings}

    def _check_python(self, filename: str, content: str) -> dict:
        """Validate Python code syntax and common patterns."""
        errors = []
        warnings = []

        # Syntax check
        try:
            ast.parse(content)
        except SyntaxError as e:
            errors.append(f"{filename}: Python syntax error at line {e.lineno}: {e.msg}")
            return {"errors": errors, "warnings": warnings}

        # Check for common issues
        if "import " not in content and "from " not in content:
            warnings.append(f"{filename}: No imports found")

        # Check for dangerous patterns
        dangerous_patterns = [
            (r"os\.system\(", "os.system() call found - potential security risk"),
            (r"eval\(", "eval() call found - potential security risk"),
            (r"exec\(", "exec() call found - potential security risk"),
            (r"__import__\(", "__import__() call found - potential security risk"),
        ]
        for pattern, msg in dangerous_patterns:
            if re.search(pattern, content):
                warnings.append(f"{filename}: {msg}")

        # Check for hardcoded tokens/secrets
        secret_patterns = [
            (r'(?:token|key|secret|password)\s*=\s*["\'][^"\']{10,}["\']', "Possible hardcoded secret"),
        ]
        for pattern, msg in secret_patterns:
            if re.search(pattern, content, re.IGNORECASE):
                warnings.append(f"{filename}: {msg}")

        return {"errors": errors, "warnings": warnings}

    def _check_dockerfile(self, content: str) -> dict:
        """Validate Dockerfile content."""
        errors = []
        warnings = []

        if "FROM" not in content:
            errors.append("Dockerfile: Missing FROM instruction")
        if "EXPOSE" not in content:
            warnings.append("Dockerfile: Missing EXPOSE instruction")
        if "7860" not in content:
            warnings.append("Dockerfile: Port 7860 not found (required for HF Spaces)")
        if "CMD" not in content and "ENTRYPOINT" not in content:
            errors.append("Dockerfile: Missing CMD or ENTRYPOINT")

        return {"errors": errors, "warnings": warnings}

    def _check_requirements(self, content: str) -> dict:
        """Validate requirements.txt."""
        errors = []
        warnings = []

        lines = [l.strip() for l in content.strip().split("\n") if l.strip() and not l.strip().startswith("#")]
        if not lines:
            warnings.append("requirements.txt: No dependencies listed")

        for line in lines:
            # Basic format check
            if " " in line and ";" not in line and "#" not in line:
                warnings.append(f"requirements.txt: Suspicious line: '{line}'")

        return {"errors": errors, "warnings": warnings}

    def _check_html(self, filename: str, content: str) -> dict:
        """Basic HTML validation."""
        errors = []
        warnings = []

        if "<html" not in content.lower() and "<!doctype" not in content.lower():
            warnings.append(f"{filename}: Missing <html> tag or DOCTYPE")

        # Check for unclosed tags (very basic)
        for tag in ["html", "head", "body"]:
            open_count = len(re.findall(f"<{tag}[\\s>]", content, re.IGNORECASE))
            close_count = len(re.findall(f"</{tag}>", content, re.IGNORECASE))
            if open_count > close_count:
                warnings.append(f"{filename}: Unclosed <{tag}> tag")

        return {"errors": errors, "warnings": warnings}

    def _cross_file_checks(self, files: dict, sdk: str) -> dict:
        """Perform checks across multiple files."""
        errors = []
        warnings = []

        if sdk == "gradio":
            if "app.py" not in files:
                errors.append("Missing app.py (required for Gradio Spaces)")
            if "requirements.txt" not in files:
                warnings.append("Missing requirements.txt")
            if "README.md" not in files:
                warnings.append("Missing README.md")

            # Check that requirements includes gradio
            req = files.get("requirements.txt", "")
            if "gradio" not in req.lower():
                warnings.append("requirements.txt: 'gradio' not listed as dependency")

        elif sdk == "docker":
            if "Dockerfile" not in files:
                errors.append("Missing Dockerfile (required for Docker Spaces)")
            if "README.md" not in files:
                warnings.append("Missing README.md")

        elif sdk == "static":
            if "index.html" not in files:
                errors.append("Missing index.html (required for Static Spaces)")

        return {"errors": errors, "warnings": warnings}