File size: 5,254 Bytes
a750766
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python3
"""
Validate README.md for Hugging Face Space compatibility.

This script checks that the README.md file has:
- Valid YAML frontmatter
- Required fields for HF Spaces (sdk, app_port for docker)
- Correct format and values
"""

import sys
import re
from pathlib import Path
from typing import Dict, List, Tuple

# Required fields for Docker SDK
REQUIRED_DOCKER_FIELDS = {
    "sdk": ["docker"],
    "app_port": lambda x: isinstance(x, int) and 1 <= x <= 65535,
}

# Optional but recommended fields
RECOMMENDED_FIELDS = ["title", "emoji", "colorFrom", "colorTo"]

# Valid color values
VALID_COLORS = {"red", "yellow", "green", "blue", "indigo", "purple", "pink", "gray"}

# Valid SDK values
VALID_SDKS = {"gradio", "docker", "static"}

# Valid hardware flavors (from HF docs)
VALID_HARDWARE = {
    "cpu-basic", "cpu-upgrade",
    "t4-small", "t4-medium", "l4x1", "l4x4",
    "a10g-small", "a10g-large", "a10g-largex2", "a10g-largex4", "a100-large",
    "v5e-1x1", "v5e-2x2", "v5e-2x4"
}


def extract_yaml_frontmatter(content: str) -> Tuple[Dict, int, int]:
    """Extract YAML frontmatter from README.md content."""
    # Check for YAML frontmatter pattern
    match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
    if not match:
        return {}, -1, -1
    
    yaml_content = match.group(1)
    start_pos = 0
    end_pos = match.end()
    
    # Simple YAML parsing (basic key: value pairs)
    yaml_dict = {}
    for line in yaml_content.split('\n'):
        line = line.strip()
        if not line or line.startswith('#'):
            continue
        
        if ':' in line:
            key, value = line.split(':', 1)
            key = key.strip()
            value = value.strip().strip('"\'')
            
            # Convert boolean strings
            if value.lower() == 'true':
                value = True
            elif value.lower() == 'false':
                value = False
            # Convert integers
            elif value.isdigit():
                value = int(value)
            
            yaml_dict[key] = value
    
    return yaml_dict, start_pos, end_pos


def validate_readme(readme_path: Path) -> List[str]:
    """Validate README.md file and return list of errors."""
    errors = []
    
    if not readme_path.exists():
        return [f"README.md not found at {readme_path}"]
    
    content = readme_path.read_text(encoding='utf-8')
    
    # Extract YAML frontmatter
    yaml_data, start, end = extract_yaml_frontmatter(content)
    
    if start == -1:
        errors.append("README.md must start with YAML frontmatter (--- ... ---)")
        return errors
    
    # Check SDK
    sdk = yaml_data.get("sdk")
    if not sdk:
        errors.append("Missing required field: 'sdk'")
    elif sdk not in VALID_SDKS:
        errors.append(f"Invalid 'sdk' value: {sdk}. Must be one of: {', '.join(VALID_SDKS)}")
    
    # For Docker SDK, check app_port
    if sdk == "docker":
        app_port = yaml_data.get("app_port")
        if app_port is None:
            errors.append("Missing required field for Docker SDK: 'app_port'")
        elif not isinstance(app_port, int) or not (1 <= app_port <= 65535):
            errors.append(f"Invalid 'app_port' value: {app_port}. Must be an integer between 1 and 65535")
    
    # Check colors if present
    color_from = yaml_data.get("colorFrom")
    color_to = yaml_data.get("colorTo")
    if color_from and color_from not in VALID_COLORS:
        errors.append(f"Invalid 'colorFrom' value: {color_from}. Must be one of: {', '.join(VALID_COLORS)}")
    if color_to and color_to not in VALID_COLORS:
        errors.append(f"Invalid 'colorTo' value: {color_to}. Must be one of: {', '.join(VALID_COLORS)}")
    
    # Check suggested_hardware if present
    hardware = yaml_data.get("suggested_hardware")
    if hardware and hardware not in VALID_HARDWARE:
        errors.append(f"Invalid 'suggested_hardware' value: {hardware}. Must be one of: {', '.join(sorted(VALID_HARDWARE))}")
    
    # Warn about deprecated 'hardware' field
    if "hardware" in yaml_data:
        errors.append("Deprecated field 'hardware' found. Use 'suggested_hardware' instead (per HF Spaces docs)")
    
    # Check for emoji (recommended)
    if "emoji" not in yaml_data:
        errors.append("Warning: 'emoji' field is recommended for better Space appearance")
    
    # Check for title (recommended)
    if "title" not in yaml_data:
        errors.append("Warning: 'title' field is recommended")
    
    # Check that pinned is boolean if present
    if "pinned" in yaml_data and not isinstance(yaml_data["pinned"], bool):
        errors.append(f"Invalid 'pinned' value: {yaml_data['pinned']}. Must be boolean (true/false)")
    
    return errors


def main():
    """Main entry point."""
    repo_root = Path(__file__).parent.parent
    readme_path = repo_root / "README.md"
    
    errors = validate_readme(readme_path)
    
    if errors:
        print("❌ README.md validation failed:", file=sys.stderr)
        for error in errors:
            print(f"  - {error}", file=sys.stderr)
        sys.exit(1)
    else:
        print("βœ… README.md is valid for Hugging Face Spaces")
        sys.exit(0)


if __name__ == "__main__":
    main()