File size: 5,254 Bytes
a750766 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
#!/usr/bin/env python3
"""
Validate README.md for Hugging Face Space compatibility.
This script checks that the README.md file has:
- Valid YAML frontmatter
- Required fields for HF Spaces (sdk, app_port for docker)
- Correct format and values
"""
import sys
import re
from pathlib import Path
from typing import Dict, List, Tuple
# Required fields for Docker SDK
REQUIRED_DOCKER_FIELDS = {
"sdk": ["docker"],
"app_port": lambda x: isinstance(x, int) and 1 <= x <= 65535,
}
# Optional but recommended fields
RECOMMENDED_FIELDS = ["title", "emoji", "colorFrom", "colorTo"]
# Valid color values
VALID_COLORS = {"red", "yellow", "green", "blue", "indigo", "purple", "pink", "gray"}
# Valid SDK values
VALID_SDKS = {"gradio", "docker", "static"}
# Valid hardware flavors (from HF docs)
VALID_HARDWARE = {
"cpu-basic", "cpu-upgrade",
"t4-small", "t4-medium", "l4x1", "l4x4",
"a10g-small", "a10g-large", "a10g-largex2", "a10g-largex4", "a100-large",
"v5e-1x1", "v5e-2x2", "v5e-2x4"
}
def extract_yaml_frontmatter(content: str) -> Tuple[Dict, int, int]:
"""Extract YAML frontmatter from README.md content."""
# Check for YAML frontmatter pattern
match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
if not match:
return {}, -1, -1
yaml_content = match.group(1)
start_pos = 0
end_pos = match.end()
# Simple YAML parsing (basic key: value pairs)
yaml_dict = {}
for line in yaml_content.split('\n'):
line = line.strip()
if not line or line.startswith('#'):
continue
if ':' in line:
key, value = line.split(':', 1)
key = key.strip()
value = value.strip().strip('"\'')
# Convert boolean strings
if value.lower() == 'true':
value = True
elif value.lower() == 'false':
value = False
# Convert integers
elif value.isdigit():
value = int(value)
yaml_dict[key] = value
return yaml_dict, start_pos, end_pos
def validate_readme(readme_path: Path) -> List[str]:
"""Validate README.md file and return list of errors."""
errors = []
if not readme_path.exists():
return [f"README.md not found at {readme_path}"]
content = readme_path.read_text(encoding='utf-8')
# Extract YAML frontmatter
yaml_data, start, end = extract_yaml_frontmatter(content)
if start == -1:
errors.append("README.md must start with YAML frontmatter (--- ... ---)")
return errors
# Check SDK
sdk = yaml_data.get("sdk")
if not sdk:
errors.append("Missing required field: 'sdk'")
elif sdk not in VALID_SDKS:
errors.append(f"Invalid 'sdk' value: {sdk}. Must be one of: {', '.join(VALID_SDKS)}")
# For Docker SDK, check app_port
if sdk == "docker":
app_port = yaml_data.get("app_port")
if app_port is None:
errors.append("Missing required field for Docker SDK: 'app_port'")
elif not isinstance(app_port, int) or not (1 <= app_port <= 65535):
errors.append(f"Invalid 'app_port' value: {app_port}. Must be an integer between 1 and 65535")
# Check colors if present
color_from = yaml_data.get("colorFrom")
color_to = yaml_data.get("colorTo")
if color_from and color_from not in VALID_COLORS:
errors.append(f"Invalid 'colorFrom' value: {color_from}. Must be one of: {', '.join(VALID_COLORS)}")
if color_to and color_to not in VALID_COLORS:
errors.append(f"Invalid 'colorTo' value: {color_to}. Must be one of: {', '.join(VALID_COLORS)}")
# Check suggested_hardware if present
hardware = yaml_data.get("suggested_hardware")
if hardware and hardware not in VALID_HARDWARE:
errors.append(f"Invalid 'suggested_hardware' value: {hardware}. Must be one of: {', '.join(sorted(VALID_HARDWARE))}")
# Warn about deprecated 'hardware' field
if "hardware" in yaml_data:
errors.append("Deprecated field 'hardware' found. Use 'suggested_hardware' instead (per HF Spaces docs)")
# Check for emoji (recommended)
if "emoji" not in yaml_data:
errors.append("Warning: 'emoji' field is recommended for better Space appearance")
# Check for title (recommended)
if "title" not in yaml_data:
errors.append("Warning: 'title' field is recommended")
# Check that pinned is boolean if present
if "pinned" in yaml_data and not isinstance(yaml_data["pinned"], bool):
errors.append(f"Invalid 'pinned' value: {yaml_data['pinned']}. Must be boolean (true/false)")
return errors
def main():
"""Main entry point."""
repo_root = Path(__file__).parent.parent
readme_path = repo_root / "README.md"
errors = validate_readme(readme_path)
if errors:
print("β README.md validation failed:", file=sys.stderr)
for error in errors:
print(f" - {error}", file=sys.stderr)
sys.exit(1)
else:
print("β
README.md is valid for Hugging Face Spaces")
sys.exit(0)
if __name__ == "__main__":
main()
|