open-finance-llm-8b / scripts /validate_hf_readme.py
jeanbaptdzd's picture
Update to vLLM 0.9.2 with Qwen3 support, remove PRIIPS functionality, add HF Space validation hook
a750766
#!/usr/bin/env python3
"""
Validate README.md for Hugging Face Space compatibility.
This script checks that the README.md file has:
- Valid YAML frontmatter
- Required fields for HF Spaces (sdk, app_port for docker)
- Correct format and values
"""
import sys
import re
from pathlib import Path
from typing import Dict, List, Tuple
# Required fields for Docker SDK
REQUIRED_DOCKER_FIELDS = {
"sdk": ["docker"],
"app_port": lambda x: isinstance(x, int) and 1 <= x <= 65535,
}
# Optional but recommended fields
RECOMMENDED_FIELDS = ["title", "emoji", "colorFrom", "colorTo"]
# Valid color values
VALID_COLORS = {"red", "yellow", "green", "blue", "indigo", "purple", "pink", "gray"}
# Valid SDK values
VALID_SDKS = {"gradio", "docker", "static"}
# Valid hardware flavors (from HF docs)
VALID_HARDWARE = {
"cpu-basic", "cpu-upgrade",
"t4-small", "t4-medium", "l4x1", "l4x4",
"a10g-small", "a10g-large", "a10g-largex2", "a10g-largex4", "a100-large",
"v5e-1x1", "v5e-2x2", "v5e-2x4"
}
def extract_yaml_frontmatter(content: str) -> Tuple[Dict, int, int]:
"""Extract YAML frontmatter from README.md content."""
# Check for YAML frontmatter pattern
match = re.match(r'^---\s*\n(.*?)\n---\s*\n', content, re.DOTALL)
if not match:
return {}, -1, -1
yaml_content = match.group(1)
start_pos = 0
end_pos = match.end()
# Simple YAML parsing (basic key: value pairs)
yaml_dict = {}
for line in yaml_content.split('\n'):
line = line.strip()
if not line or line.startswith('#'):
continue
if ':' in line:
key, value = line.split(':', 1)
key = key.strip()
value = value.strip().strip('"\'')
# Convert boolean strings
if value.lower() == 'true':
value = True
elif value.lower() == 'false':
value = False
# Convert integers
elif value.isdigit():
value = int(value)
yaml_dict[key] = value
return yaml_dict, start_pos, end_pos
def validate_readme(readme_path: Path) -> List[str]:
"""Validate README.md file and return list of errors."""
errors = []
if not readme_path.exists():
return [f"README.md not found at {readme_path}"]
content = readme_path.read_text(encoding='utf-8')
# Extract YAML frontmatter
yaml_data, start, end = extract_yaml_frontmatter(content)
if start == -1:
errors.append("README.md must start with YAML frontmatter (--- ... ---)")
return errors
# Check SDK
sdk = yaml_data.get("sdk")
if not sdk:
errors.append("Missing required field: 'sdk'")
elif sdk not in VALID_SDKS:
errors.append(f"Invalid 'sdk' value: {sdk}. Must be one of: {', '.join(VALID_SDKS)}")
# For Docker SDK, check app_port
if sdk == "docker":
app_port = yaml_data.get("app_port")
if app_port is None:
errors.append("Missing required field for Docker SDK: 'app_port'")
elif not isinstance(app_port, int) or not (1 <= app_port <= 65535):
errors.append(f"Invalid 'app_port' value: {app_port}. Must be an integer between 1 and 65535")
# Check colors if present
color_from = yaml_data.get("colorFrom")
color_to = yaml_data.get("colorTo")
if color_from and color_from not in VALID_COLORS:
errors.append(f"Invalid 'colorFrom' value: {color_from}. Must be one of: {', '.join(VALID_COLORS)}")
if color_to and color_to not in VALID_COLORS:
errors.append(f"Invalid 'colorTo' value: {color_to}. Must be one of: {', '.join(VALID_COLORS)}")
# Check suggested_hardware if present
hardware = yaml_data.get("suggested_hardware")
if hardware and hardware not in VALID_HARDWARE:
errors.append(f"Invalid 'suggested_hardware' value: {hardware}. Must be one of: {', '.join(sorted(VALID_HARDWARE))}")
# Warn about deprecated 'hardware' field
if "hardware" in yaml_data:
errors.append("Deprecated field 'hardware' found. Use 'suggested_hardware' instead (per HF Spaces docs)")
# Check for emoji (recommended)
if "emoji" not in yaml_data:
errors.append("Warning: 'emoji' field is recommended for better Space appearance")
# Check for title (recommended)
if "title" not in yaml_data:
errors.append("Warning: 'title' field is recommended")
# Check that pinned is boolean if present
if "pinned" in yaml_data and not isinstance(yaml_data["pinned"], bool):
errors.append(f"Invalid 'pinned' value: {yaml_data['pinned']}. Must be boolean (true/false)")
return errors
def main():
"""Main entry point."""
repo_root = Path(__file__).parent.parent
readme_path = repo_root / "README.md"
errors = validate_readme(readme_path)
if errors:
print("❌ README.md validation failed:", file=sys.stderr)
for error in errors:
print(f" - {error}", file=sys.stderr)
sys.exit(1)
else:
print("βœ… README.md is valid for Hugging Face Spaces")
sys.exit(0)
if __name__ == "__main__":
main()