File size: 1,683 Bytes
9ec4919
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python3
"""Check GitHub Pages metadata needed for search and social previews."""

from __future__ import annotations

import sys
import xml.etree.ElementTree as ET
from pathlib import Path


LANDING_PAGE = Path("docs/index.html")
SITEMAP = Path("docs/sitemap.xml")
ROBOTS = Path("docs/robots.txt")
CANONICAL_URL = "https://chaoyue0307.github.io/awesome-loop-engineering/"
REQUIRED_HTML_SNIPPETS = [
    "<title>Awesome Loop Engineering</title>",
    'name="description"',
    f'href="{CANONICAL_URL}"',
    'property="og:title"',
    'property="og:image"',
    'name="twitter:card"',
    "application/ld+json",
]


def main() -> int:
    failures: list[str] = []

    html = LANDING_PAGE.read_text(encoding="utf-8")
    for snippet in REQUIRED_HTML_SNIPPETS:
        if snippet not in html:
            failures.append(f"{LANDING_PAGE}: missing {snippet}")

    sitemap_text = SITEMAP.read_text(encoding="utf-8")
    if CANONICAL_URL not in sitemap_text:
        failures.append(f"{SITEMAP}: missing canonical URL")
    try:
        ET.fromstring(sitemap_text)
    except ET.ParseError as error:
        failures.append(f"{SITEMAP}: invalid XML: {error}")

    robots = ROBOTS.read_text(encoding="utf-8")
    if "Allow: /" not in robots:
        failures.append(f"{ROBOTS}: missing Allow rule")
    if f"Sitemap: {CANONICAL_URL}sitemap.xml" not in robots:
        failures.append(f"{ROBOTS}: missing sitemap URL")

    if failures:
        print("Pages metadata check failed:", file=sys.stderr)
        for failure in failures:
            print(f"- {failure}", file=sys.stderr)
        return 1

    return 0


if __name__ == "__main__":
    raise SystemExit(main())