File size: 2,113 Bytes
7134ce7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import os
import re

import requests

from swift.utils import get_logger

logger = get_logger()


def check_link(url):
    try:
        response = requests.head(url, timeout=5, allow_redirects=True)
        return response.status_code == 200
    except requests.RequestException:
        return False


def extract_links_from_md(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
        links = re.findall(r'\[.*?\]\((.*?)\)', content)
        return links


def check_links_in_folder(folder_path):
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith('.md'):
                if file in ['Supported-models-and-datasets.md', 'Supported-models-and-datasets.md']:
                    continue
                file_path = os.path.join(root, file)
                logger.info(f'Checking links in file: {file_path}')
                links = extract_links_from_md(file_path)
                for link in links:
                    if not link.startswith(('http://', 'https://')):
                        path = link.rsplit('#', 1)[0]
                        if path:
                            path = os.path.abspath(os.path.join(root, path))
                            if os.path.exists(path):
                                logger.info(f'✅ Link is valid: {link}')
                            else:
                                logger.info(f'❌ Link is broken: {link}')
                        else:
                            logger.info(f'Skipping non-HTTP link: {link}')
                        continue
                    if check_link(link):
                        logger.info(f'✅ Link is valid: {link}')
                    else:
                        if 'huggingface.co' in link:
                            logger.info(f'Link is broken: {link}')
                        else:
                            logger.info(f'❌ Link is broken: {link}')


if __name__ == '__main__':
    folder_path = './'
    check_links_in_folder(folder_path)