| import os | |
| import re | |
| import requests | |
| from swift.utils import get_logger | |
| logger = get_logger() | |
| def check_link(url): | |
| try: | |
| response = requests.head(url, timeout=5, allow_redirects=True) | |
| return response.status_code == 200 | |
| except requests.RequestException: | |
| return False | |
| def extract_links_from_md(file_path): | |
| with open(file_path, 'r', encoding='utf-8') as file: | |
| content = file.read() | |
| links = re.findall(r'\[.*?\]\((.*?)\)', content) | |
| return links | |
| def check_links_in_folder(folder_path): | |
| for root, _, files in os.walk(folder_path): | |
| for file in files: | |
| if file.endswith('.md'): | |
| if file in ['Supported-models-and-datasets.md', 'Supported-models-and-datasets.md']: | |
| continue | |
| file_path = os.path.join(root, file) | |
| logger.info(f'Checking links in file: {file_path}') | |
| links = extract_links_from_md(file_path) | |
| for link in links: | |
| if not link.startswith(('http://', 'https://')): | |
| path = link.rsplit('#', 1)[0] | |
| if path: | |
| path = os.path.abspath(os.path.join(root, path)) | |
| if os.path.exists(path): | |
| logger.info(f'✅ Link is valid: {link}') | |
| else: | |
| logger.info(f'❌ Link is broken: {link}') | |
| else: | |
| logger.info(f'Skipping non-HTTP link: {link}') | |
| continue | |
| if check_link(link): | |
| logger.info(f'✅ Link is valid: {link}') | |
| else: | |
| if 'huggingface.co' in link: | |
| logger.info(f'Link is broken: {link}') | |
| else: | |
| logger.info(f'❌ Link is broken: {link}') | |
| if __name__ == '__main__': | |
| folder_path = './' | |
| check_links_in_folder(folder_path) | |