BechusRantus's picture
Upload folder using huggingface_hub
7134ce7 verified
import os
import re
import requests
from swift.utils import get_logger
logger = get_logger()
def check_link(url):
try:
response = requests.head(url, timeout=5, allow_redirects=True)
return response.status_code == 200
except requests.RequestException:
return False
def extract_links_from_md(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
links = re.findall(r'\[.*?\]\((.*?)\)', content)
return links
def check_links_in_folder(folder_path):
for root, _, files in os.walk(folder_path):
for file in files:
if file.endswith('.md'):
if file in ['Supported-models-and-datasets.md', 'Supported-models-and-datasets.md']:
continue
file_path = os.path.join(root, file)
logger.info(f'Checking links in file: {file_path}')
links = extract_links_from_md(file_path)
for link in links:
if not link.startswith(('http://', 'https://')):
path = link.rsplit('#', 1)[0]
if path:
path = os.path.abspath(os.path.join(root, path))
if os.path.exists(path):
logger.info(f'✅ Link is valid: {link}')
else:
logger.info(f'❌ Link is broken: {link}')
else:
logger.info(f'Skipping non-HTTP link: {link}')
continue
if check_link(link):
logger.info(f'✅ Link is valid: {link}')
else:
if 'huggingface.co' in link:
logger.info(f'Link is broken: {link}')
else:
logger.info(f'❌ Link is broken: {link}')
if __name__ == '__main__':
folder_path = './'
check_links_in_folder(folder_path)