Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """ Link tester. | |
| This little utility reads all the python files in the repository, | |
| scans for links pointing to S3 and tests the links one by one. Raises an error | |
| at the end of the scan if at least one link was reported broken. | |
| """ | |
| import os | |
| import re | |
| import sys | |
| import requests | |
| REGEXP_FIND_S3_LINKS = r"""([\"'])(https:\/\/s3)(.*)?\1""" | |
| def list_python_files_in_repository(): | |
| """ List all python files in the repository. | |
| This function assumes that the script is executed in the root folder. | |
| """ | |
| source_code_files = [] | |
| for path, subdirs, files in os.walk("."): | |
| if "templates" in path: | |
| continue | |
| for name in files: | |
| if ".py" in name and ".pyc" not in name: | |
| path_to_files = os.path.join(path, name) | |
| source_code_files.append(path_to_files) | |
| return source_code_files | |
| def find_all_links(file_paths): | |
| links = [] | |
| for path in file_paths: | |
| links += scan_code_for_links(path) | |
| return links | |
| def scan_code_for_links(source): | |
| """ Scans the file to find links using a regular expression. | |
| Returns a list of links. | |
| """ | |
| with open(source, "r") as content: | |
| content = content.read() | |
| raw_links = re.findall(REGEXP_FIND_S3_LINKS, content) | |
| links = [prefix + suffix for _, prefix, suffix in raw_links] | |
| return links | |
| def check_all_links(links): | |
| """ Check that the provided links are valid. | |
| Links are considered valid if a HEAD request to the server | |
| returns a 200 status code. | |
| """ | |
| broken_links = [] | |
| for link in links: | |
| head = requests.head(link) | |
| if head.status_code != 200: | |
| broken_links.append(link) | |
| return broken_links | |
| if __name__ == "__main__": | |
| file_paths = list_python_files_in_repository() | |
| links = find_all_links(file_paths) | |
| broken_links = check_all_links(links) | |
| print("Looking for broken links to pre-trained models/configs/tokenizers...") | |
| if broken_links: | |
| print("The following links did not respond:") | |
| for link in broken_links: | |
| print("- {}".format(link)) | |
| sys.exit(1) | |
| print("All links are ok.") | |