Spaces:
Runtime error
Runtime error
| import os | |
| import gzip | |
| import shutil | |
| from urllib.request import urlretrieve | |
| from tqdm import tqdm | |
| def download_large_file(url: str, output_file: str): | |
| if not os.path.exists(output_file): | |
| urlretrieve(url, output_file) | |
| def unzip_file(input_file): | |
| output_file = os.path.splitext(input_file)[0] | |
| if not os.path.exists(output_file): | |
| with gzip.open(input_file, "rb") as f_in: | |
| # Input file has the format xxx.tsv.gz | |
| with open(output_file, "wb") as f_out: | |
| shutil.copyfileobj(f_in, f_out) | |
| if __name__ == "__main__": | |
| imdb_url = "https://datasets.imdbws.com" | |
| filenames = [ | |
| "name.basics.tsv.gz", | |
| "title.basics.tsv.gz", | |
| "title.ratings.tsv.gz", | |
| "title.principals.tsv.gz", | |
| ] | |
| for filename in tqdm(filenames): | |
| url = f"{imdb_url}/{filename}" | |
| output_file = os.path.join("data", filename) | |
| download_large_file(url, output_file) | |
| unzip_file(output_file) | |