| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | """ |
| | M4 Dataset |
| | """ |
| | import logging |
| | import os |
| | from collections import OrderedDict |
| | from dataclasses import dataclass |
| | from glob import glob |
| |
|
| | import numpy as np |
| | import pandas as pd |
| | import patoolib |
| | from tqdm import tqdm |
| | import logging |
| | import os |
| | import pathlib |
| | import sys |
| | from urllib import request |
| |
|
| |
|
| | def url_file_name(url: str) -> str: |
| | """ |
| | Extract file name from url. |
| | |
| | :param url: URL to extract file name from. |
| | :return: File name. |
| | """ |
| | return url.split('/')[-1] if len(url) > 0 else '' |
| |
|
| |
|
| | def download(url: str, file_path: str) -> None: |
| | """ |
| | Download a file to the given path. |
| | |
| | :param url: URL to download |
| | :param file_path: Where to download the content. |
| | """ |
| |
|
| | def progress(count, block_size, total_size): |
| | progress_pct = float(count * block_size) / float(total_size) * 100.0 |
| | sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct)) |
| | sys.stdout.flush() |
| |
|
| | if not os.path.isfile(file_path): |
| | opener = request.build_opener() |
| | opener.addheaders = [('User-agent', 'Mozilla/5.0')] |
| | request.install_opener(opener) |
| | pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True) |
| | f, _ = request.urlretrieve(url, file_path, progress) |
| | sys.stdout.write('\n') |
| | sys.stdout.flush() |
| | file_info = os.stat(f) |
| | logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.') |
| | else: |
| | file_info = os.stat(file_path) |
| | logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.') |
| |
|
| |
|
| | @dataclass() |
| | class M4Dataset: |
| | ids: np.ndarray |
| | groups: np.ndarray |
| | frequencies: np.ndarray |
| | horizons: np.ndarray |
| | values: np.ndarray |
| |
|
| | @staticmethod |
| | def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset': |
| | """ |
| | Load cached dataset. |
| | |
| | :param training: Load training part if training is True, test part otherwise. |
| | """ |
| | info_file = os.path.join(dataset_file, 'M4-info.csv') |
| | train_cache_file = os.path.join(dataset_file, 'training.npz') |
| | test_cache_file = os.path.join(dataset_file, 'test.npz') |
| | m4_info = pd.read_csv(info_file) |
| | m4dataset = M4Dataset(ids=m4_info.M4id.values, |
| | groups=m4_info.SP.values, |
| | frequencies=m4_info.Frequency.values, |
| | horizons=m4_info.Horizon.values, |
| | values=np.load( |
| | train_cache_file if training else test_cache_file, |
| | allow_pickle=True)) |
| | |
| | |
| | return m4dataset |
| |
|
| |
|
| | @dataclass() |
| | class M4Meta: |
| | seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly'] |
| | horizons = [6, 8, 18, 13, 14, 48] |
| | frequencies = [1, 4, 12, 1, 1, 24] |
| | horizons_map = { |
| | 'Yearly': 6, |
| | 'Quarterly': 8, |
| | 'Monthly': 18, |
| | 'Weekly': 13, |
| | 'Daily': 14, |
| | 'Hourly': 48 |
| | } |
| | frequency_map = { |
| | 'Yearly': 1, |
| | 'Quarterly': 4, |
| | 'Monthly': 12, |
| | 'Weekly': 1, |
| | 'Daily': 1, |
| | 'Hourly': 24 |
| | } |
| | history_size = { |
| | 'Yearly': 1.5, |
| | 'Quarterly': 1.5, |
| | 'Monthly': 1.5, |
| | 'Weekly': 10, |
| | 'Daily': 10, |
| | 'Hourly': 10 |
| | } |
| |
|
| |
|
| | def load_m4_info() -> pd.DataFrame: |
| | """ |
| | Load M4Info file. |
| | |
| | :return: Pandas DataFrame of M4Info. |
| | """ |
| | return pd.read_csv(INFO_FILE_PATH) |
| |
|