| import os | |
| import pandas as pd | |
| import matplotlib.pyplot as plt | |
| import re | |
| from collections import OrderedDict | |
| import numpy as np | |
| def remove_duplicates_and_convert_npy(val_list): | |
| tmp_dist = OrderedDict() | |
| for ele in val_list: | |
| tmp_dist[ele[0]] = ele[1:] | |
| val_list = [] | |
| for k, v in tmp_dist.items(): | |
| val_list.append(((k,) + v)) | |
| ret_npy = np.zeros((len(val_list), len(val_list[0])), dtype=np.float32) | |
| for i, ele_tuple in enumerate(val_list): | |
| for j in range(len(ele_tuple)): | |
| ret_npy[i, j] = float(ele_tuple[j]) | |
| return ret_npy | |
| def temporal_smoothing(training_statistics, stride=10, window_size=100): | |
| """We always assume the first axis in statistics is the iteration | |
| Parameters | |
| ---------- | |
| training_statistics | |
| stride | |
| window_size | |
| Returns | |
| ------- | |
| smoothed_mean: | |
| smoothed_std: | |
| """ | |
| slice_obj = slice(window_size-1, None, stride) | |
| iter_slice = training_statistics[slice_obj, 0:1] | |
| rolling_frame = pd.DataFrame(training_statistics[:, 1:]).rolling(window=window_size, center=False) | |
| smoothed_mean = rolling_frame.mean().as_matrix()[slice_obj, :] | |
| smoothed_std = rolling_frame.std().as_matrix()[slice_obj, :] | |
| smoothed_mean = np.concatenate([iter_slice, smoothed_mean], axis=1) | |
| smoothed_std = np.concatenate([iter_slice, smoothed_std], axis=1) | |
| return smoothed_mean, smoothed_std | |
| def parse_log(file_path, regex): | |
| """ | |
| Parameters | |
| ---------- | |
| file_path | |
| regex | |
| Returns | |
| ------- | |
| """ | |
| with open(file_path) as f: | |
| content = f.read() | |
| ret = re.findall(regex, content) | |
| ret = remove_duplicates_and_convert_npy(ret) | |
| return ret | |