File size: 1,690 Bytes
6021dd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import os
import pandas as pd
import matplotlib.pyplot as plt
import re
from collections import OrderedDict
import numpy as np
def remove_duplicates_and_convert_npy(val_list):
tmp_dist = OrderedDict()
for ele in val_list:
tmp_dist[ele[0]] = ele[1:]
val_list = []
for k, v in tmp_dist.items():
val_list.append(((k,) + v))
ret_npy = np.zeros((len(val_list), len(val_list[0])), dtype=np.float32)
for i, ele_tuple in enumerate(val_list):
for j in range(len(ele_tuple)):
ret_npy[i, j] = float(ele_tuple[j])
return ret_npy
def temporal_smoothing(training_statistics, stride=10, window_size=100):
"""We always assume the first axis in statistics is the iteration
Parameters
----------
training_statistics
stride
window_size
Returns
-------
smoothed_mean:
smoothed_std:
"""
slice_obj = slice(window_size-1, None, stride)
iter_slice = training_statistics[slice_obj, 0:1]
rolling_frame = pd.DataFrame(training_statistics[:, 1:]).rolling(window=window_size, center=False)
smoothed_mean = rolling_frame.mean().as_matrix()[slice_obj, :]
smoothed_std = rolling_frame.std().as_matrix()[slice_obj, :]
smoothed_mean = np.concatenate([iter_slice, smoothed_mean], axis=1)
smoothed_std = np.concatenate([iter_slice, smoothed_std], axis=1)
return smoothed_mean, smoothed_std
def parse_log(file_path, regex):
"""
Parameters
----------
file_path
regex
Returns
-------
"""
with open(file_path) as f:
content = f.read()
ret = re.findall(regex, content)
ret = remove_duplicates_and_convert_npy(ret)
return ret
|