File size: 1,690 Bytes

6021dd1

import os
import pandas as pd
import matplotlib.pyplot as plt
import re
from collections import OrderedDict
import numpy as np

def remove_duplicates_and_convert_npy(val_list):
    tmp_dist = OrderedDict()
    for ele in val_list:
        tmp_dist[ele[0]] = ele[1:]
    val_list = []
    for k, v in tmp_dist.items():
        val_list.append(((k,) + v))
    ret_npy = np.zeros((len(val_list), len(val_list[0])), dtype=np.float32)
    for i, ele_tuple in enumerate(val_list):
        for j in range(len(ele_tuple)):
            ret_npy[i, j] = float(ele_tuple[j])
    return ret_npy


def temporal_smoothing(training_statistics, stride=10, window_size=100):
    """We always assume the first axis in statistics is the iteration

    Parameters
    ----------
    training_statistics
    stride
    window_size

    Returns
    -------
    smoothed_mean:
    smoothed_std:
    """
    slice_obj = slice(window_size-1, None, stride)
    iter_slice = training_statistics[slice_obj, 0:1]
    rolling_frame = pd.DataFrame(training_statistics[:, 1:]).rolling(window=window_size, center=False)
    smoothed_mean = rolling_frame.mean().as_matrix()[slice_obj, :]
    smoothed_std = rolling_frame.std().as_matrix()[slice_obj, :]
    smoothed_mean = np.concatenate([iter_slice, smoothed_mean], axis=1)
    smoothed_std = np.concatenate([iter_slice, smoothed_std], axis=1)
    return smoothed_mean, smoothed_std


def parse_log(file_path, regex):
    """

    Parameters
    ----------
    file_path
    regex

    Returns
    -------

    """
    with open(file_path) as f:
        content = f.read()
    ret = re.findall(regex, content)
    ret = remove_duplicates_and_convert_npy(ret)
    return ret