|
|
|
|
|
""" |
|
|
log_parser |
|
|
|
|
|
tools to parse log_train and log_err |
|
|
""" |
|
|
|
|
|
from __future__ import absolute_import |
|
|
from __future__ import print_function |
|
|
|
|
|
import numpy as np |
|
|
import os |
|
|
import re |
|
|
import sys |
|
|
|
|
|
__author__ = "Xin Wang" |
|
|
__email__ = "wangxin@nii.ac.jp" |
|
|
__copyright__ = "Copyright 2020, Xin Wang" |
|
|
|
|
|
|
|
|
def f_read_log_err(file_path, train_num, val_num): |
|
|
""" |
|
|
log_train, log_val = f_read_log_err(log_err, num_train_utt, num_val_utt) |
|
|
|
|
|
input: |
|
|
----- |
|
|
log_err: path to the log_err file |
|
|
num_train_utt: how many training utterances |
|
|
num_val_utt: how many validation utterances |
|
|
|
|
|
output: |
|
|
------ |
|
|
log_train: np.array, average error values per epoch on training set |
|
|
log_val: np.array, average error values per epoch on valiation set |
|
|
""" |
|
|
|
|
|
data_str = [] |
|
|
with open(file_path, 'r') as file_ptr: |
|
|
for line in file_ptr: |
|
|
if not line.count('skip'): |
|
|
try: |
|
|
tmp = int(line[0]) |
|
|
data_str.append(line) |
|
|
except ValueError: |
|
|
pass |
|
|
|
|
|
row = len(data_str) |
|
|
col = len(np.fromstring(data_str[0], dtype=np.float32, sep=',')) |
|
|
|
|
|
data = np.zeros([row,col]) |
|
|
for idx, line in enumerate(data_str): |
|
|
data[idx, :] = np.fromstring(line, dtype=np.float32, sep=',') |
|
|
|
|
|
print(data.shape[0]) |
|
|
total_num = train_num + val_num |
|
|
epoch_num = int(data.shape[0] / total_num) |
|
|
data_train = np.zeros([epoch_num, data.shape[1]]) |
|
|
data_val = np.zeros([epoch_num, data.shape[1]]) |
|
|
|
|
|
for x in range(epoch_num): |
|
|
temp_data = data[x * total_num:(x+1)*total_num, :] |
|
|
train_part = temp_data[0:train_num,:] |
|
|
val_part = temp_data[train_num:(train_num+val_num),:] |
|
|
data_train[x, :] = np.mean(train_part, axis=0) |
|
|
data_val[x, :] = np.mean(val_part, axis=0) |
|
|
|
|
|
return data_train, data_val |
|
|
|
|
|
|
|
|
def f_read_log_train(file_path): |
|
|
""" |
|
|
data_train, data_val, time_per_epoch = read_log_train(path_to_log_train) |
|
|
|
|
|
input: |
|
|
----- |
|
|
path_to_log_train: path to the log_train file |
|
|
|
|
|
output: |
|
|
------ |
|
|
data_train: error values per epoch on training set |
|
|
data_val: error values per epoch on valiation set |
|
|
time_per_epoch: training time per epoch |
|
|
""" |
|
|
read_flag = False |
|
|
|
|
|
data_str = [] |
|
|
with open(file_path, 'r') as file_ptr: |
|
|
for line in file_ptr: |
|
|
if read_flag and line.count('|') > 2: |
|
|
data_str.append(line) |
|
|
if line.count('Duration'): |
|
|
read_flag = True |
|
|
|
|
|
row = len(data_str) |
|
|
|
|
|
data_train = np.zeros([row, 3]) |
|
|
data_val = np.zeros([row, 3]) |
|
|
time_per_epoch = np.zeros(row) |
|
|
for idx, line in enumerate(data_str): |
|
|
try: |
|
|
time_per_epoch[idx] = float(line.split('|')[1]) |
|
|
except ValueError: |
|
|
continue |
|
|
trn_data = line.split('|')[2].split('/') |
|
|
val_data = line.split('|')[3].split('/') |
|
|
for idx2 in np.arange(len(trn_data)): |
|
|
data_train[idx, idx2] = float(trn_data[idx2]) |
|
|
data_val[idx,idx2] = float(val_data[idx2]) |
|
|
|
|
|
return data_train, data_val, time_per_epoch |
|
|
|
|
|
|
|
|
def read_log_err_pytorch(file_path, merge_epoch=False): |
|
|
def set_size(line): |
|
|
return int(line.split('/')[1].split(',')[0]) |
|
|
def data_line(line): |
|
|
if line.count("Time:"): |
|
|
return True |
|
|
else: |
|
|
return False |
|
|
def get_data(line): |
|
|
return [float(x.split(":")[1]) for x in line.split(',') if x.count("Loss:")] |
|
|
|
|
|
trn_utt_num = None |
|
|
val_utt_num = None |
|
|
trn_total_num = 0 |
|
|
val_total_num = 0 |
|
|
with open(file_path, 'r') as file_ptr: |
|
|
for line in file_ptr: |
|
|
if not data_line(line): |
|
|
continue |
|
|
temp_num = set_size(line) |
|
|
col_num = len(get_data(line)) |
|
|
if trn_utt_num is None: |
|
|
trn_utt_num = temp_num |
|
|
if temp_num != val_utt_num and temp_num != trn_utt_num: |
|
|
val_utt_num = temp_num |
|
|
if trn_utt_num == temp_num: |
|
|
trn_total_num += 1 |
|
|
if val_utt_num == temp_num: |
|
|
val_total_num += 1 |
|
|
|
|
|
if trn_utt_num is None: |
|
|
print("Cannot parse file") |
|
|
return |
|
|
if val_utt_num is None: |
|
|
print("Trn %d, no val" % (trn_utt_num)) |
|
|
else: |
|
|
print("Trn %d, val %d" % (trn_utt_num, val_utt_num)) |
|
|
print("Trn data %d, val data %d" % (trn_total_num, val_total_num)) |
|
|
trn_data = np.zeros([trn_total_num, col_num]) |
|
|
val_data = np.zeros([val_total_num, col_num]) |
|
|
trn_utt_cnt = 0 |
|
|
val_utt_cnt = 0 |
|
|
with open(file_path, 'r') as file_ptr: |
|
|
for line in file_ptr: |
|
|
if not data_line(line): |
|
|
continue |
|
|
data = get_data(line) |
|
|
temp_num = set_size(line) |
|
|
if trn_utt_num == temp_num: |
|
|
trn_data[trn_utt_cnt, :] = np.array(data) |
|
|
trn_utt_cnt += 1 |
|
|
if val_utt_num == temp_num: |
|
|
val_data[val_utt_cnt, :] = np.array(data) |
|
|
val_utt_cnt += 1 |
|
|
if merge_epoch: |
|
|
trn_data_new = np.zeros([trn_total_num // trn_utt_num, col_num]) |
|
|
val_data_new = np.zeros([val_total_num // val_utt_num, col_num]) |
|
|
for idx in range(min([trn_total_num // trn_utt_num, val_total_num // val_utt_num])): |
|
|
trn_data_new[idx, :] = trn_data[idx*trn_utt_num:(idx+1)*trn_utt_num, :].mean(axis=0) |
|
|
val_data_new[idx, :] = val_data[idx*val_utt_num:(idx+1)*val_utt_num, :].mean(axis=0) |
|
|
return trn_data_new, val_data_new |
|
|
else: |
|
|
return trn_data, val_data |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("logParser") |
|
|
|