File size: 5,695 Bytes
ca1888b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
#!/usr/bin/env python
"""
log_parser
tools to parse log_train and log_err
"""
from __future__ import absolute_import
from __future__ import print_function
import numpy as np
import os
import re
import sys
__author__ = "Xin Wang"
__email__ = "wangxin@nii.ac.jp"
__copyright__ = "Copyright 2020, Xin Wang"
def f_read_log_err(file_path, train_num, val_num):
"""
log_train, log_val = f_read_log_err(log_err, num_train_utt, num_val_utt)
input:
-----
log_err: path to the log_err file
num_train_utt: how many training utterances
num_val_utt: how many validation utterances
output:
------
log_train: np.array, average error values per epoch on training set
log_val: np.array, average error values per epoch on valiation set
"""
data_str = []
with open(file_path, 'r') as file_ptr:
for line in file_ptr:
if not line.count('skip'):
try:
tmp = int(line[0])
data_str.append(line)
except ValueError:
pass
row = len(data_str)
col = len(np.fromstring(data_str[0], dtype=np.float32, sep=','))
data = np.zeros([row,col])
for idx, line in enumerate(data_str):
data[idx, :] = np.fromstring(line, dtype=np.float32, sep=',')
print(data.shape[0])
total_num = train_num + val_num
epoch_num = int(data.shape[0] / total_num)
data_train = np.zeros([epoch_num, data.shape[1]])
data_val = np.zeros([epoch_num, data.shape[1]])
for x in range(epoch_num):
temp_data = data[x * total_num:(x+1)*total_num, :]
train_part = temp_data[0:train_num,:]
val_part = temp_data[train_num:(train_num+val_num),:]
data_train[x, :] = np.mean(train_part, axis=0)
data_val[x, :] = np.mean(val_part, axis=0)
return data_train, data_val
def f_read_log_train(file_path):
"""
data_train, data_val, time_per_epoch = read_log_train(path_to_log_train)
input:
-----
path_to_log_train: path to the log_train file
output:
------
data_train: error values per epoch on training set
data_val: error values per epoch on valiation set
time_per_epoch: training time per epoch
"""
read_flag = False
data_str = []
with open(file_path, 'r') as file_ptr:
for line in file_ptr:
if read_flag and line.count('|') > 2:
data_str.append(line)
if line.count('Duration'):
read_flag = True
row = len(data_str)
data_train = np.zeros([row, 3])
data_val = np.zeros([row, 3])
time_per_epoch = np.zeros(row)
for idx, line in enumerate(data_str):
try:
time_per_epoch[idx] = float(line.split('|')[1])
except ValueError:
continue
trn_data = line.split('|')[2].split('/')
val_data = line.split('|')[3].split('/')
for idx2 in np.arange(len(trn_data)):
data_train[idx, idx2] = float(trn_data[idx2])
data_val[idx,idx2] = float(val_data[idx2])
return data_train, data_val, time_per_epoch
def read_log_err_pytorch(file_path, merge_epoch=False):
def set_size(line):
return int(line.split('/')[1].split(',')[0])
def data_line(line):
if line.count("Time:"):
return True
else:
return False
def get_data(line):
return [float(x.split(":")[1]) for x in line.split(',') if x.count("Loss:")]
trn_utt_num = None
val_utt_num = None
trn_total_num = 0
val_total_num = 0
with open(file_path, 'r') as file_ptr:
for line in file_ptr:
if not data_line(line):
continue
temp_num = set_size(line)
col_num = len(get_data(line))
if trn_utt_num is None:
trn_utt_num = temp_num
if temp_num != val_utt_num and temp_num != trn_utt_num:
val_utt_num = temp_num
if trn_utt_num == temp_num:
trn_total_num += 1
if val_utt_num == temp_num:
val_total_num += 1
if trn_utt_num is None:
print("Cannot parse file")
return
if val_utt_num is None:
print("Trn %d, no val" % (trn_utt_num))
else:
print("Trn %d, val %d" % (trn_utt_num, val_utt_num))
print("Trn data %d, val data %d" % (trn_total_num, val_total_num))
trn_data = np.zeros([trn_total_num, col_num])
val_data = np.zeros([val_total_num, col_num])
trn_utt_cnt = 0
val_utt_cnt = 0
with open(file_path, 'r') as file_ptr:
for line in file_ptr:
if not data_line(line):
continue
data = get_data(line)
temp_num = set_size(line)
if trn_utt_num == temp_num:
trn_data[trn_utt_cnt, :] = np.array(data)
trn_utt_cnt += 1
if val_utt_num == temp_num:
val_data[val_utt_cnt, :] = np.array(data)
val_utt_cnt += 1
if merge_epoch:
trn_data_new = np.zeros([trn_total_num // trn_utt_num, col_num])
val_data_new = np.zeros([val_total_num // val_utt_num, col_num])
for idx in range(min([trn_total_num // trn_utt_num, val_total_num // val_utt_num])):
trn_data_new[idx, :] = trn_data[idx*trn_utt_num:(idx+1)*trn_utt_num, :].mean(axis=0)
val_data_new[idx, :] = val_data[idx*val_utt_num:(idx+1)*val_utt_num, :].mean(axis=0)
return trn_data_new, val_data_new
else:
return trn_data, val_data
if __name__ == "__main__":
print("logParser")
|