File size: 3,863 Bytes
ca1888b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
#!/usr/bin/env python
"""
stats.py
Tools to calcualte statistics
"""
from __future__ import absolute_import
import os
import sys
import numpy as np
import core_scripts.other_tools.display as nii_display
import core_scripts.data_io.conf as nii_dconf
__author__ = "Xin Wang"
__email__ = "wangxin@nii.ac.jp"
__copyright__ = "Copyright 2020, Xin Wang"
def f_var2std(var):
"""
std = f_var2std(var)
Args:
var: np.arrary, variance
Return:
std: np.array, standard-devitation
std = sqrt(variance), std[std<floor] = 1.0
"""
negative_idx = var < 0
std = np.sqrt(var)
std[negative_idx] = 1.0
floored_idx = std < nii_dconf.std_floor
std[floored_idx] = 1.0
return std
def f_online_mean_std(data, mean_old, var_old, cnt_old):
"""
mean, var, count=f_online_mean_var(data, mean, var, num_count):
online algorithm to accumulate mean and var
Args:
data: input data as numpy.array, in shape [length, dimension]
mean: mean to be updated, np.array [dimension]
var: var to be updated, np.array [dimension]
num_count: how many data rows have been calculated before
this calling.
Return:
mean: mean, np.array [dimension]
var: var, np.array [dimension]
count: accumulated data number, = num_count + data.shape[0]
Ref. parallel algorithm
https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
"""
try:
# how many time steps (number of rows) in this data
cnt_this = data.shape[0]
# if input data is empty, don't update
if cnt_this == 0:
return mean_old, var_old, cnt_old
if data.ndim == 1:
# single dimension data, 1d array
mean_this = data.mean()
var_this = data.var()
dim = 1
else:
# multiple dimension data, 2d array
mean_this = data.mean(axis=0)
var_this = data.var(axis=0)
dim = data.shape[1]
# difference of accumulated mean and data mean
diff_mean = mean_this - mean_old
# new mean and var
new_mean = np.zeros([dim], dtype=nii_dconf.h_dtype)
new_var = np.zeros([dim], dtype=nii_dconf.h_dtype)
# update count
updated_count = cnt_old + cnt_this
# update mean
new_mean = mean_old + diff_mean * (float(cnt_this) /
(cnt_old + cnt_this))
# update var
if cnt_old == 0:
# if this is the first data
if data.ndim == 1:
# remember that var is array, not scalar
new_var[0] = var_this
else:
new_var = var_this
else:
# not first data
new_var = (var_old * (float(cnt_old) / updated_count)
+ var_this * (float(cnt_this)/ updated_count)
+ (diff_mean * diff_mean
/ (float(cnt_this)/cnt_old
+ float(cnt_old)/cnt_this
+ 2.0)))
# done
return new_mean, new_var, updated_count
except ValueError:
if data.ndim > 1:
if data.shape[1] != mean_old.shape[0] or \
data.shape[1] != var_old.shape[0]:
nii_display.f_print("Dimension incompatible", "error")
nii_display.f_die("Error in online mean var calculation")
else:
if mean_old.shape[0] != 1 or \
var_old.shape[0] != 1:
nii_display.f_print("Dimension incompatible", "error")
nii_display.f_die("Error in online mean var calculation")
if __name__ == "__main__":
pass
|