File size: 3,863 Bytes
ca1888b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
"""
stats.py

Tools to calcualte statistics

"""
from __future__ import absolute_import

import os
import sys
import numpy as np
import core_scripts.other_tools.display as nii_display
import core_scripts.data_io.conf as nii_dconf

__author__ = "Xin Wang"
__email__ = "wangxin@nii.ac.jp"
__copyright__ = "Copyright 2020, Xin Wang"


def f_var2std(var):
    """
    std = f_var2std(var)
    Args:
     var: np.arrary, variance
    
    Return:
     std: np.array, standard-devitation

    std = sqrt(variance), std[std<floor] = 1.0
    """
    negative_idx = var < 0
    std = np.sqrt(var)
    std[negative_idx] = 1.0
    floored_idx = std < nii_dconf.std_floor
    std[floored_idx] = 1.0
    return std
    

def f_online_mean_std(data, mean_old, var_old, cnt_old):
    """ 
    mean, var, count=f_online_mean_var(data, mean, var, num_count):
    
    online algorithm to accumulate mean and var
    
    Args:
      data: input data as numpy.array, in shape [length, dimension]
    
      mean: mean to be updated, np.array [dimension]

      var: var to be updated, np.array [dimension]

      num_count: how many data rows have been calculated before 
        this calling.

    Return:
      mean: mean, np.array [dimension]
      var: var, np.array [dimension]
      count: accumulated data number, = num_count + data.shape[0]

    Ref. parallel algorithm                                                 
    https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance  
    """

    try:
        # how many time steps (number of rows) in this data
        cnt_this = data.shape[0]

        # if input data is empty, don't update
        if cnt_this == 0:
            return mean_old, var_old, cnt_old
        
        if data.ndim == 1:
            # single dimension data, 1d array
            mean_this = data.mean()
            var_this = data.var()
            dim = 1
        else:
            # multiple dimension data, 2d array
            mean_this = data.mean(axis=0)
            var_this = data.var(axis=0)
            dim = data.shape[1]
            
        # difference of accumulated mean and data mean
        diff_mean = mean_this - mean_old

        # new mean and var
        new_mean = np.zeros([dim], dtype=nii_dconf.h_dtype)
        new_var = np.zeros([dim], dtype=nii_dconf.h_dtype)

        # update count
        updated_count = cnt_old + cnt_this
        
        # update mean
        new_mean = mean_old + diff_mean * (float(cnt_this) /
                                           (cnt_old + cnt_this))
        # update var
        if cnt_old == 0:
            # if this is the first data
            if data.ndim == 1:
                # remember that var is array, not scalar
                new_var[0] = var_this
            else:
                new_var = var_this
        else:
            # not first data
            new_var = (var_old * (float(cnt_old) / updated_count) 
                       + var_this * (float(cnt_this)/ updated_count) 
                       + (diff_mean * diff_mean
                          / (float(cnt_this)/cnt_old 
                             + float(cnt_old)/cnt_this
                             + 2.0)))
        # done
        return new_mean, new_var, updated_count
        
    except ValueError:
        if data.ndim > 1:
            if data.shape[1] != mean_old.shape[0] or \
               data.shape[1] != var_old.shape[0]:
                nii_display.f_print("Dimension incompatible", "error")
                nii_display.f_die("Error in online mean var calculation")
        else:
            if mean_old.shape[0] != 1 or \
               var_old.shape[0] != 1:
                nii_display.f_print("Dimension incompatible", "error")
                nii_display.f_die("Error in online mean var calculation")
            

if __name__ == "__main__":
    pass