| """ |
| This module provides functions to perform full Procrustes analysis. |
| |
| This code was originally written by Justin Kucynski and ported over from |
| scikit-bio by Yoshiki Vazquez-Baeza. |
| """ |
|
|
| import numpy as np |
| from scipy.linalg import orthogonal_procrustes |
|
|
|
|
| __all__ = ['procrustes'] |
|
|
|
|
| def procrustes(data1, data2): |
| r"""Procrustes analysis, a similarity test for two data sets. |
| |
| Each input matrix is a set of points or vectors (the rows of the matrix). |
| The dimension of the space is the number of columns of each matrix. Given |
| two identically sized matrices, procrustes standardizes both such that: |
| |
| - :math:`tr(AA^{T}) = 1`. |
| |
| - Both sets of points are centered around the origin. |
| |
| Procrustes ([1]_, [2]_) then applies the optimal transform to the second |
| matrix (including scaling/dilation, rotations, and reflections) to minimize |
| :math:`M^{2}=\sum(data1-data2)^{2}`, or the sum of the squares of the |
| pointwise differences between the two input datasets. |
| |
| This function was not designed to handle datasets with different numbers of |
| datapoints (rows). If two data sets have different dimensionality |
| (different number of columns), simply add columns of zeros to the smaller |
| of the two. |
| |
| Parameters |
| ---------- |
| data1 : array_like |
| Matrix, n rows represent points in k (columns) space `data1` is the |
| reference data, after it is standardised, the data from `data2` will be |
| transformed to fit the pattern in `data1` (must have >1 unique points). |
| data2 : array_like |
| n rows of data in k space to be fit to `data1`. Must be the same |
| shape ``(numrows, numcols)`` as data1 (must have >1 unique points). |
| |
| Returns |
| ------- |
| mtx1 : array_like |
| A standardized version of `data1`. |
| mtx2 : array_like |
| The orientation of `data2` that best fits `data1`. Centered, but not |
| necessarily :math:`tr(AA^{T}) = 1`. |
| disparity : float |
| :math:`M^{2}` as defined above. |
| |
| Raises |
| ------ |
| ValueError |
| If the input arrays are not two-dimensional. |
| If the shape of the input arrays is different. |
| If the input arrays have zero columns or zero rows. |
| |
| See Also |
| -------- |
| scipy.linalg.orthogonal_procrustes |
| scipy.spatial.distance.directed_hausdorff : Another similarity test |
| for two data sets |
| |
| Notes |
| ----- |
| - The disparity should not depend on the order of the input matrices, but |
| the output matrices will, as only the first output matrix is guaranteed |
| to be scaled such that :math:`tr(AA^{T}) = 1`. |
| |
| - Duplicate data points are generally ok, duplicating a data point will |
| increase its effect on the procrustes fit. |
| |
| - The disparity scales as the number of points per input matrix. |
| |
| References |
| ---------- |
| .. [1] Krzanowski, W. J. (2000). "Principles of Multivariate analysis". |
| .. [2] Gower, J. C. (1975). "Generalized procrustes analysis". |
| |
| Examples |
| -------- |
| >>> import numpy as np |
| >>> from scipy.spatial import procrustes |
| |
| The matrix ``b`` is a rotated, shifted, scaled and mirrored version of |
| ``a`` here: |
| |
| >>> a = np.array([[1, 3], [1, 2], [1, 1], [2, 1]], 'd') |
| >>> b = np.array([[4, -2], [4, -4], [4, -6], [2, -6]], 'd') |
| >>> mtx1, mtx2, disparity = procrustes(a, b) |
| >>> round(disparity) |
| 0 |
| |
| """ |
| mtx1 = np.array(data1, dtype=np.float64, copy=True) |
| mtx2 = np.array(data2, dtype=np.float64, copy=True) |
|
|
| if mtx1.ndim != 2 or mtx2.ndim != 2: |
| raise ValueError("Input matrices must be two-dimensional") |
| if mtx1.shape != mtx2.shape: |
| raise ValueError("Input matrices must be of same shape") |
| if mtx1.size == 0: |
| raise ValueError("Input matrices must be >0 rows and >0 cols") |
|
|
| |
| mtx1 -= np.mean(mtx1, 0) |
| mtx2 -= np.mean(mtx2, 0) |
|
|
| norm1 = np.linalg.norm(mtx1) |
| norm2 = np.linalg.norm(mtx2) |
|
|
| if norm1 == 0 or norm2 == 0: |
| raise ValueError("Input matrices must contain >1 unique points") |
|
|
| |
| mtx1 /= norm1 |
| mtx2 /= norm2 |
|
|
| |
| R, s = orthogonal_procrustes(mtx1, mtx2) |
| mtx2 = np.dot(mtx2, R.T) * s |
|
|
| |
| disparity = np.sum(np.square(mtx1 - mtx2)) |
|
|
| return mtx1, mtx2, disparity |
|
|
|
|