| | import numpy as np |
| | from scipy.spatial.distance import squareform |
| | from fastcluster import linkage |
| | import umap |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | def seriation(Z,N,cur_index): |
| | ''' |
| | input: |
| | - Z is a hierarchical tree (dendrogram) |
| | - N is the number of points given to the clustering process |
| | - cur_index is the position in the tree for the recursive traversal |
| | output: |
| | - order implied by the hierarchical tree Z |
| | |
| | seriation computes the order implied by a hierarchical tree (dendrogram) |
| | ''' |
| | if cur_index < N: |
| | return [cur_index] |
| | else: |
| | left = int(Z[cur_index-N,0]) |
| | right = int(Z[cur_index-N,1]) |
| | return (seriation(Z,N,left) + seriation(Z,N,right)) |
| |
|
| | def compute_serial_matrix(dist_mat,method="ward"): |
| | ''' |
| | input: |
| | - dist_mat is a distance matrix |
| | - method = ["ward","single","average","complete"] |
| | output: |
| | - seriated_dist is the input dist_mat, |
| | but with re-ordered rows and columns |
| | according to the seriation, i.e. the |
| | order implied by the hierarchical tree |
| | - res_order is the order implied by |
| | the hierarhical tree |
| | - res_linkage is the hierarhical tree (dendrogram) |
| | |
| | compute_serial_matrix transforms a distance matrix into |
| | a sorted distance matrix according to the order implied |
| | by the hierarchical tree (dendrogram) |
| | ''' |
| | N = len(dist_mat) |
| | flat_dist_mat = squareform(dist_mat) |
| | res_linkage = linkage(flat_dist_mat, method=method,preserve_input=True) |
| | res_order = seriation(res_linkage, N, N + N-2) |
| | seriated_dist = np.zeros((N,N)) |
| | a,b = np.triu_indices(N,k=1) |
| | seriated_dist[a,b] = dist_mat[ [res_order[i] for i in a], [res_order[j] for j in b]] |
| | seriated_dist[b,a] = seriated_dist[a,b] |
| | |
| | return seriated_dist, res_order, res_linkage |
| |
|
| | def compute_ordered_matrix(sim_matrix,dist_matrix, model_names): |
| | if len(sim_matrix) >= 2: |
| | |
| | ordered_dist_matrix, order, Z = compute_serial_matrix(dist_matrix) |
| | ordered_sim_matrix = sim_matrix[order][:, order] |
| | ordered_model_names = [model_names[i] for i in order] |
| | else: |
| | ordered_sim_matrix = sim_matrix |
| | ordered_model_names = model_names |
| |
|
| | return ordered_sim_matrix, ordered_model_names |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | def compute_umap(dist_matrix,d=2): |
| | embedding = umap.UMAP(densmap=True,n_components=d, metric='precomputed',random_state=42).fit_transform(dist_matrix) |
| | return embedding |