Spaces:
Sleeping
Sleeping
Commit ·
f4051e1
1
Parent(s): 883a832
add dependencies
Browse files- app.py +174 -1
- requirements.txt +5 -0
- utils/utils.py +0 -819
app.py
CHANGED
|
@@ -1,11 +1,184 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
|
|
|
|
|
|
| 3 |
import numpy as np
|
| 4 |
import pandas as pd
|
|
|
|
| 5 |
import matplotlib.pyplot as plt
|
| 6 |
from pathlib import Path
|
| 7 |
from tensorflow.keras.models import model_from_json
|
| 8 |
-
from utils.utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
model_depths = ['1.5M', '2M', '2.4M', '4.88M', '5M', '6.29M', '8.5M', '12.5M', '16.5M', '25M', '32M', '50M', '100M', '150M']
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import os
|
| 3 |
+
import scipy
|
| 4 |
+
from scipy.sparse import tril, triu
|
| 5 |
import numpy as np
|
| 6 |
import pandas as pd
|
| 7 |
+
import matplotlib
|
| 8 |
import matplotlib.pyplot as plt
|
| 9 |
from pathlib import Path
|
| 10 |
from tensorflow.keras.models import model_from_json
|
| 11 |
+
from utils.utils import draw_heatmap, load_chr_ratio_matrix_from_sparse
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_chromosome_from_filename(filename):
|
| 16 |
+
"""
|
| 17 |
+
Extract the chromosome string from any of the file name formats we use
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
filename (:obj:`str`) : name of anchor to anchor file
|
| 21 |
+
|
| 22 |
+
Returns:
|
| 23 |
+
Chromosome string of form chr<>
|
| 24 |
+
"""
|
| 25 |
+
chr_index = filename.find('chr') # index of chromosome name
|
| 26 |
+
if chr_index == 0: # if chromosome name is file prefix
|
| 27 |
+
return filename[:filename.find('.')]
|
| 28 |
+
file_ending_index = filename.rfind('.') # index of file ending
|
| 29 |
+
if chr_index > file_ending_index: # if chromosome name is file ending
|
| 30 |
+
return filename[chr_index:]
|
| 31 |
+
else:
|
| 32 |
+
return filename[chr_index: file_ending_index]
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def draw_heatmap(matrix, color_scale, ax=None, return_image=False):
|
| 36 |
+
"""
|
| 37 |
+
Display ratio heatmap containing only strong signals (values > 1 or 0.98th quantile)
|
| 38 |
+
|
| 39 |
+
Args:
|
| 40 |
+
matrix (:obj:`numpy.array`) : ratio matrix to be displayed
|
| 41 |
+
color_scale (:obj:`int`) : max ratio value to be considered strongest by color mapping
|
| 42 |
+
ax (:obj:`matplotlib.axes.Axes`) : axes which will contain the heatmap. If None, new axes are created
|
| 43 |
+
return_image (:obj:`bool`) : set to True to return the image obtained from drawing the heatmap with the generated color map
|
| 44 |
+
|
| 45 |
+
Returns:
|
| 46 |
+
``numpy.array`` : if ``return_image`` is set to True, return the heatmap as an array
|
| 47 |
+
"""
|
| 48 |
+
if color_scale != 0:
|
| 49 |
+
breaks = np.append(np.arange(1.001, color_scale, (color_scale - 1.001) / 18), np.max(matrix))
|
| 50 |
+
elif np.max(matrix) < 2:
|
| 51 |
+
breaks = np.arange(1.001, np.max(matrix), (np.max(matrix) - 1.001) / 19)
|
| 52 |
+
else:
|
| 53 |
+
step = (np.quantile(matrix, q=0.95) - 1) / 18
|
| 54 |
+
up = np.quantile(matrix, q=0.95) + 0.011
|
| 55 |
+
if up < 2:
|
| 56 |
+
up = 2
|
| 57 |
+
step = 0.999 / 18
|
| 58 |
+
breaks = np.append(np.arange(1.001, up, step), np.max(matrix))
|
| 59 |
+
|
| 60 |
+
n_bin = 20 # Discretizes the interpolation into bins
|
| 61 |
+
colors = ["#FFFFFF", "#FFE4E4", "#FFD7D7", "#FFC9C9", "#FFBCBC", "#FFAEAE", "#FFA1A1", "#FF9494", "#FF8686",
|
| 62 |
+
"#FF7979", "#FF6B6B", "#FF5E5E", "#FF5151", "#FF4343", "#FF3636", "#FF2828", "#FF1B1B", "#FF0D0D",
|
| 63 |
+
"#FF0000"]
|
| 64 |
+
cmap_name = 'my_list'
|
| 65 |
+
# Create the colormap
|
| 66 |
+
cm = matplotlib.colors.LinearSegmentedColormap.from_list(
|
| 67 |
+
cmap_name, colors, N=n_bin)
|
| 68 |
+
norm = matplotlib.colors.BoundaryNorm(breaks, 20)
|
| 69 |
+
# Fewer bins will result in "coarser" colomap interpolation
|
| 70 |
+
if ax is None:
|
| 71 |
+
_, ax = plt.subplots()
|
| 72 |
+
img = ax.imshow(matrix, cmap=cm, norm=norm, interpolation='nearest')
|
| 73 |
+
if return_image:
|
| 74 |
+
plt.close()
|
| 75 |
+
return img.get_array()
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def anchor_list_to_dict(anchors):
|
| 79 |
+
"""
|
| 80 |
+
Converts the array of anchor names to a dictionary mapping each anchor to its chromosomal index
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
anchors (:obj:`numpy.array`) : array of anchor name values
|
| 84 |
+
|
| 85 |
+
Returns:
|
| 86 |
+
`dict` : dictionary mapping each anchor to its index from the array
|
| 87 |
+
"""
|
| 88 |
+
anchor_dict = {}
|
| 89 |
+
for i, anchor in enumerate(anchors):
|
| 90 |
+
anchor_dict[anchor] = i
|
| 91 |
+
return anchor_dict
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def anchor_to_locus(anchor_dict):
|
| 95 |
+
"""
|
| 96 |
+
Function to convert an anchor name to its genomic locus which can be easily vectorized
|
| 97 |
+
|
| 98 |
+
Args:
|
| 99 |
+
anchor_dict (:obj:`dict`) : dictionary mapping each anchor to its chromosomal index
|
| 100 |
+
|
| 101 |
+
Returns:
|
| 102 |
+
`function` : function which returns the locus of an anchor name
|
| 103 |
+
"""
|
| 104 |
+
def f(anchor):
|
| 105 |
+
return anchor_dict[anchor]
|
| 106 |
+
return f
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def load_chr_ratio_matrix_from_sparse(dir_name, file_name, anchor_dir, sparse_dir=None, anchor_list=None, chr_name=None, dummy=5, ignore_sparse=False, force_symmetry=True, use_raw=False):
|
| 111 |
+
"""
|
| 112 |
+
Loads data as a sparse matrix by either reading a precompiled sparse matrix or an anchor to anchor file which is converted to sparse CSR format.
|
| 113 |
+
Ratio values are computed using the observed (obs) and expected (exp) values:
|
| 114 |
+
|
| 115 |
+
.. math::
|
| 116 |
+
ratio = \\frac{obs + dummy}{exp + dummy}
|
| 117 |
+
|
| 118 |
+
Args:
|
| 119 |
+
dir_name (:obj:`str`) : directory containing the anchor to anchor or precompiled (.npz) sparse matrix file
|
| 120 |
+
file_name (:obj:`str`) : name of anchor to anchor or precompiled (.npz) sparse matrix file
|
| 121 |
+
anchor_dir (:obj:`str`) : directory containing the reference anchor ``.bed`` files
|
| 122 |
+
dummy (:obj:`int`) : dummy value to used when computing ratio values
|
| 123 |
+
ignore_sparse (:obj:`bool`) : set to True to ignore precompiled sparse matrices even if they exist
|
| 124 |
+
|
| 125 |
+
Returns:
|
| 126 |
+
``scipy.sparse.csr_matrix``: sparse matrix of ratio values
|
| 127 |
+
"""
|
| 128 |
+
global data_dir
|
| 129 |
+
global sparse_data_dir
|
| 130 |
+
if chr_name is None:
|
| 131 |
+
chr_name = get_chromosome_from_filename(file_name)
|
| 132 |
+
sparse_rep_dir = dir_name[dir_name[: -1].rfind('/') + 1:] # directory where the pre-compiled sparse matrices are saved
|
| 133 |
+
if sparse_dir is not None:
|
| 134 |
+
sparse_data_dir = sparse_dir
|
| 135 |
+
os.makedirs(os.path.join(sparse_data_dir, sparse_rep_dir), exist_ok=True)
|
| 136 |
+
if file_name.endswith('.npz'): # loading pre-combined and pre-compiled sparse data
|
| 137 |
+
sparse_matrix = scipy.sparse.load_npz(dir_name + file_name)
|
| 138 |
+
else: # load from file name
|
| 139 |
+
if file_name + '.npz' in os.listdir(os.path.join(sparse_data_dir, sparse_rep_dir)) and not ignore_sparse: # check if pre-compiled data already exists
|
| 140 |
+
sparse_matrix = scipy.sparse.load_npz(os.path.join(sparse_data_dir, sparse_rep_dir, file_name + '.npz'))
|
| 141 |
+
else: # otherwise generate sparse matrix from anchor2anchor file and save pre-compiled data
|
| 142 |
+
if anchor_list is None:
|
| 143 |
+
if anchor_dir is None:
|
| 144 |
+
assert 'You must supply either an anchor reference list or the directory containing one'
|
| 145 |
+
anchor_list = pd.read_csv(os.path.join(anchor_dir, '%s.bed' % chr_name), sep='\t',
|
| 146 |
+
names=['chr', 'start', 'end', 'anchor']) # read anchor list file
|
| 147 |
+
matrix_size = len(anchor_list) # matrix size is needed to construct sparse CSR matrix
|
| 148 |
+
anchor_dict = anchor_list_to_dict(anchor_list['anchor'].values) # convert to anchor --> index dictionary
|
| 149 |
+
try: # first try reading anchor to anchor file as <a1> <a2> <obs> <exp>
|
| 150 |
+
chr_anchor_file = pd.read_csv(
|
| 151 |
+
os.path.join(dir_name, file_name),
|
| 152 |
+
delimiter='\t',
|
| 153 |
+
names=['anchor1', 'anchor2', 'obs', 'exp'],
|
| 154 |
+
usecols=['anchor1', 'anchor2', 'obs', 'exp']) # read chromosome anchor to anchor file
|
| 155 |
+
rows = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor1'].values) # convert anchor names to row indices
|
| 156 |
+
cols = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor2'].values) # convert anchor names to column indices
|
| 157 |
+
ratio = (chr_anchor_file['obs'] + dummy) / (chr_anchor_file['exp'] + dummy) # compute matrix ratio value
|
| 158 |
+
sparse_matrix = scipy.sparse.csr_matrix((ratio, (rows, cols)), shape=(matrix_size, matrix_size)) # construct sparse CSR matrix
|
| 159 |
+
except: # otherwise read anchor to anchor file as <a1> <a2> <ratio>
|
| 160 |
+
chr_anchor_file = pd.read_csv(
|
| 161 |
+
os.path.join(dir_name, file_name),
|
| 162 |
+
delimiter='\t',
|
| 163 |
+
names=['anchor1', 'anchor2', 'ratio'],
|
| 164 |
+
usecols=['anchor1', 'anchor2', 'ratio'])
|
| 165 |
+
rows = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor1'].values) # convert anchor names to row indices
|
| 166 |
+
cols = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor2'].values) # convert anchor names to column indices
|
| 167 |
+
if use_raw:
|
| 168 |
+
sparse_matrix = scipy.sparse.csr_matrix((chr_anchor_file['obs'], (rows, cols)), shape=(
|
| 169 |
+
matrix_size, matrix_size)) # construct sparse CSR matrix
|
| 170 |
+
else:
|
| 171 |
+
sparse_matrix = scipy.sparse.csr_matrix((chr_anchor_file['ratio'], (rows, cols)), shape=(matrix_size, matrix_size)) # construct sparse CSR matrix
|
| 172 |
+
if force_symmetry:
|
| 173 |
+
upper_sum = triu(sparse_matrix, k=1).sum()
|
| 174 |
+
lower_sum = tril(sparse_matrix, k=-1).sum()
|
| 175 |
+
if upper_sum == 0 or lower_sum == 0:
|
| 176 |
+
sparse_matrix = sparse_matrix + sparse_matrix.transpose()
|
| 177 |
+
sparse_triu = scipy.sparse.triu(sparse_matrix)
|
| 178 |
+
sparse_matrix = sparse_triu + sparse_triu.transpose()
|
| 179 |
+
if not ignore_sparse:
|
| 180 |
+
scipy.sparse.save_npz(os.path.join(sparse_data_dir, sparse_rep_dir, file_name), sparse_matrix) # save precompiled data
|
| 181 |
+
return sparse_matrix
|
| 182 |
|
| 183 |
|
| 184 |
model_depths = ['1.5M', '2M', '2.4M', '4.88M', '5M', '6.29M', '8.5M', '12.5M', '16.5M', '25M', '32M', '50M', '100M', '150M']
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
tensorflow
|
| 2 |
+
numpy<2.0
|
| 3 |
+
pandas
|
| 4 |
+
matplotlib
|
| 5 |
+
scipy
|
utils/utils.py
DELETED
|
@@ -1,819 +0,0 @@
|
|
| 1 |
-
import math
|
| 2 |
-
import os
|
| 3 |
-
import re
|
| 4 |
-
import cv2
|
| 5 |
-
import random
|
| 6 |
-
import pickle
|
| 7 |
-
import numpy as np
|
| 8 |
-
import tensorflow.keras.backend as K
|
| 9 |
-
import pandas as pd
|
| 10 |
-
import matplotlib.pyplot as plt
|
| 11 |
-
import matplotlib.colors
|
| 12 |
-
import matplotlib.cm
|
| 13 |
-
import scipy.sparse
|
| 14 |
-
from scipy.sparse import coo_matrix, csr_matrix, triu, tril
|
| 15 |
-
import scipy.ndimage
|
| 16 |
-
|
| 17 |
-
chromosome_labels = {'chr1': 0, 'chr2': 1, 'chr3': 2, 'chr4': 3, 'chr5': 4, 'chr6': 5, 'chr7': 6, 'chr8': 7, 'chr9': 8,
|
| 18 |
-
'chr10': 9, 'chr11': 10, 'chr12': 11, 'chr13': 12, 'chr14': 13, 'chr15': 14, 'chr16': 15, 'chr17': 16, 'chr18': 17,
|
| 19 |
-
'chr19': 18, 'chr20': 19, 'chr21': 20, 'chr22': 21, 'chrX': 22, 'chrY': 23}
|
| 20 |
-
|
| 21 |
-
data_dir = 'data/'
|
| 22 |
-
sparse_data_dir = 'data/sparse/'
|
| 23 |
-
try:
|
| 24 |
-
os.mkdir(data_dir)
|
| 25 |
-
except FileExistsError:
|
| 26 |
-
pass
|
| 27 |
-
try:
|
| 28 |
-
os.mkdir(sparse_data_dir)
|
| 29 |
-
except FileExistsError:
|
| 30 |
-
pass
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
def open_anchor_to_anchor(filename):
|
| 34 |
-
'''
|
| 35 |
-
Read a tab delimited anchor to anchor file as a DataFrame
|
| 36 |
-
Args:
|
| 37 |
-
filename (:obj:`str`) : full path to anchor to anchor file
|
| 38 |
-
|
| 39 |
-
Returns:
|
| 40 |
-
``pandas.DataFrame``: if reading a normalized anchor to anchor file, columns are ``a1 a2 obs exp ratio``
|
| 41 |
-
and if reading a denoised or enhanced anchor to anchor file, columns are ``a1 a2 ratio``
|
| 42 |
-
'''
|
| 43 |
-
df = pd.read_csv(filename, sep='\t')
|
| 44 |
-
n_cols = len(df.columns)
|
| 45 |
-
if n_cols == 4: # if before denoise top loops
|
| 46 |
-
df = pd.read_csv(filename,
|
| 47 |
-
sep='\t',
|
| 48 |
-
names=['anchor1', 'anchor2', 'obs', 'exp'])
|
| 49 |
-
df['ratio'] = (df['obs'] + 5) / (df['exp'] + 5)
|
| 50 |
-
elif n_cols == 5: # includes p-value
|
| 51 |
-
df = pd.read_csv(filename,
|
| 52 |
-
sep='\t',
|
| 53 |
-
names=['anchor1', 'anchor2', 'obs', 'exp', 'p_val'])
|
| 54 |
-
df['ratio'] = (df['obs'] + 5) / (df['exp'] + 5)
|
| 55 |
-
else: # after denoise has no obs or exp
|
| 56 |
-
df = pd.read_csv(filename,
|
| 57 |
-
sep='\t',
|
| 58 |
-
names=['anchor1', 'anchor2', 'ratio'])
|
| 59 |
-
df = df[['anchor1', 'anchor2', 'ratio']]
|
| 60 |
-
return df
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
def open_full_genome(data_dir):
|
| 64 |
-
'''
|
| 65 |
-
|
| 66 |
-
Args:
|
| 67 |
-
data_dir:
|
| 68 |
-
|
| 69 |
-
Returns:
|
| 70 |
-
|
| 71 |
-
'''
|
| 72 |
-
genome = pd.DataFrame()
|
| 73 |
-
print('Opening genome-wide anchor to anchor...')
|
| 74 |
-
for chr_file in os.listdir(data_dir):
|
| 75 |
-
if 'anchor_2_anchor' in chr_file or 'denoised.anchor.to.anchor' in chr_file:
|
| 76 |
-
print(chr_file)
|
| 77 |
-
genome = pd.concat([genome, open_anchor_to_anchor(data_dir + '/' + chr_file)])
|
| 78 |
-
return genome
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
def get_chromosome_from_filename(filename):
|
| 82 |
-
"""
|
| 83 |
-
Extract the chromosome string from any of the file name formats we use
|
| 84 |
-
|
| 85 |
-
Args:
|
| 86 |
-
filename (:obj:`str`) : name of anchor to anchor file
|
| 87 |
-
|
| 88 |
-
Returns:
|
| 89 |
-
Chromosome string of form chr<>
|
| 90 |
-
"""
|
| 91 |
-
chr_index = filename.find('chr') # index of chromosome name
|
| 92 |
-
if chr_index == 0: # if chromosome name is file prefix
|
| 93 |
-
return filename[:filename.find('.')]
|
| 94 |
-
file_ending_index = filename.rfind('.') # index of file ending
|
| 95 |
-
if chr_index > file_ending_index: # if chromosome name is file ending
|
| 96 |
-
return filename[chr_index:]
|
| 97 |
-
else:
|
| 98 |
-
return filename[chr_index: file_ending_index]
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
def locus_to_anchor(chr_name, locus, anchor_dir):
|
| 102 |
-
anchor_list = pd.read_csv(anchor_dir + '%s.bed' % chr_name, sep='\t',
|
| 103 |
-
names=['chr', 'start', 'end', 'anchor']) # read anchor list file
|
| 104 |
-
loci_indices = (anchor_list['start'] <= locus) & (locus <= anchor_list['end']) & (
|
| 105 |
-
anchor_list['chr'] == chr_name)
|
| 106 |
-
print(np.where(loci_indices)[0][0])
|
| 107 |
-
return int(np.where(loci_indices)[0][0])
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
def save_samples(input_dir, target_dir, matrix_size, multi_input=False, dir_3=None, combined_dir=None, anchor_dir=None, name='sample', chr_name='chr6', locus_start=25922605, locus_end=26709867, force_size=128, force_symmetry=True):
|
| 111 |
-
"""
|
| 112 |
-
Saves sample matrices for use in training visualizations
|
| 113 |
-
|
| 114 |
-
Args:
|
| 115 |
-
input_dir (:obj:`str`) : directory containing input anchor to anchor files
|
| 116 |
-
target_dir (:obj:`str`) : directory containing target anchor to anchor files
|
| 117 |
-
matrix_size (:obj:`int`) : size of each sample matrix
|
| 118 |
-
multi_input (:obj:`bool`) : set to True to save samples from each of the multiple input sets in ``input_dir``
|
| 119 |
-
dir_3 (:obj:`str`) : optional directory containing third set of input anchor to anchor files
|
| 120 |
-
combined_dir (:obj:`str`) : optional directory containing combined target anchor to anchor files
|
| 121 |
-
anchor_dir (:obj:`str`) : directory containing anchor reference ``.bed`` files
|
| 122 |
-
name (:obj:`str`) : each saved sample file will begin with this string
|
| 123 |
-
chr_index (:obj:`int`) : index of chromosome to save samples from
|
| 124 |
-
locus (:obj:`int`) : index of anchor to save samples from
|
| 125 |
-
"""
|
| 126 |
-
global data_dir
|
| 127 |
-
global sparse_data_dir
|
| 128 |
-
try:
|
| 129 |
-
os.mkdir(sparse_data_dir)
|
| 130 |
-
except FileExistsError as e:
|
| 131 |
-
pass
|
| 132 |
-
if multi_input:
|
| 133 |
-
input_folder_1 = os.listdir(input_dir)[0] + '/'
|
| 134 |
-
input_folder_2 = os.listdir(input_dir)[1] + '/'
|
| 135 |
-
try:
|
| 136 |
-
input_folder_3 = os.listdir(input_dir)[2] + '/'
|
| 137 |
-
except IndexError:
|
| 138 |
-
pass
|
| 139 |
-
chr_index = min(int(chr_name.replace('chr', '')), len(os.listdir(input_dir + input_folder_1)) - 1)
|
| 140 |
-
print('Saving samples from', chr_name, '...')
|
| 141 |
-
if (name == 'enhance' or name == 'val_enhance') and multi_input:
|
| 142 |
-
matrix_1 = load_chr_ratio_matrix_from_sparse(input_dir + input_folder_1, os.listdir(input_dir + input_folder_1)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 143 |
-
matrix_2 = load_chr_ratio_matrix_from_sparse(target_dir, os.listdir(target_dir)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 144 |
-
matrix_3 = None
|
| 145 |
-
combined_matrix = None
|
| 146 |
-
else:
|
| 147 |
-
if multi_input:
|
| 148 |
-
matrix_1 = load_chr_ratio_matrix_from_sparse(input_dir + input_folder_1, os.listdir(input_dir + input_folder_1)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 149 |
-
matrix_2 = load_chr_ratio_matrix_from_sparse(input_dir + input_folder_2, os.listdir(input_dir + input_folder_2)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 150 |
-
matrix_3 = load_chr_ratio_matrix_from_sparse(input_dir + input_folder_3, os.listdir(input_dir + input_folder_3)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 151 |
-
combined_matrix = load_chr_ratio_matrix_from_sparse(target_dir, os.listdir(target_dir)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 152 |
-
else:
|
| 153 |
-
matrix_1 = load_chr_ratio_matrix_from_sparse(input_dir, os.listdir(input_dir)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 154 |
-
matrix_2 = load_chr_ratio_matrix_from_sparse(target_dir, os.listdir(target_dir)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 155 |
-
if dir_3 is not None:
|
| 156 |
-
matrix_3 = load_chr_ratio_matrix_from_sparse(dir_3, os.listdir(dir_3)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 157 |
-
else:
|
| 158 |
-
matrix_3 = None
|
| 159 |
-
if combined_dir is not None:
|
| 160 |
-
combined_matrix = load_chr_ratio_matrix_from_sparse(combined_dir, os.listdir(combined_dir)[chr_index], anchor_dir, force_symmetry=force_symmetry)
|
| 161 |
-
else:
|
| 162 |
-
combined_matrix = None
|
| 163 |
-
i = locus_to_anchor(chr_name, locus_start, anchor_dir)
|
| 164 |
-
j = locus_to_anchor(chr_name, locus_end, anchor_dir)
|
| 165 |
-
mid = int((i + j) / 2)
|
| 166 |
-
i = max(0, mid - int(force_size / 2))
|
| 167 |
-
j = i + force_size
|
| 168 |
-
rows = slice(i, j)
|
| 169 |
-
cols = slice(i, j)
|
| 170 |
-
tile_1 = matrix_1[rows, cols].A
|
| 171 |
-
tile_2 = matrix_2[rows, cols].A
|
| 172 |
-
tile_1 = np.expand_dims(tile_1, -1) # add channel dimension
|
| 173 |
-
tile_1 = np.expand_dims(tile_1, 0) # model expects a list of inputs
|
| 174 |
-
tile_2 = np.expand_dims(tile_2, -1)
|
| 175 |
-
tile_2 = np.expand_dims(tile_2, 0)
|
| 176 |
-
if matrix_3 is not None:
|
| 177 |
-
tile_3 = matrix_3[i:i + matrix_size, j:j + matrix_size].A
|
| 178 |
-
tile_3 = np.expand_dims(tile_3, -1)
|
| 179 |
-
tile_3 = np.expand_dims(tile_3, 0)
|
| 180 |
-
np.save('%s%s_3' % (data_dir, name), tile_3)
|
| 181 |
-
if combined_matrix is not None:
|
| 182 |
-
combined_tile = combined_matrix[i:i + matrix_size, j:j + matrix_size].A
|
| 183 |
-
combined_tile = np.expand_dims(combined_tile, -1)
|
| 184 |
-
combined_tile = np.expand_dims(combined_tile, 0)
|
| 185 |
-
np.save('%s%s_combined' % (data_dir, name), combined_tile)
|
| 186 |
-
np.save('%s%s_1' % (data_dir, name), tile_1)
|
| 187 |
-
np.save('%s%s_2' % (data_dir, name), tile_2)
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
def load_chr_ratio_matrix_from_sparse(dir_name, file_name, anchor_dir, sparse_dir=None, anchor_list=None, chr_name=None, dummy=5, ignore_sparse=False, force_symmetry=True, use_raw=False):
|
| 191 |
-
"""
|
| 192 |
-
Loads data as a sparse matrix by either reading a precompiled sparse matrix or an anchor to anchor file which is converted to sparse CSR format.
|
| 193 |
-
Ratio values are computed using the observed (obs) and expected (exp) values:
|
| 194 |
-
|
| 195 |
-
.. math::
|
| 196 |
-
ratio = \\frac{obs + dummy}{exp + dummy}
|
| 197 |
-
|
| 198 |
-
Args:
|
| 199 |
-
dir_name (:obj:`str`) : directory containing the anchor to anchor or precompiled (.npz) sparse matrix file
|
| 200 |
-
file_name (:obj:`str`) : name of anchor to anchor or precompiled (.npz) sparse matrix file
|
| 201 |
-
anchor_dir (:obj:`str`) : directory containing the reference anchor ``.bed`` files
|
| 202 |
-
dummy (:obj:`int`) : dummy value to used when computing ratio values
|
| 203 |
-
ignore_sparse (:obj:`bool`) : set to True to ignore precompiled sparse matrices even if they exist
|
| 204 |
-
|
| 205 |
-
Returns:
|
| 206 |
-
``scipy.sparse.csr_matrix``: sparse matrix of ratio values
|
| 207 |
-
"""
|
| 208 |
-
global data_dir
|
| 209 |
-
global sparse_data_dir
|
| 210 |
-
if chr_name is None:
|
| 211 |
-
chr_name = get_chromosome_from_filename(file_name)
|
| 212 |
-
sparse_rep_dir = dir_name[dir_name[: -1].rfind('/') + 1:] # directory where the pre-compiled sparse matrices are saved
|
| 213 |
-
if sparse_dir is not None:
|
| 214 |
-
sparse_data_dir = sparse_dir
|
| 215 |
-
os.makedirs(os.path.join(sparse_data_dir, sparse_rep_dir), exist_ok=True)
|
| 216 |
-
if file_name.endswith('.npz'): # loading pre-combined and pre-compiled sparse data
|
| 217 |
-
sparse_matrix = scipy.sparse.load_npz(dir_name + file_name)
|
| 218 |
-
else: # load from file name
|
| 219 |
-
if file_name + '.npz' in os.listdir(os.path.join(sparse_data_dir, sparse_rep_dir)) and not ignore_sparse: # check if pre-compiled data already exists
|
| 220 |
-
sparse_matrix = scipy.sparse.load_npz(os.path.join(sparse_data_dir, sparse_rep_dir, file_name + '.npz'))
|
| 221 |
-
else: # otherwise generate sparse matrix from anchor2anchor file and save pre-compiled data
|
| 222 |
-
if anchor_list is None:
|
| 223 |
-
if anchor_dir is None:
|
| 224 |
-
assert 'You must supply either an anchor reference list or the directory containing one'
|
| 225 |
-
anchor_list = pd.read_csv(os.path.join(anchor_dir, '%s.bed' % chr_name), sep='\t',
|
| 226 |
-
names=['chr', 'start', 'end', 'anchor']) # read anchor list file
|
| 227 |
-
matrix_size = len(anchor_list) # matrix size is needed to construct sparse CSR matrix
|
| 228 |
-
anchor_dict = anchor_list_to_dict(anchor_list['anchor'].values) # convert to anchor --> index dictionary
|
| 229 |
-
try: # first try reading anchor to anchor file as <a1> <a2> <obs> <exp>
|
| 230 |
-
chr_anchor_file = pd.read_csv(
|
| 231 |
-
os.path.join(dir_name, file_name),
|
| 232 |
-
delimiter='\t',
|
| 233 |
-
names=['anchor1', 'anchor2', 'obs', 'exp'],
|
| 234 |
-
usecols=['anchor1', 'anchor2', 'obs', 'exp']) # read chromosome anchor to anchor file
|
| 235 |
-
rows = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor1'].values) # convert anchor names to row indices
|
| 236 |
-
cols = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor2'].values) # convert anchor names to column indices
|
| 237 |
-
ratio = (chr_anchor_file['obs'] + dummy) / (chr_anchor_file['exp'] + dummy) # compute matrix ratio value
|
| 238 |
-
sparse_matrix = scipy.sparse.csr_matrix((ratio, (rows, cols)), shape=(matrix_size, matrix_size)) # construct sparse CSR matrix
|
| 239 |
-
except: # otherwise read anchor to anchor file as <a1> <a2> <ratio>
|
| 240 |
-
chr_anchor_file = pd.read_csv(
|
| 241 |
-
os.path.join(dir_name, file_name),
|
| 242 |
-
delimiter='\t',
|
| 243 |
-
names=['anchor1', 'anchor2', 'ratio'],
|
| 244 |
-
usecols=['anchor1', 'anchor2', 'ratio'])
|
| 245 |
-
rows = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor1'].values) # convert anchor names to row indices
|
| 246 |
-
cols = np.vectorize(anchor_to_locus(anchor_dict))(chr_anchor_file['anchor2'].values) # convert anchor names to column indices
|
| 247 |
-
if use_raw:
|
| 248 |
-
sparse_matrix = scipy.sparse.csr_matrix((chr_anchor_file['obs'], (rows, cols)), shape=(
|
| 249 |
-
matrix_size, matrix_size)) # construct sparse CSR matrix
|
| 250 |
-
else:
|
| 251 |
-
sparse_matrix = scipy.sparse.csr_matrix((chr_anchor_file['ratio'], (rows, cols)), shape=(matrix_size, matrix_size)) # construct sparse CSR matrix
|
| 252 |
-
if force_symmetry:
|
| 253 |
-
upper_sum = triu(sparse_matrix, k=1).sum()
|
| 254 |
-
lower_sum = tril(sparse_matrix, k=-1).sum()
|
| 255 |
-
if upper_sum == 0 or lower_sum == 0:
|
| 256 |
-
sparse_matrix = sparse_matrix + sparse_matrix.transpose()
|
| 257 |
-
sparse_triu = scipy.sparse.triu(sparse_matrix)
|
| 258 |
-
sparse_matrix = sparse_triu + sparse_triu.transpose()
|
| 259 |
-
if not ignore_sparse:
|
| 260 |
-
scipy.sparse.save_npz(os.path.join(sparse_data_dir, sparse_rep_dir, file_name), sparse_matrix) # save precompiled data
|
| 261 |
-
return sparse_matrix
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
def split_matrix(input_filename,
|
| 265 |
-
input_matrix,
|
| 266 |
-
target_matrix,
|
| 267 |
-
input_batch,
|
| 268 |
-
target_batch,
|
| 269 |
-
matrix_size,
|
| 270 |
-
step_size,
|
| 271 |
-
batch_size,
|
| 272 |
-
n_matrices,
|
| 273 |
-
start_index,
|
| 274 |
-
normalize,
|
| 275 |
-
shuffle,
|
| 276 |
-
random_steps,
|
| 277 |
-
diagonal_only,
|
| 278 |
-
upper_triangular_only):
|
| 279 |
-
"""
|
| 280 |
-
Generator function to split input and target sparse matrices into patches which are used for training and prediction.
|
| 281 |
-
|
| 282 |
-
Args:
|
| 283 |
-
input_filename (:obj:`str`): name of file which is being used to generate ratio matrix patches
|
| 284 |
-
input_matrix (:obj:`scipy.sparse.csr_matrix`) : sparse CSR input matrix
|
| 285 |
-
target_matrix (:obj:`scipy.sparse.csr_matrix`) : sparse CSR target matrix
|
| 286 |
-
input_batch (:obj:`numpy.array`) : current array of samples in the input batch being generated
|
| 287 |
-
target_batch (:obj:`numpy.array`) : current array of samples in the target batch being generated
|
| 288 |
-
matrix_size (:obj:`int`) : size of each patch
|
| 289 |
-
step_size (:obj:`int`) : size of steps used when generating batches. Values less than ``matrix size`` will include overlapping regions
|
| 290 |
-
batch_size (:obj:`int`) : number of patches to use in each batch
|
| 291 |
-
n_matrices (:obj:`int`) : current number of matrix patches in the batch being generated
|
| 292 |
-
start_index (:obj:`int`) : starting anchor index of the matrix splitting, ensures batches are not identical across epochs
|
| 293 |
-
normalize (:obj:`bool`) : set to True to normalize all ratio values between ``[0, 1]``
|
| 294 |
-
shuffle (:obj:`bool`) : set to True to randomly split the matrix instead of sliding across sequentially
|
| 295 |
-
random_steps (:obj:`bool`) : set to True add a random offset to each step between patch indices
|
| 296 |
-
diagonal_only (:obj:`bool`) : set to True to only generate patches along the diagonal of the matrix
|
| 297 |
-
|
| 298 |
-
Returns:
|
| 299 |
-
(``numpy.array``, ``numpy.array``, ``str``): input batch, target batch, and batch label
|
| 300 |
-
"""
|
| 301 |
-
if matrix_size == -1:
|
| 302 |
-
input_matrix = np.expand_dims(np.expand_dims(input_matrix.A, 0), -1)
|
| 303 |
-
target_matrix = np.expand_dims(np.expand_dims(target_matrix.A, 0), -1)
|
| 304 |
-
yield input_matrix, target_matrix, input_filename + '_full_chr'
|
| 305 |
-
else:
|
| 306 |
-
if random_steps: # random offset from step size intervals
|
| 307 |
-
start_index = np.random.randint(0, step_size)
|
| 308 |
-
row_indices = np.arange(start_index, input_matrix.shape[0], step_size)
|
| 309 |
-
col_indices = np.arange(start_index, input_matrix.shape[1], step_size)
|
| 310 |
-
if shuffle: # shuffle slicing indices
|
| 311 |
-
np.random.shuffle(row_indices)
|
| 312 |
-
np.random.shuffle(col_indices)
|
| 313 |
-
for i in row_indices:
|
| 314 |
-
for j in col_indices:
|
| 315 |
-
if abs(i - j) > 384: # max distance from diagonal with actual values
|
| 316 |
-
continue
|
| 317 |
-
if diagonal_only and i != j:
|
| 318 |
-
continue
|
| 319 |
-
if upper_triangular_only and i < j:
|
| 320 |
-
continue
|
| 321 |
-
input_tile = input_matrix[i:i + matrix_size, j:j + matrix_size].A
|
| 322 |
-
target_tile = target_matrix[i:i + matrix_size, j:j + matrix_size].A
|
| 323 |
-
#input_tile = np.expand_dims(input_tile, axis=-1)
|
| 324 |
-
#target_tile = np.expand_dims(target_tile, axis=-1)
|
| 325 |
-
input_batch.append(input_tile)
|
| 326 |
-
target_batch.append(target_tile)
|
| 327 |
-
n_matrices += 1
|
| 328 |
-
if n_matrices == batch_size:
|
| 329 |
-
try:
|
| 330 |
-
input_batch = np.reshape(np.array(input_batch), (n_matrices, matrix_size, matrix_size, 1))
|
| 331 |
-
target_batch = np.reshape(np.array(target_batch), (n_matrices, matrix_size, matrix_size, 1))
|
| 332 |
-
if normalize:
|
| 333 |
-
input_batch = normalize_matrix(input_batch)
|
| 334 |
-
target_batch = normalize_matrix(target_batch)
|
| 335 |
-
|
| 336 |
-
yield input_batch, target_batch, input_filename + '_' + str(i)
|
| 337 |
-
except ValueError as e: # reached end of valid values
|
| 338 |
-
input_batch = []
|
| 339 |
-
target_batch = []
|
| 340 |
-
n_matrices = 0
|
| 341 |
-
pass
|
| 342 |
-
input_batch = []
|
| 343 |
-
target_batch = []
|
| 344 |
-
n_matrices = 0
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
def generate_batches_from_chr(input_dir,
|
| 349 |
-
target_dir,
|
| 350 |
-
matrix_size,
|
| 351 |
-
batch_size,
|
| 352 |
-
anchor_dir=None,
|
| 353 |
-
step_size=64,
|
| 354 |
-
multi_input=False,
|
| 355 |
-
shuffle=False,
|
| 356 |
-
random_steps=False,
|
| 357 |
-
normalize=False,
|
| 358 |
-
diagonal_only=False,
|
| 359 |
-
upper_triangular_only=False,
|
| 360 |
-
force_symmetry=True,
|
| 361 |
-
ignore_XY=True,
|
| 362 |
-
ignore_even_chr=False,
|
| 363 |
-
ignore_odd_chr=False):
|
| 364 |
-
"""
|
| 365 |
-
Generator function which generates batches of input target pairs to train the model:
|
| 366 |
-
|
| 367 |
-
.. code-block:: python
|
| 368 |
-
:linenos:
|
| 369 |
-
|
| 370 |
-
for epoch_i in range(epochs):
|
| 371 |
-
for input_batch, target_batch, batch_label in generate_batches_from_chr(input_dir,
|
| 372 |
-
target_dir,
|
| 373 |
-
matrix_size=128,
|
| 374 |
-
batch_size=64,
|
| 375 |
-
step_size=64,
|
| 376 |
-
shuffle=True,
|
| 377 |
-
random_steps=True,
|
| 378 |
-
anchor_dir=anchor_dir):
|
| 379 |
-
step_start_time = time.time()
|
| 380 |
-
loss = model.train_on_batch(noisy_batch, target_batch)
|
| 381 |
-
print("%d-%d %ds [Loss: %.3f][PSNR: %.3f, Jaccard: %.3f]" %
|
| 382 |
-
(epoch_i,
|
| 383 |
-
step_i,
|
| 384 |
-
time.time() - step_start_time,
|
| 385 |
-
loss[0],
|
| 386 |
-
loss[1],
|
| 387 |
-
loss[2]
|
| 388 |
-
))
|
| 389 |
-
step_i += 1
|
| 390 |
-
|
| 391 |
-
Args:
|
| 392 |
-
input_dir (:obj:`str`) : directory containing all input data to be generated
|
| 393 |
-
target_dir (:obj:`str`) : directory containing all target data to be generated
|
| 394 |
-
matrix_size (:obj:`int`) : size of each patch that the full ratio matrix is divided into
|
| 395 |
-
batch_size (:obj:`int`) : number of patches to use in each batch
|
| 396 |
-
anchor_dir (:obj:`str`) : directory containing the reference anchor ``.bed`` files
|
| 397 |
-
step_size (:obj:`int`) : size of steps used when generating batches. Values less than ``matrix size`` will include overlapping regions
|
| 398 |
-
multi_input (:obj:`bool`) : set to True to save samples from each of the multiple input sets in ``input_dir``
|
| 399 |
-
shuffle (:obj:`bool`) : set to True to randomly split the matrix instead of sliding across sequentially
|
| 400 |
-
random_steps (:obj:`bool`) : set to True add a random offset to each step between patch indices
|
| 401 |
-
diagonal_only (:obj:`bool`) : set to True to only generate patches along the diagonal of the matrix
|
| 402 |
-
normalize (:obj:`bool`) : set to True to normalize all ratio values between ``[0, 1]``
|
| 403 |
-
ignore_XY (:obj:`bool`) : set to True to ignore chromosomes X and Y when generating batches
|
| 404 |
-
ignore_even_chr (:obj:`bool`) : set to True to ignore all even numbered chromosomes
|
| 405 |
-
ignore_odd_chr (:obj:`bool`) : set to True to ignore all odd numbered chromosomes
|
| 406 |
-
|
| 407 |
-
Returns:
|
| 408 |
-
(``numpy.array``, ``numpy.array``, ``str``): input batch, target batch, and batch label
|
| 409 |
-
"""
|
| 410 |
-
input_batch = []
|
| 411 |
-
target_batch = []
|
| 412 |
-
if multi_input:
|
| 413 |
-
input_folders = os.listdir(input_dir) # get list of all folders in input dir
|
| 414 |
-
input_files = sorted(os.listdir(input_dir + input_folders[0])) # get list of input files (assume all inputs have same name pattern)
|
| 415 |
-
target_files = sorted(os.listdir(target_dir))
|
| 416 |
-
'''
|
| 417 |
-
# remove duplicates of chromosomes
|
| 418 |
-
tmp = []
|
| 419 |
-
for f in input_files:
|
| 420 |
-
if '.p_val' in f and f.replace('.p_val', '') in input_files:
|
| 421 |
-
tmp.append(f.replace('.p_val', ''))
|
| 422 |
-
if len(tmp) > 0:
|
| 423 |
-
input_files = tmp
|
| 424 |
-
print(input_files)
|
| 425 |
-
'''
|
| 426 |
-
else:
|
| 427 |
-
input_files = sorted(os.listdir(input_dir))
|
| 428 |
-
target_files = sorted(os.listdir(target_dir))
|
| 429 |
-
|
| 430 |
-
if shuffle: # shuffle chromosome file order
|
| 431 |
-
c = list(zip(input_files, target_files))
|
| 432 |
-
random.shuffle(c)
|
| 433 |
-
input_files, target_files = zip(*c)
|
| 434 |
-
|
| 435 |
-
if ignore_XY:
|
| 436 |
-
remove_XY = lambda files: [f for f in files if 'chrX' not in f and 'chrY' not in f]
|
| 437 |
-
input_files = remove_XY(input_files)
|
| 438 |
-
target_files = remove_XY(target_files)
|
| 439 |
-
|
| 440 |
-
if ignore_odd_chr:
|
| 441 |
-
# fun one-liner to remove all odd-numbered chromosomes
|
| 442 |
-
remove_odds = lambda files: [f for f in files if f[f.index('chr') + 3:f.index('.matrix')].isdigit() and int(f[f.index('chr') + 3:f.index('.matrix')]) % 2 == 0]
|
| 443 |
-
input_files = remove_odds(input_files)
|
| 444 |
-
target_files = remove_odds(target_files)
|
| 445 |
-
elif ignore_even_chr:
|
| 446 |
-
remove_evens = lambda files: [f for f in files if f[f.index('chr') + 3:f.index('.matrix')].isdigit() and int(f[f.index('chr') + 3:f.index('.matrix')]) % 2 != 0]
|
| 447 |
-
input_files = remove_evens(input_files)
|
| 448 |
-
target_files = remove_evens(target_files)
|
| 449 |
-
|
| 450 |
-
for input_file, target_file in zip(input_files, target_files):
|
| 451 |
-
n_matrices = 0
|
| 452 |
-
start_index = 0
|
| 453 |
-
if multi_input:
|
| 454 |
-
target_matrix = load_chr_ratio_matrix_from_sparse(target_dir, target_file, anchor_dir, force_symmetry=force_symmetry)
|
| 455 |
-
for input_folder in input_folders:
|
| 456 |
-
input_folder += '/'
|
| 457 |
-
input_matrix = load_chr_ratio_matrix_from_sparse(input_dir + input_folder, input_file, anchor_dir, force_symmetry=force_symmetry)
|
| 458 |
-
for input_batch, target_batch, figure_title in split_matrix(input_filename=input_folder + input_file,
|
| 459 |
-
input_matrix=input_matrix,
|
| 460 |
-
target_matrix=target_matrix,
|
| 461 |
-
input_batch=input_batch,
|
| 462 |
-
target_batch=target_batch,
|
| 463 |
-
matrix_size=matrix_size,
|
| 464 |
-
step_size=step_size,
|
| 465 |
-
batch_size=batch_size,
|
| 466 |
-
n_matrices=n_matrices,
|
| 467 |
-
start_index=start_index,
|
| 468 |
-
normalize=normalize,
|
| 469 |
-
shuffle=shuffle,
|
| 470 |
-
random_steps=random_steps,
|
| 471 |
-
diagonal_only=diagonal_only,
|
| 472 |
-
upper_triangular_only=upper_triangular_only):
|
| 473 |
-
yield input_batch, target_batch, figure_title
|
| 474 |
-
input_batch = []
|
| 475 |
-
target_batch = []
|
| 476 |
-
n_matrices = 0
|
| 477 |
-
else:
|
| 478 |
-
input_matrix = load_chr_ratio_matrix_from_sparse(input_dir, input_file, anchor_dir, force_symmetry=force_symmetry)
|
| 479 |
-
target_matrix = load_chr_ratio_matrix_from_sparse(target_dir, target_file, anchor_dir, force_symmetry=force_symmetry)
|
| 480 |
-
for input_batch, target_batch, figure_title in split_matrix(input_filename=input_file,
|
| 481 |
-
input_matrix=input_matrix,
|
| 482 |
-
target_matrix=target_matrix,
|
| 483 |
-
input_batch=input_batch,
|
| 484 |
-
target_batch=target_batch,
|
| 485 |
-
matrix_size=matrix_size,
|
| 486 |
-
step_size=step_size,
|
| 487 |
-
batch_size=batch_size,
|
| 488 |
-
n_matrices=n_matrices,
|
| 489 |
-
start_index=start_index,
|
| 490 |
-
normalize=normalize,
|
| 491 |
-
shuffle=shuffle,
|
| 492 |
-
random_steps=random_steps,
|
| 493 |
-
diagonal_only=diagonal_only,
|
| 494 |
-
upper_triangular_only=upper_triangular_only):
|
| 495 |
-
yield input_batch, target_batch, figure_title
|
| 496 |
-
input_batch = []
|
| 497 |
-
target_batch = []
|
| 498 |
-
n_matrices = 0
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
def get_matrices_from_loci(input_dir,
|
| 502 |
-
target_dir,
|
| 503 |
-
matrix_size,
|
| 504 |
-
loci,
|
| 505 |
-
anchor_dir=None):
|
| 506 |
-
"""
|
| 507 |
-
Generator function for getting sample matrices at specific loci
|
| 508 |
-
|
| 509 |
-
Args:
|
| 510 |
-
input_dir (:obj:`str`) : directory containing all input data to be generated
|
| 511 |
-
target_dir (:obj:`str`) : directory containing all target data to be generated
|
| 512 |
-
matrix_size (:obj:`int`) : size of each patch that the full ratio matrix is divided into
|
| 513 |
-
loci (:obj:`dict`) : dictionary of chromosome locus pairs
|
| 514 |
-
anchor_dir (:obj:`str`) : directory containing the reference anchor ``.bed`` files
|
| 515 |
-
|
| 516 |
-
Returns:
|
| 517 |
-
(``numpy.array``, ``numpy.array``, ``str``, ``int``, ``int``): input matrix, target matrix, chromosome name, locus, and anchor index
|
| 518 |
-
"""
|
| 519 |
-
input_files = sorted_nicely(os.listdir(input_dir))
|
| 520 |
-
target_files = sorted_nicely(os.listdir(target_dir))
|
| 521 |
-
|
| 522 |
-
for file_1, file_2 in zip(input_files, target_files):
|
| 523 |
-
chr_name = get_chromosome_from_filename(file_1)
|
| 524 |
-
if chr_name in loci.keys():
|
| 525 |
-
anchor_list = pd.read_csv(anchor_dir + '%s.bed' % chr_name, sep='\t',
|
| 526 |
-
names=['chr', 'start', 'end', 'anchor']) # read anchor list file
|
| 527 |
-
else:
|
| 528 |
-
continue
|
| 529 |
-
input_matrix = load_chr_ratio_matrix_from_sparse(input_dir, file_1, anchor_dir)
|
| 530 |
-
target_matrix = load_chr_ratio_matrix_from_sparse(target_dir, file_2, anchor_dir)
|
| 531 |
-
|
| 532 |
-
loci_indices = (anchor_list['start'] <= loci[chr_name]) & (loci[chr_name] <= anchor_list['end']) & (anchor_list['chr'] == chr_name)
|
| 533 |
-
|
| 534 |
-
for i, locus in enumerate(loci_indices):
|
| 535 |
-
if locus:
|
| 536 |
-
input_tile = input_matrix[i:i + matrix_size, i:i + matrix_size].A
|
| 537 |
-
target_tile = target_matrix[i:i + matrix_size, i:i + matrix_size].A
|
| 538 |
-
input_tile = np.expand_dims(input_tile, axis=-1)
|
| 539 |
-
target_tile = np.expand_dims(target_tile, axis=-1)
|
| 540 |
-
input_tile = np.expand_dims(input_tile, axis=0)
|
| 541 |
-
target_tile = np.expand_dims(target_tile, axis=0)
|
| 542 |
-
|
| 543 |
-
yield input_tile, target_tile, chr_name, loci[chr_name], i
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
def get_top_loops(matrix_data_dir, reference_dir, num_top_loops=None, q=None, dummy=5):
|
| 547 |
-
"""
|
| 548 |
-
Ranks the ratio values of all chromosomes and computes the cutoff value for taking the top ``num_top_loops`` or the ``q`` th quantile
|
| 549 |
-
|
| 550 |
-
Args:
|
| 551 |
-
matrix_data_dir (:obj:`str`) : directory containing the anchor to anchor files used to count loops
|
| 552 |
-
reference_dir (:obj:`str`) : directory containing the reference anchor ``.bed`` files
|
| 553 |
-
num_top_loops (:obj:`str`) : number of top loops to consider
|
| 554 |
-
q (:obj:`str`) : quantile range of loops to consider
|
| 555 |
-
dummy (:obj:`str`) : dummy value to use to calculate each ratio value
|
| 556 |
-
|
| 557 |
-
Returns:
|
| 558 |
-
``float`` : cutoff value for top loops
|
| 559 |
-
"""
|
| 560 |
-
global data_dir
|
| 561 |
-
if 'top_loop_values.pickle' in os.listdir(data_dir):
|
| 562 |
-
with open(data_dir + 'top_loop_values.pickle', 'rb') as handle:
|
| 563 |
-
top_loop_values = pickle.load(handle)
|
| 564 |
-
else:
|
| 565 |
-
top_loop_values = {}
|
| 566 |
-
if q is not None: # select top loops based on quantile not quantity
|
| 567 |
-
if matrix_data_dir + str(q) in top_loop_values.keys():
|
| 568 |
-
genome_min_loop_value = top_loop_values[matrix_data_dir + str(q)]
|
| 569 |
-
else:
|
| 570 |
-
top_loops = np.array([])
|
| 571 |
-
for file in os.listdir(matrix_data_dir):
|
| 572 |
-
sparse = load_chr_ratio_matrix_from_sparse(matrix_data_dir, file, reference_dir, dummy=dummy)
|
| 573 |
-
sparse = scipy.sparse.triu(sparse)
|
| 574 |
-
nonzero_indices = sparse.nonzero()
|
| 575 |
-
top_loops = np.append(top_loops, sparse.tocsr()[nonzero_indices].A)
|
| 576 |
-
genome_min_loop_value = np.quantile(top_loops, q=q)
|
| 577 |
-
top_loop_values[matrix_data_dir + str(q)] = genome_min_loop_value
|
| 578 |
-
print('%s %.4f quantile loops cutoff value: %f' % (matrix_data_dir, q, genome_min_loop_value))
|
| 579 |
-
else: # select top loops based on rank
|
| 580 |
-
if matrix_data_dir + str(num_top_loops) in top_loop_values.keys():
|
| 581 |
-
genome_min_loop_value = top_loop_values[matrix_data_dir + str(num_top_loops)]
|
| 582 |
-
else:
|
| 583 |
-
top_loops = np.array([])
|
| 584 |
-
for file in os.listdir(matrix_data_dir):
|
| 585 |
-
sparse = load_chr_ratio_matrix_from_sparse(matrix_data_dir, file, reference_dir, dummy=dummy)
|
| 586 |
-
sparse = scipy.sparse.triu(sparse)
|
| 587 |
-
loop_list = np.append(top_loops, sparse.data)
|
| 588 |
-
top_loops = loop_list[np.argsort(-loop_list)[:num_top_loops]]
|
| 589 |
-
genome_min_loop_value = top_loops[-1]
|
| 590 |
-
top_loop_values[matrix_data_dir + str(num_top_loops)] = genome_min_loop_value
|
| 591 |
-
print('%s top %d loops cutoff value: %f' % (matrix_data_dir, num_top_loops, genome_min_loop_value))
|
| 592 |
-
with open(data_dir + 'top_loop_values.pickle', 'wb') as handle:
|
| 593 |
-
pickle.dump(top_loop_values, handle, protocol=pickle.HIGHEST_PROTOCOL)
|
| 594 |
-
|
| 595 |
-
return genome_min_loop_value
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
def anchor_list_to_dict(anchors):
|
| 599 |
-
"""
|
| 600 |
-
Converts the array of anchor names to a dictionary mapping each anchor to its chromosomal index
|
| 601 |
-
|
| 602 |
-
Args:
|
| 603 |
-
anchors (:obj:`numpy.array`) : array of anchor name values
|
| 604 |
-
|
| 605 |
-
Returns:
|
| 606 |
-
`dict` : dictionary mapping each anchor to its index from the array
|
| 607 |
-
"""
|
| 608 |
-
anchor_dict = {}
|
| 609 |
-
for i, anchor in enumerate(anchors):
|
| 610 |
-
anchor_dict[anchor] = i
|
| 611 |
-
return anchor_dict
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
def anchor_to_locus(anchor_dict):
|
| 615 |
-
"""
|
| 616 |
-
Function to convert an anchor name to its genomic locus which can be easily vectorized
|
| 617 |
-
|
| 618 |
-
Args:
|
| 619 |
-
anchor_dict (:obj:`dict`) : dictionary mapping each anchor to its chromosomal index
|
| 620 |
-
|
| 621 |
-
Returns:
|
| 622 |
-
`function` : function which returns the locus of an anchor name
|
| 623 |
-
"""
|
| 624 |
-
def f(anchor):
|
| 625 |
-
return anchor_dict[anchor]
|
| 626 |
-
return f
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
def sorted_nicely(l):
|
| 630 |
-
"""
|
| 631 |
-
Sorts an iterable object according to file system defaults
|
| 632 |
-
Args:
|
| 633 |
-
l (:obj:`iterable`) : iterable object containing items which can be interpreted as text
|
| 634 |
-
|
| 635 |
-
Returns:
|
| 636 |
-
`iterable` : sorted iterable
|
| 637 |
-
"""
|
| 638 |
-
convert = lambda text: int(text) if text.isdigit() else text
|
| 639 |
-
alphanum_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]
|
| 640 |
-
return sorted(l, key=alphanum_key)
|
| 641 |
-
|
| 642 |
-
|
| 643 |
-
def normalize_matrix(matrix):
|
| 644 |
-
"""
|
| 645 |
-
Normalize ratio values between ``[0, 1]`` using the following function:
|
| 646 |
-
|
| 647 |
-
.. math::
|
| 648 |
-
f(x) = 1 - \\frac{1}{1 + x}
|
| 649 |
-
|
| 650 |
-
.. image:: _static/normalization_function_plot.PNG
|
| 651 |
-
:scale: 100 %
|
| 652 |
-
:align: center
|
| 653 |
-
|
| 654 |
-
Args:
|
| 655 |
-
matrix (:obj:`numpy.array`) : matrix of ratio values
|
| 656 |
-
|
| 657 |
-
Returns:
|
| 658 |
-
``numpy.array`` : matrix of normalized ratio values between ``[0, 1]``
|
| 659 |
-
"""
|
| 660 |
-
return 1 - (1 / (1 + matrix))
|
| 661 |
-
|
| 662 |
-
|
| 663 |
-
def denormalize_matrix(matrix):
|
| 664 |
-
"""
|
| 665 |
-
Reverse the normalization of a matrix to set all valid normalized values back to their original ratio values using the following function:
|
| 666 |
-
|
| 667 |
-
.. math::
|
| 668 |
-
|
| 669 |
-
f^{-1}(x) = \\frac{1}{1 - g(x)} - 1 &\\quad \\mbox{where} &\\quad g(x) = \\begin{cases} 0.98, & \\mbox{if } x > 1 \\\\ 0, & \\mbox{if } x < 0 \\\\ x & \\mbox{ otherwise} \\end{cases}
|
| 670 |
-
|
| 671 |
-
We apply the function :math:`g(x)` to remove invalid values that could be in a predicted result and because :math:`f^{-1}(x)` blows up as we approach 1:
|
| 672 |
-
|
| 673 |
-
.. image:: _static/denormalization_function_plot.PNG
|
| 674 |
-
:scale: 100 %
|
| 675 |
-
:align: center
|
| 676 |
-
|
| 677 |
-
Args:
|
| 678 |
-
matrix (:obj:`numpy.array`) : matrix of normalized ratio values
|
| 679 |
-
|
| 680 |
-
Returns:
|
| 681 |
-
``numpy.array`` : matrix of ratio values
|
| 682 |
-
"""
|
| 683 |
-
matrix[matrix > 1] = 0.98
|
| 684 |
-
matrix[matrix < 0] = 0
|
| 685 |
-
return (1 / (1 - matrix)) - 1
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
def draw_heatmap(matrix, color_scale, ax=None, return_image=False):
|
| 689 |
-
"""
|
| 690 |
-
Display ratio heatmap containing only strong signals (values > 1 or 0.98th quantile)
|
| 691 |
-
|
| 692 |
-
Args:
|
| 693 |
-
matrix (:obj:`numpy.array`) : ratio matrix to be displayed
|
| 694 |
-
color_scale (:obj:`int`) : max ratio value to be considered strongest by color mapping
|
| 695 |
-
ax (:obj:`matplotlib.axes.Axes`) : axes which will contain the heatmap. If None, new axes are created
|
| 696 |
-
return_image (:obj:`bool`) : set to True to return the image obtained from drawing the heatmap with the generated color map
|
| 697 |
-
|
| 698 |
-
Returns:
|
| 699 |
-
``numpy.array`` : if ``return_image`` is set to True, return the heatmap as an array
|
| 700 |
-
"""
|
| 701 |
-
if color_scale != 0:
|
| 702 |
-
breaks = np.append(np.arange(1.001, color_scale, (color_scale - 1.001) / 18), np.max(matrix))
|
| 703 |
-
elif np.max(matrix) < 2:
|
| 704 |
-
breaks = np.arange(1.001, np.max(matrix), (np.max(matrix) - 1.001) / 19)
|
| 705 |
-
else:
|
| 706 |
-
step = (np.quantile(matrix, q=0.95) - 1) / 18
|
| 707 |
-
up = np.quantile(matrix, q=0.95) + 0.011
|
| 708 |
-
if up < 2:
|
| 709 |
-
up = 2
|
| 710 |
-
step = 0.999 / 18
|
| 711 |
-
breaks = np.append(np.arange(1.001, up, step), np.max(matrix))
|
| 712 |
-
|
| 713 |
-
n_bin = 20 # Discretizes the interpolation into bins
|
| 714 |
-
colors = ["#FFFFFF", "#FFE4E4", "#FFD7D7", "#FFC9C9", "#FFBCBC", "#FFAEAE", "#FFA1A1", "#FF9494", "#FF8686",
|
| 715 |
-
"#FF7979", "#FF6B6B", "#FF5E5E", "#FF5151", "#FF4343", "#FF3636", "#FF2828", "#FF1B1B", "#FF0D0D",
|
| 716 |
-
"#FF0000"]
|
| 717 |
-
cmap_name = 'my_list'
|
| 718 |
-
# Create the colormap
|
| 719 |
-
cm = matplotlib.colors.LinearSegmentedColormap.from_list(
|
| 720 |
-
cmap_name, colors, N=n_bin)
|
| 721 |
-
norm = matplotlib.colors.BoundaryNorm(breaks, 20)
|
| 722 |
-
# Fewer bins will result in "coarser" colomap interpolation
|
| 723 |
-
if ax is None:
|
| 724 |
-
_, ax = plt.subplots()
|
| 725 |
-
img = ax.imshow(matrix, cmap=cm, norm=norm, interpolation='nearest')
|
| 726 |
-
if return_image:
|
| 727 |
-
plt.close()
|
| 728 |
-
return img.get_array()
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
def get_heatmap(matrix, color_scale):
|
| 732 |
-
if color_scale != 0:
|
| 733 |
-
breaks = np.append(np.arange(1.001, color_scale, (color_scale - 1.001) / 18), np.max(matrix))
|
| 734 |
-
elif np.max(matrix) < 2:
|
| 735 |
-
breaks = np.arange(1.001, np.max(matrix), (np.max(matrix) - 1.001) / 19)
|
| 736 |
-
else:
|
| 737 |
-
step = (np.quantile(matrix, q=0.98) - 1) / 18
|
| 738 |
-
up = np.quantile(matrix, q=0.98) + 0.011
|
| 739 |
-
if up < 2:
|
| 740 |
-
up = 2
|
| 741 |
-
step = 0.999 / 18
|
| 742 |
-
breaks = np.append(np.arange(1.001, up, step), np.max(matrix))
|
| 743 |
-
|
| 744 |
-
n_bin = 20 # Discretizes the interpolation into bins
|
| 745 |
-
colors = ["#FFFFFF", "#FFE4E4", "#FFD7D7", "#FFC9C9", "#FFBCBC", "#FFAEAE", "#FFA1A1", "#FF9494", "#FF8686",
|
| 746 |
-
"#FF7979", "#FF6B6B", "#FF5E5E", "#FF5151", "#FF4343", "#FF3636", "#FF2828", "#FF1B1B", "#FF0D0D",
|
| 747 |
-
"#FF0000"]
|
| 748 |
-
cmap_name = 'my_list'
|
| 749 |
-
# Create the colormap
|
| 750 |
-
cm = matplotlib.colors.LinearSegmentedColormap.from_list(
|
| 751 |
-
cmap_name, colors, N=n_bin)
|
| 752 |
-
norm = matplotlib.colors.BoundaryNorm(breaks, 20)
|
| 753 |
-
# Fewer bins will result in "coarser" colomap interpolation
|
| 754 |
-
m = matplotlib.cm.ScalarMappable(norm=norm, cmap=cm)
|
| 755 |
-
heatmap = m.to_rgba(matrix)
|
| 756 |
-
mask = matrix > 1.2
|
| 757 |
-
heatmap[..., -1] = np.ones_like(mask) * mask
|
| 758 |
-
return heatmap
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
def save_images_to_video(output_name, out_dir):
|
| 762 |
-
"""
|
| 763 |
-
Saves all training visualization images to a video file
|
| 764 |
-
|
| 765 |
-
Args:
|
| 766 |
-
output_name (:obj:`str`) : filename for the saved video file
|
| 767 |
-
"""
|
| 768 |
-
image_folder = 'images'
|
| 769 |
-
video_name = out_dir + output_name + '.avi'
|
| 770 |
-
|
| 771 |
-
images = [img for img in sorted(os.listdir(image_folder)) if img.endswith(".png")]
|
| 772 |
-
frame = cv2.imread(os.path.join(image_folder, images[0]))
|
| 773 |
-
height, width, layers = frame.shape
|
| 774 |
-
|
| 775 |
-
video = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), 29.94, (width, height))
|
| 776 |
-
|
| 777 |
-
for image in images:
|
| 778 |
-
video.write(cv2.imread(os.path.join(image_folder, image)))
|
| 779 |
-
|
| 780 |
-
last_frame = cv2.imread(os.path.join(image_folder, images[-1]))
|
| 781 |
-
for _ in range(150):
|
| 782 |
-
video.write(last_frame)
|
| 783 |
-
|
| 784 |
-
cv2.destroyAllWindows()
|
| 785 |
-
video.release()
|
| 786 |
-
|
| 787 |
-
|
| 788 |
-
def get_model_memory_usage(batch_size, model):
|
| 789 |
-
"""
|
| 790 |
-
Estimates the amount of memory required to train the model using the current batch size.
|
| 791 |
-
|
| 792 |
-
Args:
|
| 793 |
-
batch_size (:obj:`int`) : number of training samples in each batch
|
| 794 |
-
model (:obj:`keras.models.Model`) : uncompiled Keras model to be trained
|
| 795 |
-
|
| 796 |
-
Returns:
|
| 797 |
-
``float`` : estimated memory usage in GB
|
| 798 |
-
"""
|
| 799 |
-
shapes_mem_count = 0
|
| 800 |
-
for l in model.layers:
|
| 801 |
-
single_layer_mem = 1
|
| 802 |
-
for s in l.output_shape:
|
| 803 |
-
if s is None:
|
| 804 |
-
continue
|
| 805 |
-
single_layer_mem *= s
|
| 806 |
-
shapes_mem_count += single_layer_mem
|
| 807 |
-
|
| 808 |
-
trainable_count = np.sum([K.count_params(p) for p in set(model.trainable_weights)])
|
| 809 |
-
non_trainable_count = np.sum([K.count_params(p) for p in set(model.non_trainable_weights)])
|
| 810 |
-
|
| 811 |
-
number_size = 4.0
|
| 812 |
-
if K.floatx() == 'float16':
|
| 813 |
-
number_size = 2.0
|
| 814 |
-
if K.floatx() == 'float64':
|
| 815 |
-
number_size = 8.0
|
| 816 |
-
|
| 817 |
-
total_memory = number_size*(batch_size*shapes_mem_count + trainable_count + non_trainable_count)
|
| 818 |
-
gbytes = np.round(total_memory / (1024.0 ** 3), 3)
|
| 819 |
-
return gbytes
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|