Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import os,uuid,pysam
|
| 3 |
+
import time,gdown
|
| 4 |
+
import pyBigWig,pickle
|
| 5 |
+
from scipy.sparse import csr_matrix
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
def atac_bwtonpz(atac_file):
|
| 9 |
+
bw = pyBigWig.open(atac_file)
|
| 10 |
+
signals = {}
|
| 11 |
+
for chrom, length in bw.chroms().items():
|
| 12 |
+
try:
|
| 13 |
+
if chrom == 'chrX':
|
| 14 |
+
chr = 'X'
|
| 15 |
+
else:
|
| 16 |
+
chr = int(chrom[3:])
|
| 17 |
+
except Exception:
|
| 18 |
+
continue
|
| 19 |
+
temp = np.zeros(length)
|
| 20 |
+
intervals = bw.intervals(chrom)
|
| 21 |
+
for interval in intervals:
|
| 22 |
+
temp[interval[0]:interval[1]] = interval[2]
|
| 23 |
+
|
| 24 |
+
seq_length = length // 1000 * 1000
|
| 25 |
+
signals[chr] = csr_matrix(temp.astype('float32')[:seq_length])
|
| 26 |
+
with open('ATAC/atac_'+atac_file.replace('bigWig','pickle'),'wb') as f:
|
| 27 |
+
pickle.dump(signals,f)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def process(bam_file,progress=gr.Progress()):
|
| 31 |
+
if not os.path.exists('ATAC'):
|
| 32 |
+
os.mkdir('ATAC')
|
| 33 |
+
else:
|
| 34 |
+
for f in os.listdir('ATAC/'):
|
| 35 |
+
if not f.startswith('example'):
|
| 36 |
+
os.remove(os.path.join('ATAC/', f))
|
| 37 |
+
if bam_file=='':
|
| 38 |
+
raise gr.Error('')
|
| 39 |
+
|
| 40 |
+
fid = str(uuid.uuid4())
|
| 41 |
+
print(bam_file,fid)
|
| 42 |
+
|
| 43 |
+
if 'dropbox' in bam_file:
|
| 44 |
+
progress(0.2, desc="Downloading BAM file "+bam_file)
|
| 45 |
+
os.system('wget -O ATAC/'+fid+'.bam '+bam_file)
|
| 46 |
+
else:
|
| 47 |
+
progress(0.2, desc="Downloading BAM file "+bam_file)
|
| 48 |
+
gdown.download(url=bam_file,fuzzy=True, output='ATAC/'+fid + '.bam')
|
| 49 |
+
# tmp_bamid=bam_file.split('file/d/')[1].split('/view?')[0]
|
| 50 |
+
# gdown.download(id=tmp_bamid,output=fid + '.bam')
|
| 51 |
+
progress(0.4, desc="Index BAM file")
|
| 52 |
+
time.sleep(0.1)
|
| 53 |
+
pysam.index('ATAC/'+fid + '.bam')
|
| 54 |
+
progress(0.6, desc="Convert BAM file to bigWig file")
|
| 55 |
+
time.sleep(0.1)
|
| 56 |
+
os.system('bamCoverage --bam %s -o %s --outFileFormat bigwig --normalizeUsing RPGC '
|
| 57 |
+
'--effectiveGenomeSize 2913022398 --Offset 1 --binSize 1 --numberOfProcessors 6 '
|
| 58 |
+
'--blackListFileName data/black_list.bed' % ('ATAC/'+fid + '.bam','ATAC/'+ fid + '.bigWig'))
|
| 59 |
+
progress(0.8, desc="Process bigWig file")
|
| 60 |
+
time.sleep(0.1)
|
| 61 |
+
atac_bwtonpz('ATAC/'+fid + '.bigWig')
|
| 62 |
+
os.remove('ATAC/'+fid+'.bam.bai')
|
| 63 |
+
os.remove('ATAC/'+fid + '.bigWig')
|
| 64 |
+
os.remove('ATAC/'+fid + '.bam')
|
| 65 |
+
return 'ATAC/atac_' + fid + '.pickle'
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
css='li {font-size: 15px;}'
|
| 69 |
+
|
| 70 |
+
with gr.Blocks(theme=gr.themes.Monochrome(),css=css) as app:
|
| 71 |
+
gr.Markdown('<h1 style="text-align: center; font-size: 2.2em"> '
|
| 72 |
+
'A computational tool to use ATAC-seq to impute epigenome, transcriptome, and high-resolution chromatin contact maps</h1>')
|
| 73 |
+
|
| 74 |
+
gr.HTML('<div style="text-align: center;">'
|
| 75 |
+
'<a href="https://github.com/zzh24zzh/EPCOT_gradio" style="margin-right: 20px;font-size: 18px; color:black;">[Code]</a>'
|
| 76 |
+
'<a href="https://www.biorxiv.org/content/10.1101/2022.05.23.493129v2" style="font-size: 18px; color:black;">[Paper]</a>'
|
| 77 |
+
'</div>')
|
| 78 |
+
# gr.Markdown('[[Code]](https://github.com/zzh24zzh/EPCOT_gradio) [[Paper]](https://www.biorxiv.org/content/10.1101/2022.05.23.493129v2)')
|
| 79 |
+
gr.Markdown('## Instructions:\n'
|
| 80 |
+
'- Use the first app to process ATAC-seq data by uploading an ATAC-seq BAM file.\n'
|
| 81 |
+
'- Execute the model by uploading the processed ATAC-seq file to the "Run Model" interface in the second app.\n'
|
| 82 |
+
'- Visualize the prediction results by uploading the predicted files from the "Run Model" interface to the "Visualize Prediction Results" interface.\n')
|
| 83 |
+
gr.Markdown('<hr>')
|
| 84 |
+
gr.Markdown('<h1 font-size: 2em"> App 1: Process ATAC-seq data</h1>\n'
|
| 85 |
+
# '<p style="font-size: 15px;">Hosted on a server from Liu Lab</p>\n'
|
| 86 |
+
'<p style="font-size: 15px;"><strong>Notice:</strong> Downsample the BAM file before upload if the size is too large</p>')
|
| 87 |
+
with gr.Row():
|
| 88 |
+
with gr.Column():
|
| 89 |
+
inp=gr.Textbox(
|
| 90 |
+
label="URL to ATAC-seq BAM",
|
| 91 |
+
info='Only Dropbox and Google Drive file links are accecpted (set "Anyone with the link")',
|
| 92 |
+
lines=1,
|
| 93 |
+
)
|
| 94 |
+
btn2=gr.Button("Submit")
|
| 95 |
+
with gr.Column():
|
| 96 |
+
out = gr.File(label='Download the processed file')
|
| 97 |
+
btn2.click(fn=process, inputs=inp, outputs=out)
|
| 98 |
+
|
| 99 |
+
gr.Markdown('<hr>')
|
| 100 |
+
gr.Markdown('<h1 font-size: 2em;"> App 2: Run models and visualize prediction results</h1>\n'
|
| 101 |
+
'<p style="font-size: 16px;">'
|
| 102 |
+
'Hosted on <a href="https://huggingface.co/spaces/drjieliu/EPCOT" style="font-size: 16px;">Hugging Face Space</a> </p>')
|
| 103 |
+
with gr.Column():
|
| 104 |
+
gr.HTML('<iframe src="https://drjieliu-epcot.hf.space" '
|
| 105 |
+
'border="none" width="100%" height=1200></iframe>')
|
| 106 |
+
|
| 107 |
+
app.launch(debug=True,enable_queue=True)
|