| import gradio as gr | |
| import os,uuid,pysam | |
| import time,gdown | |
| import pyBigWig,pickle | |
| from scipy.sparse import csr_matrix | |
| import numpy as np | |
| def atac_bwtonpz(atac_file): | |
| bw = pyBigWig.open(atac_file) | |
| signals = {} | |
| for chrom, length in bw.chroms().items(): | |
| try: | |
| if chrom == 'chrX': | |
| chr = 'X' | |
| else: | |
| chr = int(chrom[3:]) | |
| except Exception: | |
| continue | |
| temp = np.zeros(length) | |
| intervals = bw.intervals(chrom) | |
| for interval in intervals: | |
| temp[interval[0]:interval[1]] = interval[2] | |
| seq_length = length // 1000 * 1000 | |
| signals[chr] = csr_matrix(temp.astype('float32')[:seq_length]) | |
| with open(atac_file.replace('bigWig','pickle'),'wb') as f: | |
| pickle.dump(signals,f) | |
| def process(bam_file,progress=gr.Progress()): | |
| if not os.path.exists('ATAC'): | |
| os.mkdir('ATAC') | |
| else: | |
| for f in os.listdir('ATAC/'): | |
| if not f.startswith('example'): | |
| os.remove(os.path.join('ATAC/', f)) | |
| if bam_file=='': | |
| raise gr.Error('') | |
| fid = str(uuid.uuid4()) | |
| print(bam_file,fid) | |
| if 'dropbox' in bam_file: | |
| progress(0.2, desc="Downloading BAM file "+bam_file) | |
| os.system('wget -O ATAC/'+fid+'.bam '+bam_file) | |
| else: | |
| progress(0.2, desc="Downloading BAM file "+bam_file) | |
| gdown.download(url=bam_file,fuzzy=True, output='ATAC/'+fid + '.bam') | |
| progress(0.4, desc="Indexing BAM file") | |
| time.sleep(0.1) | |
| pysam.index('ATAC/'+fid + '.bam') | |
| progress(0.6, desc="Converting BAM file to BigWig file (Please note that processing time may be lengthy)") | |
| time.sleep(0.1) | |
| os.system('bamCoverage --bam %s -o %s --outFileFormat bigwig --normalizeUsing RPGC ' | |
| '--effectiveGenomeSize 2913022398 --Offset 1 --binSize 1 --numberOfProcessors 6 ' | |
| '--blackListFileName data/black_list.bed' % ('ATAC/'+fid + '.bam','ATAC/'+ fid + '.bigWig')) | |
| progress(0.8, desc="Processing bigWig file") | |
| time.sleep(0.1) | |
| atac_bwtonpz('ATAC/'+fid + '.bigWig') | |
| os.remove('ATAC/'+fid+'.bam.bai') | |
| os.remove('ATAC/'+fid + '.bigWig') | |
| os.remove('ATAC/'+fid + '.bam') | |
| return 'ATAC/'+fid + '.pickle' | |
| css="li {font-size: 16px; --font: 'Quicksand', 'ui-sans-serif', 'system-ui', sans-serif" | |
| with gr.Blocks(theme=gr.themes.Soft(),css=css) as app: | |
| gr.HTML('<p>For faster inference without waiting in queue, you may duplicate the space. <a href="https://huggingface.co/spaces/drjieliu/epcot_app?duplicate=true">' | |
| '<img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>') | |
| with gr.Row(): | |
| with gr.Column(): | |
| inp=gr.Textbox( | |
| label="URL to ATAC-seq BAM", | |
| info='Only Dropbox and Google Drive file links are accecpted (set "Anyone with the link")', | |
| lines=1, | |
| placeholder='e.g. https://drive.google.com/file/d/xxxxx/view?usp=sharing' | |
| ) | |
| btn2=gr.Button("Submit") | |
| with gr.Column(): | |
| out = gr.File(label='Download the processed file') | |
| btn2.click(fn=process, inputs=inp, outputs=out) | |
| app.queue(concurrency_count=1,max_size=5).launch(enable_queue=True, debug=True) |