bernardo-de-almeida commited on
Commit
9ed59c5
·
1 Parent(s): 2452e69

feat: add functional tracks pipeline notebook

Browse files
notebooks_pipelines/NTv3_650M_pos_hg38_chr19_6700000_6831072.gff3 DELETED
@@ -1,107 +0,0 @@
1
- ##gff-version 3
2
- # model: InstaDeepAI/NTv3_650M_pos
3
- # window: chr19:6700000-6831072 (hg38); predictions on central 37.5%: chr19:6740960-6790112
4
- chr19 NTv3_HMM intron 6740961 6740995 0.975 . . ID=INTRON_1;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
5
- chr19 NTv3_HMM start_codon 6740996 6741013 0.355 . . ID=START_CODON_2;Name=START_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,191,255
6
- chr19 NTv3_HMM exon 6741014 6741124 0.673 . . ID=EXON_3;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
7
- chr19 NTv3_HMM splice_donor_site 6741125 6741125 0.857 . . ID=SPLICE_DONOR_4;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
8
- chr19 NTv3_HMM intron 6741126 6741224 0.974 . . ID=INTRON_5;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
9
- chr19 NTv3_HMM splice_acceptor_site 6741225 6741225 0.930 . . ID=SPLICE_ACCEPTOR_6;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
10
- chr19 NTv3_HMM exon 6741226 6741280 0.693 . . ID=EXON_7;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
11
- chr19 NTv3_HMM splice_donor_site 6741281 6741281 0.837 . . ID=SPLICE_DONOR_8;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
12
- chr19 NTv3_HMM intron 6741282 6742966 0.959 . . ID=INTRON_9;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
13
- chr19 NTv3_HMM splice_acceptor_site 6742967 6742967 0.958 . . ID=SPLICE_ACCEPTOR_10;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
14
- chr19 NTv3_HMM exon 6742968 6743113 0.841 . . ID=EXON_11;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
15
- chr19 NTv3_HMM splice_donor_site 6743114 6743114 0.779 . . ID=SPLICE_DONOR_12;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
16
- chr19 NTv3_HMM intron 6743115 6743193 0.963 . . ID=INTRON_13;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
17
- chr19 NTv3_HMM splice_acceptor_site 6743194 6743194 0.910 . . ID=SPLICE_ACCEPTOR_14;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
18
- chr19 NTv3_HMM exon 6743195 6743255 0.845 . . ID=EXON_15;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
19
- chr19 NTv3_HMM splice_donor_site 6743256 6743256 0.782 . . ID=SPLICE_DONOR_16;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
20
- chr19 NTv3_HMM intron 6743257 6743493 0.970 . . ID=INTRON_17;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
21
- chr19 NTv3_HMM splice_acceptor_site 6743494 6743494 0.780 . . ID=SPLICE_ACCEPTOR_18;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
22
- chr19 NTv3_HMM exon 6743495 6743597 0.876 . . ID=EXON_19;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
23
- chr19 NTv3_HMM splice_donor_site 6743598 6743598 0.856 . . ID=SPLICE_DONOR_20;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
24
- chr19 NTv3_HMM intron 6743599 6743707 0.951 . . ID=INTRON_21;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
25
- chr19 NTv3_HMM splice_acceptor_site 6743708 6743708 0.856 . . ID=SPLICE_ACCEPTOR_22;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
26
- chr19 NTv3_HMM exon 6743709 6743835 0.812 . . ID=EXON_23;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
27
- chr19 NTv3_HMM splice_donor_site 6743836 6743836 0.887 . . ID=SPLICE_DONOR_24;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
28
- chr19 NTv3_HMM intron 6743837 6744553 0.989 . . ID=INTRON_25;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
29
- chr19 NTv3_HMM splice_acceptor_site 6744554 6744554 0.972 . . ID=SPLICE_ACCEPTOR_26;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
30
- chr19 NTv3_HMM exon 6744555 6744700 0.977 . . ID=EXON_27;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
31
- chr19 NTv3_HMM intron 6744701 6744799 0.972 . . ID=INTRON_28;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
32
- chr19 NTv3_HMM splice_acceptor_site 6744800 6744800 0.954 . . ID=SPLICE_ACCEPTOR_29;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
33
- chr19 NTv3_HMM exon 6744801 6744993 0.977 . . ID=EXON_30;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
34
- chr19 NTv3_HMM splice_donor_site 6744994 6744994 0.886 . . ID=SPLICE_DONOR_31;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
35
- chr19 NTv3_HMM intron 6744995 6746451 0.979 . . ID=INTRON_32;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
36
- chr19 NTv3_HMM splice_acceptor_site 6746452 6746452 0.938 . . ID=SPLICE_ACCEPTOR_33;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
37
- chr19 NTv3_HMM exon 6746453 6746560 0.840 . . ID=EXON_34;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
38
- chr19 NTv3_HMM splice_donor_site 6746561 6746561 0.947 . . ID=SPLICE_DONOR_35;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
39
- chr19 NTv3_HMM intron 6746562 6749933 0.973 . . ID=INTRON_36;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
40
- chr19 NTv3_HMM splice_acceptor_site 6749934 6749934 0.693 . . ID=SPLICE_ACCEPTOR_37;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
41
- chr19 NTv3_HMM exon 6749935 6750065 0.918 . . ID=EXON_38;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
42
- chr19 NTv3_HMM splice_donor_site 6750066 6750066 0.783 . . ID=SPLICE_DONOR_39;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
43
- chr19 NTv3_HMM intron 6750067 6750291 0.955 . . ID=INTRON_40;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
44
- chr19 NTv3_HMM splice_acceptor_site 6750292 6750292 0.960 . . ID=SPLICE_ACCEPTOR_41;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
45
- chr19 NTv3_HMM exon 6750293 6750430 0.959 . . ID=EXON_42;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
46
- chr19 NTv3_HMM splice_donor_site 6750431 6750431 0.723 . . ID=SPLICE_DONOR_43;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
47
- chr19 NTv3_HMM intron 6750432 6750511 0.939 . . ID=INTRON_44;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
48
- chr19 NTv3_HMM splice_acceptor_site 6750512 6750512 0.750 . . ID=SPLICE_ACCEPTOR_45;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
49
- chr19 NTv3_HMM exon 6750513 6750632 0.902 . . ID=EXON_46;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
50
- chr19 NTv3_HMM splice_donor_site 6750633 6750633 0.917 . . ID=SPLICE_DONOR_47;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
51
- chr19 NTv3_HMM intron 6750634 6751062 0.961 . . ID=INTRON_48;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
52
- chr19 NTv3_HMM splice_acceptor_site 6751063 6751063 0.694 . . ID=SPLICE_ACCEPTOR_49;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
53
- chr19 NTv3_HMM exon 6751064 6751199 0.558 . . ID=EXON_50;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
54
- chr19 NTv3_HMM stop_codon 6751200 6751212 0.332 . . ID=STOP_CODON_51;Name=STOP_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=220,20,60
55
- chr19 NTv3_HMM three_prime_UTR 6751213 6751488 0.965 + . ID=UTR3_PLUS_52;Name=UTR3_PLUS;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=34,139,34
56
- chr19 NTv3_HMM polyA_signal 6751489 6751507 0.355 . . ID=POLYA_SIGNAL_53;Name=POLYA_SIGNAL;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=139,69,19
57
- chr19 NTv3_HMM start_codon 6751508 6752169 0.002 . . ID=START_CODON_54;Name=START_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,191,255
58
- chr19 NTv3_HMM polyA_signal 6752170 6752187 0.432 . . ID=POLYA_SIGNAL_55;Name=POLYA_SIGNAL;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=139,69,19
59
- chr19 NTv3_HMM three_prime_UTR 6752188 6752571 0.839 - . ID=UTR3_MINUS_56;Name=UTR3_MINUS;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=34,139,34
60
- chr19 NTv3_HMM stop_codon 6752572 6752752 0.136 . . ID=STOP_CODON_57;Name=STOP_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=220,20,60
61
- chr19 NTv3_HMM splice_acceptor_site 6752753 6752753 0.798 . . ID=SPLICE_ACCEPTOR_58;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
62
- chr19 NTv3_HMM intron 6752754 6753455 0.910 . . ID=INTRON_59;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
63
- chr19 NTv3_HMM splice_donor_site 6753456 6753456 0.766 . . ID=SPLICE_DONOR_60;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
64
- chr19 NTv3_HMM exon 6753457 6753640 0.953 . . ID=EXON_61;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
65
- chr19 NTv3_HMM splice_acceptor_site 6753641 6753641 0.939 . . ID=SPLICE_ACCEPTOR_62;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
66
- chr19 NTv3_HMM intron 6753642 6754051 0.985 . . ID=INTRON_63;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
67
- chr19 NTv3_HMM splice_donor_site 6754052 6754052 0.844 . . ID=SPLICE_DONOR_64;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
68
- chr19 NTv3_HMM exon 6754053 6754161 0.908 . . ID=EXON_65;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
69
- chr19 NTv3_HMM splice_acceptor_site 6754162 6754163 0.633 . . ID=SPLICE_ACCEPTOR_66;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
70
- chr19 NTv3_HMM intron 6754164 6754250 0.962 . . ID=INTRON_67;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
71
- chr19 NTv3_HMM splice_donor_site 6754251 6754251 0.875 . . ID=SPLICE_DONOR_68;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
72
- chr19 NTv3_HMM exon 6754252 6754424 0.965 . . ID=EXON_69;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
73
- chr19 NTv3_HMM splice_acceptor_site 6754425 6754425 0.791 . . ID=SPLICE_ACCEPTOR_70;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
74
- chr19 NTv3_HMM intron 6754426 6754615 0.975 . . ID=INTRON_71;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
75
- chr19 NTv3_HMM splice_donor_site 6754616 6754616 0.953 . . ID=SPLICE_DONOR_72;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
76
- chr19 NTv3_HMM exon 6754617 6754730 0.731 . . ID=EXON_73;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
77
- chr19 NTv3_HMM splice_acceptor_site 6754731 6754731 0.822 . . ID=SPLICE_ACCEPTOR_74;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
78
- chr19 NTv3_HMM intron 6754732 6754830 0.975 . . ID=INTRON_75;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
79
- chr19 NTv3_HMM splice_donor_site 6754831 6754831 0.944 . . ID=SPLICE_DONOR_76;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
80
- chr19 NTv3_HMM exon 6754832 6755314 0.757 . . ID=EXON_77;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
81
- chr19 NTv3_HMM splice_acceptor_site 6755315 6755315 0.713 . . ID=SPLICE_ACCEPTOR_78;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
82
- chr19 NTv3_HMM intron 6755316 6759593 0.988 . . ID=INTRON_79;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
83
- chr19 NTv3_HMM splice_donor_site 6759594 6759594 0.928 . . ID=SPLICE_DONOR_80;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
84
- chr19 NTv3_HMM exon 6759595 6759669 0.840 . . ID=EXON_81;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
85
- chr19 NTv3_HMM splice_acceptor_site 6759670 6759670 0.901 . . ID=SPLICE_ACCEPTOR_82;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
86
- chr19 NTv3_HMM intron 6759671 6760637 0.985 . . ID=INTRON_83;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
87
- chr19 NTv3_HMM splice_donor_site 6760638 6760638 0.928 . . ID=SPLICE_DONOR_84;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
88
- chr19 NTv3_HMM exon 6760639 6760985 0.748 . . ID=EXON_85;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
89
- chr19 NTv3_HMM splice_acceptor_site 6760986 6760987 0.603 . . ID=SPLICE_ACCEPTOR_86;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
90
- chr19 NTv3_HMM intron 6760988 6763679 0.984 . . ID=INTRON_87;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
91
- chr19 NTv3_HMM splice_donor_site 6763680 6763680 0.759 . . ID=SPLICE_DONOR_88;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
92
- chr19 NTv3_HMM exon 6763681 6763732 0.663 . . ID=EXON_89;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
93
- chr19 NTv3_HMM five_prime_UTR 6763733 6763815 0.840 - . ID=UTR5_MINUS_90;Name=UTR5_MINUS;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,128,0
94
- chr19 NTv3_HMM splice_acceptor_site 6763816 6763816 0.869 . . ID=SPLICE_ACCEPTOR_91;Name=SPLICE_ACCEPTOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
95
- chr19 NTv3_HMM intron 6763817 6767386 0.976 . . ID=INTRON_92;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
96
- chr19 NTv3_HMM splice_donor_site 6767387 6767387 0.902 . . ID=SPLICE_DONOR_93;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
97
- chr19 NTv3_HMM start_codon 6767388 6767411 0.051 . . ID=START_CODON_94;Name=START_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,191,255
98
- chr19 NTv3_HMM five_prime_UTR 6767412 6767514 0.578 - . ID=UTR5_MINUS_95;Name=UTR5_MINUS;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,128,0
99
- chr19 NTv3_HMM start_codon 6767515 6769347 0.009 . . ID=START_CODON_96;Name=START_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,191,255
100
- chr19 NTv3_HMM TF_binding_site 6769348 6769521 0.506 . . ID=CTCF_97;Name=CTCF;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=148,0,211
101
- chr19 NTv3_HMM start_codon 6769522 6772696 0.002 . . ID=START_CODON_98;Name=START_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,191,255
102
- chr19 NTv3_HMM five_prime_UTR 6772697 6772806 0.885 + . ID=UTR5_PLUS_99;Name=UTR5_PLUS;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,128,0
103
- chr19 NTv3_HMM start_codon 6772807 6772810 0.694 . . ID=START_CODON_100;Name=START_CODON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,191,255
104
- chr19 NTv3_HMM five_prime_UTR 6772811 6772922 0.748 + . ID=UTR5_PLUS_101;Name=UTR5_PLUS;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,128,0
105
- chr19 NTv3_HMM exon 6772923 6773010 0.635 . . ID=EXON_102;Name=EXON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,255
106
- chr19 NTv3_HMM splice_donor_site 6773011 6773011 0.884 . . ID=SPLICE_DONOR_103;Name=SPLICE_DONOR;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072;color=0,0,0
107
- chr19 NTv3_HMM intron 6773012 6790112 0.972 . . ID=INTRON_104;Name=INTRON;model=InstaDeepAI/NTv3_650M_pos;assembly=hg38;window=chr19:6700000-6831072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
notebooks_pipelines/bigwig_outputs/HepG2_CTCF.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa369433dcd408740311d0a0c5209fbb8889402a82fdad222cc11413dcaf1f1a
3
- size 380493
 
 
 
 
notebooks_pipelines/bigwig_outputs/HepG2_DNAse.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a59a1a90a9337b0862c27f4bc492e72f7d78ad0d3ffa5fdf7a981faca8d55cc
3
- size 387370
 
 
 
 
notebooks_pipelines/bigwig_outputs/HepG2_H3k4me3.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:349d8edea3908f828dbb1946d9ab16f7220b61a5922d6a7b75ac4fa55b5f359a
3
- size 381439
 
 
 
 
notebooks_pipelines/bigwig_outputs/HepG2_RNA_seq.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a116151b3b1550916f49093ca128326338911ee070481480e994a0baa1b00d4f
3
- size 381391
 
 
 
 
notebooks_pipelines/bigwig_outputs/K562_CTCF.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:cf84025d20d7ec59efbf02e7f0d20d67e3bb9564ffe3538e2397c0a46a576aea
3
- size 379394
 
 
 
 
notebooks_pipelines/bigwig_outputs/K562_DNAse.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8333fc13cfabe984cc303c575c2e65da20908240f8c733658b5b309a4191cb07
3
- size 381686
 
 
 
 
notebooks_pipelines/bigwig_outputs/K562_H3k4me3.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:074971cc028eab364a6cb7642bcc0cf70603de6f3f3f425c600b3b6f90699f32
3
- size 383184
 
 
 
 
notebooks_pipelines/bigwig_outputs/K562_RNA_seq.bw DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e4118a761250629de70de55c935b38f53a0c0eb6dc2156dcb632335c1ef7f42
3
- size 380637
 
 
 
 
tabs/annotation.html CHANGED
@@ -2,8 +2,13 @@
2
  <h2>🧬 NTv3 Post-Trained Genome Annotation</h2>
3
  <p>This notebook demonstrates how to use the NTv3 post-trained model to perform genome annotation directly from a DNA sequence. It relies on a pipeline that applies a Hidden Markov Model (HMM) to the per-base probabilities returned by NTv3, converting them into a coherent gene model that respects biological constraints and valid transitions between genomic elements.</p>
4
  <p>The pipeline abstracts away all the underlying steps: running inference with the model, retrieving and processing the predicted probabilities, and applying the HMM to generate a consistent annotation. It returns a ready-to-use GFF file that can be visualized in any genome browser for the sequence of interest.</p>
5
- <p>If you're interested in exploring the intermediate probabilities, please refer to the <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks/01_tracks_prediction.ipynb" target="_blank" rel="noopener">track-prediction notebook</a>. These probabilities can be useful for assessing model confidence and identifying potentially interesting biological regions. This notebook focuses on the higher-level task of producing gene annotations directly from raw DNA.</p>
6
  <p><strong>📝 Note for Google Colab users:</strong> This notebook is compatible with Colab! For faster inference, make sure to enable GPU: Runtime → Change runtime type → GPU (T4 or better recommended).</p>
 
 
 
 
 
7
  </div>
8
 
9
  <div class="grid">
@@ -41,11 +46,11 @@ print(f"Original sequence length: {len(seq)}")
41
  # Crop to multiple of 128 (the pipeline will crop again, but this is a no-op once divisible)
42
  seq = seq[:int(len(seq) // 128) * 128]
43
  print(f"Cropped sequence length: {len(seq)}, {len(seq) / 128} transformer tokens")</code></pre></div>
44
- <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
45
- <strong>Example output:</strong><br>
46
  Original sequence length: 131072<br>
47
  Cropped sequence length: 131072, 1024.0 transformer tokens
48
- </p>
49
  </div>
50
 
51
  <div class="card" style="grid-column: span 12;">
@@ -72,6 +77,14 @@ start_time = time.time()
72
  gff_text = ntv3_gff(inputs)
73
  end_time = time.time()
74
  print(f"Inference + decoding time: {end_time - start_time:.2f} seconds")</code></pre></div>
 
 
 
 
 
 
 
 
75
  <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
76
  The pipeline performs all the necessary steps: running inference with the model, retrieving and processing the predicted probabilities, and applying the HMM to generate a consistent annotation.
77
  </p>
@@ -88,9 +101,10 @@ with open(output_filename, "w") as output_file:
88
  output_file.write(gff_text)
89
 
90
  print(f"Saved GFF file to {output_filename}")</code></pre></div>
91
- <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
92
- <strong>Example output:</strong> Saved GFF file to NTv3_650M_pos_hg38_chr19_6700000_6831072.gff3
93
- </p>
 
94
  </div>
95
 
96
  <div class="card" style="grid-column: span 12;">
 
2
  <h2>🧬 NTv3 Post-Trained Genome Annotation</h2>
3
  <p>This notebook demonstrates how to use the NTv3 post-trained model to perform genome annotation directly from a DNA sequence. It relies on a pipeline that applies a Hidden Markov Model (HMM) to the per-base probabilities returned by NTv3, converting them into a coherent gene model that respects biological constraints and valid transitions between genomic elements.</p>
4
  <p>The pipeline abstracts away all the underlying steps: running inference with the model, retrieving and processing the predicted probabilities, and applying the HMM to generate a consistent annotation. It returns a ready-to-use GFF file that can be visualized in any genome browser for the sequence of interest.</p>
5
+ <p>If you're interested in exploring the intermediate probabilities, please refer to the <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_tutorials/01_tracks_prediction.ipynb" target="_blank" rel="noopener">track-prediction notebook</a>. These probabilities can be useful for assessing model confidence and identifying potentially interesting biological regions. This notebook focuses on the higher-level task of producing gene annotations directly from raw DNA.</p>
6
  <p><strong>📝 Note for Google Colab users:</strong> This notebook is compatible with Colab! For faster inference, make sure to enable GPU: Runtime → Change runtime type → GPU (T4 or better recommended).</p>
7
+ <p>
8
+ <strong>🔗 Quick links:</strong><br>
9
+ • <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/02_genome_annotation.ipynb" target="_blank" rel="noopener">View notebook on Hugging Face</a> (includes "Open in Colab" button)<br>
10
+ • <a href="https://colab.research.google.com/github/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/02_genome_annotation.ipynb" target="_blank" rel="noopener">Open directly in Google Colab</a>
11
+ </p>
12
  </div>
13
 
14
  <div class="grid">
 
46
  # Crop to multiple of 128 (the pipeline will crop again, but this is a no-op once divisible)
47
  seq = seq[:int(len(seq) // 128) * 128]
48
  print(f"Cropped sequence length: {len(seq)}, {len(seq) / 128} transformer tokens")</code></pre></div>
49
+ <div style="margin-top: 15px; padding: 12px 16px; background: rgba(0, 0, 0, 0.4); border: 1px solid var(--border); border-radius: 8px; font-family: var(--mono); font-size: 12px; color: rgba(255, 255, 255, 0.85); line-height: 1.6;">
50
+ <strong style="color: var(--muted);">Output:</strong><br>
51
  Original sequence length: 131072<br>
52
  Cropped sequence length: 131072, 1024.0 transformer tokens
53
+ </div>
54
  </div>
55
 
56
  <div class="card" style="grid-column: span 12;">
 
77
  gff_text = ntv3_gff(inputs)
78
  end_time = time.time()
79
  print(f"Inference + decoding time: {end_time - start_time:.2f} seconds")</code></pre></div>
80
+ <div style="margin-top: 15px; padding: 12px 16px; background: rgba(0, 0, 0, 0.4); border: 1px solid var(--border); border-radius: 8px; font-family: var(--mono); font-size: 12px; color: rgba(255, 255, 255, 0.85); line-height: 1.6;">
81
+ <strong style="color: var(--muted);">Output:</strong><br>
82
+ A new version of the following files was downloaded from https://huggingface.co/InstaDeepAI/NTv3_650M_pos:<br>
83
+ - ntv3_gff_pipeline.py<br>
84
+ . Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.<br>
85
+ Device set to use cpu<br>
86
+ Inference + decoding time: 53.09 seconds
87
+ </div>
88
  <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
89
  The pipeline performs all the necessary steps: running inference with the model, retrieving and processing the predicted probabilities, and applying the HMM to generate a consistent annotation.
90
  </p>
 
101
  output_file.write(gff_text)
102
 
103
  print(f"Saved GFF file to {output_filename}")</code></pre></div>
104
+ <div style="margin-top: 15px; padding: 12px 16px; background: rgba(0, 0, 0, 0.4); border: 1px solid var(--border); border-radius: 8px; font-family: var(--mono); font-size: 12px; color: rgba(255, 255, 255, 0.85); line-height: 1.6;">
105
+ <strong style="color: var(--muted);">Output:</strong><br>
106
+ Saved GFF file to NTv3_650M_pos_hg38_chr19_6700000_6831072.gff3
107
+ </div>
108
  </div>
109
 
110
  <div class="card" style="grid-column: span 12;">
tabs/functional_tracks.html CHANGED
@@ -4,6 +4,11 @@
4
  <p>The pipeline abstracts away all the underlying steps: running inference with the model and plotting the predictions per tracks.</p>
5
  <p>If you're interested in exploring the intermediate probabilities, please refer to the <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_tutorials/01_tracks_prediction.ipynb" target="_blank" rel="noopener">track-prediction notebook</a>.</p>
6
  <p><strong>📝 Note for Google Colab users:</strong> This notebook is compatible with Colab! For faster inference, make sure to enable GPU: Runtime → Change runtime type → GPU (T4 or better recommended).</p>
 
 
 
 
 
7
  </div>
8
 
9
  <div class="grid">
@@ -47,11 +52,11 @@ print(f"Original sequence length: {len(seq)}")
47
  # Crop to multiple of 128 (the pipeline will crop again, but this is a no-op once divisible)
48
  seq = seq[:int(len(seq) // 128) * 128]
49
  print(f"Cropped sequence length: {len(seq)}, {len(seq) / 128} transformer tokens")</code></pre></div>
50
- <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
51
- <strong>Example output:</strong><br>
52
  Original sequence length: 131072<br>
53
  Cropped sequence length: 131072, 1024.0 transformer tokens
54
- </p>
55
  </div>
56
 
57
  <div class="card" style="grid-column: span 12;">
@@ -90,6 +95,15 @@ ntv3_predictions = ntv3_tracks(
90
  end_time = time.time()
91
 
92
  print(f"Inference + decoding time: {end_time - start_time:.2f} seconds")</code></pre></div>
 
 
 
 
 
 
 
 
 
93
  <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
94
  The pipeline performs all the necessary steps: running inference with the model and plotting the predictions for the specified tracks and genomic elements.
95
  </p>
@@ -166,7 +180,27 @@ for i, (display_name, track_id, track_idx) in enumerate(track_data_list):
166
 
167
  print(f" Saved {i + 1}/{len(track_data_list)}: {display_name} ({track_clean_name}.bw)")
168
 
169
- print(f"\n✅ Successfully saved {len(track_data_list)} BigWig files to '{output_dir}/'")</code></pre></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
  <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
171
  This saves each selected functional track as a separate BigWig file that can be visualized in genome browsers. The files are saved with user-friendly display names (e.g., "K562_RNA_seq.bw").
172
  </p>
 
4
  <p>The pipeline abstracts away all the underlying steps: running inference with the model and plotting the predictions per tracks.</p>
5
  <p>If you're interested in exploring the intermediate probabilities, please refer to the <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_tutorials/01_tracks_prediction.ipynb" target="_blank" rel="noopener">track-prediction notebook</a>.</p>
6
  <p><strong>📝 Note for Google Colab users:</strong> This notebook is compatible with Colab! For faster inference, make sure to enable GPU: Runtime → Change runtime type → GPU (T4 or better recommended).</p>
7
+ <p>
8
+ <strong>🔗 Quick links:</strong><br>
9
+ • <a href="https://huggingface.co/spaces/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/01_functional_track_prediction.ipynb" target="_blank" rel="noopener">View notebook on Hugging Face</a> (includes "Open in Colab" button)<br>
10
+ • <a href="https://colab.research.google.com/github/InstaDeepAI/ntv3/blob/main/notebooks_pipelines/01_functional_track_prediction.ipynb" target="_blank" rel="noopener">Open directly in Google Colab</a>
11
+ </p>
12
  </div>
13
 
14
  <div class="grid">
 
52
  # Crop to multiple of 128 (the pipeline will crop again, but this is a no-op once divisible)
53
  seq = seq[:int(len(seq) // 128) * 128]
54
  print(f"Cropped sequence length: {len(seq)}, {len(seq) / 128} transformer tokens")</code></pre></div>
55
+ <div style="margin-top: 15px; padding: 12px 16px; background: rgba(0, 0, 0, 0.4); border: 1px solid var(--border); border-radius: 8px; font-family: var(--mono); font-size: 12px; color: rgba(255, 255, 255, 0.85); line-height: 1.6;">
56
+ <strong style="color: var(--muted);">Output:</strong><br>
57
  Original sequence length: 131072<br>
58
  Cropped sequence length: 131072, 1024.0 transformer tokens
59
+ </div>
60
  </div>
61
 
62
  <div class="card" style="grid-column: span 12;">
 
95
  end_time = time.time()
96
 
97
  print(f"Inference + decoding time: {end_time - start_time:.2f} seconds")</code></pre></div>
98
+ <div style="margin-top: 15px; padding: 12px 16px; background: rgba(0, 0, 0, 0.4); border: 1px solid var(--border); border-radius: 8px; font-family: var(--mono); font-size: 12px; color: rgba(255, 255, 255, 0.85); line-height: 1.6;">
99
+ <strong style="color: var(--muted);">Output:</strong><br>
100
+ Device set to use cpu<br>
101
+ Running on device: cpu<br>
102
+ Inference + decoding time: 38.32 seconds
103
+ </div>
104
+ <div style="margin-top: 20px;">
105
+ <img src="assets/output_tracks.png" alt="Output tracks plot" style="width: 100%; height: auto; border-radius: 12px; border: 1px solid var(--border);" />
106
+ </div>
107
  <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
108
  The pipeline performs all the necessary steps: running inference with the model and plotting the predictions for the specified tracks and genomic elements.
109
  </p>
 
180
 
181
  print(f" Saved {i + 1}/{len(track_data_list)}: {display_name} ({track_clean_name}.bw)")
182
 
183
+ print(f"\n✅ Successfully saved {len(track_data_list)} BigWig files to '{output_dir}/'")
184
+ print(f" Files: {', '.join([name.replace(' ', '_').replace('/', '_').replace('-', '_') for name, _, _ in track_data_list])}")</code></pre></div>
185
+ <div style="margin-top: 15px; padding: 12px 16px; background: rgba(0, 0, 0, 0.4); border: 1px solid var(--border); border-radius: 8px; font-family: var(--mono); font-size: 12px; color: rgba(255, 255, 255, 0.85); line-height: 1.6; white-space: pre-wrap;">
186
+ <strong style="color: var(--muted);">Output:</strong><br>Found 8 tracks to save from tracks_to_plot
187
+ Input region: chr19:6700000-6831072 (length: 131,072 bp)
188
+ Prediction region: chr19:6740960-6790112 (length: 49,152 bp)
189
+ Number of positions: 49152
190
+
191
+ Saving BigWig files to 'bigwig_outputs/' directory...
192
+ Saved 1/8: K562 RNA-seq (K562_RNA_seq.bw)
193
+ Saved 2/8: K562 DNAse (K562_DNAse.bw)
194
+ Saved 3/8: K562 H3k4me3 (K562_H3k4me3.bw)
195
+ Saved 4/8: K562 CTCF (K562_CTCF.bw)
196
+ Saved 5/8: HepG2 RNA-seq (HepG2_RNA_seq.bw)
197
+ Saved 6/8: HepG2 DNAse (HepG2_DNAse.bw)
198
+ Saved 7/8: HepG2 H3k4me3 (HepG2_H3k4me3.bw)
199
+ Saved 8/8: HepG2 CTCF (HepG2_CTCF.bw)
200
+
201
+ ✅ Successfully saved 8 BigWig files to 'bigwig_outputs/'
202
+ Files: K562_RNA_seq, K562_DNAse, K562_H3k4me3, K562_CTCF, HepG2_RNA_seq, HepG2_DNAse, HepG2_H3k4me3, HepG2_CTCF
203
+ </div>
204
  <p style="margin-top: 15px; color: var(--muted); font-size: 13px;">
205
  This saves each selected functional track as a separate BigWig file that can be visualized in genome browsers. The files are saved with user-friendly display names (e.g., "K562_RNA_seq.bw").
206
  </p>