astirn commited on
Commit
e43faaa
·
1 Parent(s): 97f8ff7

very close

Browse files
Files changed (2) hide show
  1. app.py +58 -86
  2. tiger.py +5 -5
app.py CHANGED
@@ -29,23 +29,14 @@ def mode_change_callback():
29
  st.session_state.disable_off_target_checkbox = False
30
 
31
 
32
- def entry_method_change_callback():
33
- if st.session_state.entry_method == ENTRY_METHODS['manual']:
34
- st.session_state.manual_entry_disabled = False
35
- st.session_state.fasta_entry_disabled = True
36
- elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
37
- st.session_state.manual_entry_disabled = True
38
- st.session_state.fasta_entry_disabled = False
39
-
40
-
41
- def process_input():
42
 
43
  # initialize transcript DataFrame
44
- st.session_state.transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
45
 
46
  # manual entry
47
  if st.session_state.entry_method == ENTRY_METHODS['manual']:
48
- st.session_state.transcripts = pd.DataFrame({
49
  tiger.ID_COL: ['ManualEntry'],
50
  tiger.SEQ_COL: [st.session_state.manual_entry]
51
  })
@@ -56,33 +47,38 @@ def process_input():
56
  fasta_path = st.session_state.fasta_entry.name
57
  with open(fasta_path, 'w') as f:
58
  f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
59
- st.session_state.transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
60
 
61
  # make sure all transcripts have unique identifiers
62
- if st.session_state.transcripts.index.has_duplicates:
63
  with TRANSCRIPT_ENTRY:
64
  st.write("Duplicate transcript ID's detected in fasta file")
65
  return
66
 
67
- # make sure all transcripts satisfy length requirements
68
- too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)
69
- if any(too_short):
70
- with TRANSCRIPT_ENTRY:
71
- st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
72
- return
73
-
74
  # convert to upper case as used by tokenizer
75
- st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
76
 
77
  # make sure all transcripts only contain nucleotides A, C, G, T, and wildcard N
78
- valid = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))
79
  if not all(valid):
80
  with TRANSCRIPT_ENTRY:
81
  st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts')
82
  return
83
 
84
- # everything looks good, so run the model
85
- st.session_state.run = True
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
88
  if __name__ == '__main__':
@@ -91,18 +87,14 @@ if __name__ == '__main__':
91
  if 'mode' not in st.session_state:
92
  st.session_state.mode = tiger.RUN_MODES['all']
93
  st.session_state.disable_off_target_checkbox = True
94
- if 'off_targets_checked' not in st.session_state:
95
- st.session_state.off_targets_checked = False
96
  if 'entry_method' not in st.session_state:
97
  st.session_state.entry_method = ENTRY_METHODS['manual']
98
- st.session_state.manual_entry_disabled = False
99
- st.session_state.fasta_entry_disabled = True
100
  if 'run' not in st.session_state:
101
  st.session_state.run = False
102
- if 'results_ready' not in st.session_state:
103
- st.session_state.results_ready = False
104
- st.session_state.on_target = pd.DataFrame()
105
- st.session_state.off_target = pd.DataFrame()
106
 
107
  # title and documentation
108
  with DOCUMENTATION:
@@ -131,65 +123,45 @@ if __name__ == '__main__':
131
  label='How would you like to provide transcripts of interest?',
132
  options=ENTRY_METHODS.values(),
133
  key='entry_method',
134
- on_change=entry_method_change_callback
135
- )
136
- st.text_input(
137
- label='Enter a target transcript:',
138
- key='manual_entry',
139
- placeholder='Upper or lower case',
140
- disabled=st.session_state.manual_entry_disabled
141
- )
142
- st.file_uploader(
143
- label='Upload a fasta file:',
144
- key='fasta_entry',
145
- disabled=st.session_state.fasta_entry_disabled
146
  )
147
- st.button(label='Get predictions!', on_click=process_input)
 
 
 
 
 
 
 
 
 
 
148
 
 
149
  with RUNTIME:
150
- # check and clear run signal and existing results
151
- if st.session_state.run:
152
- st.session_state.run = False
153
- st.session_state.results_ready = False
154
-
155
- # run model and signal results are ready
156
- st.session_state.off_targets_checked = st.session_state.check_off_targets
157
- st.session_state.on_target, st.session_state.off_target = tiger.tiger_exhibit(
158
- transcripts=st.session_state.transcripts,
159
- mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
160
- status=st.empty(),
161
- progress_bar=st.progress(0),
162
- check_off_targets=st.session_state.off_targets_checked
163
- )
164
- st.session_state.results_ready = True
165
 
 
166
  with RESULTS:
167
- on_target_results = st.empty()
168
- off_target_results = st.empty()
169
 
170
- # display results if they are ready
171
- if st.session_state.results_ready:
172
- with on_target_results:
173
- st.write('On-target predictions:', st.session_state.on_target)
 
 
 
 
 
 
 
 
 
 
174
  st.download_button(
175
- label='Download on-target predictions',
176
- data=convert_df(st.session_state.on_target),
177
- file_name='on_target.csv',
178
  mime='text/csv'
179
  )
180
- with off_target_results:
181
- if st.session_state.off_targets_checked and len(st.session_state.off_target) > 0:
182
- st.write('Off-target predictions:', st.session_state.off_target)
183
- st.download_button(
184
- label='Download off-target predictions',
185
- data=convert_df(st.session_state.off_target),
186
- file_name='off_target.csv',
187
- mime='text/csv'
188
- )
189
- elif st.session_state.off_targets_checked and len(st.session_state.off_target) == 0:
190
- st.write('We did not find any off-target effects!')
191
-
192
- # otherwise, clear our results
193
- else:
194
- on_target_results.empty()
195
- off_target_results.empty()
 
29
  st.session_state.disable_off_target_checkbox = False
30
 
31
 
32
+ def run():
 
 
 
 
 
 
 
 
 
33
 
34
  # initialize transcript DataFrame
35
+ transcripts = pd.DataFrame(columns=[tiger.ID_COL, tiger.SEQ_COL])
36
 
37
  # manual entry
38
  if st.session_state.entry_method == ENTRY_METHODS['manual']:
39
+ transcripts = pd.DataFrame({
40
  tiger.ID_COL: ['ManualEntry'],
41
  tiger.SEQ_COL: [st.session_state.manual_entry]
42
  })
 
47
  fasta_path = st.session_state.fasta_entry.name
48
  with open(fasta_path, 'w') as f:
49
  f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
50
+ transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
51
 
52
  # make sure all transcripts have unique identifiers
53
+ if transcripts.index.has_duplicates:
54
  with TRANSCRIPT_ENTRY:
55
  st.write("Duplicate transcript ID's detected in fasta file")
56
  return
57
 
 
 
 
 
 
 
 
58
  # convert to upper case as used by tokenizer
59
+ transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
60
 
61
  # make sure all transcripts only contain nucleotides A, C, G, T, and wildcard N
62
+ valid = transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))
63
  if not all(valid):
64
  with TRANSCRIPT_ENTRY:
65
  st.write('Transcript(s) must only contain upper or lower case A, C, G, and Ts')
66
  return
67
 
68
+ # make sure all transcripts satisfy length requirements
69
+ too_short = transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)
70
+ if any(too_short):
71
+ with TRANSCRIPT_ENTRY:
72
+ st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
73
+ return
74
+
75
+ # run model
76
+ st.session_state.on_target, st.session_state.off_target = tiger.tiger_exhibit(
77
+ transcripts=transcripts,
78
+ mode={v: k for k, v in tiger.RUN_MODES.items()}[st.session_state.mode],
79
+ # status=RUNTIME,
80
+ check_off_targets=st.session_state.check_off_targets
81
+ )
82
 
83
 
84
  if __name__ == '__main__':
 
87
  if 'mode' not in st.session_state:
88
  st.session_state.mode = tiger.RUN_MODES['all']
89
  st.session_state.disable_off_target_checkbox = True
 
 
90
  if 'entry_method' not in st.session_state:
91
  st.session_state.entry_method = ENTRY_METHODS['manual']
 
 
92
  if 'run' not in st.session_state:
93
  st.session_state.run = False
94
+ if 'on_target' not in st.session_state:
95
+ st.session_state.on_target = None
96
+ if 'off_target' not in st.session_state:
97
+ st.session_state.off_target = None
98
 
99
  # title and documentation
100
  with DOCUMENTATION:
 
123
  label='How would you like to provide transcripts of interest?',
124
  options=ENTRY_METHODS.values(),
125
  key='entry_method',
 
 
 
 
 
 
 
 
 
 
 
 
126
  )
127
+ if st.session_state.entry_method == ENTRY_METHODS['manual']:
128
+ st.text_input(
129
+ label='Enter a target transcript:',
130
+ key='manual_entry',
131
+ placeholder='Upper or lower case',
132
+ )
133
+ elif st.session_state.entry_method == ENTRY_METHODS['fasta']:
134
+ st.file_uploader(
135
+ label='Upload a fasta file:',
136
+ key='fasta_entry',
137
+ )
138
 
139
+ # runtime
140
  with RUNTIME:
141
+ st.button(label='Get predictions!', on_click=run)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
+ # results
144
  with RESULTS:
 
 
145
 
146
+ # on-target results
147
+ if st.session_state.on_target is not None:
148
+ st.write('On-target predictions:', st.session_state.on_target)
149
+ st.download_button(
150
+ label='Download on-target predictions',
151
+ data=convert_df(st.session_state.on_target),
152
+ file_name='on_target.csv',
153
+ mime='text/csv'
154
+ )
155
+
156
+ # off-target results
157
+ if st.session_state.off_target is not None:
158
+ if len(st.session_state.off_target) > 0:
159
+ st.write('Off-target predictions:', st.session_state.off_target)
160
  st.download_button(
161
+ label='Download off-target predictions',
162
+ data=convert_df(st.session_state.off_target),
163
+ file_name='off_target.csv',
164
  mime='text/csv'
165
  )
166
+ else:
167
+ st.write('We did not find any off-target effects!')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tiger.py CHANGED
@@ -204,7 +204,7 @@ def top_guides_per_transcript(predictions: pd.DataFrame):
204
  return top_guides.reset_index(drop=True)
205
 
206
 
207
- def find_off_targets(top_guides: pd.DataFrame, status=None, progress_bar=None):
208
 
209
  # load reference transcripts
210
  reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
@@ -262,9 +262,9 @@ def find_off_targets(top_guides: pd.DataFrame, status=None, progress_bar=None):
262
  update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
263
  print('\r' + update_text, end='')
264
  if status is not None:
265
- status.text(update_text)
266
- if progress_bar is not None:
267
- progress_bar.progress(percent_complete / 100)
268
  print('')
269
 
270
  return off_targets
@@ -301,7 +301,7 @@ def tiger_exhibit(transcripts: pd.DataFrame, mode: str, check_off_targets: bool,
301
  on_target_predictions = get_on_target_predictions(transcripts, tiger, status, progress_bar)
302
 
303
  # initialize other outputs
304
- off_target_predictions = pd.DataFrame()
305
 
306
  if mode == 'all' and not check_off_targets:
307
  pass # nothing to do!
 
204
  return top_guides.reset_index(drop=True)
205
 
206
 
207
+ def find_off_targets(top_guides: pd.DataFrame, status=None):
208
 
209
  # load reference transcripts
210
  reference_transcripts = load_transcripts([os.path.join('transcripts', f) for f in REFERENCE_TRANSCRIPTS])
 
262
  update_text = 'Scanning for off-targets: {:.2f}%'.format(percent_complete)
263
  print('\r' + update_text, end='')
264
  if status is not None:
265
+ with status:
266
+ st.text(update_text)
267
+ st.progress(percent_complete / 100)
268
  print('')
269
 
270
  return off_targets
 
301
  on_target_predictions = get_on_target_predictions(transcripts, tiger, status, progress_bar)
302
 
303
  # initialize other outputs
304
+ off_target_predictions = None
305
 
306
  if mode == 'all' and not check_off_targets:
307
  pass # nothing to do!