astirn commited on
Commit
5dac9ad
·
1 Parent(s): f230aaf

support for U nucleotides

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -58,7 +58,7 @@ def initiate_run():
58
  os.remove(fasta_path)
59
 
60
  # convert to upper case as used by tokenizer
61
- transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
62
 
63
  # ensure all transcripts have unique identifiers
64
  if transcripts.index.has_duplicates:
@@ -66,7 +66,7 @@ def initiate_run():
66
 
67
  # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
68
  elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
69
- st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts'
70
 
71
  # ensure all transcripts satisfy length requirements
72
  elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
 
58
  os.remove(fasta_path)
59
 
60
  # convert to upper case as used by tokenizer
61
+ transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper().replace('U', 'T'))
62
 
63
  # ensure all transcripts have unique identifiers
64
  if transcripts.index.has_duplicates:
 
66
 
67
  # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
68
  elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
69
+ st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts or Us'
70
 
71
  # ensure all transcripts satisfy length requirements
72
  elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):