Spaces:

Knowles-Lab
/

tiger

Running on CPU Upgrade

astirn commited on Jul 20, 2023

Commit

5dac9ad

1 Parent(s): f230aaf

support for U nucleotides

Files changed (1) hide show

app.py CHANGED Viewed

@@ -58,7 +58,7 @@ def initiate_run():
             os.remove(fasta_path)
     # convert to upper case as used by tokenizer
-    transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
     # ensure all transcripts have unique identifiers
     if transcripts.index.has_duplicates:
@@ -66,7 +66,7 @@ def initiate_run():
     # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
     elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
-        st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts'
     # ensure all transcripts satisfy length requirements
     elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):

             os.remove(fasta_path)
     # convert to upper case as used by tokenizer
+    transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper().replace('U', 'T'))
     # ensure all transcripts have unique identifiers
     if transcripts.index.has_duplicates:
     # ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
     elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
+        st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts or Us'
     # ensure all transcripts satisfy length requirements
     elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):