Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
support for U nucleotides
Browse files
app.py
CHANGED
|
@@ -58,7 +58,7 @@ def initiate_run():
|
|
| 58 |
os.remove(fasta_path)
|
| 59 |
|
| 60 |
# convert to upper case as used by tokenizer
|
| 61 |
-
transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
|
| 62 |
|
| 63 |
# ensure all transcripts have unique identifiers
|
| 64 |
if transcripts.index.has_duplicates:
|
|
@@ -66,7 +66,7 @@ def initiate_run():
|
|
| 66 |
|
| 67 |
# ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
|
| 68 |
elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
|
| 69 |
-
st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts'
|
| 70 |
|
| 71 |
# ensure all transcripts satisfy length requirements
|
| 72 |
elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
|
|
|
|
| 58 |
os.remove(fasta_path)
|
| 59 |
|
| 60 |
# convert to upper case as used by tokenizer
|
| 61 |
+
transcripts[tiger.SEQ_COL] = transcripts[tiger.SEQ_COL].apply(lambda s: s.upper().replace('U', 'T'))
|
| 62 |
|
| 63 |
# ensure all transcripts have unique identifiers
|
| 64 |
if transcripts.index.has_duplicates:
|
|
|
|
| 66 |
|
| 67 |
# ensure all transcripts only contain nucleotides A, C, G, T, and wildcard N
|
| 68 |
elif not all(transcripts[tiger.SEQ_COL].apply(lambda s: set(s).issubset(tiger.NUCLEOTIDE_TOKENS.keys()))):
|
| 69 |
+
st.session_state.input_error = 'Transcript(s) must only contain upper or lower case A, C, G, and Ts or Us'
|
| 70 |
|
| 71 |
# ensure all transcripts satisfy length requirements
|
| 72 |
elif any(transcripts[tiger.SEQ_COL].apply(lambda s: len(s) < tiger.TARGET_LEN)):
|