Spaces:

Knowles-Lab
/

tiger

Sleeping

astirn commited on Jul 6, 2023

Commit

66b2911

1 Parent(s): 08495a9

progress

Files changed (1) hide show

app.py CHANGED Viewed

@@ -55,25 +55,28 @@ def process_input():
             fasta_path = st.session_state.fasta_entry.name
             with open(fasta_path, 'w') as f:
                 f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
-            df = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
-            if df.index.has_duplicates:
-                with TRANSCRIPT_ENTRY:
-                    st.write("Duplicate transcript ID's detected in fasta file")
-            else:
-                st.session_state.transcripts = df
-    # convert to upper case as used by tokenizer
-    st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
     # make sure all transcripts satisfy length requirements
     too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s)) < tiger.TARGET_LEN
     if any(too_short):
         with TRANSCRIPT_ENTRY:
             st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
     # if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
     # elif src_seq is not None:
     #     st.write('Invalid input!')
     print(st.session_state.transcripts)
     # everything looks good to run the model

             fasta_path = st.session_state.fasta_entry.name
             with open(fasta_path, 'w') as f:
                 f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
+            st.session_state.transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
+    # make sure all transcripts have unique identifiers
+    if st.session_state.transcripts.index.has_duplicates:
+        with TRANSCRIPT_ENTRY:
+            st.write("Duplicate transcript ID's detected in fasta file")
+        return
     # make sure all transcripts satisfy length requirements
     too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s)) < tiger.TARGET_LEN
     if any(too_short):
         with TRANSCRIPT_ENTRY:
             st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
+        return
+    # convert to upper case as used by tokenizer
+    st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
     # if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
     # elif src_seq is not None:
     #     st.write('Invalid input!')
     print(st.session_state.transcripts)
     # everything looks good to run the model