astirn commited on
Commit
66b2911
·
1 Parent(s): 08495a9
Files changed (1) hide show
  1. app.py +11 -8
app.py CHANGED
@@ -55,25 +55,28 @@ def process_input():
55
  fasta_path = st.session_state.fasta_entry.name
56
  with open(fasta_path, 'w') as f:
57
  f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
58
- df = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
59
- if df.index.has_duplicates:
60
- with TRANSCRIPT_ENTRY:
61
- st.write("Duplicate transcript ID's detected in fasta file")
62
- else:
63
- st.session_state.transcripts = df
64
 
65
- # convert to upper case as used by tokenizer
66
- st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
 
 
 
67
 
68
  # make sure all transcripts satisfy length requirements
69
  too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s)) < tiger.TARGET_LEN
70
  if any(too_short):
71
  with TRANSCRIPT_ENTRY:
72
  st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
 
 
 
 
73
 
74
  # if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
75
  # elif src_seq is not None:
76
  # st.write('Invalid input!')
 
77
  print(st.session_state.transcripts)
78
 
79
  # everything looks good to run the model
 
55
  fasta_path = st.session_state.fasta_entry.name
56
  with open(fasta_path, 'w') as f:
57
  f.write(st.session_state.fasta_entry.getvalue().decode('utf-8'))
58
+ st.session_state.transcripts = tiger.load_transcripts([fasta_path], enforce_unique_ids=False)
 
 
 
 
 
59
 
60
+ # make sure all transcripts have unique identifiers
61
+ if st.session_state.transcripts.index.has_duplicates:
62
+ with TRANSCRIPT_ENTRY:
63
+ st.write("Duplicate transcript ID's detected in fasta file")
64
+ return
65
 
66
  # make sure all transcripts satisfy length requirements
67
  too_short = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: len(s)) < tiger.TARGET_LEN
68
  if any(too_short):
69
  with TRANSCRIPT_ENTRY:
70
  st.write('Transcript(s) must be at least {:d} bases.'.format(tiger.TARGET_LEN))
71
+ return
72
+
73
+ # convert to upper case as used by tokenizer
74
+ st.session_state.transcripts[tiger.SEQ_COL] = st.session_state.transcripts[tiger.SEQ_COL].apply(lambda s: s.upper())
75
 
76
  # if src_seq and all([True if nt.upper() in NUCLEOTIDE_TOKENS.keys() else False for nt in src_seq]):
77
  # elif src_seq is not None:
78
  # st.write('Invalid input!')
79
+
80
  print(st.session_state.transcripts)
81
 
82
  # everything looks good to run the model