saketh11 commited on
Commit
6d5dff4
Β·
1 Parent(s): 9c0f711

Add debug logging to track DNA optimization flow

Browse files

- Add logging for protein translation steps
- Track sequence lengths and content through optimization
- Debug why DNA input produces identical output sequences
- This will help identify if issue is in translation, model, or comparison

Files changed (1) hide show
  1. app.py +22 -1
app.py CHANGED
@@ -595,6 +595,12 @@ def run_optimization(protein: str, organism: str, use_post_processing: bool = Fa
595
  st.session_state.optimization_running = True
596
  st.session_state.post_processed_results = None
597
 
 
 
 
 
 
 
598
  try:
599
  # Use the exact same method that achieved best results in evaluation
600
  result = predict_dna_sequence(
@@ -606,6 +612,11 @@ def run_optimization(protein: str, organism: str, use_post_processing: bool = Fa
606
  match_protein=True,
607
  )
608
 
 
 
 
 
 
609
  # Check GC content and auto-correct if out of optimal range
610
  _res = result[0] if isinstance(result, list) else result
611
  initial_gc = get_GC_content(_res.predicted_dna)
@@ -867,7 +878,12 @@ def single_sequence_optimization():
867
  if st.button("πŸš€ Optimize Sequence", type="primary", use_container_width=True):
868
  st.session_state.results = None
869
  if st.session_state.sequence_type == "dna":
870
- protein_sequence = translate_dna_to_protein(str(st.session_state.sequence_clean))
 
 
 
 
 
871
  run_optimization(protein_sequence, str(st.session_state.organism), use_post_processing)
872
  else:
873
  run_optimization(str(st.session_state.sequence_clean), str(st.session_state.organism), use_post_processing)
@@ -950,11 +966,16 @@ def display_optimization_results(result, organism, original_sequence, sequence_t
950
  with imp_col1:
951
  if input_metrics.get('gc_content') and optimized_metrics.get('gc_content'):
952
  gc_change = optimized_metrics['gc_content'] - input_metrics['gc_content']
 
953
  st.metric("GC Content", f"{optimized_metrics['gc_content']:.1f}%", delta=f"{gc_change:+.1f}%")
954
 
955
  with imp_col2:
956
  if input_metrics.get('cai') and optimized_metrics.get('cai'):
957
  cai_change = optimized_metrics['cai'] - input_metrics['cai']
 
 
 
 
958
  st.metric("CAI Score", f"{optimized_metrics['cai']:.3f}", delta=f"{cai_change:+.3f}")
959
 
960
  with imp_col3:
 
595
  st.session_state.optimization_running = True
596
  st.session_state.post_processed_results = None
597
 
598
+ # Debug logging
599
+ print(f"πŸ” DEBUG: Starting optimization")
600
+ print(f"πŸ” DEBUG: Protein length: {len(protein)}")
601
+ print(f"πŸ” DEBUG: Protein (first 50): {protein[:50]}...")
602
+ print(f"πŸ” DEBUG: Organism: {organism}")
603
+
604
  try:
605
  # Use the exact same method that achieved best results in evaluation
606
  result = predict_dna_sequence(
 
612
  match_protein=True,
613
  )
614
 
615
+ # Debug logging for result
616
+ _res = result[0] if isinstance(result, list) else result
617
+ print(f"πŸ” DEBUG: Model returned DNA length: {len(_res.predicted_dna)}")
618
+ print(f"πŸ” DEBUG: Model returned DNA (first 50): {_res.predicted_dna[:50]}...")
619
+
620
  # Check GC content and auto-correct if out of optimal range
621
  _res = result[0] if isinstance(result, list) else result
622
  initial_gc = get_GC_content(_res.predicted_dna)
 
878
  if st.button("πŸš€ Optimize Sequence", type="primary", use_container_width=True):
879
  st.session_state.results = None
880
  if st.session_state.sequence_type == "dna":
881
+ original_dna = str(st.session_state.sequence_clean)
882
+ protein_sequence = translate_dna_to_protein(original_dna)
883
+ print(f"πŸ” DEBUG: Original DNA length: {len(original_dna)}")
884
+ print(f"πŸ” DEBUG: Translated protein length: {len(protein_sequence)}")
885
+ print(f"πŸ” DEBUG: Original DNA (first 50): {original_dna[:50]}...")
886
+ print(f"πŸ” DEBUG: Translated protein (first 30): {protein_sequence[:30]}...")
887
  run_optimization(protein_sequence, str(st.session_state.organism), use_post_processing)
888
  else:
889
  run_optimization(str(st.session_state.sequence_clean), str(st.session_state.organism), use_post_processing)
 
966
  with imp_col1:
967
  if input_metrics.get('gc_content') and optimized_metrics.get('gc_content'):
968
  gc_change = optimized_metrics['gc_content'] - input_metrics['gc_content']
969
+ print(f"πŸ” DEBUG: GC change: {input_metrics['gc_content']:.3f} -> {optimized_metrics['gc_content']:.3f} (Ξ”{gc_change:+.3f})")
970
  st.metric("GC Content", f"{optimized_metrics['gc_content']:.1f}%", delta=f"{gc_change:+.1f}%")
971
 
972
  with imp_col2:
973
  if input_metrics.get('cai') and optimized_metrics.get('cai'):
974
  cai_change = optimized_metrics['cai'] - input_metrics['cai']
975
+ print(f"πŸ” DEBUG: CAI change: {input_metrics['cai']:.6f} -> {optimized_metrics['cai']:.6f} (Ξ”{cai_change:+.6f})")
976
+ print(f"πŸ” DEBUG: Are sequences identical? {original_sequence == result.predicted_dna}")
977
+ if hasattr(result, 'predicted_dna') and len(original_sequence) > 0:
978
+ print(f"πŸ” DEBUG: Sequence lengths - Original: {len(original_sequence)}, Optimized: {len(result.predicted_dna)}")
979
  st.metric("CAI Score", f"{optimized_metrics['cai']:.3f}", delta=f"{cai_change:+.3f}")
980
 
981
  with imp_col3: