Spaces:
Running
Running
Add debug logging to track DNA optimization flow
Browse files- Add logging for protein translation steps
- Track sequence lengths and content through optimization
- Debug why DNA input produces identical output sequences
- This will help identify if issue is in translation, model, or comparison
app.py
CHANGED
|
@@ -595,6 +595,12 @@ def run_optimization(protein: str, organism: str, use_post_processing: bool = Fa
|
|
| 595 |
st.session_state.optimization_running = True
|
| 596 |
st.session_state.post_processed_results = None
|
| 597 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
try:
|
| 599 |
# Use the exact same method that achieved best results in evaluation
|
| 600 |
result = predict_dna_sequence(
|
|
@@ -606,6 +612,11 @@ def run_optimization(protein: str, organism: str, use_post_processing: bool = Fa
|
|
| 606 |
match_protein=True,
|
| 607 |
)
|
| 608 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 609 |
# Check GC content and auto-correct if out of optimal range
|
| 610 |
_res = result[0] if isinstance(result, list) else result
|
| 611 |
initial_gc = get_GC_content(_res.predicted_dna)
|
|
@@ -867,7 +878,12 @@ def single_sequence_optimization():
|
|
| 867 |
if st.button("π Optimize Sequence", type="primary", use_container_width=True):
|
| 868 |
st.session_state.results = None
|
| 869 |
if st.session_state.sequence_type == "dna":
|
| 870 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 871 |
run_optimization(protein_sequence, str(st.session_state.organism), use_post_processing)
|
| 872 |
else:
|
| 873 |
run_optimization(str(st.session_state.sequence_clean), str(st.session_state.organism), use_post_processing)
|
|
@@ -950,11 +966,16 @@ def display_optimization_results(result, organism, original_sequence, sequence_t
|
|
| 950 |
with imp_col1:
|
| 951 |
if input_metrics.get('gc_content') and optimized_metrics.get('gc_content'):
|
| 952 |
gc_change = optimized_metrics['gc_content'] - input_metrics['gc_content']
|
|
|
|
| 953 |
st.metric("GC Content", f"{optimized_metrics['gc_content']:.1f}%", delta=f"{gc_change:+.1f}%")
|
| 954 |
|
| 955 |
with imp_col2:
|
| 956 |
if input_metrics.get('cai') and optimized_metrics.get('cai'):
|
| 957 |
cai_change = optimized_metrics['cai'] - input_metrics['cai']
|
|
|
|
|
|
|
|
|
|
|
|
|
| 958 |
st.metric("CAI Score", f"{optimized_metrics['cai']:.3f}", delta=f"{cai_change:+.3f}")
|
| 959 |
|
| 960 |
with imp_col3:
|
|
|
|
| 595 |
st.session_state.optimization_running = True
|
| 596 |
st.session_state.post_processed_results = None
|
| 597 |
|
| 598 |
+
# Debug logging
|
| 599 |
+
print(f"π DEBUG: Starting optimization")
|
| 600 |
+
print(f"π DEBUG: Protein length: {len(protein)}")
|
| 601 |
+
print(f"π DEBUG: Protein (first 50): {protein[:50]}...")
|
| 602 |
+
print(f"π DEBUG: Organism: {organism}")
|
| 603 |
+
|
| 604 |
try:
|
| 605 |
# Use the exact same method that achieved best results in evaluation
|
| 606 |
result = predict_dna_sequence(
|
|
|
|
| 612 |
match_protein=True,
|
| 613 |
)
|
| 614 |
|
| 615 |
+
# Debug logging for result
|
| 616 |
+
_res = result[0] if isinstance(result, list) else result
|
| 617 |
+
print(f"π DEBUG: Model returned DNA length: {len(_res.predicted_dna)}")
|
| 618 |
+
print(f"π DEBUG: Model returned DNA (first 50): {_res.predicted_dna[:50]}...")
|
| 619 |
+
|
| 620 |
# Check GC content and auto-correct if out of optimal range
|
| 621 |
_res = result[0] if isinstance(result, list) else result
|
| 622 |
initial_gc = get_GC_content(_res.predicted_dna)
|
|
|
|
| 878 |
if st.button("π Optimize Sequence", type="primary", use_container_width=True):
|
| 879 |
st.session_state.results = None
|
| 880 |
if st.session_state.sequence_type == "dna":
|
| 881 |
+
original_dna = str(st.session_state.sequence_clean)
|
| 882 |
+
protein_sequence = translate_dna_to_protein(original_dna)
|
| 883 |
+
print(f"π DEBUG: Original DNA length: {len(original_dna)}")
|
| 884 |
+
print(f"π DEBUG: Translated protein length: {len(protein_sequence)}")
|
| 885 |
+
print(f"π DEBUG: Original DNA (first 50): {original_dna[:50]}...")
|
| 886 |
+
print(f"π DEBUG: Translated protein (first 30): {protein_sequence[:30]}...")
|
| 887 |
run_optimization(protein_sequence, str(st.session_state.organism), use_post_processing)
|
| 888 |
else:
|
| 889 |
run_optimization(str(st.session_state.sequence_clean), str(st.session_state.organism), use_post_processing)
|
|
|
|
| 966 |
with imp_col1:
|
| 967 |
if input_metrics.get('gc_content') and optimized_metrics.get('gc_content'):
|
| 968 |
gc_change = optimized_metrics['gc_content'] - input_metrics['gc_content']
|
| 969 |
+
print(f"π DEBUG: GC change: {input_metrics['gc_content']:.3f} -> {optimized_metrics['gc_content']:.3f} (Ξ{gc_change:+.3f})")
|
| 970 |
st.metric("GC Content", f"{optimized_metrics['gc_content']:.1f}%", delta=f"{gc_change:+.1f}%")
|
| 971 |
|
| 972 |
with imp_col2:
|
| 973 |
if input_metrics.get('cai') and optimized_metrics.get('cai'):
|
| 974 |
cai_change = optimized_metrics['cai'] - input_metrics['cai']
|
| 975 |
+
print(f"π DEBUG: CAI change: {input_metrics['cai']:.6f} -> {optimized_metrics['cai']:.6f} (Ξ{cai_change:+.6f})")
|
| 976 |
+
print(f"π DEBUG: Are sequences identical? {original_sequence == result.predicted_dna}")
|
| 977 |
+
if hasattr(result, 'predicted_dna') and len(original_sequence) > 0:
|
| 978 |
+
print(f"π DEBUG: Sequence lengths - Original: {len(original_sequence)}, Optimized: {len(result.predicted_dna)}")
|
| 979 |
st.metric("CAI Score", f"{optimized_metrics['cai']:.3f}", delta=f"{cai_change:+.3f}")
|
| 980 |
|
| 981 |
with imp_col3:
|