File size: 17,984 Bytes
8715adc
 
 
 
 
 
 
 
 
 
 
 
c8c1410
 
 
 
8715adc
 
 
a18b22a
654a357
 
a18b22a
 
654a357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8715adc
654a357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8715adc
654a357
 
 
 
 
 
 
 
 
8715adc
654a357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8715adc
654a357
 
 
8715adc
654a357
 
 
 
 
 
 
 
8715adc
654a357
 
8715adc
a18b22a
654a357
 
a18b22a
654a357
 
 
 
 
 
 
 
 
8715adc
 
654a357
 
a18b22a
654a357
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a18b22a
 
654a357
a18b22a
654a357
a18b22a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8715adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a18b22a
 
 
 
8715adc
 
 
 
 
 
 
a18b22a
 
 
8715adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a18b22a
 
 
8715adc
 
 
88d8634
 
 
8715adc
 
 
 
 
 
 
 
 
 
f868a9f
8715adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8c1410
 
 
 
 
8715adc
 
 
 
 
 
 
 
 
 
 
c8c1410
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8715adc
 
 
 
 
c8c1410
8715adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7595b98
8715adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a18b22a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8715adc
 
 
88d8634
 
 
8715adc
88d8634
8715adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a18b22a
8715adc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
"""
Codon Optimizer - Gradio Web Application
Based on GenScript GenSmart algorithm (Patent WO2020024917A1)

A multi-objective codon optimization tool using NSGA-III algorithm.
"""

import gradio as gr
from typing import Optional
import textwrap

from codon_tables import get_organism_list, CODON_TO_AA, AA_TO_CODONS
from indices import (
    sequence_to_codons, codons_to_protein, calculate_cai, calculate_gc_content,
    mRNAStructureAnalyzer, SEQFOLD_AVAILABLE
)
from optimizer import optimize_sequence, CodonOptimizer, SimpleOptimizer


# Restriction enzymes organized by type
# Only 6-cutters and above (6bp+ recognition sequences)

# Type II: Orthodox restriction enzymes that cut within or near recognition sequence
TYPE_II_ENZYMES = {
    # 6-cutters (palindromic)
    "AatII": "GACGTC",
    "AccI": "GTMKAC",      # Degenerate: GT(A/C)(G/T)AC
    "AclI": "AACGTT",
    "AfeI": "AGCGCT",
    "AflII": "CTTAAG",
    "AgeI": "ACCGGT",
    "AhdI": "GACNNNNNGTC",
    "AleI": "CACNNNNGTG",
    "ApaI": "GGGCCC",
    "ApaLI": "GTGCAC",
    "AscI": "GGCGCGCC",    # 8-cutter
    "AseI": "ATTAAT",
    "AsiSI": "GCGATCGC",   # 8-cutter
    "AvrII": "CCTAGG",
    "BamHI": "GGATCC",
    "BclI": "TGATCA",
    "BglII": "AGATCT",
    "BlpI": "GCTNAGC",
    "BmtI": "GCTAGC",
    "BsiWI": "CGTACG",
    "BspEI": "TCCGGA",
    "BspHI": "TCATGA",
    "BsrGI": "TGTACA",
    "BssHII": "GCGCGC",
    "BstAPI": "GCANNNNNTGC",
    "BstBI": "TTCGAA",
    "BstEII": "GGTNACC",
    "BstXI": "CCANNNNNNTGG",
    "BstZ17I": "GTATAC",
    "Cac8I": "GCNNGC",
    "ClaI": "ATCGAT",
    "DraI": "TTTAAA",
    "DraIII": "CACNNNGTG",
    "EagI": "CGGCCG",
    "EcoRI": "GAATTC",
    "EcoRV": "GATATC",
    "FseI": "GGCCGGCC",    # 8-cutter
    "FspI": "TGCGCA",
    "HincII": "GTYRAC",
    "HindIII": "AAGCTT",
    "HpaI": "GTTAAC",
    "KasI": "GGCGCC",
    "KpnI": "GGTACC",
    "MfeI": "CAATTG",
    "MluI": "ACGCGT",
    "MscI": "TGGCCA",
    "MspA1I": "CMGCKG",
    "NaeI": "GCCGGC",
    "NarI": "GGCGCC",
    "NcoI": "CCATGG",
    "NdeI": "CATATG",
    "NgoMIV": "GCCGGC",
    "NheI": "GCTAGC",
    "NotI": "GCGGCCGC",    # 8-cutter
    "NruI": "TCGCGA",
    "NsiI": "ATGCAT",
    "NspI": "RCATGY",
    "PacI": "TTAATTAA",    # 8-cutter
    "PciI": "ACATGT",
    "PflMI": "CCANNNNNTGG",
    "PmeI": "GTTTAAAC",    # 8-cutter
    "PmlI": "CACGTG",
    "PpuMI": "RGGWCCY",
    "PshAI": "GACNNNNGTC",
    "PsiI": "TTATAA",
    "PspOMI": "GGGCCC",
    "PstI": "CTGCAG",
    "PvuI": "CGATCG",
    "PvuII": "CAGCTG",
    "RsrII": "CGGWCCG",
    "SacI": "GAGCTC",
    "SacII": "CCGCGG",
    "SalI": "GTCGAC",
    "SbfI": "CCTGCAGG",    # 8-cutter
    "ScaI": "AGTACT",
    "SexAI": "ACCWGGT",
    "SfiI": "GGCCNNNNNGGCC",
    "SfoI": "GGCGCC",
    "SgrAI": "CRCCGGYG",   # 8-cutter
    "SmaI": "CCCGGG",
    "SmlI": "CTYRAG",
    "SnaBI": "TACGTA",
    "SpeI": "ACTAGT",
    "SphI": "GCATGC",
    "SrfI": "GCCCGGGC",    # 8-cutter
    "SspI": "AATATT",
    "StuI": "AGGCCT",
    "SwaI": "ATTTAAAT",    # 8-cutter
    "TliI": "CTCGAG",
    "TspMI": "CCCGGG",
    "Tth111I": "GACNNNGTC",
    "XbaI": "TCTAGA",
    "XcmI": "CCANNNNNNNNNTGG",
    "XhoI": "CTCGAG",
    "XmaI": "CCCGGG",
    "ZraI": "GACGTC",
}

# Type IIS: Cut outside recognition sequence (6bp+ only)
# Used in Golden Gate, MoClo, and other scarless cloning methods
TYPE_IIS_ENZYMES = {
    "AarI": "CACCTGC",     # 7bp
    "BbsI": "GAAGAC",      # 6bp - Golden Gate alternative
    "BfuAI": "ACCTGC",     # 6bp
    "BpiI": "GAAGAC",      # 6bp - BbsI isoschizomer
    "BsaI": "GGTCTC",      # 6bp - Golden Gate standard
    "BsaI-HFv2": "GGTCTC", # 6bp - High-fidelity BsaI
    "BseRI": "GAGGAG",     # 6bp
    "BsmBI": "CGTCTC",     # 6bp - MoClo standard
    "BspMI": "ACCTGC",     # 6bp
    "BtgZI": "GCGATG",     # 6bp
    "BtsI": "GCAGTG",      # 6bp
    "BspQI": "GCTCTTC",    # 7bp - SapI isoschizomer
    "Esp3I": "CGTCTC",     # 6bp - BsmBI isoschizomer
    "LguI": "GCTCTTC",     # 7bp - SapI isoschizomer
    "PaqCI": "CACCTGC",    # 7bp - AarI isoschizomer
    "SapI": "GCTCTTC",     # 7bp - Used in SapTrap
}

# Type III: Require two recognition sites in inverse orientation (6bp+ only)
TYPE_III_ENZYMES = {
    "EcoP15I": "CAGCAG",   # 6bp
}

# Combine all for backward compatibility
COMMON_RESTRICTION_SITES = {**TYPE_II_ENZYMES, **TYPE_IIS_ENZYMES, **TYPE_III_ENZYMES}

# Create labeled choices for UI
def get_enzyme_choices():
    """Get enzyme choices with category labels for display."""
    choices = []

    # Type II
    for name in sorted(TYPE_II_ENZYMES.keys()):
        choices.append(f"[Type II] {name}")

    # Type IIS
    for name in sorted(TYPE_IIS_ENZYMES.keys()):
        choices.append(f"[Type IIS] {name}")

    # Type III
    for name in sorted(TYPE_III_ENZYMES.keys()):
        choices.append(f"[Type III] {name}")

    return choices

def parse_enzyme_name(labeled_name: str) -> str:
    """Extract enzyme name from labeled choice."""
    if "] " in labeled_name:
        return labeled_name.split("] ")[1]
    return labeled_name


def parse_sequence(sequence: str) -> tuple:
    """
    Parse and validate input sequence.
    Returns: (cleaned_sequence, is_protein, error_message)
    """
    if not sequence or len(sequence.strip()) == 0:
        return None, None, "Please enter a sequence"

    # Clean sequence
    cleaned = sequence.upper().replace(" ", "").replace("\n", "").replace("\r", "")
    cleaned = ''.join(c for c in cleaned if c.isalpha())

    if len(cleaned) == 0:
        return None, None, "No valid characters found in sequence"

    # Detect if protein or DNA
    dna_chars = set('ATGC')
    protein_chars = set('ACDEFGHIKLMNPQRSTVWY')

    unique_chars = set(cleaned)

    # If only ATGC, likely DNA
    if unique_chars.issubset(dna_chars):
        # Could be DNA or protein with limited amino acids
        # Check length - if divisible by 3 and reasonably long, assume DNA
        if len(cleaned) >= 30 and len(cleaned) % 3 == 0:
            return cleaned, False, None  # DNA
        elif len(cleaned) < 30:
            # Short sequence - could be either
            return cleaned, True, None  # Assume protein for short sequences

    # If has characters outside ATGC, must be protein
    if not unique_chars.issubset(dna_chars):
        if unique_chars.issubset(protein_chars):
            return cleaned, True, None  # Protein
        else:
            invalid = unique_chars - protein_chars
            return None, None, f"Invalid characters found: {invalid}"

    return cleaned, False, None  # Default to DNA


def format_sequence(sequence: str, line_length: int = 60) -> str:
    """Format sequence with line breaks for display."""
    return '\n'.join(textwrap.wrap(sequence, line_length))


def get_excluded_sites(site_names: list) -> list:
    """Convert site names to sequences."""
    sites = []
    for name in site_names or []:
        # Handle labeled names like "[Type II] EcoRI"
        enzyme_name = parse_enzyme_name(name)
        if enzyme_name in COMMON_RESTRICTION_SITES:
            sites.append(COMMON_RESTRICTION_SITES[enzyme_name])
    return sites


def optimize_codon(
    input_sequence: str,
    sequence_type: str,
    organism: str,
    type_ii_sites: list,
    type_iis_sites: list,
    type_iii_sites: list,
    optimization_quality: str,
) -> tuple:
    """
    Main optimization function for Gradio interface.
    """
    # Parse sequence
    cleaned, auto_is_protein, error = parse_sequence(input_sequence)

    if error:
        return error, "", "", "", ""

    # Determine sequence type
    if sequence_type == "Auto-detect":
        is_protein = auto_is_protein
    else:
        is_protein = (sequence_type == "Protein")

    # Combine all excluded restriction sites
    all_excluded_sites = (type_ii_sites or []) + (type_iis_sites or []) + (type_iii_sites or [])
    excluded = get_excluded_sites(all_excluded_sites)

    # Map quality
    quality_map = {
        "Fast": "fast",
        "Standard": "standard",
        "Thorough": "thorough",
    }
    quality = quality_map.get(optimization_quality, "standard")

    try:
        # Run optimization
        result = optimize_sequence(
            sequence=cleaned,
            organism=organism,
            is_protein=is_protein,
            excluded_sites=excluded,
            use_nsga3=False,  # Use fast hill-climbing optimizer
            quality=quality
        )

        # Format outputs
        input_info = f"""**Input Analysis:**
- Detected as: {'Protein' if is_protein else 'DNA'}
- {'Amino acids' if is_protein else 'Nucleotides'}: {len(cleaned)}
- Target organism: {organism}
"""

        optimized_dna = format_sequence(result['optimized_dna'])

        protein_seq = format_sequence(result['protein'])

        metrics = result['metrics']

        # Perform mRNA structure analysis
        mrna_analyzer = mRNAStructureAnalyzer()
        mrna_analysis = mrna_analyzer.analyze(result['optimized_dna'])

        metrics_text = f"""**Optimization Metrics:**

| Metric | Value |
|--------|-------|
| Codon Adaptation Index (CAI) | {metrics['cai']:.3f} |
| Harmony Index | {metrics['harmony_index']:.3f} |
| Codon Context Index | {metrics['context_index']:.3f} |
| Outlier Index | {metrics['outlier_index']:.3f} |
| GC Content | {metrics['gc_content']:.1f}% |
| Sequence Length | {metrics['length_bp']} bp ({metrics['length_aa']} aa) |

"""

        # Add mRNA structure metrics if available
        if mrna_analysis['available']:
            five_prime_status = "Good" if mrna_analysis['five_prime_mfe'] > -30 else "Warning: stable structure"
            metrics_text += f"""**mRNA Secondary Structure:**

| Metric | Value | Status |
|--------|-------|--------|
| 5' Region MFE (50 nt) | {mrna_analysis['five_prime_mfe']:.1f} kcal/mol | {five_prime_status} |
| Full Sequence MFE | {mrna_analysis['full_mfe']:.1f} kcal/mol | - |
| Hairpins Detected | {mrna_analysis['hairpin_count']} | {"None" if mrna_analysis['hairpin_count'] == 0 else "Review recommended"} |

"""
        else:
            metrics_text += "*mRNA structure analysis not available (seqfold not installed)*\n\n"

        metrics_text += """**Interpretation:**
- CAI: Higher is better (1.0 = perfect match to highly expressed genes)
- Harmony Index: Higher is better (codon usage match)
- Context Index: Higher is better (codon pair optimization)
- Outlier Index: Lower is better (fewer adverse features)
- GC Content: Optimal range is 40-60%
- 5' MFE: > -30 kcal/mol recommended (less stable = better translation initiation)
"""

        # Generate codon comparison if input was DNA
        if not is_protein:
            original_codons = sequence_to_codons(cleaned)
            optimized_codons = result['codons']

            changes = 0
            for i, (orig, opt) in enumerate(zip(original_codons, optimized_codons)):
                if orig != opt:
                    changes += 1

            comparison = f"\n**Codon Changes:** {changes} of {len(original_codons)} codons modified ({100*changes/len(original_codons):.1f}%)"
            metrics_text += comparison

        return input_info, optimized_dna, protein_seq, metrics_text, ""

    except Exception as e:
        return f"Error: {str(e)}", "", "", "", str(e)


def download_fasta(optimized_dna: str, organism: str) -> str:
    """Generate FASTA format for download."""
    if not optimized_dna:
        return ""

    clean_dna = optimized_dna.replace('\n', '').replace(' ', '')
    organism_short = organism.replace(' ', '_').replace('(', '').replace(')', '')

    fasta = f">Optimized_sequence|{organism_short}|{len(clean_dna)}bp\n"
    fasta += '\n'.join(textwrap.wrap(clean_dna, 60))
    return fasta


# Example sequences
EXAMPLE_PROTEIN = """MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTL
VTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIFFKDDGNYKTRAEVKFEGDTLV
NRIELKGIDFKEDGNILGHKLEYNYNSHNVYIMADKQKNGIKVNFKIRHNIEDGSVQLAD
HYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK"""

EXAMPLE_DNA = """ATGAGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTCTCTTATGGTGTTCAATGCTTTTCAAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCTGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCATGGCAGACAAACAAAAGAATGGAATCAAAGTTAACTTCAAAATTAGACACAACATTGAAGATGGAAGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTGCCCTTTCGAAAGATCCCAACGAAAAGAGAGACCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAA"""


# Build Gradio interface
with gr.Blocks(
    title="Codon Optimizer",
    theme=gr.themes.Soft(),
) as demo:

    gr.Markdown("""
    # Codon Optimizer
    `joeyisgoed/codon-optimizer`

    Multi-objective codon optimization tool based on the GenScript GenSmart algorithm.
    Uses NSGA-III genetic algorithm to optimize for:
    - **Harmony Index**: Match codon usage to highly-expressed genes
    - **Codon Context Index**: Optimize codon pair preferences
    - **Outlier Index**: Minimize adverse sequence features

    Enter a protein or DNA sequence below to optimize it for your target expression host.
    """)

    with gr.Row():
        with gr.Column(scale=1):
            input_sequence = gr.Textbox(
                label="Input Sequence",
                placeholder="Paste your protein or DNA sequence here...",
                lines=8,
                max_lines=20,
            )

            with gr.Row():
                sequence_type = gr.Radio(
                    choices=["Auto-detect", "Protein", "DNA"],
                    value="Auto-detect",
                    label="Sequence Type",
                )

                organism = gr.Dropdown(
                    choices=get_organism_list(),
                    value="Escherichia coli K12",
                    label="Target Organism",
                )

            with gr.Accordion("Exclude Restriction Sites", open=False):
                gr.Markdown("*Select restriction enzyme sites to avoid in the optimized sequence*")

                with gr.Accordion("Type II Enzymes (Standard)", open=False):
                    type_ii_sites = gr.CheckboxGroup(
                        choices=[f"[Type II] {name}" for name in sorted(TYPE_II_ENZYMES.keys())],
                        label="Type II",
                        info="Orthodox enzymes that cut within recognition sequence",
                    )

                with gr.Accordion("Type IIS Enzymes (Golden Gate/MoClo)", open=True):
                    type_iis_sites = gr.CheckboxGroup(
                        choices=[f"[Type IIS] {name}" for name in sorted(TYPE_IIS_ENZYMES.keys())],
                        label="Type IIS",
                        info="Cut outside recognition site - used for scarless cloning",
                    )

                with gr.Accordion("Type III Enzymes", open=False):
                    type_iii_sites = gr.CheckboxGroup(
                        choices=[f"[Type III] {name}" for name in sorted(TYPE_III_ENZYMES.keys())],
                        label="Type III",
                        info="Require two recognition sites in inverse orientation",
                    )

            optimization_quality = gr.Radio(
                choices=[
                    "Fast",
                    "Standard",
                    "Thorough",
                ],
                value="Fast",  # Default to fast for better UX
                label="Optimization Quality",
            )

            with gr.Row():
                optimize_btn = gr.Button("Optimize Sequence", variant="primary", size="lg")
                clear_btn = gr.Button("Clear", size="lg")

            with gr.Accordion("Example Sequences", open=False):
                example_protein_btn = gr.Button("Load GFP Protein")
                example_dna_btn = gr.Button("Load GFP DNA")

        with gr.Column(scale=1):
            input_info = gr.Markdown(label="Input Analysis")

            optimized_dna = gr.Textbox(
                label="Optimized DNA Sequence",
                lines=8,
                max_lines=20,
                show_copy_button=True,
            )

            protein_output = gr.Textbox(
                label="Protein Sequence",
                lines=4,
                max_lines=10,
                show_copy_button=True,
            )

            metrics_output = gr.Markdown(label="Optimization Metrics")

            error_output = gr.Textbox(label="Errors", visible=False)

    # Event handlers
    optimize_btn.click(
        fn=optimize_codon,
        inputs=[input_sequence, sequence_type, organism, type_ii_sites, type_iis_sites, type_iii_sites, optimization_quality],
        outputs=[input_info, optimized_dna, protein_output, metrics_output, error_output],
    )

    clear_btn.click(
        fn=lambda: ("", "", "", "", ""),
        outputs=[input_info, optimized_dna, protein_output, metrics_output, error_output],
    )

    example_protein_btn.click(
        fn=lambda: EXAMPLE_PROTEIN.replace('\n', ''),
        outputs=[input_sequence],
    )

    example_dna_btn.click(
        fn=lambda: EXAMPLE_DNA.replace('\n', ''),
        outputs=[input_sequence],
    )

    gr.Markdown("""
    ---
    ### About

    This tool implements a codon optimization algorithm inspired by GenScript's GenSmart system
    ([Patent WO2020024917A1](https://patents.google.com/patent/WO2020024917A1/en)).

    **Features:**
    - Multi-objective optimization using NSGA-III algorithm
    - Support for 10 common expression hosts
    - Optional restriction site exclusion
    - Comprehensive optimization metrics

    **References:**
    - GenScript GenSmart Codon Optimization
    - NSGA-III: Deb & Jain (2014)
    - Codon Adaptation Index (CAI): Sharp & Li (1987)
    """)


if __name__ == "__main__":
    demo.launch()