import os
import cas9att
import cas9attvcf
#import cas12
import cas12lstm
import cas12lstmvcf
import pandas as pd
import streamlit as st
import plotly.graph_objs as go
from pathlib import Path
import zipfile
import io
#import gtracks
#import subprocess
import cyvcf2



# title and documentation
st.markdown(Path('crisprTool.md').read_text(), unsafe_allow_html=True)
st.divider()

CRISPR_MODELS = ['Cas9', 'Cas12']

selected_model = st.selectbox('Select CRISPR system:', CRISPR_MODELS, key='selected_model')
cas9att_path = 'cas9_model/Cas9_MultiHeadAttention_weights.h5'
cas12lstm_path = 'cas12_model/BiLSTM_Cpf1_weights.h5'


@st.cache_data


def parse_gene_annotations(file_path):
    gene_dict = {}
    with open(file_path, 'r', encoding='gbk', errors='ignore') as file:
        headers = file.readline().strip().split('\t')  # Assuming tab-delimited file
        symbol_idx = headers.index('Approved symbol')  # Find index of 'Approved symbol'
        ensembl_idx = headers.index('Ensembl gene ID')  # Find index of 'Ensembl gene ID'
        for line in file:
            values = line.strip().split('\t')
            # Ensure we have enough values and add mapping from symbol to Ensembl ID
            if len(values) > max(symbol_idx, ensembl_idx):
                gene_dict[values[symbol_idx]] = values[ensembl_idx]
    return gene_dict

# Replace 'your_annotation_file.txt' with the path to your actual gene annotation file
gene_annotations = parse_gene_annotations('Human_genes_HUGO_02242024_annotation.txt')
gene_symbol_list = list(gene_annotations.keys())  # List of gene symbols for the autocomplete feature


# Check if the selected model is Cas9
if selected_model == 'Cas9':
    # Use a radio button to select enzymes, making sure only one can be selected at a time
    target_selection = st.radio(
        "Select either Normal or Mutation related to MDA-MB-231:",
        ('Normal', 'Mutation related to MDA-MB-231'),
        key='target_selection'
    )
    if 'current_gene_symbol' not in st.session_state:
        st.session_state['current_gene_symbol'] = ""

    # Define a function to clean up old files
    def clean_up_old_files(gene_symbol):
        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
        for path in [genbank_file_path, bed_file_path, csv_file_path]:
            if os.path.exists(path):
                os.remove(path)


    if target_selection == 'Normal':

        # Gene symbol entry with autocomplete-like feature
        gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                                    format_func=lambda x: x if x else "")

        # Handle gene symbol change and file cleanup
        if gene_symbol != st.session_state['current_gene_symbol'] and gene_symbol:
            if st.session_state['current_gene_symbol']:
                # Clean up files only if a different gene symbol is entered and a previous symbol exists
                clean_up_old_files(st.session_state['current_gene_symbol'])
            # Update the session state with the new gene symbol
            st.session_state['current_gene_symbol'] = gene_symbol
        
        # Prediction button
        predict_button = st.button('Go Cas9 on-target prediction!')

        if 'exons' not in st.session_state:
            st.session_state['exons'] = []

        # Process predictions
        if predict_button and gene_symbol:
            with st.spinner('Predicting... Please wait'):
                predictions, gene_sequence, exons = cas9att.process_gene(gene_symbol, cas9att_path)
                predictions_all = sorted(predictions, key=lambda x: x[8], reverse=True)
                sorted_predictions = predictions_all[:10]  # Get top 10 predictions
                st.session_state['on_target_results_all'] = predictions_all
                st.session_state['on_target_results'] = sorted_predictions
                st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
                st.session_state['exons'] = exons  # Store exon data

            # Notify the user once the process is completed successfully.
            st.success('Prediction completed!')
            st.session_state['prediction_made'] = True

            if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
                ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.markdown("**Genome**")
                    st.markdown("Homo sapiens")
                with col2:
                    st.markdown("**Gene**")
                    st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
                with col3:
                    st.markdown("**Nuclease**")
                    st.markdown("SpCas9")
                # Include "Target" in the DataFrame's columns
                try:
                    df_full = pd.DataFrame(st.session_state['on_target_results_all'],
                                      columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target", "gRNA", "Prediction"])
                    st.dataframe(df_full)
                except ValueError as e:
                    st.error(f"DataFrame creation error: {e}")
                    # Optionally print or log the problematic data for debugging:
                    print(st.session_state['on_target_results_all'])

                # Initialize Plotly figure
                fig = go.Figure()

                EXON_BASE = 0  # Base position for exons and CDS on the Y axis
                EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear

                # Plot Exons as small markers on the X-axis
                for exon in st.session_state['exons']:
                    exon_start, exon_end = exon['start'], exon['end']
                    fig.add_trace(go.Bar(
                        x=[(exon_start + exon_end) / 2],
                        y=[EXON_HEIGHT],
                        width=[exon_end - exon_start],
                        base=EXON_BASE,
                        marker_color='rgba(128, 0, 128, 0.5)',
                        name='Exon'
                    ))

                VERTICAL_GAP = 0.2  # Gap between different ranks

                # Define max and min Y values based on strand and rank
                MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
                MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results

                # Iterate over top 5 sorted predictions to create the plot
                for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
                    chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
                    midpoint = (int(start) + int(end)) / 2

                    # Vertical position based on rank, modified by strand
                    y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
                            MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)

                    fig.add_trace(go.Scatter(
                        x=[midpoint],
                        y=[y_value],
                        mode='markers+text',
                        marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
                                    size=12),
                        text=f"Rank: {i}",  # Text label
                        hoverinfo='text',
                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
                    ))

                # Update layout for clarity and interaction
                fig.update_layout(
                    title='Top 5 gRNA Sequences by Prediction Score',
                    xaxis_title='Genomic Position',
                    yaxis_title='Strand',
                    yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
                    showlegend=False,
                    hovermode='x unified',
                )

                # Display the plot
                st.plotly_chart(fig)

                if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                    gene_symbol = st.session_state['current_gene_symbol']
                    gene_sequence = st.session_state['gene_sequence']

                    # Define file paths
                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                    #bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                    #plot_image_path = f"{gene_symbol}_gtracks_plot.png"


                    # Generate files
                    cas9att.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol, genbank_file_path)
                    #cas9att.create_bed_file_from_df(df, bed_file_path)
                    cas9att.create_csv_from_df(df_full, csv_file_path)

                    # Prepare an in-memory buffer for the ZIP file
                    zip_buffer = io.BytesIO()
                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                        # For each file, add it to the ZIP file
                        zip_file.write(genbank_file_path)
                        #zip_file.write(bed_file_path)
                        zip_file.write(csv_file_path)

                    # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                    zip_buffer.seek(0)

                    # Display the download button for the ZIP file
                    st.download_button(
                        label="Download GenBank and CSV files as ZIP",
                        data=zip_buffer.getvalue(),
                        file_name=f"{gene_symbol}_files.zip",
                        mime="application/zip"
                    )

    elif target_selection == 'Mutation related to MDA-MB-231':
        # Gene symbol entry with autocomplete-like feature
        gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                                    format_func=lambda x: x if x else "")

        # Handle gene symbol change and file cleanup
        if gene_symbol != st.session_state['current_gene_symbol'] and gene_symbol:
            if st.session_state['current_gene_symbol']:
                # Clean up files only if a different gene symbol is entered and a previous symbol exists
                clean_up_old_files(st.session_state['current_gene_symbol'])
            # Update the session state with the new gene symbol
            st.session_state['current_gene_symbol'] = gene_symbol
        
        # Prediction button
        predict_button = st.button('Go Cas9 on-target prediction!')
        vcf_reader = cyvcf2.VCF('SRR25934512.filter.snps.indels.vcf.gz')

        if 'exons' not in st.session_state:
            st.session_state['exons'] = []

        # Process predictions
        if predict_button and gene_symbol:
            with st.spinner('Predicting... Please wait'):
                predictions, gene_sequence, exons = cas9attvcf.process_gene(gene_symbol, vcf_reader, cas9att_path)
                full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
                sorted_predictions = full_predictions[:10]
                st.session_state['full_results'] = full_predictions
                st.session_state['on_target_results'] = sorted_predictions
                st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
                st.session_state['exons'] = exons  # Store exon data

            # Notify the user once the process is completed successfully.
            st.success('Prediction completed!')
            st.session_state['prediction_made'] = True

            if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
                ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.markdown("**Genome**")
                    st.markdown("Homo sapiens")
                with col2:
                    st.markdown("**Gene**")
                    st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
                with col3:
                    st.markdown("**Nuclease**")
                    st.markdown("SpCas9")
                # Include "Target" in the DataFrame's columns
                try:
                    # df = pd.DataFrame(st.session_state['on_target_results'],
                    #                   columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript", "Exon",
                    #                            "Target",
                    #                            "gRNA", "Prediction", "Is Mutation"])
                    df_full = pd.DataFrame(st.session_state['full_results'],
                                           columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript",
                                                    "Exon", "Target",
                                                    "gRNA", "Prediction", "Is Mutation"])
                    st.dataframe(df_full)
                except ValueError as e:
                    st.error(f"DataFrame creation error: {e}")
                    # Optionally print or log the problematic data for debugging:
                    print(st.session_state['on_target_results'])

                if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                    gene_symbol = st.session_state['current_gene_symbol']
                    gene_sequence = st.session_state['gene_sequence']

                    # Define file paths
                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                    #bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                    #plot_image_path = f"{gene_symbol}_gtracks_plot.png"

                    # Generate files
                    cas9attvcf.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol, genbank_file_path)
                    #cas9attvcf.create_bed_file_from_df(df_full, bed_file_path)
                    cas9attvcf.create_csv_from_df(df_full, csv_file_path)

                    # Prepare an in-memory buffer for the ZIP file
                    zip_buffer = io.BytesIO()
                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                        # For each file, add it to the ZIP file
                        zip_file.write(genbank_file_path)
                        #zip_file.write(bed_file_path)
                        zip_file.write(csv_file_path)

                    # Display the download button for the ZIP file
                    st.download_button(
                        label="Download GenBank and CSV files as ZIP",
                        data=zip_buffer.getvalue(),
                        file_name=f"{gene_symbol}_files.zip",
                        mime="application/zip"
                    )



elif selected_model == 'Cas12':
    # Use a radio button to select enzymes, making sure only one can be selected at a time
    target_selection = st.radio(
        "Select either Normal or Mutation related to MDA-MB-231:",
        ('Normal', 'Mutation related to MDA-MB-231'),
        key='target_selection'
    )
    if 'current_gene_symbol' not in st.session_state:
        st.session_state['current_gene_symbol'] = ""

    # Define a function to clean up old files
    def clean_up_old_files(gene_symbol):
        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
        for path in [genbank_file_path, bed_file_path, csv_file_path]:
            if os.path.exists(path):
                os.remove(path)


    if target_selection == 'Normal':

        # Gene symbol entry with autocomplete-like feature
        gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                                    format_func=lambda x: x if x else "")

        # Handle gene symbol change and file cleanup
        if gene_symbol != st.session_state['current_gene_symbol'] and gene_symbol:
            if st.session_state['current_gene_symbol']:
                # Clean up files only if a different gene symbol is entered and a previous symbol exists
                clean_up_old_files(st.session_state['current_gene_symbol'])
            # Update the session state with the new gene symbol
            st.session_state['current_gene_symbol'] = gene_symbol
        
        # Prediction button
        predict_button = st.button('Go Cas12 on-target prediction!')

        if 'exons' not in st.session_state:
            st.session_state['exons'] = []

        # Process predictions
        if predict_button and gene_symbol:
            with st.spinner('Predicting... Please wait'):
                predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas12lstm_path)
                predictions_all = sorted(predictions, key=lambda x: x[8], reverse=True)
                sorted_predictions = predictions_all[:10]  # Get top 10 predictions
                st.session_state['on_target_results_all'] = predictions_all
                st.session_state['on_target_results'] = sorted_predictions
                st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
                st.session_state['exons'] = exons  # Store exon data

            # Notify the user once the process is completed successfully.
            st.success('Prediction completed!')
            st.session_state['prediction_made'] = True

            if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
                ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.markdown("**Genome**")
                    st.markdown("Homo sapiens")
                with col2:
                    st.markdown("**Gene**")
                    st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
                with col3:
                    st.markdown("**Nuclease**")
                    st.markdown("AsCas12a")
                # Include "Target" in the DataFrame's columns
                try:
                    df_full = pd.DataFrame(st.session_state['on_target_results_all'],
                                      columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target", "gRNA", "Prediction"])
                    st.dataframe(df_full)
                except ValueError as e:
                    st.error(f"DataFrame creation error: {e}")
                    # Optionally print or log the problematic data for debugging:
                    print(st.session_state['on_target_results_all'])

                # Initialize Plotly figure
                fig = go.Figure()

                EXON_BASE = 0  # Base position for exons and CDS on the Y axis
                EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear

                # Plot Exons as small markers on the X-axis
                for exon in st.session_state['exons']:
                    exon_start, exon_end = exon['start'], exon['end']
                    fig.add_trace(go.Bar(
                        x=[(exon_start + exon_end) / 2],
                        y=[EXON_HEIGHT],
                        width=[exon_end - exon_start],
                        base=EXON_BASE,
                        marker_color='rgba(128, 0, 128, 0.5)',
                        name='Exon'
                    ))

                VERTICAL_GAP = 0.2  # Gap between different ranks

                # Define max and min Y values based on strand and rank
                MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
                MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results

                # Iterate over top 5 sorted predictions to create the plot
                for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
                    chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
                    midpoint = (int(start) + int(end)) / 2

                    # Vertical position based on rank, modified by strand
                    y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
                            MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)

                    fig.add_trace(go.Scatter(
                        x=[midpoint],
                        y=[y_value],
                        mode='markers+text',
                        marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
                                    size=12),
                        text=f"Rank: {i}",  # Text label
                        hoverinfo='text',
                        hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
                    ))

                # Update layout for clarity and interaction
                fig.update_layout(
                    title='Top 5 gRNA Sequences by Prediction Score',
                    xaxis_title='Genomic Position',
                    yaxis_title='Strand',
                    yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
                    showlegend=False,
                    hovermode='x unified',
                )

                # Display the plot
                st.plotly_chart(fig)

                # Generate and download files
                if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                    gene_symbol = st.session_state['current_gene_symbol']
                    gene_sequence = st.session_state['gene_sequence']

                    # Define file paths
                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                    #bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                    #plot_image_path = f"{gene_symbol}_gtracks_plot.png"


                    # Generate files
                    cas12lstm.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol, genbank_file_path)
                    cas12lstm.create_csv_from_df(df_full, csv_file_path)

                    # Prepare an in-memory buffer for the ZIP file
                    zip_buffer = io.BytesIO()
                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                        # For each file, add it to the ZIP file
                        zip_file.write(genbank_file_path)
                        #zip_file.write(bed_file_path)
                        zip_file.write(csv_file_path)

                    # Important: move the cursor to the beginning of the BytesIO buffer before reading it
                    zip_buffer.seek(0)

                    # Display the download button for the ZIP file
                    st.download_button(
                        label="Download GenBank and CSV files as ZIP",
                        data=zip_buffer.getvalue(),
                        file_name=f"{gene_symbol}_files.zip",
                        mime="application/zip"
                    )

    elif target_selection == 'Mutation related to MDA-MB-231':
        # Gene symbol entry with autocomplete-like feature
        gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                                    format_func=lambda x: x if x else "")

        # Handle gene symbol change and file cleanup
        if gene_symbol != st.session_state['current_gene_symbol'] and gene_symbol:
            if st.session_state['current_gene_symbol']:
                # Clean up files only if a different gene symbol is entered and a previous symbol exists
                clean_up_old_files(st.session_state['current_gene_symbol'])
            # Update the session state with the new gene symbol
            st.session_state['current_gene_symbol'] = gene_symbol
        
        # Prediction button
        predict_button = st.button('Go Cas12 on-target prediction!')
        vcf_reader = cyvcf2.VCF('SRR25934512.filter.snps.indels.vcf.gz')

        if 'exons' not in st.session_state:
            st.session_state['exons'] = []

        # Process predictions
        if predict_button and gene_symbol:
            with st.spinner('Predicting... Please wait'):
                predictions, gene_sequence, exons = cas12lstmvcf.process_gene(gene_symbol, vcf_reader, cas12lstm_path)
                full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
                sorted_predictions = full_predictions[:10]
                st.session_state['full_results'] = full_predictions
                st.session_state['on_target_results'] = sorted_predictions
                st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
                st.session_state['exons'] = exons  # Store exon data

            # Notify the user once the process is completed successfully.
            st.success('Prediction completed!')
            st.session_state['prediction_made'] = True

            if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
                ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
                col1, col2, col3 = st.columns(3)
                with col1:
                    st.markdown("**Genome**")
                    st.markdown("Homo sapiens")
                with col2:
                    st.markdown("**Gene**")
                    st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
                with col3:
                    st.markdown("**Nuclease**")
                    st.markdown("AsCas12a")
                # Include "Target" in the DataFrame's columns
                try:
                    # df = pd.DataFrame(st.session_state['on_target_results'],
                    #                   columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript", "Exon",
                    #                            "Target",
                    #                            "gRNA", "Prediction", "Is Mutation"])
                    df_full = pd.DataFrame(st.session_state['full_results'],
                                           columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript",
                                                    "Exon", "Target",
                                                    "gRNA", "Prediction", "Is Mutation"])
                    st.dataframe(df_full)
                except ValueError as e:
                    st.error(f"DataFrame creation error: {e}")
                    # Optionally print or log the problematic data for debugging:
                    print(st.session_state['on_target_results'])

                if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
                    gene_symbol = st.session_state['current_gene_symbol']
                    gene_sequence = st.session_state['gene_sequence']

                    # Define file paths
                    genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                    #bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                    csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
                    #plot_image_path = f"{gene_symbol}_gtracks_plot.png"

                    # Generate files
                    cas12lstmvcf.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol, genbank_file_path)
                    #cas9attvcf.create_bed_file_from_df(df_full, bed_file_path)
                    cas12lstmvcf.create_csv_from_df(df_full, csv_file_path)

                    # Prepare an in-memory buffer for the ZIP file
                    zip_buffer = io.BytesIO()
                    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
                        # For each file, add it to the ZIP file
                        zip_file.write(genbank_file_path)
                        #zip_file.write(bed_file_path)
                        zip_file.write(csv_file_path)

                    # Display the download button for the ZIP file
                    st.download_button(
                        label="Download GenBank and CSV files as ZIP",
                        data=zip_buffer.getvalue(),
                        file_name=f"{gene_symbol}_files.zip",
                        mime="application/zip"
                    )