Spaces:

NiniCat
/

CRISPRTool

Runtime error

App Files Files Community

supercat666 commited on May 19, 2024

Commit

fc8ed8c

1 Parent(s): fd276e2

fix app

Browse files

Files changed (2) hide show

app.py +246 -151
cas12lstmvcf.py +68 -8

app.py CHANGED Viewed

@@ -5,6 +5,7 @@ import cas9attvcf
 import cas9off
 import cas12
 import cas12lstm
 import pandas as pd
 import streamlit as st
 import plotly.graph_objs as go
@@ -26,7 +27,7 @@ CRISPR_MODELS = ['Cas9', 'Cas12', 'Cas13d']
 selected_model = st.selectbox('Select CRISPR model:', CRISPR_MODELS, key='selected_model')
 cas9att_path = 'cas9_model/Cas9_MultiHeadAttention_weights.h5'
-cas12_path = 'cas12_model/BiLSTM_Cpf1_weights.h5'
 #plot functions
 def generate_coolbox_plot(bigwig_path, region, output_image_path):
@@ -331,7 +332,7 @@ if selected_model == 'Cas9':
         # Process predictions
         if predict_button and gene_symbol:
             with st.spinner('Predicting... Please wait'):
-                predictions, gene_sequence, exons = cas9attvcf.process_gene(gene_symbol, cas9att_path)
                 full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
                 sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
                 st.session_state['full_results'] = full_predictions
@@ -489,6 +490,11 @@ if selected_model == 'Cas9':
                 st.experimental_rerun()
 elif selected_model == 'Cas12':
     # Gene symbol entry with autocomplete-like feature
     gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                                format_func=lambda x: x if x else "")
@@ -497,159 +503,248 @@ elif selected_model == 'Cas12':
     if 'current_gene_symbol' not in st.session_state:
         st.session_state['current_gene_symbol'] = ""
-    # Prediction button
-    predict_button = st.button('Predict on-target')
-    # Function to clean up old files
-    def clean_up_old_files(gene_symbol):
-        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
-        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
-        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
-        for path in [genbank_file_path, bed_file_path, csv_file_path]:
-            if os.path.exists(path):
-                os.remove(path)
-    # Clean up files if a new gene symbol is entered
-    if st.session_state['current_gene_symbol'] and gene_symbol != st.session_state['current_gene_symbol']:
-        clean_up_old_files(st.session_state['current_gene_symbol'])
-    # Process predictions
-    if predict_button and gene_symbol:
-        with st.spinner('Predicting... Please wait'):
-            predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas9att_path)
-            sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
-            st.session_state['on_target_results'] = sorted_predictions
-            st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
-            st.session_state['exons'] = exons  # Store exon data
-        # Notify the user once the process is completed successfully.
-        st.success('Prediction completed!')
-        st.session_state['prediction_made'] = True
-        if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
-            ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
-            col1, col2, col3 = st.columns(3)
-            with col1:
-                st.markdown("**Genome**")
-                st.markdown("Homo sapiens")
-            with col2:
-                st.markdown("**Gene**")
-                st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
-            with col3:
-                st.markdown("**Nuclease**")
-                st.markdown("SpCas9")
-            # Include "Target" in the DataFrame's columns
-            try:
-                df = pd.DataFrame(st.session_state['on_target_results'],
-                                  columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon", "Target",
-                                           "gRNA", "Prediction"])
-                st.dataframe(df)
-            except ValueError as e:
-                st.error(f"DataFrame creation error: {e}")
-                # Optionally print or log the problematic data for debugging:
-                print(st.session_state['on_target_results'])
-            # Initialize Plotly figure
-            fig = go.Figure()
-            EXON_BASE = 0  # Base position for exons and CDS on the Y axis
-            EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear
-            # Plot Exons as small markers on the X-axis
-            for exon in st.session_state['exons']:
-                exon_start, exon_end = exon['start'], exon['end']
-                fig.add_trace(go.Bar(
-                    x=[(exon_start + exon_end) / 2],
-                    y=[EXON_HEIGHT],
-                    width=[exon_end - exon_start],
-                    base=EXON_BASE,
-                    marker_color='rgba(128, 0, 128, 0.5)',
-                    name='Exon'
-                ))
-            VERTICAL_GAP = 0.2  # Gap between different ranks
-            # Define max and min Y values based on strand and rank
-            MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
-            MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results
-            # Iterate over top 5 sorted predictions to create the plot
-            for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
-                chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
-                midpoint = (int(start) + int(end)) / 2
-                # Vertical position based on rank, modified by strand
-                y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
-                        MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)
-                fig.add_trace(go.Scatter(
-                    x=[midpoint],
-                    y=[y_value],
-                    mode='markers+text',
-                    marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
-                                size=12),
-                    text=f"Rank: {i}",  # Text label
-                    hoverinfo='text',
-                    hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
-                ))
-            # Update layout for clarity and interaction
-            fig.update_layout(
-                title='Top 5 gRNA Sequences by Prediction Score',
-                xaxis_title='Genomic Position',
-                yaxis_title='Strand',
-                yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
-                showlegend=False,
-                hovermode='x unified',
-            )
-            # Display the plot
-            st.plotly_chart(fig)
-            if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
-                gene_symbol = st.session_state['current_gene_symbol']
-                gene_sequence = st.session_state['gene_sequence']
-                # Define file paths
                 genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                 bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                 csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
-                plot_image_path = f"{gene_symbol}_gtracks_plot.png"
-                # Generate files
-                cas12lstm.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
-                cas12lstm.create_bed_file_from_df(df, bed_file_path)
-                cas12lstm.create_csv_from_df(df, csv_file_path)
-                # Prepare an in-memory buffer for the ZIP file
-                zip_buffer = io.BytesIO()
-                with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
-                    # For each file, add it to the ZIP file
-                    zip_file.write(genbank_file_path)
-                    zip_file.write(bed_file_path)
-                    zip_file.write(csv_file_path)
-                # Important: move the cursor to the beginning of the BytesIO buffer before reading it
-                zip_buffer.seek(0)
-                # Specify the region you want to visualize
-                min_start = df['Start Pos'].min()
-                max_end = df['End Pos'].max()
-                chromosome = df['Chr'].mode()[0]  # Assumes most common chromosome is the target
-                region = f"{chromosome}:{min_start}-{max_end}"
-                # Generate the pyGenomeTracks plot
-                gtracks_command = f"gtracks {region} {bed_file_path} {plot_image_path}"
-                subprocess.run(gtracks_command, shell=True)
-                st.image(plot_image_path)
-                # Display the download button for the ZIP file
-                st.download_button(
-                    label="Download GenBank, BED, CSV files as ZIP",
-                    data=zip_buffer.getvalue(),
-                    file_name=f"{gene_symbol}_files.zip",
-                    mime="application/zip"
-                )
 elif selected_model == 'Cas13d':
         ENTRY_METHODS = dict(

 import cas9off
 import cas12
 import cas12lstm
+import cas12lstmvcf
 import pandas as pd
 import streamlit as st
 import plotly.graph_objs as go
 selected_model = st.selectbox('Select CRISPR model:', CRISPR_MODELS, key='selected_model')
 cas9att_path = 'cas9_model/Cas9_MultiHeadAttention_weights.h5'
+cas12lstm_path = 'cas12_model/BiLSTM_Cpf1_weights.h5'
 #plot functions
 def generate_coolbox_plot(bigwig_path, region, output_image_path):
         # Process predictions
         if predict_button and gene_symbol:
             with st.spinner('Predicting... Please wait'):
+                predictions, gene_sequence, exons = cas9attvcf.process_gene(gene_symbol, vcf_reader, cas9att_path)
                 full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
                 sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
                 st.session_state['full_results'] = full_predictions
                 st.experimental_rerun()
 elif selected_model == 'Cas12':
+    cas12target_selection = st.radio(
+        "Select either mutation or not:",
+        ('regular', 'mutation'),
+        key='cas12target_selection'
+    )
     # Gene symbol entry with autocomplete-like feature
     gene_symbol = st.selectbox('Enter a Gene Symbol:', [''] + gene_symbol_list, key='gene_symbol',
                                format_func=lambda x: x if x else "")
     if 'current_gene_symbol' not in st.session_state:
         st.session_state['current_gene_symbol'] = ""
+        if cas12target_selection == 'regular':
+            # Prediction button
+            predict_button = st.button('Predict on-target')
+            # Function to clean up old files
+            def clean_up_old_files(gene_symbol):
                 genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
                 bed_file_path = f"{gene_symbol}_crispr_targets.bed"
                 csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                for path in [genbank_file_path, bed_file_path, csv_file_path]:
+                    if os.path.exists(path):
+                        os.remove(path)
+            # Clean up files if a new gene symbol is entered
+            if st.session_state['current_gene_symbol'] and gene_symbol != st.session_state['current_gene_symbol']:
+                clean_up_old_files(st.session_state['current_gene_symbol'])
+            # Process predictions
+            if predict_button and gene_symbol:
+                with st.spinner('Predicting... Please wait'):
+                    predictions, gene_sequence, exons = cas12lstm.process_gene(gene_symbol, cas12lstm_path)
+                    sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
+                    st.session_state['on_target_results'] = sorted_predictions
+                    st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
+                    st.session_state['exons'] = exons  # Store exon data
+                # Notify the user once the process is completed successfully.
+                st.success('Prediction completed!')
+                st.session_state['prediction_made'] = True
+                if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
+                    ensembl_id = gene_annotations.get(gene_symbol, 'Unknown')  # Get Ensembl ID or default to 'Unknown'
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.markdown("**Genome**")
+                        st.markdown("Homo sapiens")
+                    with col2:
+                        st.markdown("**Gene**")
+                        st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
+                    with col3:
+                        st.markdown("**Nuclease**")
+                        st.markdown("SpCas9")
+                    # Include "Target" in the DataFrame's columns
+                    try:
+                        df = pd.DataFrame(st.session_state['on_target_results'],
+                                          columns=["Chr", "Start Pos", "End Pos", "Strand", "Transcript", "Exon",
+                                                   "Target",
+                                                   "gRNA", "Prediction"])
+                        st.dataframe(df)
+                    except ValueError as e:
+                        st.error(f"DataFrame creation error: {e}")
+                        # Optionally print or log the problematic data for debugging:
+                        print(st.session_state['on_target_results'])
+                    # Initialize Plotly figure
+                    fig = go.Figure()
+                    EXON_BASE = 0  # Base position for exons and CDS on the Y axis
+                    EXON_HEIGHT = 0.02  # How 'tall' the exon markers should appear
+                    # Plot Exons as small markers on the X-axis
+                    for exon in st.session_state['exons']:
+                        exon_start, exon_end = exon['start'], exon['end']
+                        fig.add_trace(go.Bar(
+                            x=[(exon_start + exon_end) / 2],
+                            y=[EXON_HEIGHT],
+                            width=[exon_end - exon_start],
+                            base=EXON_BASE,
+                            marker_color='rgba(128, 0, 128, 0.5)',
+                            name='Exon'
+                        ))
+                    VERTICAL_GAP = 0.2  # Gap between different ranks
+                    # Define max and min Y values based on strand and rank
+                    MAX_STRAND_Y = 0.1  # Maximum Y value for positive strand results
+                    MIN_STRAND_Y = -0.1  # Minimum Y value for negative strand results
+                    # Iterate over top 5 sorted predictions to create the plot
+                    for i, prediction in enumerate(st.session_state['on_target_results'][:5], start=1):  # Only top 5
+                        chrom, start, end, strand, transcript, exon, target, gRNA, prediction_score = prediction
+                        midpoint = (int(start) + int(end)) / 2
+                        # Vertical position based on rank, modified by strand
+                        y_value = (MAX_STRAND_Y - (i - 1) * VERTICAL_GAP) if strand == '1' or strand == '+' else (
+                                MIN_STRAND_Y + (i - 1) * VERTICAL_GAP)
+                        fig.add_trace(go.Scatter(
+                            x=[midpoint],
+                            y=[y_value],
+                            mode='markers+text',
+                            marker=dict(symbol='triangle-up' if strand == '1' or strand == '+' else 'triangle-down',
+                                        size=12),
+                            text=f"Rank: {i}",  # Text label
+                            hoverinfo='text',
+                            hovertext=f"Rank: {i}<br>Chromosome: {chrom}<br>Target Sequence: {target}<br>gRNA: {gRNA}<br>Start: {start}<br>End: {end}<br>Strand: {'+' if strand == '1' or strand == '+' else '-'}<br>Transcript: {transcript}<br>Prediction: {prediction_score:.4f}",
+                        ))
+                    # Update layout for clarity and interaction
+                    fig.update_layout(
+                        title='Top 5 gRNA Sequences by Prediction Score',
+                        xaxis_title='Genomic Position',
+                        yaxis_title='Strand',
+                        yaxis=dict(tickvals=[MAX_STRAND_Y, MIN_STRAND_Y], ticktext=['+', '-']),
+                        showlegend=False,
+                        hovermode='x unified',
+                    )
+                    # Display the plot
+                    st.plotly_chart(fig)
+                    if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
+                        gene_symbol = st.session_state['current_gene_symbol']
+                        gene_sequence = st.session_state['gene_sequence']
+                        # Define file paths
+                        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
+                        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+                        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                        plot_image_path = f"{gene_symbol}_gtracks_plot.png"
+                        # Generate files
+                        cas12lstm.generate_genbank_file_from_df(df, gene_sequence, gene_symbol, genbank_file_path)
+                        cas12lstm.create_bed_file_from_df(df, bed_file_path)
+                        cas12lstm.create_csv_from_df(df, csv_file_path)
+                        # Prepare an in-memory buffer for the ZIP file
+                        zip_buffer = io.BytesIO()
+                        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                            # For each file, add it to the ZIP file
+                            zip_file.write(genbank_file_path)
+                            zip_file.write(bed_file_path)
+                            zip_file.write(csv_file_path)
+                        # Important: move the cursor to the beginning of the BytesIO buffer before reading it
+                        zip_buffer.seek(0)
+                        # Specify the region you want to visualize
+                        min_start = df['Start Pos'].min()
+                        max_end = df['End Pos'].max()
+                        chromosome = df['Chr'].mode()[0]  # Assumes most common chromosome is the target
+                        region = f"{chromosome}:{min_start}-{max_end}"
+                        # Generate the pyGenomeTracks plot
+                        gtracks_command = f"gtracks {region} {bed_file_path} {plot_image_path}"
+                        subprocess.run(gtracks_command, shell=True)
+                        st.image(plot_image_path)
+                        # Display the download button for the ZIP file
+                        st.download_button(
+                            label="Download GenBank, BED, CSV files as ZIP",
+                            data=zip_buffer.getvalue(),
+                            file_name=f"{gene_symbol}_files.zip",
+                            mime="application/zip"
+                        )
+        elif cas12target_selection == 'mutation':
+            # Prediction button
+            predict_button = st.button('Predict on-target')
+            vcf_reader = cyvcf2.VCF('SRR25934512.filter.snps.indels.vcf.gz')
+            if 'exons' not in st.session_state:
+                st.session_state['exons'] = []
+            # Process predictions
+            if predict_button and gene_symbol:
+                with st.spinner('Predicting... Please wait'):
+                    predictions, gene_sequence, exons = cas12lstmvcf.process_gene(gene_symbol, vcf_reader,
+                                                                                    cas12lstm_path)
+                    full_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)
+                    sorted_predictions = sorted(predictions, key=lambda x: x[8], reverse=True)[:10]
+                    st.session_state['full_results'] = full_predictions
+                    st.session_state['on_target_results'] = sorted_predictions
+                    st.session_state['gene_sequence'] = gene_sequence  # Save gene sequence in session state
+                    st.session_state['exons'] = exons  # Store exon data
+                # Notify the user once the process is completed successfully.
+                st.success('Prediction completed!')
+                st.session_state['prediction_made'] = True
+                if 'on_target_results' in st.session_state and st.session_state['on_target_results']:
+                    ensembl_id = gene_annotations.get(gene_symbol,
+                                                          'Unknown')  # Get Ensembl ID or default to 'Unknown'
+                    col1, col2, col3 = st.columns(3)
+                    with col1:
+                        st.markdown("**Genome**")
+                        st.markdown("Homo sapiens")
+                    with col2:
+                        st.markdown("**Gene**")
+                        st.markdown(f"{gene_symbol} : {ensembl_id} (primary)")
+                    with col3:
+                        st.markdown("**Nuclease**")
+                        st.markdown("SpCas9")
+                    # Include "Target" in the DataFrame's columns
+                    try:
+                        df = pd.DataFrame(st.session_state['on_target_results'],
+                                            columns=["Gene Symbol", "Chr", "Strand", "Target Start", "Transcript",
+                                                    "Exon",
+                                                    "Target",
+                                                    "gRNA", "Prediction", "Is Mutation"])
+                        df_full = pd.DataFrame(st.session_state['full_results'],
+                                                columns=["Gene Symbol", "Chr", "Strand", "Target Start",
+                                                        "Transcript",
+                                                        "Exon", "Target",
+                                                        "gRNA", "Prediction", "Is Mutation"])
+                        st.dataframe(df)
+                    except ValueError as e:
+                        st.error(f"DataFrame creation error: {e}")
+                        # Optionally print or log the problematic data for debugging:
+                        print(st.session_state['on_target_results'])
+                    if 'gene_sequence' in st.session_state and st.session_state['gene_sequence']:
+                        gene_symbol = st.session_state['current_gene_symbol']
+                        gene_sequence = st.session_state['gene_sequence']
+                        # Define file paths
+                        genbank_file_path = f"{gene_symbol}_crispr_targets.gb"
+                        bed_file_path = f"{gene_symbol}_crispr_targets.bed"
+                        csv_file_path = f"{gene_symbol}_crispr_predictions.csv"
+                        plot_image_path = f"{gene_symbol}_gtracks_plot.png"
+                        # Generate files
+                        cas12lstmvcf.generate_genbank_file_from_df(df_full, gene_sequence, gene_symbol,
+                                                                  genbank_file_path)
+                        cas12lstmvcf.create_bed_file_from_df(df_full, bed_file_path)
+                        cas12lstmvcf.create_csv_from_df(df_full, csv_file_path)
+                        # Prepare an in-memory buffer for the ZIP file
+                        zip_buffer = io.BytesIO()
+                        with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+                            # For each file, add it to the ZIP file
+                            zip_file.write(genbank_file_path)
+                            zip_file.write(bed_file_path)
+                            zip_file.write(csv_file_path)
+                        # Display the download button for the ZIP file
+                        st.download_button(
+                            label="Download GenBank, BED, CSV files as ZIP",
+                            data=zip_buffer.getvalue(),
+                            file_name=f"{gene_symbol}_files.zip",
+                            mime="application/zip"
+                        )
 elif selected_model == 'Cas13d':
         ENTRY_METHODS = dict(

cas12lstmvcf.py CHANGED Viewed

@@ -8,6 +8,10 @@ from keras.metrics import MeanSquaredError
 import pandas as pd
 import numpy as np
 import requests
 from functools import reduce
@@ -278,14 +282,70 @@ def process_gene(gene_symbol, vcf_reader, model_path):
                     print(f"Failed to retrieve gene sequence for exon {exon_id}.")
     else:
         print("Failed to retrieve transcripts.")
-    output = []
-    for result in results:
-        for item in result:
-            output.append(item)
-    # Sort results based on prediction score (assuming score is at the 8th index)
-    sorted_results = sorted(output, key=lambda x: x[8], reverse=True)
     # Return the sorted output, combined gene sequences, and all exons
-    return sorted_results, all_gene_sequences, all_exons

 import pandas as pd
 import numpy as np
+from Bio import SeqIO
+from Bio.SeqRecord import SeqRecord
+from Bio.SeqFeature import SeqFeature, FeatureLocation
+from Bio.Seq import Seq
 import requests
 from functools import reduce
                     print(f"Failed to retrieve gene sequence for exon {exon_id}.")
     else:
         print("Failed to retrieve transcripts.")
     # Return the sorted output, combined gene sequences, and all exons
+    return results, all_gene_sequences, all_exons
+def create_genbank_features(data):
+    features = []
+    # If the input data is a DataFrame, convert it to a list of lists
+    if isinstance(data, pd.DataFrame):
+        formatted_data = data.values.tolist()
+    elif isinstance(data, list):
+        formatted_data = data
+    else:
+        raise TypeError("Data should be either a list or a pandas DataFrame.")
+    for row in formatted_data:
+        try:
+            start = int(row[1])
+            end = start + len(row[6])  # Calculate the end position based on the target sequence length
+        except ValueError as e:
+            print(f"Error converting start/end to int: {row[1]}, {row[2]} - {e}")
+            continue
+        strand = 1 if row[3] == '1' else -1
+        location = FeatureLocation(start=start, end=end, strand=strand)
+        is_mutation = 'Yes' if row[9] else 'No'
+        feature = SeqFeature(location=location, type="misc_feature", qualifiers={
+            'label': row[7],  # Use gRNA as the label
+            'note': f"Prediction: {row[8]}, Mutation: {is_mutation}"  # Include the prediction score and mutation status
+        })
+        features.append(feature)
+    return features
+def generate_genbank_file_from_df(df, gene_sequence, gene_symbol, output_path):
+    # Ensure gene_sequence is a string before creating Seq object
+    if not isinstance(gene_sequence, str):
+        gene_sequence = str(gene_sequence)
+    features = create_genbank_features(df)
+    # Now gene_sequence is guaranteed to be a string, suitable for Seq
+    seq_obj = Seq(gene_sequence)
+    record = SeqRecord(seq_obj, id=gene_symbol, name=gene_symbol,
+                       description=f'CRISPR Cas12 predicted targets for {gene_symbol}', features=features)
+    record.annotations["molecule_type"] = "DNA"
+    SeqIO.write(record, output_path, "genbank")
+def create_bed_file_from_df(df, output_path):
+    with open(output_path, 'w') as bed_file:
+        for index, row in df.iterrows():
+            chrom = row["Chr"]
+            start = int(row["Target Start"])
+            end = start + len(row["Target"])  # Calculate the end position based on the target sequence length
+            strand = '+' if row["Strand"] == '1' else '-'
+            gRNA = row["gRNA"]
+            score = str(row["Prediction"])
+            is_mutation = 'Yes' if row["Is Mutation"] else 'No'
+            # transcript_id is not typically part of the standard BED columns but added here for completeness
+            transcript_id = row["Transcript"]
+            # Writing only standard BED columns; additional columns can be appended as needed
+            bed_file.write(f"{chrom}\t{start}\t{end}\t{gRNA}\t{score}\t{strand}\t{is_mutation}\n")
+def create_csv_from_df(df, output_path):
+    df.to_csv(output_path, index=False)