Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Import libraries
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import pandas as pd
|
| 4 |
+
from Bio import SeqIO
|
| 5 |
+
import altair as alt
|
| 6 |
+
import numpy as np
|
| 7 |
+
|
| 8 |
+
# Function to parse GenBank file
|
| 9 |
+
def parse_genbank(file):
|
| 10 |
+
record = SeqIO.read(file, "genbank")
|
| 11 |
+
organism = record.annotations['organism']
|
| 12 |
+
features = record.features
|
| 13 |
+
genes, cds = [], []
|
| 14 |
+
for feature in features:
|
| 15 |
+
if feature.type == "gene":
|
| 16 |
+
genes.append(feature)
|
| 17 |
+
elif feature.type == "CDS":
|
| 18 |
+
cds.append(feature)
|
| 19 |
+
gc_content = (record.seq.count('G') + record.seq.count('C')) / len(record.seq) * 100
|
| 20 |
+
gene_info = [{
|
| 21 |
+
'Gene': gene.qualifiers.get('gene', ['N/A'])[0],
|
| 22 |
+
'Length': len(gene),
|
| 23 |
+
'Location': str(gene.location)} for gene in genes]
|
| 24 |
+
cds_info = [{
|
| 25 |
+
'Gene': cds.qualifiers.get('gene', ['N/A'])[0],
|
| 26 |
+
'Protein': cds.qualifiers.get('translation', ['N/A'])[0],
|
| 27 |
+
'Length': len(cds),
|
| 28 |
+
'Location': str(cds.location)} for cds in cds]
|
| 29 |
+
return organism, gene_info, cds_info, gc_content, len(record.seq)
|
| 30 |
+
|
| 31 |
+
# Page setup
|
| 32 |
+
st.set_page_config(page_title="Genomic Data Dashboard", page_icon="🧬", layout="wide")
|
| 33 |
+
|
| 34 |
+
# Upload GenBank file
|
| 35 |
+
uploaded_file = st.file_uploader("Upload a GenBank file", type=['gb', 'gbk'])
|
| 36 |
+
if uploaded_file is not None:
|
| 37 |
+
organism, gene_info, cds_info, gc_content, sequence_length = parse_genbank(uploaded_file)
|
| 38 |
+
gene_df = pd.DataFrame(gene_info)
|
| 39 |
+
cds_df = pd.DataFrame(cds_info)
|
| 40 |
+
else:
|
| 41 |
+
st.warning("Please upload a GenBank file.")
|
| 42 |
+
st.stop()
|
| 43 |
+
|
| 44 |
+
# Sidebar information
|
| 45 |
+
with st.sidebar:
|
| 46 |
+
st.title('Genomic Data Dashboard')
|
| 47 |
+
st.write(f'Organism: {organism}')
|
| 48 |
+
# You can add more interactive widgets here as needed
|
| 49 |
+
|
| 50 |
+
# Main content
|
| 51 |
+
col1, col2 = st.columns(2)
|
| 52 |
+
|
| 53 |
+
with col1:
|
| 54 |
+
st.markdown('### General Information')
|
| 55 |
+
st.write(f'**Organism:** {organism}')
|
| 56 |
+
st.write(f'**Sequence Length:** {sequence_length} bp')
|
| 57 |
+
st.write(f'**GC Content:** {gc_content:.2f}%')
|
| 58 |
+
st.write(f'**Number of Genes:** {len(gene_df)}')
|
| 59 |
+
st.write(f'**Number of Coding Sequences (CDS):** {len(cds_df)}')
|
| 60 |
+
|
| 61 |
+
with col2:
|
| 62 |
+
st.markdown('### Genes and Proteins')
|
| 63 |
+
gene_selected = st.selectbox('Select a gene to view details:', options=gene_df['Gene'])
|
| 64 |
+
if gene_selected:
|
| 65 |
+
selected_gene = gene_df[gene_df['Gene'] == gene_selected]
|
| 66 |
+
if not selected_gene.empty:
|
| 67 |
+
st.write(f"**Gene Details:** {selected_gene.to_dict('records')[0]}")
|
| 68 |
+
selected_cds = cds_df[cds_df['Gene'] == gene_selected]
|
| 69 |
+
if not selected_cds.empty:
|
| 70 |
+
st.write(f"**CDS Details:** {selected_cds.to_dict('records')[0]}")
|
| 71 |
+
|
| 72 |
+
# Display data tables (optional)
|
| 73 |
+
with st.expander("View All Genes"):
|
| 74 |
+
st.dataframe(gene_df)
|
| 75 |
+
with st.expander("View All Coding Sequences"):
|
| 76 |
+
st.dataframe(cds_df)
|
| 77 |
+
|
| 78 |
+
# You can extend the app with more functionalities like visualizations,
|
| 79 |
+
# k-mer analysis, or other genomic metrics based on your requirements.
|
| 80 |
+
|