yashm commited on
Commit
51b8fa9
·
verified ·
1 Parent(s): 4d627b3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries
2
+ import streamlit as st
3
+ import pandas as pd
4
+ from Bio import SeqIO
5
+ import altair as alt
6
+ import numpy as np
7
+
8
+ # Function to parse GenBank file
9
+ def parse_genbank(file):
10
+ record = SeqIO.read(file, "genbank")
11
+ organism = record.annotations['organism']
12
+ features = record.features
13
+ genes, cds = [], []
14
+ for feature in features:
15
+ if feature.type == "gene":
16
+ genes.append(feature)
17
+ elif feature.type == "CDS":
18
+ cds.append(feature)
19
+ gc_content = (record.seq.count('G') + record.seq.count('C')) / len(record.seq) * 100
20
+ gene_info = [{
21
+ 'Gene': gene.qualifiers.get('gene', ['N/A'])[0],
22
+ 'Length': len(gene),
23
+ 'Location': str(gene.location)} for gene in genes]
24
+ cds_info = [{
25
+ 'Gene': cds.qualifiers.get('gene', ['N/A'])[0],
26
+ 'Protein': cds.qualifiers.get('translation', ['N/A'])[0],
27
+ 'Length': len(cds),
28
+ 'Location': str(cds.location)} for cds in cds]
29
+ return organism, gene_info, cds_info, gc_content, len(record.seq)
30
+
31
+ # Page setup
32
+ st.set_page_config(page_title="Genomic Data Dashboard", page_icon="🧬", layout="wide")
33
+
34
+ # Upload GenBank file
35
+ uploaded_file = st.file_uploader("Upload a GenBank file", type=['gb', 'gbk'])
36
+ if uploaded_file is not None:
37
+ organism, gene_info, cds_info, gc_content, sequence_length = parse_genbank(uploaded_file)
38
+ gene_df = pd.DataFrame(gene_info)
39
+ cds_df = pd.DataFrame(cds_info)
40
+ else:
41
+ st.warning("Please upload a GenBank file.")
42
+ st.stop()
43
+
44
+ # Sidebar information
45
+ with st.sidebar:
46
+ st.title('Genomic Data Dashboard')
47
+ st.write(f'Organism: {organism}')
48
+ # You can add more interactive widgets here as needed
49
+
50
+ # Main content
51
+ col1, col2 = st.columns(2)
52
+
53
+ with col1:
54
+ st.markdown('### General Information')
55
+ st.write(f'**Organism:** {organism}')
56
+ st.write(f'**Sequence Length:** {sequence_length} bp')
57
+ st.write(f'**GC Content:** {gc_content:.2f}%')
58
+ st.write(f'**Number of Genes:** {len(gene_df)}')
59
+ st.write(f'**Number of Coding Sequences (CDS):** {len(cds_df)}')
60
+
61
+ with col2:
62
+ st.markdown('### Genes and Proteins')
63
+ gene_selected = st.selectbox('Select a gene to view details:', options=gene_df['Gene'])
64
+ if gene_selected:
65
+ selected_gene = gene_df[gene_df['Gene'] == gene_selected]
66
+ if not selected_gene.empty:
67
+ st.write(f"**Gene Details:** {selected_gene.to_dict('records')[0]}")
68
+ selected_cds = cds_df[cds_df['Gene'] == gene_selected]
69
+ if not selected_cds.empty:
70
+ st.write(f"**CDS Details:** {selected_cds.to_dict('records')[0]}")
71
+
72
+ # Display data tables (optional)
73
+ with st.expander("View All Genes"):
74
+ st.dataframe(gene_df)
75
+ with st.expander("View All Coding Sequences"):
76
+ st.dataframe(cds_df)
77
+
78
+ # You can extend the app with more functionalities like visualizations,
79
+ # k-mer analysis, or other genomic metrics based on your requirements.
80
+