GenSeq / app.py
Accelernate's picture
Update app.py
ddb223f verified
raw
history blame
2.55 kB
import streamlit as st
import numpy as np
from Bio.Seq import Seq
def calculate_gc_content(seq):
gc_count = seq.count('G') + seq.count('C')
total_count = len(seq)
return (gc_count / total_count) * 100 if total_count > 0 else 0
def find_potential_regulatory_regions(seq, window_size=50, gc_threshold=60):
gc_content = []
for i in range(len(seq) - window_size + 1):
window = seq[i:i+window_size]
gc_content.append(calculate_gc_content(window))
regulatory_regions = []
in_region = False
start = 0
for i, gc in enumerate(gc_content):
if gc > gc_threshold and not in_region:
in_region = True
start = i
elif gc <= gc_threshold and in_region:
in_region = False
regulatory_regions.append((start, i + window_size))
if in_region:
regulatory_regions.append((start, len(seq)))
return regulatory_regions
def analyze_dark_matter(sequence):
seq = Seq(sequence)
# Basic statistics
length = len(seq)
gc_content = calculate_gc_content(seq)
# Look for common regulatory motifs
tata_box = seq.count("TATAAA")
caat_box = seq.count("CCAAT")
# Find potential regulatory regions based on GC content
regulatory_regions = find_potential_regulatory_regions(seq)
return length, gc_content, tata_box, caat_box, regulatory_regions
# Streamlit app
st.title("Genomic Dark Matter Analyzer")
sequence = st.text_area("Paste your DNA sequence here", height=150)
if st.button("Analyze"):
if sequence:
length, gc_content, tata_box, caat_box, regulatory_regions = analyze_dark_matter(sequence)
st.write(f"Sequence Length: {length}")
st.write(f"Overall GC Content: {gc_content:.2f}%")
st.write(f"TATA Box motifs: {tata_box}")
st.write(f"CAAT Box motifs: {caat_box}")
st.subheader("Potential Regulatory Regions (based on GC content):")
for start, end in regulatory_regions:
st.write(f"Region from base {start} to {end}")
# Visualize the sequence with highlighted regions
highlighted_seq = list(sequence)
for start, end in regulatory_regions:
for i in range(start, min(end, len(highlighted_seq))):
highlighted_seq[i] = f"<span style='background-color: yellow'>{highlighted_seq[i]}</span>"
st.markdown("".join(highlighted_seq), unsafe_allow_html=True)
else:
st.write("Please enter a DNA sequence.")