| import streamlit as st |
| import numpy as np |
| from Bio.Seq import Seq |
|
|
| def calculate_gc_content(seq): |
| gc_count = seq.count('G') + seq.count('C') |
| total_count = len(seq) |
| return (gc_count / total_count) * 100 if total_count > 0 else 0 |
|
|
| def find_potential_regulatory_regions(seq, window_size=50, gc_threshold=60): |
| gc_content = [] |
| for i in range(len(seq) - window_size + 1): |
| window = seq[i:i+window_size] |
| gc_content.append(calculate_gc_content(window)) |
| |
| regulatory_regions = [] |
| in_region = False |
| start = 0 |
| for i, gc in enumerate(gc_content): |
| if gc > gc_threshold and not in_region: |
| in_region = True |
| start = i |
| elif gc <= gc_threshold and in_region: |
| in_region = False |
| regulatory_regions.append((start, i + window_size)) |
| |
| if in_region: |
| regulatory_regions.append((start, len(seq))) |
| |
| return regulatory_regions |
|
|
| def analyze_dark_matter(sequence): |
| seq = Seq(sequence) |
| |
| |
| length = len(seq) |
| gc_content = calculate_gc_content(seq) |
| |
| |
| tata_box = seq.count("TATAAA") |
| caat_box = seq.count("CCAAT") |
| |
| |
| regulatory_regions = find_potential_regulatory_regions(seq) |
| |
| return length, gc_content, tata_box, caat_box, regulatory_regions |
|
|
| |
| st.title("Genomic Dark Matter Analyzer") |
|
|
| sequence = st.text_area("Paste your DNA sequence here", height=150) |
|
|
| if st.button("Analyze"): |
| if sequence: |
| length, gc_content, tata_box, caat_box, regulatory_regions = analyze_dark_matter(sequence) |
| |
| st.write(f"Sequence Length: {length}") |
| st.write(f"Overall GC Content: {gc_content:.2f}%") |
| st.write(f"TATA Box motifs: {tata_box}") |
| st.write(f"CAAT Box motifs: {caat_box}") |
| |
| st.subheader("Potential Regulatory Regions (based on GC content):") |
| for start, end in regulatory_regions: |
| st.write(f"Region from base {start} to {end}") |
| |
| |
| highlighted_seq = list(sequence) |
| for start, end in regulatory_regions: |
| for i in range(start, min(end, len(highlighted_seq))): |
| highlighted_seq[i] = f"<span style='background-color: yellow'>{highlighted_seq[i]}</span>" |
| |
| st.markdown("".join(highlighted_seq), unsafe_allow_html=True) |
| else: |
| st.write("Please enter a DNA sequence.") |