Spaces:

WANDSAI
/

GenSeq

Sleeping

App Files Files Community

GenSeq / app.py

Accelernate

Update app.py

ddb223f verified almost 2 years ago

raw

history blame

2.55 kB

	import streamlit as st
	import numpy as np
	from Bio.Seq import Seq

	def calculate_gc_content(seq):
	gc_count = seq.count('G') + seq.count('C')
	total_count = len(seq)
	return (gc_count / total_count) * 100 if total_count > 0 else 0

	def find_potential_regulatory_regions(seq, window_size=50, gc_threshold=60):
	gc_content = []
	for i in range(len(seq) - window_size + 1):
	window = seq[i:i+window_size]
	gc_content.append(calculate_gc_content(window))

	regulatory_regions = []
	in_region = False
	start = 0
	for i, gc in enumerate(gc_content):
	if gc > gc_threshold and not in_region:
	in_region = True
	start = i
	elif gc <= gc_threshold and in_region:
	in_region = False
	regulatory_regions.append((start, i + window_size))

	if in_region:
	regulatory_regions.append((start, len(seq)))

	return regulatory_regions

	def analyze_dark_matter(sequence):
	seq = Seq(sequence)

	# Basic statistics
	length = len(seq)
	gc_content = calculate_gc_content(seq)

	# Look for common regulatory motifs
	tata_box = seq.count("TATAAA")
	caat_box = seq.count("CCAAT")

	# Find potential regulatory regions based on GC content
	regulatory_regions = find_potential_regulatory_regions(seq)

	return length, gc_content, tata_box, caat_box, regulatory_regions

	# Streamlit app
	st.title("Genomic Dark Matter Analyzer")

	sequence = st.text_area("Paste your DNA sequence here", height=150)

	if st.button("Analyze"):
	if sequence:
	length, gc_content, tata_box, caat_box, regulatory_regions = analyze_dark_matter(sequence)

	st.write(f"Sequence Length: {length}")
	st.write(f"Overall GC Content: {gc_content:.2f}%")
	st.write(f"TATA Box motifs: {tata_box}")
	st.write(f"CAAT Box motifs: {caat_box}")

	st.subheader("Potential Regulatory Regions (based on GC content):")
	for start, end in regulatory_regions:
	st.write(f"Region from base {start} to {end}")

	# Visualize the sequence with highlighted regions
	highlighted_seq = list(sequence)
	for start, end in regulatory_regions:
	for i in range(start, min(end, len(highlighted_seq))):
	highlighted_seq[i] = f"<span style='background-color: yellow'>{highlighted_seq[i]}</span>"

	st.markdown("".join(highlighted_seq), unsafe_allow_html=True)
	else:
	st.write("Please enter a DNA sequence.")