Spaces:

WANDSAI
/

GenSeq

Sleeping

App Files Files Community

Accelernate commited on Jul 12, 2024

Commit

28578a5

verified ·

1 Parent(s): a7ea9ce

Create app.py

Browse files

Files changed (1) hide show

app.py +75 -0

app.py ADDED Viewed

	@@ -0,0 +1,75 @@

+import streamlit as st
+import numpy as np
+from Bio import SeqIO
+from Bio.Seq import Seq
+from hmmlearn import hmm
+# Function to encode DNA sequence
+def encode_sequence(seq):
+    encoding = {'A': 0, 'C': 1, 'G': 2, 'T': 3}
+    return np.array([encoding[base] for base in seq if base in encoding])
+# Simple HMM model (this is a placeholder and would need proper training)
+model = hmm.MultinomialHMM(n_components=2, random_state=42)
+model.startprob_ = np.array([0.5, 0.5])
+model.transmat_ = np.array([[0.7, 0.3],
+                            [0.3, 0.7]])
+model.emissionprob_ = np.array([[0.25, 0.25, 0.25, 0.25],
+                                [0.20, 0.30, 0.30, 0.20]])
+def analyze_dark_matter(sequence):
+    seq = Seq(sequence)
+    # Basic statistics
+    length = len(seq)
+    gc_content = SeqIO.GC(seq)
+    # Look for common regulatory motifs
+    tata_box = seq.count("TATAAA")
+    caat_box = seq.count("CCAAT")
+    # HMM analysis
+    encoded_seq = encode_sequence(str(seq))
+    logprob, hidden_states = model.decode(encoded_seq.reshape(-1, 1))
+    regulatory_regions = []
+    current_start = None
+    for i, state in enumerate(hidden_states):
+        if state == 1 and current_start is None:
+            current_start = i
+        elif state == 0 and current_start is not None:
+            regulatory_regions.append((current_start, i))
+            current_start = None
+    if current_start is not None:
+        regulatory_regions.append((current_start, len(hidden_states)))
+    return length, gc_content, tata_box, caat_box, regulatory_regions
+# Streamlit app
+st.title("Genomic Dark Matter Analyzer")
+sequence = st.text_area("Paste your DNA sequence here", height=150)
+if st.button("Analyze"):
+    if sequence:
+        length, gc_content, tata_box, caat_box, regulatory_regions = analyze_dark_matter(sequence)
+        st.write(f"Sequence Length: {length}")
+        st.write(f"GC Content: {gc_content:.2f}%")
+        st.write(f"TATA Box motifs: {tata_box}")
+        st.write(f"CAAT Box motifs: {caat_box}")
+        st.subheader("Potential Regulatory Regions (based on HMM):")
+        for start, end in regulatory_regions:
+            st.write(f"Region from base {start} to {end}")
+        # Visualize the sequence with highlighted regions
+        highlighted_seq = list(sequence)
+        for start, end in regulatory_regions:
+            for i in range(start, min(end, len(highlighted_seq))):
+                highlighted_seq[i] = f"<span style='background-color: yellow'>{highlighted_seq[i]}</span>"
+        st.markdown("".join(highlighted_seq), unsafe_allow_html=True)
+    else:
+        st.write("Please enter a DNA sequence.")