jithAI / src /app.py
persadian's picture
init
758380a verified
# app.py
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import matplotlib.pyplot as plt
import numpy as np
import time
import re
import base64
# ---- Configuration ----
MODEL_R1 = "deepseek-ai/DeepSeek-R1-0528"
MODEL_V3 = "deepseek-ai/DeepSeek-V3-0324"
APP_NAME = "JithAI"
PRIMARY_COLOR = "#6366F1" # Modern indigo
SECONDARY_COLOR = "#8B5CF6" # Vibrant violet
BG_COLOR = "#0F172A" # Deep space blue
TEXT_COLOR = "#E2E8F0" # Light gray text
ACCENT_COLOR = "#06D6A0" # Teal accent
# ---- Custom CSS ----
st.markdown(f"""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&display=swap');
* {{
font-family: 'Inter', sans-serif;
}}
body {{
background-color: {BG_COLOR};
color: {TEXT_COLOR};
}}
.stApp {{
background: linear-gradient(135deg, {BG_COLOR}, #1E293B);
background-size: 400% 400%;
animation: gradientBG 15s ease infinite;
}}
@keyframes gradientBG {{
0% {{ background-position: 0% 50%; }}
50% {{ background-position: 100% 50%; }}
100% {{ background-position: 0% 50%; }}
}}
.header {{
color: white;
text-align: center;
padding: 1rem 0;
background: rgba(30, 41, 59, 0.7);
border-radius: 16px;
backdrop-filter: blur(10px);
box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1);
border: 1px solid rgba(99, 102, 241, 0.3);
margin-bottom: 2rem;
}}
.stButton>button {{
background: linear-gradient(to right, {PRIMARY_COLOR}, {SECONDARY_COLOR});
color: white !important;
border: none;
border-radius: 12px;
padding: 12px 28px;
font-weight: 600;
transition: all 0.3s ease;
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
}}
.stButton>button:hover {{
transform: translateY(-2px);
box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
}}
.stTextArea textarea {{
background-color: rgba(30, 41, 59, 0.7) !important;
color: {TEXT_COLOR} !important;
border: 1px solid {SECONDARY_COLOR} !important;
border-radius: 12px;
padding: 15px !important;
}}
.result-box {{
background: rgba(30, 41, 59, 0.7);
border-radius: 16px;
padding: 25px;
margin-top: 20px;
backdrop-filter: blur(5px);
border: 1px solid rgba(139, 92, 246, 0.2);
box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1);
}}
.model-card {{
background: rgba(15, 23, 42, 0.8);
border-radius: 12px;
padding: 20px;
margin-bottom: 20px;
border-left: 4px solid {ACCENT_COLOR};
}}
.footer {{
text-align: center;
margin-top: 40px;
color: #94A3B8;
font-size: 0.9rem;
}}
.highlight {{
background: linear-gradient(120deg, rgba{tuple(int(PRIMARY_COLOR.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)}, 0.3), rgba{tuple(int(SECONDARY_COLOR.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)}, 0.3));
padding: 2px 6px;
border-radius: 4px;
font-weight: 600;
}}
.tab-content {{
padding: 20px 0;
}}
.stProgress > div > div > div {{
background: linear-gradient(to right, {PRIMARY_COLOR}, {SECONDARY_COLOR}) !important;
}}
</style>
""", unsafe_allow_html=True)
# ---- App Header ----
st.markdown(f"""
<div class="header">
<h1>{APP_NAME}</h1>
<p>Advanced Protein Sequence Analysis with DeepSeek AI</p>
</div>
""", unsafe_allow_html=True)
# ---- Model Loading ----
@st.cache_resource(show_spinner=False)
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# Initialize session state
if 'r1_model' not in st.session_state:
st.session_state.r1_model = None
if 'v3_model' not in st.session_state:
st.session_state.v3_model = None
if 'current_tab' not in st.session_state:
st.session_state.current_tab = "Analysis"
# ---- Model Cards ----
with st.container():
col1, col2 = st.columns(2)
with col1:
st.markdown("""
<div class="model-card">
<h3>DeepSeek-R1-0528</h3>
<p>Advanced 52.8B parameter model for precise protein analysis and functional predictions</p>
<p><span class="highlight">Specialized</span> in protein sequence interpretation</p>
</div>
""", unsafe_allow_html=True)
with col2:
st.markdown("""
<div class="model-card">
<h3>DeepSeek-V3-0324</h3>
<p>Cutting-edge 32.4B parameter model for generative protein design and sequence optimization</p>
<p><span class="highlight">Optimized</span> for protein engineering tasks</p>
</div>
""", unsafe_allow_html=True)
# ---- Tab Navigation ----
tabs = ["Analysis", "Sequence Generator", "Protein Explorer"]
current_tab = st.radio("", tabs, index=0, horizontal=True, label_visibility="collapsed")
# ---- Input Section ----
protein_seq = st.text_area(
"Enter Protein Sequence:",
height=180,
placeholder="MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCA...",
help="Enter amino acid sequence in single-letter code"
)
# ---- Tab Content ----
if current_tab == "Analysis":
st.markdown("### Protein Analysis")
analysis_prompt = st.text_input(
"Analysis Focus (optional):",
placeholder="e.g., Identify potential binding sites, analyze structural motifs",
help="Specify what you want to analyze in the protein sequence"
)
if st.button("Analyze with DeepSeek-R1", use_container_width=True):
if not protein_seq:
st.warning("Please input a protein sequence")
else:
with st.spinner("Initializing DeepSeek-R1 model..."):
if not st.session_state.r1_model:
st.session_state.r1_model = load_model(MODEL_R1)
with st.spinner("Analyzing protein structure..."):
prompt = f"""
[INST] You are an expert bioinformatician specializing in protein analysis.
Analyze the following protein sequence and provide detailed insights:
Protein Sequence:
{protein_seq}
{f"Focus: {analysis_prompt}" if analysis_prompt else ""}
Provide your analysis in the following format:
1. Structural characteristics
2. Potential functional domains
3. Binding site predictions
4. Stability and solubility assessment
5. Potential modifications for optimization
[/INST]
"""
progress_bar = st.progress(0)
result_container = st.empty()
full_response = ""
for i in range(1, 101):
time.sleep(0.02)
progress_bar.progress(i)
if i % 20 == 0:
# Simulate intermediate results
intermediate = f"Analysis in progress... {i}% complete"
result_container.markdown(f"""
<div class="result-box">
<p>{intermediate}</p>
</div>
""", unsafe_allow_html=True)
# Generate actual response
response = st.session_state.r1_model(
prompt,
max_new_tokens=800,
temperature=0.7,
do_sample=True,
top_p=0.9,
)
# Extract the generated text
analysis = response[0]['generated_text'].split('[/INST]')[-1].strip()
# Format the analysis with markdown
formatted_analysis = re.sub(
r'(\d+\.\s+[^\n]+)',
r'<br><span style="color:#8B5CF6; font-weight:600">\1</span><br>',
analysis
)
progress_bar.empty()
st.markdown(f"""
<div class="result-box">
<h3>Analysis Results</h3>
<div style="line-height: 1.8; margin-top: 15px;">
{formatted_analysis}
</div>
</div>
""", unsafe_allow_html=True)
elif current_tab == "Sequence Generator":
st.markdown("### Protein Sequence Generation")
design_goal = st.text_input(
"Design Goal:",
placeholder="e.g., Create a thermostable enzyme for DNA repair",
help="Describe the protein you want to generate"
)
if st.button("Generate with DeepSeek-V3", use_container_width=True):
if not design_goal:
st.warning("Please enter a design goal")
else:
with st.spinner("Initializing DeepSeek-V3 model..."):
if not st.session_state.v3_model:
st.session_state.v3_model = load_model(MODEL_V3)
with st.spinner("Designing optimized protein sequence..."):
prompt = f"""
[INST] You are an AI protein engineer. Design a novel protein sequence based on the following requirements:
Design Goal: {design_goal}
Provide:
1. A novel protein sequence (60-80 amino acids)
2. Brief explanation of key features
3. Potential applications
[/INST]
"""
progress_bar = st.progress(0)
result_container = st.empty()
for i in range(1, 101):
time.sleep(0.02)
progress_bar.progress(i)
response = st.session_state.v3_model(
prompt,
max_new_tokens=400,
temperature=0.8,
do_sample=True,
top_p=0.95,
)
# Extract the generated text
generation = response[0]['generated_text'].split('[/INST]')[-1].strip()
# Extract the protein sequence using regex
sequence_match = re.search(r'([A-Z]{60,})', generation)
sequence = sequence_match.group(1) if sequence_match else "Sequence not found"
# Highlight the sequence in the response
highlighted_generation = generation.replace(
sequence,
f'<span style="background: rgba{tuple(int(ACCENT_COLOR.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)}, 0.3); padding: 3px; border-radius: 4px; font-family: monospace;">{sequence}</span>'
)
progress_bar.empty()
st.markdown(f"""
<div class="result-box">
<h3>Generated Protein</h3>
<div style="line-height: 1.8; margin-top: 15px;">
{highlighted_generation}
</div>
</div>
""", unsafe_allow_html=True)
# Sequence visualization
st.markdown("### Sequence Visualization")
fig, ax = plt.subplots(figsize=(10, 1.5))
ax.text(0.5, 0.5, sequence,
fontfamily='monospace',
fontsize=9,
ha='center',
va='center')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')
st.pyplot(fig, use_container_width=True)
elif current_tab == "Protein Explorer":
st.markdown("### Protein Structure Explorer")
st.info("This module provides interactive visualization of protein structures")
# Protein structure visualization placeholder
st.image("https://cdn.rcsb.org/images/structures/1mbn/1mbn_assembly-1.jpeg",
caption="Protein Structure Visualization",
use_column_width=True)
col1, col2 = st.columns(2)
with col1:
st.selectbox("Visualization Style", ["Cartoon", "Surface", "Ribbon", "Ball & Stick"])
with col2:
st.selectbox("Color Scheme", ["By Element", "By Chain", "By Residue Type", "Hydrophobicity"])
st.slider("Rotation", 0, 360, 45)
st.button("Render Structure", use_container_width=True)
# ---- Footer ----
st.markdown("---")
st.markdown(f"""
<div class="footer">
<p>{APP_NAME} v1.2 | Advanced Protein Analysis Platform</p>
<p style="font-size: 0.85rem; margin-top: 10px;">
Powered by DeepSeek-R1 and DeepSeek-V3 models | This tool is for research purposes only
</p>
</div>
""", unsafe_allow_html=True)