# app.py
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import matplotlib.pyplot as plt
import numpy as np
import time
import re
import base64
# ---- Configuration ----
MODEL_R1 = "deepseek-ai/DeepSeek-R1-0528"
MODEL_V3 = "deepseek-ai/DeepSeek-V3-0324"
APP_NAME = "JithAI"
PRIMARY_COLOR = "#6366F1" # Modern indigo
SECONDARY_COLOR = "#8B5CF6" # Vibrant violet
BG_COLOR = "#0F172A" # Deep space blue
TEXT_COLOR = "#E2E8F0" # Light gray text
ACCENT_COLOR = "#06D6A0" # Teal accent
# ---- Custom CSS ----
st.markdown(f"""
""", unsafe_allow_html=True)
# ---- App Header ----
st.markdown(f"""
""", unsafe_allow_html=True)
# ---- Model Loading ----
@st.cache_resource(show_spinner=False)
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# Initialize session state
if 'r1_model' not in st.session_state:
st.session_state.r1_model = None
if 'v3_model' not in st.session_state:
st.session_state.v3_model = None
if 'current_tab' not in st.session_state:
st.session_state.current_tab = "Analysis"
# ---- Model Cards ----
with st.container():
col1, col2 = st.columns(2)
with col1:
st.markdown("""
DeepSeek-R1-0528
Advanced 52.8B parameter model for precise protein analysis and functional predictions
Specialized in protein sequence interpretation
""", unsafe_allow_html=True)
with col2:
st.markdown("""
DeepSeek-V3-0324
Cutting-edge 32.4B parameter model for generative protein design and sequence optimization
Optimized for protein engineering tasks
""", unsafe_allow_html=True)
# ---- Tab Navigation ----
tabs = ["Analysis", "Sequence Generator", "Protein Explorer"]
current_tab = st.radio("", tabs, index=0, horizontal=True, label_visibility="collapsed")
# ---- Input Section ----
protein_seq = st.text_area(
"Enter Protein Sequence:",
height=180,
placeholder="MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCA...",
help="Enter amino acid sequence in single-letter code"
)
# ---- Tab Content ----
if current_tab == "Analysis":
st.markdown("### Protein Analysis")
analysis_prompt = st.text_input(
"Analysis Focus (optional):",
placeholder="e.g., Identify potential binding sites, analyze structural motifs",
help="Specify what you want to analyze in the protein sequence"
)
if st.button("Analyze with DeepSeek-R1", use_container_width=True):
if not protein_seq:
st.warning("Please input a protein sequence")
else:
with st.spinner("Initializing DeepSeek-R1 model..."):
if not st.session_state.r1_model:
st.session_state.r1_model = load_model(MODEL_R1)
with st.spinner("Analyzing protein structure..."):
prompt = f"""
[INST] You are an expert bioinformatician specializing in protein analysis.
Analyze the following protein sequence and provide detailed insights:
Protein Sequence:
{protein_seq}
{f"Focus: {analysis_prompt}" if analysis_prompt else ""}
Provide your analysis in the following format:
1. Structural characteristics
2. Potential functional domains
3. Binding site predictions
4. Stability and solubility assessment
5. Potential modifications for optimization
[/INST]
"""
progress_bar = st.progress(0)
result_container = st.empty()
full_response = ""
for i in range(1, 101):
time.sleep(0.02)
progress_bar.progress(i)
if i % 20 == 0:
# Simulate intermediate results
intermediate = f"Analysis in progress... {i}% complete"
result_container.markdown(f"""
""", unsafe_allow_html=True)
# Generate actual response
response = st.session_state.r1_model(
prompt,
max_new_tokens=800,
temperature=0.7,
do_sample=True,
top_p=0.9,
)
# Extract the generated text
analysis = response[0]['generated_text'].split('[/INST]')[-1].strip()
# Format the analysis with markdown
formatted_analysis = re.sub(
r'(\d+\.\s+[^\n]+)',
r'
\1
',
analysis
)
progress_bar.empty()
st.markdown(f"""
Analysis Results
{formatted_analysis}
""", unsafe_allow_html=True)
elif current_tab == "Sequence Generator":
st.markdown("### Protein Sequence Generation")
design_goal = st.text_input(
"Design Goal:",
placeholder="e.g., Create a thermostable enzyme for DNA repair",
help="Describe the protein you want to generate"
)
if st.button("Generate with DeepSeek-V3", use_container_width=True):
if not design_goal:
st.warning("Please enter a design goal")
else:
with st.spinner("Initializing DeepSeek-V3 model..."):
if not st.session_state.v3_model:
st.session_state.v3_model = load_model(MODEL_V3)
with st.spinner("Designing optimized protein sequence..."):
prompt = f"""
[INST] You are an AI protein engineer. Design a novel protein sequence based on the following requirements:
Design Goal: {design_goal}
Provide:
1. A novel protein sequence (60-80 amino acids)
2. Brief explanation of key features
3. Potential applications
[/INST]
"""
progress_bar = st.progress(0)
result_container = st.empty()
for i in range(1, 101):
time.sleep(0.02)
progress_bar.progress(i)
response = st.session_state.v3_model(
prompt,
max_new_tokens=400,
temperature=0.8,
do_sample=True,
top_p=0.95,
)
# Extract the generated text
generation = response[0]['generated_text'].split('[/INST]')[-1].strip()
# Extract the protein sequence using regex
sequence_match = re.search(r'([A-Z]{60,})', generation)
sequence = sequence_match.group(1) if sequence_match else "Sequence not found"
# Highlight the sequence in the response
highlighted_generation = generation.replace(
sequence,
f'{sequence}'
)
progress_bar.empty()
st.markdown(f"""
Generated Protein
{highlighted_generation}
""", unsafe_allow_html=True)
# Sequence visualization
st.markdown("### Sequence Visualization")
fig, ax = plt.subplots(figsize=(10, 1.5))
ax.text(0.5, 0.5, sequence,
fontfamily='monospace',
fontsize=9,
ha='center',
va='center')
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.axis('off')
st.pyplot(fig, use_container_width=True)
elif current_tab == "Protein Explorer":
st.markdown("### Protein Structure Explorer")
st.info("This module provides interactive visualization of protein structures")
# Protein structure visualization placeholder
st.image("https://cdn.rcsb.org/images/structures/1mbn/1mbn_assembly-1.jpeg",
caption="Protein Structure Visualization",
use_column_width=True)
col1, col2 = st.columns(2)
with col1:
st.selectbox("Visualization Style", ["Cartoon", "Surface", "Ribbon", "Ball & Stick"])
with col2:
st.selectbox("Color Scheme", ["By Element", "By Chain", "By Residue Type", "Hydrophobicity"])
st.slider("Rotation", 0, 360, 45)
st.button("Render Structure", use_container_width=True)
# ---- Footer ----
st.markdown("---")
st.markdown(f"""
""", unsafe_allow_html=True)