|
|
|
|
|
import streamlit as st |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
import matplotlib.pyplot as plt |
|
|
import numpy as np |
|
|
import time |
|
|
import re |
|
|
import base64 |
|
|
|
|
|
|
|
|
MODEL_R1 = "deepseek-ai/DeepSeek-R1-0528" |
|
|
MODEL_V3 = "deepseek-ai/DeepSeek-V3-0324" |
|
|
APP_NAME = "JithAI" |
|
|
PRIMARY_COLOR = "#6366F1" |
|
|
SECONDARY_COLOR = "#8B5CF6" |
|
|
BG_COLOR = "#0F172A" |
|
|
TEXT_COLOR = "#E2E8F0" |
|
|
ACCENT_COLOR = "#06D6A0" |
|
|
|
|
|
|
|
|
st.markdown(f""" |
|
|
<style> |
|
|
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&display=swap'); |
|
|
|
|
|
* {{ |
|
|
font-family: 'Inter', sans-serif; |
|
|
}} |
|
|
|
|
|
body {{ |
|
|
background-color: {BG_COLOR}; |
|
|
color: {TEXT_COLOR}; |
|
|
}} |
|
|
|
|
|
.stApp {{ |
|
|
background: linear-gradient(135deg, {BG_COLOR}, #1E293B); |
|
|
background-size: 400% 400%; |
|
|
animation: gradientBG 15s ease infinite; |
|
|
}} |
|
|
|
|
|
@keyframes gradientBG {{ |
|
|
0% {{ background-position: 0% 50%; }} |
|
|
50% {{ background-position: 100% 50%; }} |
|
|
100% {{ background-position: 0% 50%; }} |
|
|
}} |
|
|
|
|
|
.header {{ |
|
|
color: white; |
|
|
text-align: center; |
|
|
padding: 1rem 0; |
|
|
background: rgba(30, 41, 59, 0.7); |
|
|
border-radius: 16px; |
|
|
backdrop-filter: blur(10px); |
|
|
box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1); |
|
|
border: 1px solid rgba(99, 102, 241, 0.3); |
|
|
margin-bottom: 2rem; |
|
|
}} |
|
|
|
|
|
.stButton>button {{ |
|
|
background: linear-gradient(to right, {PRIMARY_COLOR}, {SECONDARY_COLOR}); |
|
|
color: white !important; |
|
|
border: none; |
|
|
border-radius: 12px; |
|
|
padding: 12px 28px; |
|
|
font-weight: 600; |
|
|
transition: all 0.3s ease; |
|
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
|
}} |
|
|
|
|
|
.stButton>button:hover {{ |
|
|
transform: translateY(-2px); |
|
|
box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15); |
|
|
}} |
|
|
|
|
|
.stTextArea textarea {{ |
|
|
background-color: rgba(30, 41, 59, 0.7) !important; |
|
|
color: {TEXT_COLOR} !important; |
|
|
border: 1px solid {SECONDARY_COLOR} !important; |
|
|
border-radius: 12px; |
|
|
padding: 15px !important; |
|
|
}} |
|
|
|
|
|
.result-box {{ |
|
|
background: rgba(30, 41, 59, 0.7); |
|
|
border-radius: 16px; |
|
|
padding: 25px; |
|
|
margin-top: 20px; |
|
|
backdrop-filter: blur(5px); |
|
|
border: 1px solid rgba(139, 92, 246, 0.2); |
|
|
box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1); |
|
|
}} |
|
|
|
|
|
.model-card {{ |
|
|
background: rgba(15, 23, 42, 0.8); |
|
|
border-radius: 12px; |
|
|
padding: 20px; |
|
|
margin-bottom: 20px; |
|
|
border-left: 4px solid {ACCENT_COLOR}; |
|
|
}} |
|
|
|
|
|
.footer {{ |
|
|
text-align: center; |
|
|
margin-top: 40px; |
|
|
color: #94A3B8; |
|
|
font-size: 0.9rem; |
|
|
}} |
|
|
|
|
|
.highlight {{ |
|
|
background: linear-gradient(120deg, rgba{tuple(int(PRIMARY_COLOR.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)}, 0.3), rgba{tuple(int(SECONDARY_COLOR.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)}, 0.3)); |
|
|
padding: 2px 6px; |
|
|
border-radius: 4px; |
|
|
font-weight: 600; |
|
|
}} |
|
|
|
|
|
.tab-content {{ |
|
|
padding: 20px 0; |
|
|
}} |
|
|
|
|
|
.stProgress > div > div > div {{ |
|
|
background: linear-gradient(to right, {PRIMARY_COLOR}, {SECONDARY_COLOR}) !important; |
|
|
}} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown(f""" |
|
|
<div class="header"> |
|
|
<h1>{APP_NAME}</h1> |
|
|
<p>Advanced Protein Sequence Analysis with DeepSeek AI</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
@st.cache_resource(show_spinner=False) |
|
|
def load_model(model_name): |
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") |
|
|
return pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
|
|
|
|
|
if 'r1_model' not in st.session_state: |
|
|
st.session_state.r1_model = None |
|
|
if 'v3_model' not in st.session_state: |
|
|
st.session_state.v3_model = None |
|
|
if 'current_tab' not in st.session_state: |
|
|
st.session_state.current_tab = "Analysis" |
|
|
|
|
|
|
|
|
with st.container(): |
|
|
col1, col2 = st.columns(2) |
|
|
|
|
|
with col1: |
|
|
st.markdown(""" |
|
|
<div class="model-card"> |
|
|
<h3>DeepSeek-R1-0528</h3> |
|
|
<p>Advanced 52.8B parameter model for precise protein analysis and functional predictions</p> |
|
|
<p><span class="highlight">Specialized</span> in protein sequence interpretation</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
with col2: |
|
|
st.markdown(""" |
|
|
<div class="model-card"> |
|
|
<h3>DeepSeek-V3-0324</h3> |
|
|
<p>Cutting-edge 32.4B parameter model for generative protein design and sequence optimization</p> |
|
|
<p><span class="highlight">Optimized</span> for protein engineering tasks</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
tabs = ["Analysis", "Sequence Generator", "Protein Explorer"] |
|
|
current_tab = st.radio("", tabs, index=0, horizontal=True, label_visibility="collapsed") |
|
|
|
|
|
|
|
|
protein_seq = st.text_area( |
|
|
"Enter Protein Sequence:", |
|
|
height=180, |
|
|
placeholder="MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCA...", |
|
|
help="Enter amino acid sequence in single-letter code" |
|
|
) |
|
|
|
|
|
|
|
|
if current_tab == "Analysis": |
|
|
st.markdown("### Protein Analysis") |
|
|
analysis_prompt = st.text_input( |
|
|
"Analysis Focus (optional):", |
|
|
placeholder="e.g., Identify potential binding sites, analyze structural motifs", |
|
|
help="Specify what you want to analyze in the protein sequence" |
|
|
) |
|
|
|
|
|
if st.button("Analyze with DeepSeek-R1", use_container_width=True): |
|
|
if not protein_seq: |
|
|
st.warning("Please input a protein sequence") |
|
|
else: |
|
|
with st.spinner("Initializing DeepSeek-R1 model..."): |
|
|
if not st.session_state.r1_model: |
|
|
st.session_state.r1_model = load_model(MODEL_R1) |
|
|
|
|
|
with st.spinner("Analyzing protein structure..."): |
|
|
prompt = f""" |
|
|
[INST] You are an expert bioinformatician specializing in protein analysis. |
|
|
Analyze the following protein sequence and provide detailed insights: |
|
|
|
|
|
Protein Sequence: |
|
|
{protein_seq} |
|
|
|
|
|
{f"Focus: {analysis_prompt}" if analysis_prompt else ""} |
|
|
|
|
|
Provide your analysis in the following format: |
|
|
1. Structural characteristics |
|
|
2. Potential functional domains |
|
|
3. Binding site predictions |
|
|
4. Stability and solubility assessment |
|
|
5. Potential modifications for optimization |
|
|
[/INST] |
|
|
""" |
|
|
|
|
|
progress_bar = st.progress(0) |
|
|
result_container = st.empty() |
|
|
full_response = "" |
|
|
|
|
|
for i in range(1, 101): |
|
|
time.sleep(0.02) |
|
|
progress_bar.progress(i) |
|
|
|
|
|
if i % 20 == 0: |
|
|
|
|
|
intermediate = f"Analysis in progress... {i}% complete" |
|
|
result_container.markdown(f""" |
|
|
<div class="result-box"> |
|
|
<p>{intermediate}</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
response = st.session_state.r1_model( |
|
|
prompt, |
|
|
max_new_tokens=800, |
|
|
temperature=0.7, |
|
|
do_sample=True, |
|
|
top_p=0.9, |
|
|
) |
|
|
|
|
|
|
|
|
analysis = response[0]['generated_text'].split('[/INST]')[-1].strip() |
|
|
|
|
|
|
|
|
formatted_analysis = re.sub( |
|
|
r'(\d+\.\s+[^\n]+)', |
|
|
r'<br><span style="color:#8B5CF6; font-weight:600">\1</span><br>', |
|
|
analysis |
|
|
) |
|
|
|
|
|
progress_bar.empty() |
|
|
st.markdown(f""" |
|
|
<div class="result-box"> |
|
|
<h3>Analysis Results</h3> |
|
|
<div style="line-height: 1.8; margin-top: 15px;"> |
|
|
{formatted_analysis} |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
elif current_tab == "Sequence Generator": |
|
|
st.markdown("### Protein Sequence Generation") |
|
|
design_goal = st.text_input( |
|
|
"Design Goal:", |
|
|
placeholder="e.g., Create a thermostable enzyme for DNA repair", |
|
|
help="Describe the protein you want to generate" |
|
|
) |
|
|
|
|
|
if st.button("Generate with DeepSeek-V3", use_container_width=True): |
|
|
if not design_goal: |
|
|
st.warning("Please enter a design goal") |
|
|
else: |
|
|
with st.spinner("Initializing DeepSeek-V3 model..."): |
|
|
if not st.session_state.v3_model: |
|
|
st.session_state.v3_model = load_model(MODEL_V3) |
|
|
|
|
|
with st.spinner("Designing optimized protein sequence..."): |
|
|
prompt = f""" |
|
|
[INST] You are an AI protein engineer. Design a novel protein sequence based on the following requirements: |
|
|
|
|
|
Design Goal: {design_goal} |
|
|
|
|
|
Provide: |
|
|
1. A novel protein sequence (60-80 amino acids) |
|
|
2. Brief explanation of key features |
|
|
3. Potential applications |
|
|
[/INST] |
|
|
""" |
|
|
|
|
|
progress_bar = st.progress(0) |
|
|
result_container = st.empty() |
|
|
|
|
|
for i in range(1, 101): |
|
|
time.sleep(0.02) |
|
|
progress_bar.progress(i) |
|
|
|
|
|
response = st.session_state.v3_model( |
|
|
prompt, |
|
|
max_new_tokens=400, |
|
|
temperature=0.8, |
|
|
do_sample=True, |
|
|
top_p=0.95, |
|
|
) |
|
|
|
|
|
|
|
|
generation = response[0]['generated_text'].split('[/INST]')[-1].strip() |
|
|
|
|
|
|
|
|
sequence_match = re.search(r'([A-Z]{60,})', generation) |
|
|
sequence = sequence_match.group(1) if sequence_match else "Sequence not found" |
|
|
|
|
|
|
|
|
highlighted_generation = generation.replace( |
|
|
sequence, |
|
|
f'<span style="background: rgba{tuple(int(ACCENT_COLOR.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)}, 0.3); padding: 3px; border-radius: 4px; font-family: monospace;">{sequence}</span>' |
|
|
) |
|
|
|
|
|
progress_bar.empty() |
|
|
|
|
|
st.markdown(f""" |
|
|
<div class="result-box"> |
|
|
<h3>Generated Protein</h3> |
|
|
<div style="line-height: 1.8; margin-top: 15px;"> |
|
|
{highlighted_generation} |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown("### Sequence Visualization") |
|
|
fig, ax = plt.subplots(figsize=(10, 1.5)) |
|
|
ax.text(0.5, 0.5, sequence, |
|
|
fontfamily='monospace', |
|
|
fontsize=9, |
|
|
ha='center', |
|
|
va='center') |
|
|
ax.set_xlim(0, 1) |
|
|
ax.set_ylim(0, 1) |
|
|
ax.axis('off') |
|
|
st.pyplot(fig, use_container_width=True) |
|
|
|
|
|
elif current_tab == "Protein Explorer": |
|
|
st.markdown("### Protein Structure Explorer") |
|
|
st.info("This module provides interactive visualization of protein structures") |
|
|
|
|
|
|
|
|
st.image("https://cdn.rcsb.org/images/structures/1mbn/1mbn_assembly-1.jpeg", |
|
|
caption="Protein Structure Visualization", |
|
|
use_column_width=True) |
|
|
|
|
|
col1, col2 = st.columns(2) |
|
|
with col1: |
|
|
st.selectbox("Visualization Style", ["Cartoon", "Surface", "Ribbon", "Ball & Stick"]) |
|
|
with col2: |
|
|
st.selectbox("Color Scheme", ["By Element", "By Chain", "By Residue Type", "Hydrophobicity"]) |
|
|
|
|
|
st.slider("Rotation", 0, 360, 45) |
|
|
st.button("Render Structure", use_container_width=True) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown(f""" |
|
|
<div class="footer"> |
|
|
<p>{APP_NAME} v1.2 | Advanced Protein Analysis Platform</p> |
|
|
<p style="font-size: 0.85rem; margin-top: 10px;"> |
|
|
Powered by DeepSeek-R1 and DeepSeek-V3 models | This tool is for research purposes only |
|
|
</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |