# app.py import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import matplotlib.pyplot as plt import numpy as np import time import re import base64 # ---- Configuration ---- MODEL_R1 = "deepseek-ai/DeepSeek-R1-0528" MODEL_V3 = "deepseek-ai/DeepSeek-V3-0324" APP_NAME = "JithAI" PRIMARY_COLOR = "#6366F1" # Modern indigo SECONDARY_COLOR = "#8B5CF6" # Vibrant violet BG_COLOR = "#0F172A" # Deep space blue TEXT_COLOR = "#E2E8F0" # Light gray text ACCENT_COLOR = "#06D6A0" # Teal accent # ---- Custom CSS ---- st.markdown(f""" """, unsafe_allow_html=True) # ---- App Header ---- st.markdown(f"""

{APP_NAME}

Advanced Protein Sequence Analysis with DeepSeek AI

""", unsafe_allow_html=True) # ---- Model Loading ---- @st.cache_resource(show_spinner=False) def load_model(model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto") return pipeline("text-generation", model=model, tokenizer=tokenizer) # Initialize session state if 'r1_model' not in st.session_state: st.session_state.r1_model = None if 'v3_model' not in st.session_state: st.session_state.v3_model = None if 'current_tab' not in st.session_state: st.session_state.current_tab = "Analysis" # ---- Model Cards ---- with st.container(): col1, col2 = st.columns(2) with col1: st.markdown("""

DeepSeek-R1-0528

Advanced 52.8B parameter model for precise protein analysis and functional predictions

Specialized in protein sequence interpretation

""", unsafe_allow_html=True) with col2: st.markdown("""

DeepSeek-V3-0324

Cutting-edge 32.4B parameter model for generative protein design and sequence optimization

Optimized for protein engineering tasks

""", unsafe_allow_html=True) # ---- Tab Navigation ---- tabs = ["Analysis", "Sequence Generator", "Protein Explorer"] current_tab = st.radio("", tabs, index=0, horizontal=True, label_visibility="collapsed") # ---- Input Section ---- protein_seq = st.text_area( "Enter Protein Sequence:", height=180, placeholder="MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCA...", help="Enter amino acid sequence in single-letter code" ) # ---- Tab Content ---- if current_tab == "Analysis": st.markdown("### Protein Analysis") analysis_prompt = st.text_input( "Analysis Focus (optional):", placeholder="e.g., Identify potential binding sites, analyze structural motifs", help="Specify what you want to analyze in the protein sequence" ) if st.button("Analyze with DeepSeek-R1", use_container_width=True): if not protein_seq: st.warning("Please input a protein sequence") else: with st.spinner("Initializing DeepSeek-R1 model..."): if not st.session_state.r1_model: st.session_state.r1_model = load_model(MODEL_R1) with st.spinner("Analyzing protein structure..."): prompt = f""" [INST] You are an expert bioinformatician specializing in protein analysis. Analyze the following protein sequence and provide detailed insights: Protein Sequence: {protein_seq} {f"Focus: {analysis_prompt}" if analysis_prompt else ""} Provide your analysis in the following format: 1. Structural characteristics 2. Potential functional domains 3. Binding site predictions 4. Stability and solubility assessment 5. Potential modifications for optimization [/INST] """ progress_bar = st.progress(0) result_container = st.empty() full_response = "" for i in range(1, 101): time.sleep(0.02) progress_bar.progress(i) if i % 20 == 0: # Simulate intermediate results intermediate = f"Analysis in progress... {i}% complete" result_container.markdown(f"""

{intermediate}

""", unsafe_allow_html=True) # Generate actual response response = st.session_state.r1_model( prompt, max_new_tokens=800, temperature=0.7, do_sample=True, top_p=0.9, ) # Extract the generated text analysis = response[0]['generated_text'].split('[/INST]')[-1].strip() # Format the analysis with markdown formatted_analysis = re.sub( r'(\d+\.\s+[^\n]+)', r'
\1
', analysis ) progress_bar.empty() st.markdown(f"""

Analysis Results

{formatted_analysis}
""", unsafe_allow_html=True) elif current_tab == "Sequence Generator": st.markdown("### Protein Sequence Generation") design_goal = st.text_input( "Design Goal:", placeholder="e.g., Create a thermostable enzyme for DNA repair", help="Describe the protein you want to generate" ) if st.button("Generate with DeepSeek-V3", use_container_width=True): if not design_goal: st.warning("Please enter a design goal") else: with st.spinner("Initializing DeepSeek-V3 model..."): if not st.session_state.v3_model: st.session_state.v3_model = load_model(MODEL_V3) with st.spinner("Designing optimized protein sequence..."): prompt = f""" [INST] You are an AI protein engineer. Design a novel protein sequence based on the following requirements: Design Goal: {design_goal} Provide: 1. A novel protein sequence (60-80 amino acids) 2. Brief explanation of key features 3. Potential applications [/INST] """ progress_bar = st.progress(0) result_container = st.empty() for i in range(1, 101): time.sleep(0.02) progress_bar.progress(i) response = st.session_state.v3_model( prompt, max_new_tokens=400, temperature=0.8, do_sample=True, top_p=0.95, ) # Extract the generated text generation = response[0]['generated_text'].split('[/INST]')[-1].strip() # Extract the protein sequence using regex sequence_match = re.search(r'([A-Z]{60,})', generation) sequence = sequence_match.group(1) if sequence_match else "Sequence not found" # Highlight the sequence in the response highlighted_generation = generation.replace( sequence, f'{sequence}' ) progress_bar.empty() st.markdown(f"""

Generated Protein

{highlighted_generation}
""", unsafe_allow_html=True) # Sequence visualization st.markdown("### Sequence Visualization") fig, ax = plt.subplots(figsize=(10, 1.5)) ax.text(0.5, 0.5, sequence, fontfamily='monospace', fontsize=9, ha='center', va='center') ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.axis('off') st.pyplot(fig, use_container_width=True) elif current_tab == "Protein Explorer": st.markdown("### Protein Structure Explorer") st.info("This module provides interactive visualization of protein structures") # Protein structure visualization placeholder st.image("https://cdn.rcsb.org/images/structures/1mbn/1mbn_assembly-1.jpeg", caption="Protein Structure Visualization", use_column_width=True) col1, col2 = st.columns(2) with col1: st.selectbox("Visualization Style", ["Cartoon", "Surface", "Ribbon", "Ball & Stick"]) with col2: st.selectbox("Color Scheme", ["By Element", "By Chain", "By Residue Type", "Hydrophobicity"]) st.slider("Rotation", 0, 360, 45) st.button("Render Structure", use_container_width=True) # ---- Footer ---- st.markdown("---") st.markdown(f""" """, unsafe_allow_html=True)