persadian commited on
Commit
758380a
·
verified ·
1 Parent(s): 720c8ae
Files changed (1) hide show
  1. src/app.py +371 -0
src/app.py ADDED
@@ -0,0 +1,371 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import streamlit as st
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ import matplotlib.pyplot as plt
5
+ import numpy as np
6
+ import time
7
+ import re
8
+ import base64
9
+
10
+ # ---- Configuration ----
11
+ MODEL_R1 = "deepseek-ai/DeepSeek-R1-0528"
12
+ MODEL_V3 = "deepseek-ai/DeepSeek-V3-0324"
13
+ APP_NAME = "JithAI"
14
+ PRIMARY_COLOR = "#6366F1" # Modern indigo
15
+ SECONDARY_COLOR = "#8B5CF6" # Vibrant violet
16
+ BG_COLOR = "#0F172A" # Deep space blue
17
+ TEXT_COLOR = "#E2E8F0" # Light gray text
18
+ ACCENT_COLOR = "#06D6A0" # Teal accent
19
+
20
+ # ---- Custom CSS ----
21
+ st.markdown(f"""
22
+ <style>
23
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&display=swap');
24
+
25
+ * {{
26
+ font-family: 'Inter', sans-serif;
27
+ }}
28
+
29
+ body {{
30
+ background-color: {BG_COLOR};
31
+ color: {TEXT_COLOR};
32
+ }}
33
+
34
+ .stApp {{
35
+ background: linear-gradient(135deg, {BG_COLOR}, #1E293B);
36
+ background-size: 400% 400%;
37
+ animation: gradientBG 15s ease infinite;
38
+ }}
39
+
40
+ @keyframes gradientBG {{
41
+ 0% {{ background-position: 0% 50%; }}
42
+ 50% {{ background-position: 100% 50%; }}
43
+ 100% {{ background-position: 0% 50%; }}
44
+ }}
45
+
46
+ .header {{
47
+ color: white;
48
+ text-align: center;
49
+ padding: 1rem 0;
50
+ background: rgba(30, 41, 59, 0.7);
51
+ border-radius: 16px;
52
+ backdrop-filter: blur(10px);
53
+ box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1);
54
+ border: 1px solid rgba(99, 102, 241, 0.3);
55
+ margin-bottom: 2rem;
56
+ }}
57
+
58
+ .stButton>button {{
59
+ background: linear-gradient(to right, {PRIMARY_COLOR}, {SECONDARY_COLOR});
60
+ color: white !important;
61
+ border: none;
62
+ border-radius: 12px;
63
+ padding: 12px 28px;
64
+ font-weight: 600;
65
+ transition: all 0.3s ease;
66
+ box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
67
+ }}
68
+
69
+ .stButton>button:hover {{
70
+ transform: translateY(-2px);
71
+ box-shadow: 0 6px 12px rgba(0, 0, 0, 0.15);
72
+ }}
73
+
74
+ .stTextArea textarea {{
75
+ background-color: rgba(30, 41, 59, 0.7) !important;
76
+ color: {TEXT_COLOR} !important;
77
+ border: 1px solid {SECONDARY_COLOR} !important;
78
+ border-radius: 12px;
79
+ padding: 15px !important;
80
+ }}
81
+
82
+ .result-box {{
83
+ background: rgba(30, 41, 59, 0.7);
84
+ border-radius: 16px;
85
+ padding: 25px;
86
+ margin-top: 20px;
87
+ backdrop-filter: blur(5px);
88
+ border: 1px solid rgba(139, 92, 246, 0.2);
89
+ box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1);
90
+ }}
91
+
92
+ .model-card {{
93
+ background: rgba(15, 23, 42, 0.8);
94
+ border-radius: 12px;
95
+ padding: 20px;
96
+ margin-bottom: 20px;
97
+ border-left: 4px solid {ACCENT_COLOR};
98
+ }}
99
+
100
+ .footer {{
101
+ text-align: center;
102
+ margin-top: 40px;
103
+ color: #94A3B8;
104
+ font-size: 0.9rem;
105
+ }}
106
+
107
+ .highlight {{
108
+ background: linear-gradient(120deg, rgba{tuple(int(PRIMARY_COLOR.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)}, 0.3), rgba{tuple(int(SECONDARY_COLOR.lstrip('#')[i:i+2], 16) for i in (0, 2, 4)}, 0.3));
109
+ padding: 2px 6px;
110
+ border-radius: 4px;
111
+ font-weight: 600;
112
+ }}
113
+
114
+ .tab-content {{
115
+ padding: 20px 0;
116
+ }}
117
+
118
+ .stProgress > div > div > div {{
119
+ background: linear-gradient(to right, {PRIMARY_COLOR}, {SECONDARY_COLOR}) !important;
120
+ }}
121
+ </style>
122
+ """, unsafe_allow_html=True)
123
+
124
+ # ---- App Header ----
125
+ st.markdown(f"""
126
+ <div class="header">
127
+ <h1>{APP_NAME}</h1>
128
+ <p>Advanced Protein Sequence Analysis with DeepSeek AI</p>
129
+ </div>
130
+ """, unsafe_allow_html=True)
131
+
132
+ # ---- Model Loading ----
133
+ @st.cache_resource(show_spinner=False)
134
+ def load_model(model_name):
135
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
136
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
137
+ return pipeline("text-generation", model=model, tokenizer=tokenizer)
138
+
139
+ # Initialize session state
140
+ if 'r1_model' not in st.session_state:
141
+ st.session_state.r1_model = None
142
+ if 'v3_model' not in st.session_state:
143
+ st.session_state.v3_model = None
144
+ if 'current_tab' not in st.session_state:
145
+ st.session_state.current_tab = "Analysis"
146
+
147
+ # ---- Model Cards ----
148
+ with st.container():
149
+ col1, col2 = st.columns(2)
150
+
151
+ with col1:
152
+ st.markdown("""
153
+ <div class="model-card">
154
+ <h3>DeepSeek-R1-0528</h3>
155
+ <p>Advanced 52.8B parameter model for precise protein analysis and functional predictions</p>
156
+ <p><span class="highlight">Specialized</span> in protein sequence interpretation</p>
157
+ </div>
158
+ """, unsafe_allow_html=True)
159
+
160
+ with col2:
161
+ st.markdown("""
162
+ <div class="model-card">
163
+ <h3>DeepSeek-V3-0324</h3>
164
+ <p>Cutting-edge 32.4B parameter model for generative protein design and sequence optimization</p>
165
+ <p><span class="highlight">Optimized</span> for protein engineering tasks</p>
166
+ </div>
167
+ """, unsafe_allow_html=True)
168
+
169
+ # ---- Tab Navigation ----
170
+ tabs = ["Analysis", "Sequence Generator", "Protein Explorer"]
171
+ current_tab = st.radio("", tabs, index=0, horizontal=True, label_visibility="collapsed")
172
+
173
+ # ---- Input Section ----
174
+ protein_seq = st.text_area(
175
+ "Enter Protein Sequence:",
176
+ height=180,
177
+ placeholder="MTAIIKEIVSRNKRRYQEDGFDLDLTYIYPNIIAMGFPAERLEGVYRNNIDDVVRFLDSKHKNHYKIYNLCA...",
178
+ help="Enter amino acid sequence in single-letter code"
179
+ )
180
+
181
+ # ---- Tab Content ----
182
+ if current_tab == "Analysis":
183
+ st.markdown("### Protein Analysis")
184
+ analysis_prompt = st.text_input(
185
+ "Analysis Focus (optional):",
186
+ placeholder="e.g., Identify potential binding sites, analyze structural motifs",
187
+ help="Specify what you want to analyze in the protein sequence"
188
+ )
189
+
190
+ if st.button("Analyze with DeepSeek-R1", use_container_width=True):
191
+ if not protein_seq:
192
+ st.warning("Please input a protein sequence")
193
+ else:
194
+ with st.spinner("Initializing DeepSeek-R1 model..."):
195
+ if not st.session_state.r1_model:
196
+ st.session_state.r1_model = load_model(MODEL_R1)
197
+
198
+ with st.spinner("Analyzing protein structure..."):
199
+ prompt = f"""
200
+ [INST] You are an expert bioinformatician specializing in protein analysis.
201
+ Analyze the following protein sequence and provide detailed insights:
202
+
203
+ Protein Sequence:
204
+ {protein_seq}
205
+
206
+ {f"Focus: {analysis_prompt}" if analysis_prompt else ""}
207
+
208
+ Provide your analysis in the following format:
209
+ 1. Structural characteristics
210
+ 2. Potential functional domains
211
+ 3. Binding site predictions
212
+ 4. Stability and solubility assessment
213
+ 5. Potential modifications for optimization
214
+ [/INST]
215
+ """
216
+
217
+ progress_bar = st.progress(0)
218
+ result_container = st.empty()
219
+ full_response = ""
220
+
221
+ for i in range(1, 101):
222
+ time.sleep(0.02)
223
+ progress_bar.progress(i)
224
+
225
+ if i % 20 == 0:
226
+ # Simulate intermediate results
227
+ intermediate = f"Analysis in progress... {i}% complete"
228
+ result_container.markdown(f"""
229
+ <div class="result-box">
230
+ <p>{intermediate}</p>
231
+ </div>
232
+ """, unsafe_allow_html=True)
233
+
234
+ # Generate actual response
235
+ response = st.session_state.r1_model(
236
+ prompt,
237
+ max_new_tokens=800,
238
+ temperature=0.7,
239
+ do_sample=True,
240
+ top_p=0.9,
241
+ )
242
+
243
+ # Extract the generated text
244
+ analysis = response[0]['generated_text'].split('[/INST]')[-1].strip()
245
+
246
+ # Format the analysis with markdown
247
+ formatted_analysis = re.sub(
248
+ r'(\d+\.\s+[^\n]+)',
249
+ r'<br><span style="color:#8B5CF6; font-weight:600">\1</span><br>',
250
+ analysis
251
+ )
252
+
253
+ progress_bar.empty()
254
+ st.markdown(f"""
255
+ <div class="result-box">
256
+ <h3>Analysis Results</h3>
257
+ <div style="line-height: 1.8; margin-top: 15px;">
258
+ {formatted_analysis}
259
+ </div>
260
+ </div>
261
+ """, unsafe_allow_html=True)
262
+
263
+ elif current_tab == "Sequence Generator":
264
+ st.markdown("### Protein Sequence Generation")
265
+ design_goal = st.text_input(
266
+ "Design Goal:",
267
+ placeholder="e.g., Create a thermostable enzyme for DNA repair",
268
+ help="Describe the protein you want to generate"
269
+ )
270
+
271
+ if st.button("Generate with DeepSeek-V3", use_container_width=True):
272
+ if not design_goal:
273
+ st.warning("Please enter a design goal")
274
+ else:
275
+ with st.spinner("Initializing DeepSeek-V3 model..."):
276
+ if not st.session_state.v3_model:
277
+ st.session_state.v3_model = load_model(MODEL_V3)
278
+
279
+ with st.spinner("Designing optimized protein sequence..."):
280
+ prompt = f"""
281
+ [INST] You are an AI protein engineer. Design a novel protein sequence based on the following requirements:
282
+
283
+ Design Goal: {design_goal}
284
+
285
+ Provide:
286
+ 1. A novel protein sequence (60-80 amino acids)
287
+ 2. Brief explanation of key features
288
+ 3. Potential applications
289
+ [/INST]
290
+ """
291
+
292
+ progress_bar = st.progress(0)
293
+ result_container = st.empty()
294
+
295
+ for i in range(1, 101):
296
+ time.sleep(0.02)
297
+ progress_bar.progress(i)
298
+
299
+ response = st.session_state.v3_model(
300
+ prompt,
301
+ max_new_tokens=400,
302
+ temperature=0.8,
303
+ do_sample=True,
304
+ top_p=0.95,
305
+ )
306
+
307
+ # Extract the generated text
308
+ generation = response[0]['generated_text'].split('[/INST]')[-1].strip()
309
+
310
+ # Extract the protein sequence using regex
311
+ sequence_match = re.search(r'([A-Z]{60,})', generation)
312
+ sequence = sequence_match.group(1) if sequence_match else "Sequence not found"
313
+
314
+ # Highlight the sequence in the response
315
+ highlighted_generation = generation.replace(
316
+ sequence,
317
+ f'<span style="background: rgba{tuple(int(ACCENT_COLOR.lstrip("#")[i:i+2], 16) for i in (0, 2, 4)}, 0.3); padding: 3px; border-radius: 4px; font-family: monospace;">{sequence}</span>'
318
+ )
319
+
320
+ progress_bar.empty()
321
+
322
+ st.markdown(f"""
323
+ <div class="result-box">
324
+ <h3>Generated Protein</h3>
325
+ <div style="line-height: 1.8; margin-top: 15px;">
326
+ {highlighted_generation}
327
+ </div>
328
+ </div>
329
+ """, unsafe_allow_html=True)
330
+
331
+ # Sequence visualization
332
+ st.markdown("### Sequence Visualization")
333
+ fig, ax = plt.subplots(figsize=(10, 1.5))
334
+ ax.text(0.5, 0.5, sequence,
335
+ fontfamily='monospace',
336
+ fontsize=9,
337
+ ha='center',
338
+ va='center')
339
+ ax.set_xlim(0, 1)
340
+ ax.set_ylim(0, 1)
341
+ ax.axis('off')
342
+ st.pyplot(fig, use_container_width=True)
343
+
344
+ elif current_tab == "Protein Explorer":
345
+ st.markdown("### Protein Structure Explorer")
346
+ st.info("This module provides interactive visualization of protein structures")
347
+
348
+ # Protein structure visualization placeholder
349
+ st.image("https://cdn.rcsb.org/images/structures/1mbn/1mbn_assembly-1.jpeg",
350
+ caption="Protein Structure Visualization",
351
+ use_column_width=True)
352
+
353
+ col1, col2 = st.columns(2)
354
+ with col1:
355
+ st.selectbox("Visualization Style", ["Cartoon", "Surface", "Ribbon", "Ball & Stick"])
356
+ with col2:
357
+ st.selectbox("Color Scheme", ["By Element", "By Chain", "By Residue Type", "Hydrophobicity"])
358
+
359
+ st.slider("Rotation", 0, 360, 45)
360
+ st.button("Render Structure", use_container_width=True)
361
+
362
+ # ---- Footer ----
363
+ st.markdown("---")
364
+ st.markdown(f"""
365
+ <div class="footer">
366
+ <p>{APP_NAME} v1.2 | Advanced Protein Analysis Platform</p>
367
+ <p style="font-size: 0.85rem; margin-top: 10px;">
368
+ Powered by DeepSeek-R1 and DeepSeek-V3 models | This tool is for research purposes only
369
+ </p>
370
+ </div>
371
+ """, unsafe_allow_html=True)