julse commited on
Commit
fd627cb
·
verified ·
1 Parent(s): d8f86a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -19
app.py CHANGED
@@ -10,6 +10,28 @@ from Bio import SeqIO
10
  import torch
11
  from sklearn.preprocessing import OneHotEncoder
12
  import plotly.graph_objects as go
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
 
15
  class RPContactPredictor:
@@ -76,6 +98,7 @@ class RPContactPredictor:
76
  return sequences
77
 
78
  def predict_contact(self, rna_seq, protein_seq):
 
79
  """Predict RNA-protein contact matrix"""
80
  # Encode sequences
81
  rna_oh = self.one_hot_encode(rna_seq, alpha='ACGU')
@@ -125,7 +148,7 @@ def create_heatmap(contact_matrix, rna_labels, protein_labels, rna_name, protein
125
  },
126
  xaxis_title=f"Residues ({protein_name}, {len(protein_labels)} AA)",
127
  yaxis_title=f"Nucleotides ({rna_name}, {len(rna_labels)} nt)",
128
- width=max(ratio*300,600),
129
  height=300,
130
  font=dict(size=12)
131
  )
@@ -168,7 +191,7 @@ def create_download_files(contact_matrix, rna_labels, protein_labels, rna_name,
168
  def process_prediction(fasta_file, rna_sequence, protein_sequence, input_method):
169
  """Process prediction request and return initial results"""
170
  if not fasta_file and not (rna_sequence and protein_sequence):
171
- return "❌ Please upload a FASTA file or enter RNA and protein sequences",None, None, None, None, None, None
172
 
173
  try:
174
  # Process input
@@ -187,9 +210,9 @@ def process_prediction(fasta_file, rna_sequence, protein_sequence, input_method)
187
 
188
  # Validate sequences
189
  if len(set(rna_seq) - set('ACGU')) > 0:
190
- return f"❌ RNA sequence contains invalid characters: {set(rna_seq) - set('ACGU')}",None, None, None, None, None, None
191
  if len(set(protein_seq) - set('GAVLIFWYDNEKQMSTCPHR')) > 0:
192
- return f"❌ Protein sequence contains invalid characters: {set(protein_seq) - set('GAVLIFWYDNEKQMSTCPHR')}",None, None, None, None, None, None
193
 
194
  # Run contact prediction
195
  contact_matrix = predictor.predict_contact(rna_seq, protein_seq)
@@ -245,6 +268,7 @@ def process_prediction(fasta_file, rna_sequence, protein_sequence, input_method)
245
  except Exception as e:
246
  return f"❌ Prediction failed: {str(e)}", None, None, None, None, None, None
247
 
 
248
  def update_results_with_threshold(Threshold, result_state):
249
  """Update heatmap and contact table based on Threshold threshold"""
250
  if result_state is None:
@@ -270,7 +294,6 @@ def update_results_with_threshold(Threshold, result_state):
270
  # Create contact pairs info
271
  contact_info = f"📊 Found {len(contact_pairs)} contacts (Probability ≥ {Threshold:.3f})"
272
 
273
-
274
  return heatmap, contact_pairs, contact_info
275
 
276
 
@@ -300,6 +323,8 @@ def load_example_data(fasta_input, rna_input, protein_input):
300
  return gr.update(value="Upload FASTA File")
301
  else:
302
  return gr.update(value="Enter Sequences Directly")
 
 
303
  def create_interface():
304
  """Create Gradio interface with threshold control"""
305
  custom_css = """
@@ -328,17 +353,17 @@ def create_interface():
328
  border: 1px solid #ddd;
329
  border-radius: 4px;
330
  }
331
-
332
  .scrollable-plot > div {
333
  min-width: max-content; /* 保持原始宽度 */
334
  }
335
  """
336
 
337
  with gr.Blocks(title="RNA-Protein Contact Prediction Tool",
338
- theme=gr.themes.Soft(primary_hue="blue", secondary_hue="teal"),css=None) as app:
339
  gr.Markdown("""
340
  <center>
341
-
342
  # 🧬 RPcontact: RNA-Protein Contact Prediction
343
  **Direct Nucleotide–Residue Contact Prediction from Primary Sequences** [Paper](https://www.biorxiv.org/content/10.1101/2025.06.02.657171v1.full)| [Code](https://github.com/JulseJiang/RPcontact) | [Demo](https://huggingface.co/spaces/julse/RPcontact)
344
  </center>
@@ -347,7 +372,7 @@ def create_interface():
347
  >This tool leveraging **ERNIE-RNA** for RNA and **ESM-2** for protein modeling, the method provides high-resolution insights into RNA-protein interactions at the atomic level.
348
  <br>Current Demo (auROC 0.779 on VL-49) is optimized for limited CPU environments using efficient one-hot encoding<br>
349
  Advanced Model (auROC 0.845 on VL-49), the embedding-based approach will be released upon paper publication ([contact us](mailto:jiangjh2023@shanghaitech.edu.cn) for early access)
350
-
351
  """)
352
  with gr.Tab("🔬 Contact Prediction"):
353
  with gr.Row():
@@ -364,7 +389,7 @@ def create_interface():
364
  label="FASTA File",
365
  file_types=['.fasta', '.fa', '.txt'],
366
  type='binary',
367
- visible = False
368
  )
369
 
370
  rna_input = gr.Textbox(
@@ -390,19 +415,15 @@ def create_interface():
390
  outputs=[input_method],
391
  label="📋 Example Data (click to load)",
392
  run_on_click=True,
393
- fn = load_example_data
394
  )
395
 
396
-
397
-
398
  # Submit button at the bottom of input column
399
  predict_btn = gr.Button("🚀 Run Prediction", variant="primary", size="lg")
400
 
401
  # Status output
402
  status_output = gr.Textbox(label="Prediction Status", lines=5)
403
 
404
-
405
-
406
  with gr.Column(scale=2):
407
  # Results section - initially hidden
408
  gr.Markdown("""
@@ -524,7 +545,7 @@ def create_interface():
524
  ## ⚡ Performance Guidelines
525
 
526
  - **Processing Time**: Scales quadratically with sequence length
527
-
528
  ### Quality Considerations
529
  - Higher probabilities indicate more confident predictions
530
  - Consider biological context when interpreting results
@@ -578,13 +599,13 @@ def create_interface():
578
  - Cross-validated on diverse RNA-protein complex datasets
579
  - Performance metrics available in the original publication
580
  - Benchmarked against existing prediction methods
581
-
582
  ### 📊 Difference between current demo and final model
583
  | Model Type | Checkpoint File | auROC (VL-49) | LLM embeddings |
584
  |---------------------|---------------------------|---------------|-------------------|
585
  | OH + RP_Emb (final) | `model_roc_0_38=0.845.pt` | 0.845 | ✓ |
586
  | OH (demo) | `model_roc_0_56=0.779.pt` | 0.779 | ✗ |
587
-
588
  ## 📚 Citation & Contact
589
 
590
  If you use this tool in your research, please cite:
@@ -593,7 +614,7 @@ def create_interface():
593
 
594
  ### Contact Information
595
  For technical issues, feature requests, or collaboration inquiries, please contact the development team.
596
-
597
  - **Primary Contact**: Jiuhong Jiang
598
  - **Email**: jiangjh2023@shanghaitech.edu.cn
599
  - **Institution**: ShanghaiTech University, Shanghai, China
 
10
  import torch
11
  from sklearn.preprocessing import OneHotEncoder
12
  import plotly.graph_objects as go
13
+ import requests
14
+ import time
15
+
16
+ last_ping_time = 0
17
+ def smart_wake_up():
18
+ global last_ping_time
19
+ current_time = time.time()
20
+ # 每 40 分钟(2400秒)触发一次,Space 默认通常是 48 小时或更短时间休眠
21
+ if current_time - last_ping_time > 2400:
22
+ wake_up_space_b()
23
+ last_ping_time = current_time
24
+
25
+
26
+ def wake_up_space_b():
27
+ try:
28
+ # 替换为 Space B 的直接访问链接
29
+ url = "https://huggingface.co/spaces/julse/maotao"
30
+ # 使用 head 请求或 get 请求,超时时间设短一点以免影响 A 的速度
31
+ requests.get(url, timeout=5)
32
+ print("Successfully pinged Space B")
33
+ except Exception as e:
34
+ print(f"Failed to wake up Space B: {e}")
35
 
36
 
37
  class RPContactPredictor:
 
98
  return sequences
99
 
100
  def predict_contact(self, rna_seq, protein_seq):
101
+ smart_wake_up()
102
  """Predict RNA-protein contact matrix"""
103
  # Encode sequences
104
  rna_oh = self.one_hot_encode(rna_seq, alpha='ACGU')
 
148
  },
149
  xaxis_title=f"Residues ({protein_name}, {len(protein_labels)} AA)",
150
  yaxis_title=f"Nucleotides ({rna_name}, {len(rna_labels)} nt)",
151
+ width=max(ratio * 300, 600),
152
  height=300,
153
  font=dict(size=12)
154
  )
 
191
  def process_prediction(fasta_file, rna_sequence, protein_sequence, input_method):
192
  """Process prediction request and return initial results"""
193
  if not fasta_file and not (rna_sequence and protein_sequence):
194
+ return "❌ Please upload a FASTA file or enter RNA and protein sequences", None, None, None, None, None, None
195
 
196
  try:
197
  # Process input
 
210
 
211
  # Validate sequences
212
  if len(set(rna_seq) - set('ACGU')) > 0:
213
+ return f"❌ RNA sequence contains invalid characters: {set(rna_seq) - set('ACGU')}", None, None, None, None, None, None
214
  if len(set(protein_seq) - set('GAVLIFWYDNEKQMSTCPHR')) > 0:
215
+ return f"❌ Protein sequence contains invalid characters: {set(protein_seq) - set('GAVLIFWYDNEKQMSTCPHR')}", None, None, None, None, None, None
216
 
217
  # Run contact prediction
218
  contact_matrix = predictor.predict_contact(rna_seq, protein_seq)
 
268
  except Exception as e:
269
  return f"❌ Prediction failed: {str(e)}", None, None, None, None, None, None
270
 
271
+
272
  def update_results_with_threshold(Threshold, result_state):
273
  """Update heatmap and contact table based on Threshold threshold"""
274
  if result_state is None:
 
294
  # Create contact pairs info
295
  contact_info = f"📊 Found {len(contact_pairs)} contacts (Probability ≥ {Threshold:.3f})"
296
 
 
297
  return heatmap, contact_pairs, contact_info
298
 
299
 
 
323
  return gr.update(value="Upload FASTA File")
324
  else:
325
  return gr.update(value="Enter Sequences Directly")
326
+
327
+
328
  def create_interface():
329
  """Create Gradio interface with threshold control"""
330
  custom_css = """
 
353
  border: 1px solid #ddd;
354
  border-radius: 4px;
355
  }
356
+
357
  .scrollable-plot > div {
358
  min-width: max-content; /* 保持原始宽度 */
359
  }
360
  """
361
 
362
  with gr.Blocks(title="RNA-Protein Contact Prediction Tool",
363
+ theme=gr.themes.Soft(primary_hue="blue", secondary_hue="teal"), css=None) as app:
364
  gr.Markdown("""
365
  <center>
366
+
367
  # 🧬 RPcontact: RNA-Protein Contact Prediction
368
  **Direct Nucleotide–Residue Contact Prediction from Primary Sequences** [Paper](https://www.biorxiv.org/content/10.1101/2025.06.02.657171v1.full)| [Code](https://github.com/JulseJiang/RPcontact) | [Demo](https://huggingface.co/spaces/julse/RPcontact)
369
  </center>
 
372
  >This tool leveraging **ERNIE-RNA** for RNA and **ESM-2** for protein modeling, the method provides high-resolution insights into RNA-protein interactions at the atomic level.
373
  <br>Current Demo (auROC 0.779 on VL-49) is optimized for limited CPU environments using efficient one-hot encoding<br>
374
  Advanced Model (auROC 0.845 on VL-49), the embedding-based approach will be released upon paper publication ([contact us](mailto:jiangjh2023@shanghaitech.edu.cn) for early access)
375
+
376
  """)
377
  with gr.Tab("🔬 Contact Prediction"):
378
  with gr.Row():
 
389
  label="FASTA File",
390
  file_types=['.fasta', '.fa', '.txt'],
391
  type='binary',
392
+ visible=False
393
  )
394
 
395
  rna_input = gr.Textbox(
 
415
  outputs=[input_method],
416
  label="📋 Example Data (click to load)",
417
  run_on_click=True,
418
+ fn=load_example_data
419
  )
420
 
 
 
421
  # Submit button at the bottom of input column
422
  predict_btn = gr.Button("🚀 Run Prediction", variant="primary", size="lg")
423
 
424
  # Status output
425
  status_output = gr.Textbox(label="Prediction Status", lines=5)
426
 
 
 
427
  with gr.Column(scale=2):
428
  # Results section - initially hidden
429
  gr.Markdown("""
 
545
  ## ⚡ Performance Guidelines
546
 
547
  - **Processing Time**: Scales quadratically with sequence length
548
+
549
  ### Quality Considerations
550
  - Higher probabilities indicate more confident predictions
551
  - Consider biological context when interpreting results
 
599
  - Cross-validated on diverse RNA-protein complex datasets
600
  - Performance metrics available in the original publication
601
  - Benchmarked against existing prediction methods
602
+
603
  ### 📊 Difference between current demo and final model
604
  | Model Type | Checkpoint File | auROC (VL-49) | LLM embeddings |
605
  |---------------------|---------------------------|---------------|-------------------|
606
  | OH + RP_Emb (final) | `model_roc_0_38=0.845.pt` | 0.845 | ✓ |
607
  | OH (demo) | `model_roc_0_56=0.779.pt` | 0.779 | ✗ |
608
+
609
  ## 📚 Citation & Contact
610
 
611
  If you use this tool in your research, please cite:
 
614
 
615
  ### Contact Information
616
  For technical issues, feature requests, or collaboration inquiries, please contact the development team.
617
+
618
  - **Primary Contact**: Jiuhong Jiang
619
  - **Email**: jiangjh2023@shanghaitech.edu.cn
620
  - **Institution**: ShanghaiTech University, Shanghai, China