yinuozhang commited on
Commit
82cd634
Β·
1 Parent(s): 4e0581e

upload model path

Browse files
Files changed (2) hide show
  1. app.py +215 -33
  2. description.md +10 -15
app.py CHANGED
@@ -12,7 +12,74 @@ import json
12
  import time
13
  from typing import List, Dict, Any, Tuple, Optional
14
 
15
- # Try to import RDKit for SMILES support
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  try:
17
  from rdkit import Chem
18
  from rdkit.Chem import Descriptors, AllChem
@@ -357,14 +424,13 @@ class PeptideCNN(nn.Module):
357
  # ==================== Data Management ====================
358
 
359
  class TrainingDataManager:
360
- """Manage training data statistics and distributions"""
361
- def __init__(self, data_dir="training_data"):
362
- self.data_dir = Path(__file__).resolve().parent / data_dir
363
  self.data_dir.mkdir(exist_ok=True)
364
  self.statistics = self.load_statistics()
365
 
366
  def _load_half_life_csv(self):
367
- csv_path = self.data_dir / "half_life_smiles.csv"
368
  if not csv_path.exists():
369
  return None
370
  try:
@@ -397,8 +463,8 @@ class TrainingDataManager:
397
  Returns: {'values': y, 'unit': 'Class (0=neg, 1=pos)', 'kind': 'binary', 'n_pos': int, 'n_neg': int}
398
  or None if missing.
399
  """
400
- pos_path = self.data_dir / f"{prefix}-positive.npz"
401
- neg_path = self.data_dir / f"{prefix}-negative.npz"
402
  if not pos_path.exists() or not neg_path.exists():
403
  return None
404
  try:
@@ -420,6 +486,68 @@ class TrainingDataManager:
420
  except Exception as e:
421
  print(f"[TrainingDataManager] binary load error for '{prefix}': {e}")
422
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
423
 
424
  def load_statistics(self):
425
  """Load pre-computed statistics for each property"""
@@ -472,7 +600,7 @@ class TrainingDataManager:
472
  # Overlay real half-life
473
  hl = self._load_half_life_csv()
474
  if hl is not None:
475
- stats["half_life"].update(hl)
476
 
477
  # Overlay real solubility from sol-* (binary)
478
  sol = self._load_binary_pair("sol")
@@ -487,6 +615,14 @@ class TrainingDataManager:
487
  hemo = self._load_binary_pair("hemo")
488
  if hemo is not None:
489
  stats["hemolysis"].update(hemo)
 
 
 
 
 
 
 
 
490
 
491
  return stats
492
 
@@ -520,16 +656,39 @@ class TrainingDataManager:
520
  # continuous
521
  fig = go.Figure()
522
  fig.add_trace(go.Histogram(x=vals, nbinsx=50, name="Training Data"))
 
 
523
  if "threshold" in s and s["threshold"] is not None:
524
  fig.add_vline(
525
- x=s["threshold"], line_dash="dash", line_color="red",
526
- annotation_text=f"Threshold: {s['threshold']:.3f}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  )
 
 
528
  if current_value is not None:
529
  fig.add_vline(
530
- x=current_value, line_dash="solid", line_color="green", line_width=3,
531
- annotation_text=f"Your Result: {current_value:.3f}"
 
 
 
532
  )
 
533
  fig.update_layout(
534
  title=f"{property_name.replace('_', ' ').title()} Distribution",
535
  xaxis_title=s.get("unit", ""),
@@ -539,6 +698,7 @@ class TrainingDataManager:
539
  )
540
  return fig
541
 
 
542
  def get_property_info(self, property_name):
543
  if property_name not in self.statistics:
544
  return None
@@ -606,7 +766,7 @@ class UnifiedPeptidePredictor:
606
  self.model_configs = self.get_model_configs()
607
 
608
  # Data manager
609
- self.data_manager = TrainingDataManager()
610
  self._protein_cache = {}
611
  # Load models
612
  self.load_all_models()
@@ -617,7 +777,7 @@ class UnifiedPeptidePredictor:
617
  'hemolysis_seq': {
618
  'type': 'xgboost',
619
  'input': 'sequence',
620
- 'path': 'best_model_hemolysis.json',
621
  'inverse_score': False,
622
  'unit': 'Probability',
623
  'display_name': '🩸 Hemolysis',
@@ -627,7 +787,7 @@ class UnifiedPeptidePredictor:
627
  'hemolysis_smiles': {
628
  'type': 'xgboost',
629
  'input': 'smiles',
630
- 'path': 'hemolysis-xgboost_smiles.json',
631
  'inverse_score': False,
632
  'unit': 'Probability',
633
  'display_name': '🩸 Hemolysis',
@@ -637,7 +797,7 @@ class UnifiedPeptidePredictor:
637
  'solubility_seq': {
638
  'type': 'xgboost',
639
  'input': 'sequence',
640
- 'path': 'best_model_solubility.json',
641
  'unit': 'Probability',
642
  'display_name': 'πŸ’§ Solubility',
643
  'positive_label': 'Soluble',
@@ -646,7 +806,7 @@ class UnifiedPeptidePredictor:
646
  'solubility_smiles': {
647
  'type': 'xgboost',
648
  'input': 'smiles',
649
- 'path': 'solubility-xgboost_smiles.json',
650
  'unit': 'Probability',
651
  'display_name': 'πŸ’§ Solubility',
652
  'positive_label': 'Soluble',
@@ -655,7 +815,7 @@ class UnifiedPeptidePredictor:
655
  'permeability_smiles': {
656
  'type': 'xgboost',
657
  'input': 'smiles',
658
- 'path': 'permeability-xgboost_smiles.json',
659
  'unit': 'Probability',
660
  'display_name': 'πŸͺ£ Permeability',
661
  'positive_label': 'Permeable',
@@ -664,7 +824,7 @@ class UnifiedPeptidePredictor:
664
  'half_life_seq': {
665
  'type': 'pytorch_cnn',
666
  'input': 'sequence',
667
- 'path': 'best_model_half_life.pth',
668
  'transform': lambda x: 10**x,
669
  'unit': 'hours',
670
  'display_name': '⏱️ Half-life',
@@ -674,7 +834,7 @@ class UnifiedPeptidePredictor:
674
  'nonfouling_seq': {
675
  'type': 'xgboost',
676
  'input': 'sequence',
677
- 'path': 'best_model_nonfouling.json',
678
  'unit': 'Probability',
679
  'display_name': 'πŸ‘― Non-Fouling',
680
  'positive_label': 'Non-toxic',
@@ -683,7 +843,7 @@ class UnifiedPeptidePredictor:
683
  'nonfouling_smiles': {
684
  'type': 'xgboost',
685
  'input': 'smiles',
686
- 'path': 'nonfouling-xgboost_smiles.json',
687
  'unit': 'Probability',
688
  'display_name': 'πŸ‘― Non-Fouling',
689
  'positive_label': 'Stable',
@@ -692,14 +852,14 @@ class UnifiedPeptidePredictor:
692
  'binding_affinity': {
693
  'type': 'binding',
694
  'input': 'dual_sequence',
695
- 'path': 'binding_affinity_unpooled.pt',
696
  'unit': 'Probability',
697
  'display_name': 'πŸ”— Binding Affinity'
698
  },
699
  'binding_affinity_smiles': {
700
  'type': 'binding_smiles',
701
  'input': 'sequence+smiles',
702
- 'path': 'binding-affinity_smiles.pt',
703
  'unit': 'Probability',
704
  'display_name': 'πŸ”— Binding Affinity (SMILES)'
705
  },
@@ -927,7 +1087,7 @@ def initialize():
927
  """Initialize the predictor"""
928
  global predictor
929
  if predictor is None:
930
- predictor = UnifiedPeptidePredictor(model_dir="models")
931
  return predictor
932
 
933
 
@@ -1175,7 +1335,21 @@ def load_example(example_name):
1175
  return examples[example_name][0], ""
1176
  return "", ""
1177
 
 
 
 
 
 
 
 
1178
 
 
 
 
 
 
 
 
1179
  # ==================== Gradio App ====================
1180
 
1181
  custom_css = """
@@ -1202,7 +1376,18 @@ h1 {
1202
  text-align: center;
1203
  margin-bottom: 10px !important;
1204
  }
1205
-
 
 
 
 
 
 
 
 
 
 
 
1206
  table {
1207
  font-size: 14px !important;
1208
  }
@@ -1217,8 +1402,8 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo")) as de
1217
  # Header
1218
  gr.Markdown(
1219
  """
1220
- # β˜„οΈ PeptiVerse
1221
- ### Peptide Property Predictions
1222
  """
1223
  )
1224
 
@@ -1319,7 +1504,6 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo")) as de
1319
  # Results Section
1320
  with gr.Group():
1321
  gr.Markdown("### πŸ“Š Results")
1322
- gr.Markdown("*Click on property names to view distribution plots*")
1323
 
1324
  results_df = gr.Dataframe(
1325
  headers=["Sequence", "Property", "Prediction", "Value", "Unit"],
@@ -1340,8 +1524,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo")) as de
1340
  """
1341
  ---
1342
  <div style='text-align: center; color: #6b7280;'>
1343
- <p>Models: ESM2-650M embeddings + XGBoost/CNN classifiers</p>
1344
- <p style='font-size: 0.9em;'>Click on property names in results to view training data distributions</p>
1345
  </div>
1346
  """
1347
  )
@@ -1349,7 +1532,7 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo")) as de
1349
  # Event Handlers
1350
  def update_visibility(binding_checked):
1351
  return gr.update(visible=binding_checked)
1352
-
1353
  binding_affinity.change(
1354
  update_visibility,
1355
  inputs=[binding_affinity],
@@ -1357,11 +1540,10 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="indigo")) as de
1357
  )
1358
 
1359
  example_dropdown.change(
1360
- load_example,
1361
  inputs=[example_dropdown],
1362
  outputs=[input_text, protein_seq]
1363
  )
1364
-
1365
  predict_btn.click(
1366
  predict_properties,
1367
  inputs=[
 
12
  import time
13
  from typing import List, Dict, Any, Tuple, Optional
14
 
15
+ from huggingface_hub import snapshot_download
16
+ from pathlib import Path
17
+ import os
18
+
19
+ def pick_assets_root() -> Path:
20
+ # HF Spaces container uses /home/user; detect via SPACE_ID or existence
21
+ spaces_root = Path("/home/user/assets")
22
+ if os.environ.get("SPACE_ID") or spaces_root.parent.exists():
23
+ try:
24
+ spaces_root.mkdir(parents=True, exist_ok=True)
25
+ return spaces_root
26
+ except Exception:
27
+ pass # fall through to local options
28
+
29
+ # Allow manual override
30
+ env = os.environ.get("HF_ASSETS_DIR")
31
+ if env:
32
+ p = Path(env); p.mkdir(parents=True, exist_ok=True)
33
+ return p
34
+
35
+ # Local fallbacks
36
+ for p in [Path.home() / "assets", Path.cwd() / "assets", Path("/tmp/assets")]:
37
+ try:
38
+ p.mkdir(parents=True, exist_ok=True)
39
+ return p
40
+ except Exception:
41
+ continue
42
+ raise RuntimeError("No writable assets directory found.")
43
+
44
+ ASSETS = pick_assets_root()
45
+
46
+ # Put all caches on the same writable disk
47
+ for k, v in {
48
+ "HF_HOME": str(ASSETS / "hf"),
49
+ "HUGGINGFACE_HUB_CACHE": str(ASSETS / "hf" / "cache"),
50
+ "TRANSFORMERS_CACHE": str(ASSETS / "transformers"),
51
+ "HF_DATASETS_CACHE": str(ASSETS / "hf" / "datasets"),
52
+ "XDG_CACHE_HOME": str(ASSETS / "xdg"),
53
+ "TMPDIR": str(ASSETS / "tmp"),
54
+ }.items():
55
+ os.environ.setdefault(k, v)
56
+ Path(v).mkdir(parents=True, exist_ok=True)
57
+
58
+ ASSETS_MODELS = ASSETS / "models"; ASSETS_MODELS.mkdir(parents=True, exist_ok=True)
59
+ ASSETS_DATA = ASSETS / "training_data"; ASSETS_DATA.mkdir(parents=True, exist_ok=True)
60
+
61
+ MODEL_REPO = "ChatterjeeLab/Classifier_Weight"
62
+ DATASET_REPO = "ChatterjeeLab/Classifier_Weight"
63
+
64
+ def fetch_models_and_data():
65
+ snapshot_download(
66
+ repo_id=MODEL_REPO,
67
+ local_dir=str(ASSETS_MODELS),
68
+ local_dir_use_symlinks=True,
69
+ allow_patterns=[
70
+ "models/*.pt","models/*.pth","models/*.ckpt","models/*.safetensors",
71
+ "models/*.json","models/*.yaml","models/*.yml",
72
+ ],
73
+ )
74
+ snapshot_download(
75
+ repo_id=DATASET_REPO, # <-- no repo_type here
76
+ local_dir=str(ASSETS_DATA),
77
+ local_dir_use_symlinks=True,
78
+ allow_patterns=["training_data/*.csv","training_data/*.npz","training_data/*.md"],
79
+ )
80
+
81
+ fetch_models_and_data()
82
+
83
  try:
84
  from rdkit import Chem
85
  from rdkit.Chem import Descriptors, AllChem
 
424
  # ==================== Data Management ====================
425
 
426
  class TrainingDataManager:
427
+ def __init__(self, data_dir=ASSETS_DATA):
428
+ self.data_dir = Path(data_dir)
 
429
  self.data_dir.mkdir(exist_ok=True)
430
  self.statistics = self.load_statistics()
431
 
432
  def _load_half_life_csv(self):
433
+ csv_path = self.data_dir / "training_data/half_life_smiles.csv"
434
  if not csv_path.exists():
435
  return None
436
  try:
 
463
  Returns: {'values': y, 'unit': 'Class (0=neg, 1=pos)', 'kind': 'binary', 'n_pos': int, 'n_neg': int}
464
  or None if missing.
465
  """
466
+ pos_path = self.data_dir / f"training_data/{prefix}-positive.npz"
467
+ neg_path = self.data_dir / f"training_data/{prefix}-negative.npz"
468
  if not pos_path.exists() or not neg_path.exists():
469
  return None
470
  try:
 
486
  except Exception as e:
487
  print(f"[TrainingDataManager] binary load error for '{prefix}': {e}")
488
  return None
489
+
490
+ def _load_binding_affinity_csv(self):
491
+ """
492
+ Read c-binding.csv and return the raw affinity values (pKd/pKi-like, i.e., -log scale).
493
+ No filtering/clipping β€” only numeric conversion with NaNs dropped so plotting works.
494
+ """
495
+ csv_path = self.data_dir / "training_data/c-binding.csv"
496
+ if not csv_path.exists():
497
+ return None
498
+ try:
499
+ df = pd.read_csv(csv_path)
500
+ if "affinity" not in df.columns:
501
+ raise ValueError("CSV must contain an 'affinity' column.")
502
+
503
+ vals = pd.to_numeric(df["affinity"], errors="coerce").dropna().to_numpy()
504
+ if len(vals) == 0:
505
+ return None
506
+
507
+ return {
508
+ "values": vals,
509
+ "description": "Protein–ligand binding affinity normalized",
510
+ "unit": "score",
511
+ "threshold": 7.5, # main threshold (tight)
512
+ "threshold_secondary": 6.0, # weak threshold
513
+ "kind": "continuous",
514
+ "download_link": str(csv_path),
515
+ }
516
+ except Exception as e:
517
+ print(f"[TrainingDataManager] binding-affinity load error: {e}")
518
+ return None
519
+
520
+ def _load_permeability_pampa_csv(self):
521
+ """
522
+ Load PAMPA permeability values from training_data/nc-CPP-processed.csv.
523
+ Expects columns: 'SMILES','PAMPA'. We only parse PAMPA as float; NaNs are dropped.
524
+ No filtering/clipping.
525
+ """
526
+ csv_path = self.data_dir / "training_data/nc-CPP-processed.csv"
527
+ if not csv_path.exists():
528
+ return None
529
+ try:
530
+ df = pd.read_csv(csv_path)
531
+ if "PAMPA" not in df.columns:
532
+ raise ValueError("CSV must contain a 'PAMPA' column.")
533
+
534
+ vals = pd.to_numeric(df["PAMPA"], errors="coerce").dropna().to_numpy()
535
+ if len(vals) == 0:
536
+ return None
537
+
538
+ # Use the conventional log Peff unit; keep your prior display threshold (-4.0) or set median
539
+ threshold_default = float(np.median(vals))
540
+ return {
541
+ "values": vals,
542
+ "description": "Cell membrane permeability measurements",
543
+ "unit": "log Peff",
544
+ "threshold": threshold_default,
545
+ "kind": "continuous",
546
+ "download_link": str(csv_path),
547
+ }
548
+ except Exception as e:
549
+ print(f"[TrainingDataManager] permeability PAMPA load error: {e}")
550
+ return None
551
 
552
  def load_statistics(self):
553
  """Load pre-computed statistics for each property"""
 
600
  # Overlay real half-life
601
  hl = self._load_half_life_csv()
602
  if hl is not None:
603
+ stats["half_life (smiles)"].update(hl)
604
 
605
  # Overlay real solubility from sol-* (binary)
606
  sol = self._load_binary_pair("sol")
 
615
  hemo = self._load_binary_pair("hemo")
616
  if hemo is not None:
617
  stats["hemolysis"].update(hemo)
618
+
619
+ ba = self._load_binding_affinity_csv()
620
+ if ba is not None:
621
+ stats["binding_affinity"].update(ba)
622
+
623
+ pampa = self._load_permeability_pampa_csv()
624
+ if pampa is not None:
625
+ stats["permeability"].update(pampa)
626
 
627
  return stats
628
 
 
656
  # continuous
657
  fig = go.Figure()
658
  fig.add_trace(go.Histogram(x=vals, nbinsx=50, name="Training Data"))
659
+
660
+ # Primary threshold (if any)
661
  if "threshold" in s and s["threshold"] is not None:
662
  fig.add_vline(
663
+ x=float(s["threshold"]),
664
+ line_dash="dash",
665
+ line_color="purple" if property_name == "binding_affinity" else "red",
666
+ annotation_text=(
667
+ "Tight threshold: {:.3f}".format(float(s["threshold"]))
668
+ if property_name == "binding_affinity"
669
+ else "Threshold: {:.3f}".format(float(s["threshold"]))
670
+ ),
671
+ )
672
+
673
+ # Secondary threshold for binding (weak)
674
+ if property_name == "binding_affinity" and "threshold_secondary" in s and s["threshold_secondary"] is not None:
675
+ fig.add_vline(
676
+ x=float(s["threshold_secondary"]),
677
+ line_dash="dash",
678
+ line_color="orange",
679
+ annotation_text="Weak threshold: {:.3f}".format(float(s["threshold_secondary"])),
680
  )
681
+
682
+ # Current value
683
  if current_value is not None:
684
  fig.add_vline(
685
+ x=float(current_value),
686
+ line_dash="solid",
687
+ line_color="green",
688
+ line_width=3,
689
+ annotation_text=f"Your Result: {float(current_value):.3f}",
690
  )
691
+
692
  fig.update_layout(
693
  title=f"{property_name.replace('_', ' ').title()} Distribution",
694
  xaxis_title=s.get("unit", ""),
 
698
  )
699
  return fig
700
 
701
+
702
  def get_property_info(self, property_name):
703
  if property_name not in self.statistics:
704
  return None
 
766
  self.model_configs = self.get_model_configs()
767
 
768
  # Data manager
769
+ self.data_manager = TrainingDataManager(data_dir=ASSETS_DATA)
770
  self._protein_cache = {}
771
  # Load models
772
  self.load_all_models()
 
777
  'hemolysis_seq': {
778
  'type': 'xgboost',
779
  'input': 'sequence',
780
+ 'path': 'models/best_model_hemolysis.json',
781
  'inverse_score': False,
782
  'unit': 'Probability',
783
  'display_name': '🩸 Hemolysis',
 
787
  'hemolysis_smiles': {
788
  'type': 'xgboost',
789
  'input': 'smiles',
790
+ 'path': 'models/hemolysis-xgboost_smiles.json',
791
  'inverse_score': False,
792
  'unit': 'Probability',
793
  'display_name': '🩸 Hemolysis',
 
797
  'solubility_seq': {
798
  'type': 'xgboost',
799
  'input': 'sequence',
800
+ 'path': 'models/best_model_solubility.json',
801
  'unit': 'Probability',
802
  'display_name': 'πŸ’§ Solubility',
803
  'positive_label': 'Soluble',
 
806
  'solubility_smiles': {
807
  'type': 'xgboost',
808
  'input': 'smiles',
809
+ 'path': 'models/solubility-xgboost_smiles.json',
810
  'unit': 'Probability',
811
  'display_name': 'πŸ’§ Solubility',
812
  'positive_label': 'Soluble',
 
815
  'permeability_smiles': {
816
  'type': 'xgboost',
817
  'input': 'smiles',
818
+ 'path': 'models/permeability-xgboost_smiles.json',
819
  'unit': 'Probability',
820
  'display_name': 'πŸͺ£ Permeability',
821
  'positive_label': 'Permeable',
 
824
  'half_life_seq': {
825
  'type': 'pytorch_cnn',
826
  'input': 'sequence',
827
+ 'path': 'models/best_model_half_life.pth',
828
  'transform': lambda x: 10**x,
829
  'unit': 'hours',
830
  'display_name': '⏱️ Half-life',
 
834
  'nonfouling_seq': {
835
  'type': 'xgboost',
836
  'input': 'sequence',
837
+ 'path': 'models/best_model_nonfouling.json',
838
  'unit': 'Probability',
839
  'display_name': 'πŸ‘― Non-Fouling',
840
  'positive_label': 'Non-toxic',
 
843
  'nonfouling_smiles': {
844
  'type': 'xgboost',
845
  'input': 'smiles',
846
+ 'path': 'models/nonfouling-xgboost_smiles.json',
847
  'unit': 'Probability',
848
  'display_name': 'πŸ‘― Non-Fouling',
849
  'positive_label': 'Stable',
 
852
  'binding_affinity': {
853
  'type': 'binding',
854
  'input': 'dual_sequence',
855
+ 'path': 'models/binding_affinity_unpooled.pt',
856
  'unit': 'Probability',
857
  'display_name': 'πŸ”— Binding Affinity'
858
  },
859
  'binding_affinity_smiles': {
860
  'type': 'binding_smiles',
861
  'input': 'sequence+smiles',
862
+ 'path': 'models/binding-affinity_smiles.pt',
863
  'unit': 'Probability',
864
  'display_name': 'πŸ”— Binding Affinity (SMILES)'
865
  },
 
1087
  """Initialize the predictor"""
1088
  global predictor
1089
  if predictor is None:
1090
+ predictor = UnifiedPeptidePredictor(model_dir=ASSETS_MODELS)
1091
  return predictor
1092
 
1093
 
 
1335
  return examples[example_name][0], ""
1336
  return "", ""
1337
 
1338
+ def on_example_change(name: str):
1339
+ binder, protein = load_example(name) # your helper above
1340
+ show_protein = (name == "Protein-Peptide")
1341
+ return (
1342
+ gr.update(value=binder), # input_text
1343
+ gr.update(value=protein, visible=show_protein) # protein_seq (and toggle visibility)
1344
+ )
1345
 
1346
+ def on_example_load(name: str):
1347
+ binder, protein = load_example(name)
1348
+ show_protein = (name == "Protein-Peptide")
1349
+ return (
1350
+ gr.update(value=binder), # input_text
1351
+ gr.update(value=protein, visible=show_protein) # protein_seq + visibility
1352
+ )
1353
  # ==================== Gradio App ====================
1354
 
1355
  custom_css = """
 
1376
  text-align: center;
1377
  margin-bottom: 10px !important;
1378
  }
1379
+ h3 {
1380
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
1381
+ -webkit-background-clip: text;
1382
+ -webkit-text-fill-color: transparent;
1383
+ text-align: center !important;
1384
+ font-size: 1.3em !important;
1385
+ margin-top: -5px !important;
1386
+ }
1387
+ .gr-form {
1388
+ border-radius: 12px !important;
1389
+ border-color: #e5e7eb !important;
1390
+ }
1391
  table {
1392
  font-size: 14px !important;
1393
  }
 
1402
  # Header
1403
  gr.Markdown(
1404
  """
1405
+ # 🌐 PeptiVerse
1406
+ ### \t Peptide Property Predictions
1407
  """
1408
  )
1409
 
 
1504
  # Results Section
1505
  with gr.Group():
1506
  gr.Markdown("### πŸ“Š Results")
 
1507
 
1508
  results_df = gr.Dataframe(
1509
  headers=["Sequence", "Property", "Prediction", "Value", "Unit"],
 
1524
  """
1525
  ---
1526
  <div style='text-align: center; color: #6b7280;'>
1527
+ <p>Please Cite Us.</p>
 
1528
  </div>
1529
  """
1530
  )
 
1532
  # Event Handlers
1533
  def update_visibility(binding_checked):
1534
  return gr.update(visible=binding_checked)
1535
+
1536
  binding_affinity.change(
1537
  update_visibility,
1538
  inputs=[binding_affinity],
 
1540
  )
1541
 
1542
  example_dropdown.change(
1543
+ on_example_change,
1544
  inputs=[example_dropdown],
1545
  outputs=[input_text, protein_seq]
1546
  )
 
1547
  predict_btn.click(
1548
  predict_properties,
1549
  inputs=[
description.md CHANGED
@@ -5,29 +5,29 @@
5
  Our models are trained on curated datasets from multiple sources:
6
 
7
  #### Hemolysis Dataset
8
- - **Primary Source:** [peptideBERT](https://pubs.acs.org/doi/abs/10.1021/acs.jpclett.3c02398)
9
- - **Secondary Source:** the Database of Antimicrobial Activity and Structure of Peptides (DBAASPv3)
10
  - **Size:** 9,316 peptides, with 19.6% being positive (hemolytic) and 80.4% being negative (nonhemolytic)
11
  - **Description:** Probability of peptide disrupting red blood cell membranes.
12
  - **Download:** [hemolysis_training_data.csv](#)
13
 
14
  #### Solubility Dataset
15
- - **Primary Source:** [peptideBERT](https://pubs.acs.org/doi/abs/10.1021/acs.jpclett.3c02398)
16
- - **Secondary Source:** PROSO-II
17
  - **Size:** 18,453 sequences, with 47.6% being labeled as positives and 52.4% being labeled as negatives
18
  - **Description:** Probability of peptide remaining dissolved in aqueous conditions.
19
  - **Download:** [solubility_training_data.csv](#)
20
 
21
  #### Non-Fouling Dataset
22
- - **Primary Source:** [peptideBERT](https://pubs.acs.org/doi/abs/10.1021/acs.jpclett.3c02398)
23
- - **Secondary Source:** [Classifying antimicrobial and multifunctional peptides with Bayesian network models](https://doi.org/10.1002/pep2.24079)
24
  - **Size:** 3,600 positive, 13,585 negative
25
  - **Description:** A nonfouling peptide resists nonspecific interactions and protein adsorption.
26
  - **Download:** [solubility_training_data.csv](#)
27
 
28
  #### Permeability Dataset
29
- - **Primary Source:** [PepLand](https://arxiv.org/abs/2311.04419)
30
- - **Secondary Source:** CycPeptMPDB
31
  - **Size:** 1162 positive and negative for nanonical samples each (22 relevant cell-penetrating peptide databases by compiling literature on existing cell-penetrating peptide prediction models ); CycPeptMPDB provides extra 7334 cyclic peptides
32
  - **Description:** Probability of peptide penetrating the cell membrane.
33
  - **Download:** [binding_affinity_training_data.csv](#)
@@ -57,14 +57,9 @@ Our models are trained on curated datasets from multiple sources:
57
 
58
  If you use this tool, please cite:
59
  ```
60
- @article{peptiprop2024,
61
- title={PeptiProp: Unified Platform for Peptide Property Prediction},
62
- author={Your Name et al.},
63
- journal={Journal Name},
64
- year={2024}
65
- }
66
  ```
67
 
68
  ### Contact
69
 
70
- For questions or collaborations: [contact@example.com](mailto:contact@example.com)
 
5
  Our models are trained on curated datasets from multiple sources:
6
 
7
  #### Hemolysis Dataset
8
+ - **Primary Source:** [the Database of Antimicrobial Activity and Structure of Peptides (DBAASPv3)](https://academic.oup.com/nar/article-abstract/49/D1/D288/5957160)
9
+ - **Secondary Source:** [peptideBERT](https://pubs.acs.org/doi/abs/10.1021/acs.jpclett.3c02398)
10
  - **Size:** 9,316 peptides, with 19.6% being positive (hemolytic) and 80.4% being negative (nonhemolytic)
11
  - **Description:** Probability of peptide disrupting red blood cell membranes.
12
  - **Download:** [hemolysis_training_data.csv](#)
13
 
14
  #### Solubility Dataset
15
+ - **Primary Source:** [PROSO-II](https://febs.onlinelibrary.wiley.com/doi/abs/10.1111/j.1742-4658.2012.08603.x)
16
+ - **Secondary Source:** [peptideBERT](https://pubs.acs.org/doi/abs/10.1021/acs.jpclett.3c02398)
17
  - **Size:** 18,453 sequences, with 47.6% being labeled as positives and 52.4% being labeled as negatives
18
  - **Description:** Probability of peptide remaining dissolved in aqueous conditions.
19
  - **Download:** [solubility_training_data.csv](#)
20
 
21
  #### Non-Fouling Dataset
22
+ - **Primary Source:** [Classifying antimicrobial and multifunctional peptides with Bayesian network models](https://doi.org/10.1002/pep2.24079)
23
+ - **Secondary Source:** [peptideBERT](https://pubs.acs.org/doi/abs/10.1021/acs.jpclett.3c02398)
24
  - **Size:** 3,600 positive, 13,585 negative
25
  - **Description:** A nonfouling peptide resists nonspecific interactions and protein adsorption.
26
  - **Download:** [solubility_training_data.csv](#)
27
 
28
  #### Permeability Dataset
29
+ - **Primary Source:** [CycPeptMPDB](https://pubs.acs.org/doi/abs/10.1021/acs.jcim.2c01573), [PAMPA](https://doi.org/10.1517/17425255.1.2.325)
30
+ - **Secondary Source:** [PepLand](https://arxiv.org/abs/2311.04419)
31
  - **Size:** 1162 positive and negative for nanonical samples each (22 relevant cell-penetrating peptide databases by compiling literature on existing cell-penetrating peptide prediction models ); CycPeptMPDB provides extra 7334 cyclic peptides
32
  - **Description:** Probability of peptide penetrating the cell membrane.
33
  - **Download:** [binding_affinity_training_data.csv](#)
 
57
 
58
  If you use this tool, please cite:
59
  ```
60
+ place holder
 
 
 
 
 
61
  ```
62
 
63
  ### Contact
64
 
65
+ For questions or collaborations: [yzhang@u.duke.nus.edu](mailto:yzhang@u.duke.nus.edu)