vladimir.manuylov commited on
Commit
58ac1b4
·
1 Parent(s): f42fb15

added logging for free

Browse files
Files changed (1) hide show
  1. app.py +25 -2
app.py CHANGED
@@ -6,11 +6,13 @@ import gradio as gr
6
  import torch
7
  from torch.utils.data import DataLoader
8
  import lightning.pytorch as pl
9
- from protobind_diff.esm_inference import get_esm_embedding
10
  from protobind_diff.model import ModelGenerator
11
  from protobind_diff.data_loader import InferenceDataset
12
  from huggingface_hub import hf_hub_download
13
  import spaces
 
 
 
14
 
15
  # Hugging Face Hub details
16
  REPO_ID = "ai-gero/ProtoBind-Diff"
@@ -18,10 +20,11 @@ MODEL_FILENAME = "model.ckpt"
18
  TOKENIZER_FILENAME = "tokenizer_smiles_diffusion.json"
19
 
20
  @spaces.GPU(duration=120)
21
- def generate_smiles_for_sequence(protein_sequence: str, num_samples: int):
22
  """
23
  The main prediction function that runs the full pipeline.
24
  """
 
25
  if not protein_sequence:
26
  raise gr.Error("Protein sequence cannot be empty.")
27
  protein_sequence = re.sub(r"[^A-Z]", "", protein_sequence.upper())
@@ -58,6 +61,26 @@ def generate_smiles_for_sequence(protein_sequence: str, num_samples: int):
58
  return ",\n".join(unique_smiles)
59
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # --- GRADIO APP DEFINITION ---
62
 
63
  # Load models on app startup
 
6
  import torch
7
  from torch.utils.data import DataLoader
8
  import lightning.pytorch as pl
 
9
  from protobind_diff.model import ModelGenerator
10
  from protobind_diff.data_loader import InferenceDataset
11
  from huggingface_hub import hf_hub_download
12
  import spaces
13
+ from pathlib import Path
14
+ import uuid, json, hashlib
15
+ from huggingface_hub import CommitScheduler
16
 
17
  # Hugging Face Hub details
18
  REPO_ID = "ai-gero/ProtoBind-Diff"
 
20
  TOKENIZER_FILENAME = "tokenizer_smiles_diffusion.json"
21
 
22
  @spaces.GPU(duration=120)
23
+ def generate_smiles_for_sequence(protein_sequence: str, num_samples: int, request: Request):
24
  """
25
  The main prediction function that runs the full pipeline.
26
  """
27
+ log_run(request.client.host or "unknown", protein_sequence)
28
  if not protein_sequence:
29
  raise gr.Error("Protein sequence cannot be empty.")
30
  protein_sequence = re.sub(r"[^A-Z]", "", protein_sequence.upper())
 
61
  return ",\n".join(unique_smiles)
62
 
63
 
64
+ def log_run(client_ip: str, seq: str):
65
+ rec = {
66
+ "ts": datetime.utcnow().isoformat(timespec="seconds"),
67
+ "client": client_ip,
68
+ "seq_hash": hashlib.sha256(seq.encode()).hexdigest(),
69
+ "seq_len": len(seq)
70
+ }
71
+ (LOG_FOLDER / f"{uuid.uuid4()}.json").write_text(json.dumps(rec))
72
+
73
+
74
+ LOG_FOLDER = Path("usage_logs"); LOG_FOLDER.mkdir(exist_ok=True)
75
+
76
+ scheduler = CommitScheduler(
77
+ repo_id="ai-gero/protobind_usage",
78
+ repo_type="dataset",
79
+ folder_path=str(LOG_FOLDER),
80
+ every=300,
81
+ token=os.getenv("HF_TOKEN")
82
+ )
83
+
84
  # --- GRADIO APP DEFINITION ---
85
 
86
  # Load models on app startup