Spaces:

ai-gero
/

ProtoBind-Diff

Running on Zero

App Files Files Community

vladimir.manuylov commited on Jul 9, 2025

Commit

58ac1b4

1 Parent(s): f42fb15

added logging for free

Browse files

Files changed (1) hide show

app.py +25 -2

app.py CHANGED Viewed

@@ -6,11 +6,13 @@ import gradio as gr
 import torch
 from torch.utils.data import DataLoader
 import lightning.pytorch as pl
-from protobind_diff.esm_inference import get_esm_embedding
 from protobind_diff.model import ModelGenerator
 from protobind_diff.data_loader import InferenceDataset
 from huggingface_hub import hf_hub_download
 import spaces
 # Hugging Face Hub details
 REPO_ID = "ai-gero/ProtoBind-Diff"
@@ -18,10 +20,11 @@ MODEL_FILENAME = "model.ckpt"
 TOKENIZER_FILENAME = "tokenizer_smiles_diffusion.json"
 @spaces.GPU(duration=120)
-def generate_smiles_for_sequence(protein_sequence: str, num_samples: int):
     """
     The main prediction function that runs the full pipeline.
     """
     if not protein_sequence:
         raise gr.Error("Protein sequence cannot be empty.")
     protein_sequence = re.sub(r"[^A-Z]", "", protein_sequence.upper())
@@ -58,6 +61,26 @@ def generate_smiles_for_sequence(protein_sequence: str, num_samples: int):
     return ",\n".join(unique_smiles)
 # --- GRADIO APP DEFINITION ---
 # Load models on app startup

 import torch
 from torch.utils.data import DataLoader
 import lightning.pytorch as pl
 from protobind_diff.model import ModelGenerator
 from protobind_diff.data_loader import InferenceDataset
 from huggingface_hub import hf_hub_download
 import spaces
+from pathlib import Path
+import uuid, json, hashlib
+from huggingface_hub import CommitScheduler
 # Hugging Face Hub details
 REPO_ID = "ai-gero/ProtoBind-Diff"
 TOKENIZER_FILENAME = "tokenizer_smiles_diffusion.json"
 @spaces.GPU(duration=120)
+def generate_smiles_for_sequence(protein_sequence: str, num_samples: int, request: Request):
     """
     The main prediction function that runs the full pipeline.
     """
+    log_run(request.client.host or "unknown", protein_sequence)
     if not protein_sequence:
         raise gr.Error("Protein sequence cannot be empty.")
     protein_sequence = re.sub(r"[^A-Z]", "", protein_sequence.upper())
     return ",\n".join(unique_smiles)
+def log_run(client_ip: str, seq: str):
+    rec = {
+        "ts": datetime.utcnow().isoformat(timespec="seconds"),
+        "client": client_ip,
+        "seq_hash": hashlib.sha256(seq.encode()).hexdigest(),
+        "seq_len": len(seq)
+    }
+    (LOG_FOLDER / f"{uuid.uuid4()}.json").write_text(json.dumps(rec))
+LOG_FOLDER = Path("usage_logs"); LOG_FOLDER.mkdir(exist_ok=True)
+scheduler = CommitScheduler(
+    repo_id="ai-gero/protobind_usage",
+    repo_type="dataset",
+    folder_path=str(LOG_FOLDER),
+    every=300,
+    token=os.getenv("HF_TOKEN")
+)
 # --- GRADIO APP DEFINITION ---
 # Load models on app startup