Spaces:
Running
on
Zero
Running
on
Zero
vladimir.manuylov
commited on
Commit
·
58ac1b4
1
Parent(s):
f42fb15
added logging for free
Browse files
app.py
CHANGED
|
@@ -6,11 +6,13 @@ import gradio as gr
|
|
| 6 |
import torch
|
| 7 |
from torch.utils.data import DataLoader
|
| 8 |
import lightning.pytorch as pl
|
| 9 |
-
from protobind_diff.esm_inference import get_esm_embedding
|
| 10 |
from protobind_diff.model import ModelGenerator
|
| 11 |
from protobind_diff.data_loader import InferenceDataset
|
| 12 |
from huggingface_hub import hf_hub_download
|
| 13 |
import spaces
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Hugging Face Hub details
|
| 16 |
REPO_ID = "ai-gero/ProtoBind-Diff"
|
|
@@ -18,10 +20,11 @@ MODEL_FILENAME = "model.ckpt"
|
|
| 18 |
TOKENIZER_FILENAME = "tokenizer_smiles_diffusion.json"
|
| 19 |
|
| 20 |
@spaces.GPU(duration=120)
|
| 21 |
-
def generate_smiles_for_sequence(protein_sequence: str, num_samples: int):
|
| 22 |
"""
|
| 23 |
The main prediction function that runs the full pipeline.
|
| 24 |
"""
|
|
|
|
| 25 |
if not protein_sequence:
|
| 26 |
raise gr.Error("Protein sequence cannot be empty.")
|
| 27 |
protein_sequence = re.sub(r"[^A-Z]", "", protein_sequence.upper())
|
|
@@ -58,6 +61,26 @@ def generate_smiles_for_sequence(protein_sequence: str, num_samples: int):
|
|
| 58 |
return ",\n".join(unique_smiles)
|
| 59 |
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# --- GRADIO APP DEFINITION ---
|
| 62 |
|
| 63 |
# Load models on app startup
|
|
|
|
| 6 |
import torch
|
| 7 |
from torch.utils.data import DataLoader
|
| 8 |
import lightning.pytorch as pl
|
|
|
|
| 9 |
from protobind_diff.model import ModelGenerator
|
| 10 |
from protobind_diff.data_loader import InferenceDataset
|
| 11 |
from huggingface_hub import hf_hub_download
|
| 12 |
import spaces
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
import uuid, json, hashlib
|
| 15 |
+
from huggingface_hub import CommitScheduler
|
| 16 |
|
| 17 |
# Hugging Face Hub details
|
| 18 |
REPO_ID = "ai-gero/ProtoBind-Diff"
|
|
|
|
| 20 |
TOKENIZER_FILENAME = "tokenizer_smiles_diffusion.json"
|
| 21 |
|
| 22 |
@spaces.GPU(duration=120)
|
| 23 |
+
def generate_smiles_for_sequence(protein_sequence: str, num_samples: int, request: Request):
|
| 24 |
"""
|
| 25 |
The main prediction function that runs the full pipeline.
|
| 26 |
"""
|
| 27 |
+
log_run(request.client.host or "unknown", protein_sequence)
|
| 28 |
if not protein_sequence:
|
| 29 |
raise gr.Error("Protein sequence cannot be empty.")
|
| 30 |
protein_sequence = re.sub(r"[^A-Z]", "", protein_sequence.upper())
|
|
|
|
| 61 |
return ",\n".join(unique_smiles)
|
| 62 |
|
| 63 |
|
| 64 |
+
def log_run(client_ip: str, seq: str):
|
| 65 |
+
rec = {
|
| 66 |
+
"ts": datetime.utcnow().isoformat(timespec="seconds"),
|
| 67 |
+
"client": client_ip,
|
| 68 |
+
"seq_hash": hashlib.sha256(seq.encode()).hexdigest(),
|
| 69 |
+
"seq_len": len(seq)
|
| 70 |
+
}
|
| 71 |
+
(LOG_FOLDER / f"{uuid.uuid4()}.json").write_text(json.dumps(rec))
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
LOG_FOLDER = Path("usage_logs"); LOG_FOLDER.mkdir(exist_ok=True)
|
| 75 |
+
|
| 76 |
+
scheduler = CommitScheduler(
|
| 77 |
+
repo_id="ai-gero/protobind_usage",
|
| 78 |
+
repo_type="dataset",
|
| 79 |
+
folder_path=str(LOG_FOLDER),
|
| 80 |
+
every=300,
|
| 81 |
+
token=os.getenv("HF_TOKEN")
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
# --- GRADIO APP DEFINITION ---
|
| 85 |
|
| 86 |
# Load models on app startup
|