brocks1234
/

dnabert2-langgraph-handler

Model card Files Files and versions

brocks1234 commited on Apr 22

Commit

e299aae

·

verified ·

1 Parent(s): c0a56cc

Update handler.py

Files changed (1) hide show

handler.py +4 -4

handler.py CHANGED Viewed

@@ -3,15 +3,14 @@ import torch
 from typing import Any, Dict, List
 from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
-# Force it at the OS level before any other imports happen
 os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
 class EndpointHandler:
     def __init__(self, path=""):
-        # We ignore 'path' to avoid the toolkit's broken local cache
         self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
-        # Explicitly load from the official repo, not the local folder
         self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
         self.model = AutoModelForMaskedLM.from_pretrained(
@@ -29,7 +28,7 @@ class EndpointHandler:
         if isinstance(inputs, list):
             inputs = inputs[0]
-        # Standard chunking for your 12.2kb APRIL promoter
         chunk_size = 1000
         stride = 500
         chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
@@ -45,5 +44,6 @@ class EndpointHandler:
                 chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
                 all_embeddings.append(chunk_emb)
         final_embedding = torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()
         return final_embedding

 from typing import Any, Dict, List
 from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
+# Force the trust flag at the environment level
 os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
 class EndpointHandler:
     def __init__(self, path=""):
+        # We explicitly ignore 'path' and pull from the source
         self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
         self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
         self.model = AutoModelForMaskedLM.from_pretrained(
         if isinstance(inputs, list):
             inputs = inputs[0]
+        # 12.2kb APRIL promoter chunking logic
         chunk_size = 1000
         stride = 500
         chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
                 chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
                 all_embeddings.append(chunk_emb)
+        # Average the chunks for one representative vector
         final_embedding = torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()
         return final_embedding