brocks1234
/

dnabert2-langgraph-handler

Model card Files Files and versions

brocks1234 commited on Apr 22

Commit

fd76be3

·

verified ·

1 Parent(s): e299aae

Update handler.py

Files changed (1) hide show

handler.py +4 -6

handler.py CHANGED Viewed

@@ -3,12 +3,12 @@ import torch
 from typing import Any, Dict, List
 from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
-# Force the trust flag at the environment level
 os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
 class EndpointHandler:
     def __init__(self, path=""):
-        # We explicitly ignore 'path' and pull from the source
         self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
         self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
@@ -28,7 +28,7 @@ class EndpointHandler:
         if isinstance(inputs, list):
             inputs = inputs[0]
-        # 12.2kb APRIL promoter chunking logic
         chunk_size = 1000
         stride = 500
         chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
@@ -44,6 +44,4 @@ class EndpointHandler:
                 chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
                 all_embeddings.append(chunk_emb)
-        # Average the chunks for one representative vector
-        final_embedding = torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()
-        return final_embedding

 from typing import Any, Dict, List
 from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
+# Force the trust flag globally
 os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
 class EndpointHandler:
     def __init__(self, path=""):
+        # We ignore the local 'path' and pull fresh from the source
         self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
         self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
         if isinstance(inputs, list):
             inputs = inputs[0]
+        # 12.2kb APRIL promoter chunking
         chunk_size = 1000
         stride = 500
         chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
                 chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
                 all_embeddings.append(chunk_emb)
+        return torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()