brocks1234
/

dnabert2-langgraph-handler

Model card Files Files and versions

xet

Community

brocks1234 commited on 28 days ago

Commit

dd09c74

verified ·

1 Parent(s): 8fee4ab

Update handler.py

Browse files

Files changed (1) hide show

handler.py +14 -26

handler.py CHANGED Viewed

@@ -1,29 +1,15 @@
-import os
-# Force PyTorch to use its built-in stable attention and ignore custom kernels
-os.environ["TORCH_CUDNN_V8_API_ENABLED"] = "1"
 import torch
 from typing import Any, Dict, List
-from transformers import AutoTokenizer, AutoModel, AutoConfig
 class EndpointHandler:
     def __init__(self, path=""):
-        self.model_id = "zhihan1996/DNABERT-2-117M"
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
-        # 1. Load config and EXPLICITLY set the attn_implementation to 'eager'
-        # 'eager' means 'plain PyTorch math' - no Triton, no Flash, just stability.
-        config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
-        config.use_flash_attn = False
-        # 2. Load Model with the 'eager' implementation if supported
-        self.model = AutoModel.from_pretrained(
-            self.model_id,
-            trust_remote_code=True,
-            config=config,
-            attn_implementation="eager"
-        )
         if torch.cuda.is_available():
             self.model = self.model.to("cuda")
@@ -34,21 +20,23 @@ class EndpointHandler:
         if isinstance(inputs, list):
             inputs = inputs[0]
         encoded_input = self.tokenizer(
             inputs,
-            return_tensors='pt',
-            padding=True,
             truncation=True,
-            max_length=512
         )
         if torch.cuda.is_available():
             encoded_input = {k: v.to("cuda") for k, v in encoded_input.items()}
         with torch.no_grad():
-            # 3. Use the inference mode context for extra stability
-            with torch.inference_mode():
-                outputs = self.model(**encoded_input)
-        embeddings = outputs[0][0].mean(dim=0).cpu().numpy().tolist()
         return embeddings

 import torch
 from typing import Any, Dict, List
+from transformers import AutoTokenizer, AutoModel
 class EndpointHandler:
     def __init__(self, path=""):
+        # We'll use the 'phulia' variant which is highly regarded for stability
+        self.model_id = "kuleshov-group/caduceus-phulia-16-soft"
+        # Load tokenizer and model
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
+        self.model = AutoModel.from_pretrained(self.model_id, trust_remote_code=True)
         if torch.cuda.is_available():
             self.model = self.model.to("cuda")
         if isinstance(inputs, list):
             inputs = inputs[0]
+        # Caduceus often performs better without excessive padding
         encoded_input = self.tokenizer(
             inputs,
+            return_tensors='pt',
             truncation=True,
+            max_length=2048 # Caduceus handles long sequences better than BERT
         )
         if torch.cuda.is_available():
             encoded_input = {k: v.to("cuda") for k, v in encoded_input.items()}
         with torch.no_grad():
+            outputs = self.model(**encoded_input)
+        # Caduceus (Mamba) outputs hidden states.
+        # We take the mean across the sequence length (dim 1)
+        # to get a fixed-size vector for your LangGraph logic.
+        embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy().tolist()
         return embeddings