Update handler.py
Browse files- handler.py +4 -4
handler.py
CHANGED
|
@@ -3,15 +3,14 @@ import torch
|
|
| 3 |
from typing import Any, Dict, List
|
| 4 |
from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
|
| 5 |
|
| 6 |
-
# Force
|
| 7 |
os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
|
| 8 |
|
| 9 |
class EndpointHandler:
|
| 10 |
def __init__(self, path=""):
|
| 11 |
-
# We ignore 'path'
|
| 12 |
self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
|
| 13 |
|
| 14 |
-
# Explicitly load from the official repo, not the local folder
|
| 15 |
self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
|
| 16 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
|
| 17 |
self.model = AutoModelForMaskedLM.from_pretrained(
|
|
@@ -29,7 +28,7 @@ class EndpointHandler:
|
|
| 29 |
if isinstance(inputs, list):
|
| 30 |
inputs = inputs[0]
|
| 31 |
|
| 32 |
-
#
|
| 33 |
chunk_size = 1000
|
| 34 |
stride = 500
|
| 35 |
chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
|
|
@@ -45,5 +44,6 @@ class EndpointHandler:
|
|
| 45 |
chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
|
| 46 |
all_embeddings.append(chunk_emb)
|
| 47 |
|
|
|
|
| 48 |
final_embedding = torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()
|
| 49 |
return final_embedding
|
|
|
|
| 3 |
from typing import Any, Dict, List
|
| 4 |
from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
|
| 5 |
|
| 6 |
+
# Force the trust flag at the environment level
|
| 7 |
os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
|
| 8 |
|
| 9 |
class EndpointHandler:
|
| 10 |
def __init__(self, path=""):
|
| 11 |
+
# We explicitly ignore 'path' and pull from the source
|
| 12 |
self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
|
| 13 |
|
|
|
|
| 14 |
self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
|
| 15 |
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
|
| 16 |
self.model = AutoModelForMaskedLM.from_pretrained(
|
|
|
|
| 28 |
if isinstance(inputs, list):
|
| 29 |
inputs = inputs[0]
|
| 30 |
|
| 31 |
+
# 12.2kb APRIL promoter chunking logic
|
| 32 |
chunk_size = 1000
|
| 33 |
stride = 500
|
| 34 |
chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
|
|
|
|
| 44 |
chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
|
| 45 |
all_embeddings.append(chunk_emb)
|
| 46 |
|
| 47 |
+
# Average the chunks for one representative vector
|
| 48 |
final_embedding = torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()
|
| 49 |
return final_embedding
|