brocks1234 commited on
Commit
e299aae
·
verified ·
1 Parent(s): c0a56cc

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +4 -4
handler.py CHANGED
@@ -3,15 +3,14 @@ import torch
3
  from typing import Any, Dict, List
4
  from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
5
 
6
- # Force it at the OS level before any other imports happen
7
  os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
8
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
- # We ignore 'path' to avoid the toolkit's broken local cache
12
  self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
13
 
14
- # Explicitly load from the official repo, not the local folder
15
  self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
16
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
17
  self.model = AutoModelForMaskedLM.from_pretrained(
@@ -29,7 +28,7 @@ class EndpointHandler:
29
  if isinstance(inputs, list):
30
  inputs = inputs[0]
31
 
32
- # Standard chunking for your 12.2kb APRIL promoter
33
  chunk_size = 1000
34
  stride = 500
35
  chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
@@ -45,5 +44,6 @@ class EndpointHandler:
45
  chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
46
  all_embeddings.append(chunk_emb)
47
 
 
48
  final_embedding = torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()
49
  return final_embedding
 
3
  from typing import Any, Dict, List
4
  from transformers import AutoConfig, AutoTokenizer, AutoModelForMaskedLM
5
 
6
+ # Force the trust flag at the environment level
7
  os.environ["HF_HUB_TRUST_REMOTE_CODE"] = "True"
8
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
+ # We explicitly ignore 'path' and pull from the source
12
  self.model_id = "InstaDeepAI/nucleotide-transformer-v2-50m-multi-species"
13
 
 
14
  self.config = AutoConfig.from_pretrained(self.model_id, trust_remote_code=True)
15
  self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, trust_remote_code=True)
16
  self.model = AutoModelForMaskedLM.from_pretrained(
 
28
  if isinstance(inputs, list):
29
  inputs = inputs[0]
30
 
31
+ # 12.2kb APRIL promoter chunking logic
32
  chunk_size = 1000
33
  stride = 500
34
  chunks = [inputs[i:i + chunk_size] for i in range(0, len(inputs), stride)]
 
44
  chunk_emb = torch.mean(outputs.hidden_states[-1], dim=1).squeeze()
45
  all_embeddings.append(chunk_emb)
46
 
47
+ # Average the chunks for one representative vector
48
  final_embedding = torch.stack(all_embeddings).mean(dim=0).cpu().numpy().tolist()
49
  return final_embedding