Fine-tuned LoRAs for each TRAFICA version

Source code Source code and other docs can be found at TRAFICA Github

Loading the pre-trained model and tokenizer using HuggingFace Interface

from transformers import AutoTokenizer, AutoConfig, AutoModelForSequenceClassification 


# configuration dict for different TRAFICA versions
config_dict = {
  'TRAFICA (BPE-1)': {'model_path':'Allanxu/TRAFICA-BPE1',
                      'tokenizer_path':'Allanxu/TRAFICA-BPE1',
                      'tokenization':'BPE'
                      },
  'TRAFICA (BPE-2)': {'model_path':'Allanxu/TRAFICA-BPE2',
                    'tokenizer_path':'zhihan1996/DNABERT-2-117M',
                    'tokenization':'BPE'
                    },
  'TRAFICA (4-mer)': {'model_path':'Allanxu/TRAFICA-4_mer',
                    'tokenizer_path':'Allanxu/TRAFICA-4_mer',
                    'tokenization':'4_mer'
                    },
  'TRAFICA (5-mer)': {'model_path':'Allanxu/TRAFICA-5_mer',
                    'tokenizer_path':'Allanxu/TRAFICA-5_mer',
                    'tokenization':'5_mer'
                    },
  'TRAFICA (6-mer)': {'model_path':'Allanxu/TRAFICA-6_mer',
                    'tokenizer_path':'Allanxu/TRAFICA-6_mer',
                    'tokenization':'6_mer'
                    },
  'TRAFICA (base-level)': {'model_path':'Allanxu/TRAFICA-Base_level',
                    'tokenizer_path':'Allanxu/TRAFICA-Base_level',
                    'tokenization':'Base-level'
                    }
}

# tokenizer
Tokenizer = AutoTokenizer.from_pretrained(config_dict['TRAFICA (base-level)']['tokenizer_path'], trust_remote_code=True)    

# model
config = AutoConfig.from_pretrained(config_dict['TRAFICA (base-level)']['model_path'])
config.num_labels = 1 


model = AutoModelForSequenceClassification.from_pretrained(config_dict['TRAFICA (base-level)']['model_path'], config=config, trust_remote_code=True)

Loading the fine-tuned LoRA module and affinity predictor for specific TFs

from peft import PeftModel
import torch 

lora_path = '/<Path of fine-tuned LoRA>/Base-level/PRJEB3289/10000/ATF7_TGGGCG30NCGT' # example for TF ATF7

# LoRA and Affinity predictor
state_dict = torch.load(os.path.join(lora_path,"predict_head_weights.pth"), weights_only=True)  
model.classifier.load_state_dict( state_dict['PREDICT_HEAD'] )
model = PeftModel.from_pretrained(model, os.path.join(lora_path,"lora_adapter"))

Download this repository, unzip each file, replace lora_path by your local path

Make prediction

from util.py import piece_sequences # Src/util.py    

# Input construction
sequences = ['CCAGAAGACAACTTGTAGAAATAAGCAAAA', 'ATTGCGCCCCAGCCCCACACCCACACGCAT']
tokens_batch = piece_sequences(sequences, config_dict['TRAFICA (base-level)']['tokenization'])   
# tokens_batch = ['C C A G A A G A C A A C T T G T A G A A A T A A G C A A A A', 'A T T G C G C C C C A G C C C C A C A C C C A C A C G C A T']
inputs = Tokenizer(tokens_batch, return_tensors="pt", padding=True)

# Prediction
with torch.no_grad():
    outputs = model(**inputs)
    
logit = outputs.logits 
print(f"Predicted relative affinities: {logit.flatten()}")

Downloads last month: -; Downloads are not tracked for this model. How to track

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Collection including Allanxu/Finetuned_TRAFICA_All

TRAFICA

Collection

Multiple Tokenization • 7 items • Updated Mar 2