Spaces:
Running
Running
File size: 1,679 Bytes
fc7b4a9 75d43d2 fc7b4a9 75d43d2 fc7b4a9 75d43d2 fc7b4a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
from pathlib import Path
from src.utils.config_loader import PCA_MODEL
import joblib
import torch
## For Single Input
def load_pca_model(vectors, model_path=PCA_MODEL):
"""
Load a pre-trained PCA model and transform the input vectors.
Args:
vectors: The input data to transform.
model_path: The file path of the pre-trained PCA model.
Returns:
output: The PCA-transformed data.
Note: Change the model path as needed in the data_config.yml file (or set the path file as shown above). Can be used for the main program.
"""
model_path = Path(model_path)
pca = joblib.load(model_path)
return pca.transform(vectors)
def l2vec_single_train(l2v, lyrics):
"""
Encode a single lyric string using the provided LLM2Vec model.
Args:
l2v: The LLM2Vec model for encoding lyrics.
lyrics: A single lyric string to encode.
Returns:
vectors: The vector representation of the lyrics.
"""
vectors = l2v.encode([lyrics]).detach().cpu().numpy()
return vectors
# For Batch Processing
def l2vec_train(l2v, lyrics_list):
"""
Encode a list of lyric strings using the provided LLM2Vec model.
Args:
l2v: The LLM2Vec model for encoding lyrics.
lyrics_list: A list of lyric strings to encode.
Returns:
vectors: The encoded vector representations of the lyrics.
Note: This function only encodes the lyrics and does not apply PCA reduction. The PCA reduction can be applied separately in the train.py module.
"""
with torch.no_grad():
vectors = l2v.encode(lyrics_list) # lyrics_list: list of strings
return vectors |