import numpy as np, pandas as pd from sentence_transformers import SentenceTransformer from sklearn.preprocessing import StandardScaler CSV='RideSearch_dataset.csv' df = pd.read_csv(CSV) model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') te = model.encode(df['text_record'].astype(str).tolist(), batch_size=256, show_progress_bar=True, normalize_embeddings=True) np.save('emb_text.npy', np.asarray(te, dtype='float32')) NUM = ['horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating'] X = df[NUM].copy(); X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'] Xs = StandardScaler().fit_transform(X.values.astype('float32')) np.save('emb_num.npy', Xs.astype('float32')) print('Saved emb_text.npy and emb_num.npy')