Spaces:
Sleeping
Sleeping
| import numpy as np, pandas as pd | |
| from sentence_transformers import SentenceTransformer | |
| from sklearn.preprocessing import StandardScaler | |
| CSV='RideSearch_dataset.csv' | |
| df = pd.read_csv(CSV) | |
| model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2') | |
| te = model.encode(df['text_record'].astype(str).tolist(), batch_size=256, show_progress_bar=True, normalize_embeddings=True) | |
| np.save('emb_text.npy', np.asarray(te, dtype='float32')) | |
| NUM = ['horsepower','zero_to_100_kmh_s','seats','cargo_liters','price_usd','popularity_score','comfort_score','reliability_score','tech_score','ownership_cost_score','safety_rating'] | |
| X = df[NUM].copy(); X['zero_to_100_kmh_s'] = -X['zero_to_100_kmh_s'] | |
| Xs = StandardScaler().fit_transform(X.values.astype('float32')) | |
| np.save('emb_num.npy', Xs.astype('float32')) | |
| print('Saved emb_text.npy and emb_num.npy') | |