olivebradshaw commited on
Commit
2bff51a
·
1 Parent(s): 6e3f415

update housing

Browse files
Files changed (1) hide show
  1. main_text.py +30 -6
main_text.py CHANGED
@@ -1,8 +1,32 @@
1
- pip install -U sentence-transformers
2
- from sentence_transformers import SentenceTransformer
3
 
4
- model = SentenceTransformer("osanseviero/full-sentence-distillroberta3")
5
- sentence = ['This framework generates embeddings for each input sentence']
 
6
 
7
- embedding = model.encode(sentence)
8
- print(embedding)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.model_selection import train_test_split
3
 
4
+ # Read the data
5
+ X_full = pd.read_csv('../input/train.csv', index_col='Id')
6
+ X_test_full = pd.read_csv('../input/test.csv', index_col='Id')
7
 
8
+ # Remove rows with missing target, separate target from predictors
9
+ X_full.dropna(axis=0, subset=['SalePrice'], inplace=True)
10
+ y = X_full.SalePrice
11
+ X_full.drop(['SalePrice'], axis=1, inplace=True)
12
+
13
+ # Break off validation set from training data
14
+ X_train_full, X_valid_full, y_train, y_valid = train_test_split(X_full, y,
15
+ train_size=0.8, test_size=0.2,
16
+ random_state=0)
17
+
18
+ # "Cardinality" means the number of unique values in a column
19
+ # Select categorical columns with relatively low cardinality (convenient but arbitrary)
20
+ categorical_cols = [cname for cname in X_train_full.columns if
21
+ X_train_full[cname].nunique() < 10 and
22
+ X_train_full[cname].dtype == "object"]
23
+
24
+ # Select numerical columns
25
+ numerical_cols = [cname for cname in X_train_full.columns if
26
+ X_train_full[cname].dtype in ['int64', 'float64']]
27
+
28
+ # Keep selected columns only
29
+ my_cols = categorical_cols + numerical_cols
30
+ X_train = X_train_full[my_cols].copy()
31
+ X_valid = X_valid_full[my_cols].copy()
32
+ X_test = X_test_full[my_cols].copy()