VJyzCELERY
commited on
Commit
·
70cbebe
1
Parent(s):
21cbaed
Fixed some issue
Browse files
app.py
CHANGED
|
@@ -529,22 +529,20 @@ df_liked = df_liked.drop_duplicates(subset=['steamid', 'app_id'])
|
|
| 529 |
code_cell("""
|
| 530 |
vectorizer = TfidfVectorizer(max_df=0.7,min_df=3,stop_words=None,ngram_range=(1,2))
|
| 531 |
review_app_id_encoder = LabelEncoder()""")
|
| 532 |
-
|
| 533 |
-
|
| 534 |
-
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
p(f"""
|
| 538 |
-
Training : {train_df.shape}
|
| 539 |
-
Testing : {test_df.shape}
|
| 540 |
-
Validation : {val_df.shape}
|
| 541 |
-
""")
|
| 542 |
code_cell("""
|
| 543 |
train_df,df_temp = train_test_split(sampled,test_size=0.2,random_state=SEED,stratify=sampled['app_id'])
|
| 544 |
test_df,val_df = train_test_split(df_temp,test_size=0.5,random_state=SEED,stratify=df_temp['app_id'])
|
| 545 |
""")
|
| 546 |
-
|
| 547 |
-
|
|
|
|
|
|
|
|
|
|
| 548 |
code_cell("""
|
| 549 |
X_train = vectorizer.fit_transform(train_df['cleaned_review'])
|
| 550 |
y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
|
|
|
|
| 529 |
code_cell("""
|
| 530 |
vectorizer = TfidfVectorizer(max_df=0.7,min_df=3,stop_words=None,ngram_range=(1,2))
|
| 531 |
review_app_id_encoder = LabelEncoder()""")
|
| 532 |
+
|
| 533 |
+
train_df,df_temp = train_test_split(sampled,test_size=0.2,random_state=SEED,stratify=sampled['app_id'])
|
| 534 |
+
test_df,val_df = train_test_split(df_temp,test_size=0.5,random_state=SEED,stratify=df_temp['app_id'])
|
| 535 |
+
del df_temp
|
| 536 |
+
gc.collect()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 537 |
code_cell("""
|
| 538 |
train_df,df_temp = train_test_split(sampled,test_size=0.2,random_state=SEED,stratify=sampled['app_id'])
|
| 539 |
test_df,val_df = train_test_split(df_temp,test_size=0.5,random_state=SEED,stratify=df_temp['app_id'])
|
| 540 |
""")
|
| 541 |
+
p(f"""
|
| 542 |
+
Training : {train_df.shape}
|
| 543 |
+
Testing : {test_df.shape}
|
| 544 |
+
Validation : {val_df.shape}
|
| 545 |
+
""")
|
| 546 |
code_cell("""
|
| 547 |
X_train = vectorizer.fit_transform(train_df['cleaned_review'])
|
| 548 |
y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
|