VJyzCELERY commited on
Commit
70cbebe
·
1 Parent(s): 21cbaed

Fixed some issue

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -529,22 +529,20 @@ df_liked = df_liked.drop_duplicates(subset=['steamid', 'app_id'])
529
  code_cell("""
530
  vectorizer = TfidfVectorizer(max_df=0.7,min_df=3,stop_words=None,ngram_range=(1,2))
531
  review_app_id_encoder = LabelEncoder()""")
532
- def get_data_split():
533
- train_df,df_temp = train_test_split(sampled,test_size=0.2,random_state=SEED,stratify=sampled['app_id'])
534
- test_df,val_df = train_test_split(df_temp,test_size=0.5,random_state=SEED,stratify=df_temp['app_id'])
535
- del df_temp
536
- gc.collect()
537
- p(f"""
538
- Training : {train_df.shape}
539
- Testing : {test_df.shape}
540
- Validation : {val_df.shape}
541
- """)
542
  code_cell("""
543
  train_df,df_temp = train_test_split(sampled,test_size=0.2,random_state=SEED,stratify=sampled['app_id'])
544
  test_df,val_df = train_test_split(df_temp,test_size=0.5,random_state=SEED,stratify=df_temp['app_id'])
545
  """)
546
- btn = gr.Button("View data split size :")
547
- btn.click(fn=get_data_split())
 
 
 
548
  code_cell("""
549
  X_train = vectorizer.fit_transform(train_df['cleaned_review'])
550
  y_train = review_app_id_encoder.fit_transform(train_df['app_id'])
 
529
  code_cell("""
530
  vectorizer = TfidfVectorizer(max_df=0.7,min_df=3,stop_words=None,ngram_range=(1,2))
531
  review_app_id_encoder = LabelEncoder()""")
532
+
533
+ train_df,df_temp = train_test_split(sampled,test_size=0.2,random_state=SEED,stratify=sampled['app_id'])
534
+ test_df,val_df = train_test_split(df_temp,test_size=0.5,random_state=SEED,stratify=df_temp['app_id'])
535
+ del df_temp
536
+ gc.collect()
 
 
 
 
 
537
  code_cell("""
538
  train_df,df_temp = train_test_split(sampled,test_size=0.2,random_state=SEED,stratify=sampled['app_id'])
539
  test_df,val_df = train_test_split(df_temp,test_size=0.5,random_state=SEED,stratify=df_temp['app_id'])
540
  """)
541
+ p(f"""
542
+ Training : {train_df.shape}
543
+ Testing : {test_df.shape}
544
+ Validation : {val_df.shape}
545
+ """)
546
  code_cell("""
547
  X_train = vectorizer.fit_transform(train_df['cleaned_review'])
548
  y_train = review_app_id_encoder.fit_transform(train_df['app_id'])