KeegBarb commited on
Commit
251e900
·
verified ·
1 Parent(s): c9c8eaa

Update observatory_app.py

Browse files
Files changed (1) hide show
  1. observatory_app.py +6 -13
observatory_app.py CHANGED
@@ -9,9 +9,10 @@ from textblob import TextBlob
9
  from sklearn.linear_model import Ridge
10
  from sklearn.model_selection import train_test_split
11
 
12
- @st.cache_data
 
13
  def load_data():
14
- df = pd.read_csv("OnlineNewsPopularity.csv")
15
  df.columns = df.columns.str.strip()
16
 
17
  channel_cols = ['data_channel_is_lifestyle', 'data_channel_is_entertainment',
@@ -33,9 +34,8 @@ def load_data():
33
  df['day_of_week'] = df.apply(get_day, axis=1)
34
  return df[df['shares'] < 20000]
35
 
36
- df_display = load_data()
37
- df = pd.read_csv("OnlineNewsPopularity.csv")
38
- df.columns = df.columns.str.strip()
39
 
40
  df['log_shares'] = np.log1p(df['shares'])
41
 
@@ -43,18 +43,11 @@ feature_cols = [
43
  'n_tokens_content', 'num_imgs', 'global_sentiment_polarity',
44
  'global_subjectivity', 'title_sentiment_polarity', 'weekday_is_monday', 'weekday_is_tuesday', 'weekday_is_wednesday',
45
  'weekday_is_thursday', 'weekday_is_friday', 'weekday_is_saturday', 'weekday_is_sunday', 'n_tokens_title',
46
- 'num_videos', 'num_imgs', 'num_hrefs'
47
  ]
48
  x = df[feature_cols]
49
  y = df['log_shares']
50
 
51
- X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
52
- model = Ridge(alpha=1.0)
53
- model.fit(X_train, y_train)
54
-
55
- joblib.dump(model, 'popularity_model.pkl')
56
- joblib.dump(x.columns.tolist(), 'model_features.pkl')
57
-
58
  model = joblib.load('popularity_model.pkl')
59
  feature_columns = joblib.load('model_features.pkl')
60
 
 
9
  from sklearn.linear_model import Ridge
10
  from sklearn.model_selection import train_test_split
11
 
12
+ data_url = "https://raw.githubusercontent.com/KeeganBarbee/KeeganBarbee.github.io/refs/heads/main/OnlineNewsPopularity.csv"
13
+ @st.cache_data(ttl = 3600)
14
  def load_data():
15
+ df = pd.read_csv(data_url)
16
  df.columns = df.columns.str.strip()
17
 
18
  channel_cols = ['data_channel_is_lifestyle', 'data_channel_is_entertainment',
 
34
  df['day_of_week'] = df.apply(get_day, axis=1)
35
  return df[df['shares'] < 20000]
36
 
37
+ df = pd.read_csv(data_url)
38
+ df_display = df.copy()
 
39
 
40
  df['log_shares'] = np.log1p(df['shares'])
41
 
 
43
  'n_tokens_content', 'num_imgs', 'global_sentiment_polarity',
44
  'global_subjectivity', 'title_sentiment_polarity', 'weekday_is_monday', 'weekday_is_tuesday', 'weekday_is_wednesday',
45
  'weekday_is_thursday', 'weekday_is_friday', 'weekday_is_saturday', 'weekday_is_sunday', 'n_tokens_title',
46
+ 'num_videos', 'num_hrefs'
47
  ]
48
  x = df[feature_cols]
49
  y = df['log_shares']
50
 
 
 
 
 
 
 
 
51
  model = joblib.load('popularity_model.pkl')
52
  feature_columns = joblib.load('model_features.pkl')
53