Spaces:
Sleeping
Sleeping
Update observatory_app.py
Browse files- observatory_app.py +6 -13
observatory_app.py
CHANGED
|
@@ -9,9 +9,10 @@ from textblob import TextBlob
|
|
| 9 |
from sklearn.linear_model import Ridge
|
| 10 |
from sklearn.model_selection import train_test_split
|
| 11 |
|
| 12 |
-
|
|
|
|
| 13 |
def load_data():
|
| 14 |
-
df = pd.read_csv(
|
| 15 |
df.columns = df.columns.str.strip()
|
| 16 |
|
| 17 |
channel_cols = ['data_channel_is_lifestyle', 'data_channel_is_entertainment',
|
|
@@ -33,9 +34,8 @@ def load_data():
|
|
| 33 |
df['day_of_week'] = df.apply(get_day, axis=1)
|
| 34 |
return df[df['shares'] < 20000]
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
df.columns = df.columns.str.strip()
|
| 39 |
|
| 40 |
df['log_shares'] = np.log1p(df['shares'])
|
| 41 |
|
|
@@ -43,18 +43,11 @@ feature_cols = [
|
|
| 43 |
'n_tokens_content', 'num_imgs', 'global_sentiment_polarity',
|
| 44 |
'global_subjectivity', 'title_sentiment_polarity', 'weekday_is_monday', 'weekday_is_tuesday', 'weekday_is_wednesday',
|
| 45 |
'weekday_is_thursday', 'weekday_is_friday', 'weekday_is_saturday', 'weekday_is_sunday', 'n_tokens_title',
|
| 46 |
-
'num_videos', '
|
| 47 |
]
|
| 48 |
x = df[feature_cols]
|
| 49 |
y = df['log_shares']
|
| 50 |
|
| 51 |
-
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
|
| 52 |
-
model = Ridge(alpha=1.0)
|
| 53 |
-
model.fit(X_train, y_train)
|
| 54 |
-
|
| 55 |
-
joblib.dump(model, 'popularity_model.pkl')
|
| 56 |
-
joblib.dump(x.columns.tolist(), 'model_features.pkl')
|
| 57 |
-
|
| 58 |
model = joblib.load('popularity_model.pkl')
|
| 59 |
feature_columns = joblib.load('model_features.pkl')
|
| 60 |
|
|
|
|
| 9 |
from sklearn.linear_model import Ridge
|
| 10 |
from sklearn.model_selection import train_test_split
|
| 11 |
|
| 12 |
+
data_url = "https://raw.githubusercontent.com/KeeganBarbee/KeeganBarbee.github.io/refs/heads/main/OnlineNewsPopularity.csv"
|
| 13 |
+
@st.cache_data(ttl = 3600)
|
| 14 |
def load_data():
|
| 15 |
+
df = pd.read_csv(data_url)
|
| 16 |
df.columns = df.columns.str.strip()
|
| 17 |
|
| 18 |
channel_cols = ['data_channel_is_lifestyle', 'data_channel_is_entertainment',
|
|
|
|
| 34 |
df['day_of_week'] = df.apply(get_day, axis=1)
|
| 35 |
return df[df['shares'] < 20000]
|
| 36 |
|
| 37 |
+
df = pd.read_csv(data_url)
|
| 38 |
+
df_display = df.copy()
|
|
|
|
| 39 |
|
| 40 |
df['log_shares'] = np.log1p(df['shares'])
|
| 41 |
|
|
|
|
| 43 |
'n_tokens_content', 'num_imgs', 'global_sentiment_polarity',
|
| 44 |
'global_subjectivity', 'title_sentiment_polarity', 'weekday_is_monday', 'weekday_is_tuesday', 'weekday_is_wednesday',
|
| 45 |
'weekday_is_thursday', 'weekday_is_friday', 'weekday_is_saturday', 'weekday_is_sunday', 'n_tokens_title',
|
| 46 |
+
'num_videos', 'num_hrefs'
|
| 47 |
]
|
| 48 |
x = df[feature_cols]
|
| 49 |
y = df['log_shares']
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
model = joblib.load('popularity_model.pkl')
|
| 52 |
feature_columns = joblib.load('model_features.pkl')
|
| 53 |
|