Fred808 commited on
Commit
78be209
·
verified ·
1 Parent(s): f18bc94

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -0
app.py CHANGED
@@ -214,4 +214,83 @@ niche_trends = solved_df.groupby('content_type')['sentiment'].mean().sort_values
214
  logging.info("Top Performing Content Types by Sentiment:")
215
  print(niche_trends)
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  logging.info("Analysis complete!")
 
214
  logging.info("Top Performing Content Types by Sentiment:")
215
  print(niche_trends)
216
 
217
+ # Viral Potential of Posts
218
+ logging.info("Training model for viral potential prediction...")
219
+ viral_threshold = engagement_df['engagement_rate'].quantile(0.9)
220
+ engagement_df['viral'] = engagement_df['engagement_rate'].apply(lambda x: 1 if x >= viral_threshold else 0)
221
+ solved_df['viral'] = engagement_df['viral']
222
+
223
+ # Features for viral potential prediction
224
+ features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded']
225
+ X = solved_df[features]
226
+ y = solved_df['viral']
227
+
228
+ # Split data into training and testing sets
229
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
230
+
231
+ # Train a Random Forest Classifier
232
+ viral_model = RandomForestClassifier(random_state=42)
233
+ viral_model.fit(X_train, y_train)
234
+
235
+ # Evaluate the model
236
+ y_pred = viral_model.predict(X_test)
237
+ accuracy = accuracy_score(y_test, y_pred)
238
+ logging.info(f"Viral Potential Model Accuracy: {accuracy:.4f}")
239
+
240
+ # Feature importance
241
+ importance = viral_model.feature_importances_
242
+ for feature, score in zip(features, importance):
243
+ logging.info(f"Feature Importance - {feature}: {score:.4f}")
244
+
245
+ # Engagement Rate Predictions
246
+ logging.info("Training model for engagement rate prediction...")
247
+ features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded', 'posting_time_encoded']
248
+ X = solved_df[features]
249
+ y = engagement_df['engagement_rate']
250
+
251
+ # Split data into training and testing sets
252
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
253
+
254
+ # Train an XGBoost Regressor
255
+ engagement_model = XGBRegressor(random_state=42)
256
+ engagement_model.fit(X_train, y_train)
257
+
258
+ # Evaluate the model
259
+ y_pred = engagement_model.predict(X_test)
260
+ mae = mean_absolute_error(y_test, y_pred)
261
+ logging.info(f"Engagement Rate Prediction Model - MAE: {mae:.4f}")
262
+
263
+ # Feature importance
264
+ importance = engagement_model.feature_importances_
265
+ for feature, score in zip(features, importance):
266
+ logging.info(f"Feature Importance - {feature}: {score:.4f}")
267
+
268
+ # Which Type of Posts Yield Greater Results When Promoted
269
+ logging.info("Training model for promotion prediction...")
270
+ promotion_threshold = engagement_df['engagement_rate'].quantile(0.8)
271
+ engagement_df['promote'] = engagement_df['engagement_rate'].apply(lambda x: 1 if x >= promotion_threshold else 0)
272
+ solved_df['promote'] = engagement_df['promote']
273
+
274
+ # Features for promotion prediction
275
+ features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded']
276
+ X = solved_df[features]
277
+ y = solved_df['promote']
278
+
279
+ # Split data into training and testing sets
280
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
281
+
282
+ # Train a Logistic Regression Model
283
+ promotion_model = LogisticRegression(random_state=42)
284
+ promotion_model.fit(X_train, y_train)
285
+
286
+ # Evaluate the model
287
+ y_pred = promotion_model.predict(X_test)
288
+ accuracy = accuracy_score(y_test, y_pred)
289
+ logging.info(f"Promotion Prediction Model Accuracy: {accuracy:.4f}")
290
+
291
+ # Analyze content type impact
292
+ content_type_impact = solved_df.groupby('content_type')['promote'].mean().sort_values(ascending=False)
293
+ logging.info("Content Type Impact on Promotion:")
294
+ print(content_type_impact)
295
+
296
  logging.info("Analysis complete!")