Update app.py
Browse files
app.py
CHANGED
|
@@ -214,4 +214,83 @@ niche_trends = solved_df.groupby('content_type')['sentiment'].mean().sort_values
|
|
| 214 |
logging.info("Top Performing Content Types by Sentiment:")
|
| 215 |
print(niche_trends)
|
| 216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
logging.info("Analysis complete!")
|
|
|
|
| 214 |
logging.info("Top Performing Content Types by Sentiment:")
|
| 215 |
print(niche_trends)
|
| 216 |
|
| 217 |
+
# Viral Potential of Posts
|
| 218 |
+
logging.info("Training model for viral potential prediction...")
|
| 219 |
+
viral_threshold = engagement_df['engagement_rate'].quantile(0.9)
|
| 220 |
+
engagement_df['viral'] = engagement_df['engagement_rate'].apply(lambda x: 1 if x >= viral_threshold else 0)
|
| 221 |
+
solved_df['viral'] = engagement_df['viral']
|
| 222 |
+
|
| 223 |
+
# Features for viral potential prediction
|
| 224 |
+
features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded']
|
| 225 |
+
X = solved_df[features]
|
| 226 |
+
y = solved_df['viral']
|
| 227 |
+
|
| 228 |
+
# Split data into training and testing sets
|
| 229 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 230 |
+
|
| 231 |
+
# Train a Random Forest Classifier
|
| 232 |
+
viral_model = RandomForestClassifier(random_state=42)
|
| 233 |
+
viral_model.fit(X_train, y_train)
|
| 234 |
+
|
| 235 |
+
# Evaluate the model
|
| 236 |
+
y_pred = viral_model.predict(X_test)
|
| 237 |
+
accuracy = accuracy_score(y_test, y_pred)
|
| 238 |
+
logging.info(f"Viral Potential Model Accuracy: {accuracy:.4f}")
|
| 239 |
+
|
| 240 |
+
# Feature importance
|
| 241 |
+
importance = viral_model.feature_importances_
|
| 242 |
+
for feature, score in zip(features, importance):
|
| 243 |
+
logging.info(f"Feature Importance - {feature}: {score:.4f}")
|
| 244 |
+
|
| 245 |
+
# Engagement Rate Predictions
|
| 246 |
+
logging.info("Training model for engagement rate prediction...")
|
| 247 |
+
features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded', 'posting_time_encoded']
|
| 248 |
+
X = solved_df[features]
|
| 249 |
+
y = engagement_df['engagement_rate']
|
| 250 |
+
|
| 251 |
+
# Split data into training and testing sets
|
| 252 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 253 |
+
|
| 254 |
+
# Train an XGBoost Regressor
|
| 255 |
+
engagement_model = XGBRegressor(random_state=42)
|
| 256 |
+
engagement_model.fit(X_train, y_train)
|
| 257 |
+
|
| 258 |
+
# Evaluate the model
|
| 259 |
+
y_pred = engagement_model.predict(X_test)
|
| 260 |
+
mae = mean_absolute_error(y_test, y_pred)
|
| 261 |
+
logging.info(f"Engagement Rate Prediction Model - MAE: {mae:.4f}")
|
| 262 |
+
|
| 263 |
+
# Feature importance
|
| 264 |
+
importance = engagement_model.feature_importances_
|
| 265 |
+
for feature, score in zip(features, importance):
|
| 266 |
+
logging.info(f"Feature Importance - {feature}: {score:.4f}")
|
| 267 |
+
|
| 268 |
+
# Which Type of Posts Yield Greater Results When Promoted
|
| 269 |
+
logging.info("Training model for promotion prediction...")
|
| 270 |
+
promotion_threshold = engagement_df['engagement_rate'].quantile(0.8)
|
| 271 |
+
engagement_df['promote'] = engagement_df['engagement_rate'].apply(lambda x: 1 if x >= promotion_threshold else 0)
|
| 272 |
+
solved_df['promote'] = engagement_df['promote']
|
| 273 |
+
|
| 274 |
+
# Features for promotion prediction
|
| 275 |
+
features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded']
|
| 276 |
+
X = solved_df[features]
|
| 277 |
+
y = solved_df['promote']
|
| 278 |
+
|
| 279 |
+
# Split data into training and testing sets
|
| 280 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
| 281 |
+
|
| 282 |
+
# Train a Logistic Regression Model
|
| 283 |
+
promotion_model = LogisticRegression(random_state=42)
|
| 284 |
+
promotion_model.fit(X_train, y_train)
|
| 285 |
+
|
| 286 |
+
# Evaluate the model
|
| 287 |
+
y_pred = promotion_model.predict(X_test)
|
| 288 |
+
accuracy = accuracy_score(y_test, y_pred)
|
| 289 |
+
logging.info(f"Promotion Prediction Model Accuracy: {accuracy:.4f}")
|
| 290 |
+
|
| 291 |
+
# Analyze content type impact
|
| 292 |
+
content_type_impact = solved_df.groupby('content_type')['promote'].mean().sort_values(ascending=False)
|
| 293 |
+
logging.info("Content Type Impact on Promotion:")
|
| 294 |
+
print(content_type_impact)
|
| 295 |
+
|
| 296 |
logging.info("Analysis complete!")
|