Spaces:

Fred808
/

Insta-AI

Paused

App Files Files Community

Fred808 commited on Jan 18, 2025

Commit

1ef5971

verified ·

1 Parent(s): 5748634

Update app.py

Browse files

Files changed (1) hide show

app.py +203 -22

app.py CHANGED Viewed

@@ -49,6 +49,14 @@ except FileNotFoundError:
     logging.error("solved.json not found. Please ensure the file exists.")
     exit()
 # Check for required columns in engagement data
 required_columns = ['posting_time', 'likes', 'comments', 'shares']
 missing_columns = [col for col in required_columns if col not in engagement_df.columns]
@@ -118,37 +126,57 @@ engagement_summary = engagement_df.groupby('posting_time').agg({
 # Plot engagement rate over time
 plt.figure(figsize=(10, 6))
-plt.plot(engagement_summary['posting_time'], engagement_summary['engagement_rate'])
 plt.title('Engagement Rate Over Time')
 plt.xlabel('Time')
 plt.ylabel('Engagement Rate')
 plt.show()
 # Time-Series Model: Optimal Posting Times (using Prophet)
 logging.info("Training time-series model for optimal posting times using Prophet...")
 time_series_data = engagement_summary[['posting_time', 'engagement_rate']].rename(columns={'posting_time': 'ds', 'engagement_rate': 'y'})
-# Train Prophet model
-prophet_model = Prophet()
-prophet_model.fit(time_series_data)
-# Make future predictions
-future = prophet_model.make_future_dataframe(periods=30)  # Predict for the next 30 days
-forecast = prophet_model.predict(future)
-# Plot the forecast
-fig = prophet_model.plot(forecast)
-plt.title('Engagement Rate Forecast (Prophet)')
-plt.xlabel('Date')
-plt.ylabel('Engagement Rate')
-plt.show()
-# Evaluate the model
-from sklearn.metrics import mean_absolute_error
-y_true = time_series_data['y']
-y_pred = forecast.loc[:len(y_true)-1, 'yhat']  # Align predictions with true values
-mae = mean_absolute_error(y_true, y_pred)
-logging.info(f"Prophet Model - MAE: {mae:.4f}")
 # Ensure 'hashtags' column is properly formatted
 solved_df['hashtags'] = solved_df['hashtags'].apply(lambda x: x if isinstance(x, list) and len(x) > 0 else ['no_hashtag'])
@@ -266,4 +294,157 @@ content_type_impact = solved_df.groupby('content_type')['promote'].mean().sort_v
 logging.info("Content Type Impact on Promotion:")
 print(content_type_impact)
-logging.info("Analysis complete!")

     logging.error("solved.json not found. Please ensure the file exists.")
     exit()
+# Load competitor data
+logging.info("Loading competitor data...")
+try:
+    competitor_df = pd.read_csv('competitors_data.csv')
+except FileNotFoundError:
+    logging.error("competitors_data.csv not found. Please ensure the file exists.")
+    exit()
 # Check for required columns in engagement data
 required_columns = ['posting_time', 'likes', 'comments', 'shares']
 missing_columns = [col for col in required_columns if col not in engagement_df.columns]
 # Plot engagement rate over time
 plt.figure(figsize=(10, 6))
+plt.plot(engagement_summary['posting_time'], engagement_summary['engagement_rate'], label='Your Data')
 plt.title('Engagement Rate Over Time')
 plt.xlabel('Time')
 plt.ylabel('Engagement Rate')
+plt.legend()
 plt.show()
 # Time-Series Model: Optimal Posting Times (using Prophet)
 logging.info("Training time-series model for optimal posting times using Prophet...")
 time_series_data = engagement_summary[['posting_time', 'engagement_rate']].rename(columns={'posting_time': 'ds', 'engagement_rate': 'y'})
+# Drop rows with NaN values in the target column
+time_series_data = time_series_data.dropna(subset=['y'])
+# Check if there is enough data to train the Prophet model
+if len(time_series_data) < 2:
+    logging.warning("Not enough data to train the Prophet model. Skipping time-series analysis.")
+else:
+    # Resample the data to a fixed frequency (e.g., daily)
+    time_series_data = time_series_data.set_index('ds').resample('D').mean().reset_index()
+    # Train Prophet model with hyperparameters
+    prophet_model = Prophet(
+        changepoint_prior_scale=0.05,  # Adjust sensitivity to trend changes
+        seasonality_prior_scale=10.0,  # Adjust seasonality strength
+        yearly_seasonality=True,       # Enable yearly seasonality
+        daily_seasonality=True         # Enable daily seasonality
+    )
+    prophet_model.fit(time_series_data)
+    # Make future predictions
+    future = prophet_model.make_future_dataframe(periods=30)  # Predict for the next 30 days
+    forecast = prophet_model.predict(future)
+    # Plot the forecast
+    fig = prophet_model.plot(forecast)
+    plt.title('Engagement Rate Forecast (Prophet)')
+    plt.xlabel('Date')
+    plt.ylabel('Engagement Rate')
+    plt.show()
+    # Evaluate the model
+    y_true = time_series_data['y']
+    y_pred = forecast.loc[:len(y_true)-1, 'yhat']  # Align predictions with true values
+    # Check for NaN values in y_true or y_pred
+    if y_true.isnull().any() or y_pred.isnull().any():
+        logging.warning("NaN values detected in true values or predictions. Skipping MAE calculation.")
+    else:
+        mae = mean_absolute_error(y_true, y_pred)
+        logging.info(f"Prophet Model - MAE: {mae:.4f}")
 # Ensure 'hashtags' column is properly formatted
 solved_df['hashtags'] = solved_df['hashtags'].apply(lambda x: x if isinstance(x, list) and len(x) > 0 else ['no_hashtag'])
 logging.info("Content Type Impact on Promotion:")
 print(content_type_impact)
+# Competitor Data Analysis
+logging.info("Analyzing competitor data...")
+# Preprocess competitor data
+competitor_df['caption'] = competitor_df['caption'].astype(str)
+competitor_df['hashtags'] = competitor_df['hashtags'].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else [])
+competitor_df['posting_time'] = pd.to_datetime(competitor_df['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
+competitor_df = competitor_df[competitor_df['posting_time'].notna()]
+# Calculate engagement_rate for competitor data
+competitor_df['engagement_rate'] = competitor_df['likes'] + competitor_df['comments'] + competitor_df['shares']
+# Perform sentiment analysis on competitor captions
+logging.info("Performing sentiment analysis on competitor captions...")
+competitor_df['caption_sentiment'] = competitor_df['caption'].apply(lambda x: TextBlob(x).sentiment.polarity)
+competitor_df['sentiment'] = competitor_df['caption_sentiment']
+# Feature engineering for competitor data
+logging.info("Performing feature engineering for competitor data...")
+competitor_df['caption_length'] = competitor_df['caption'].apply(len)
+competitor_df['hashtag_count'] = competitor_df['hashtags'].apply(len)
+# Analyze competitor engagement data
+logging.info("Analyzing competitor engagement data...")
+competitor_summary = competitor_df.groupby('posting_time').agg({
+    'likes': 'sum',
+    'comments': 'sum',
+    'shares': 'sum',
+    'engagement_rate': 'mean'
+}).reset_index()
+# Plot competitor engagement rate over time
+plt.figure(figsize=(10, 6))
+plt.plot(competitor_summary['posting_time'], competitor_summary['engagement_rate'], label='Competitor')
+plt.plot(engagement_summary['posting_time'], engagement_summary['engagement_rate'], label='Your Data')
+plt.title('Engagement Rate Over Time (Your Data vs Competitor)')
+plt.xlabel('Time')
+plt.ylabel('Engagement Rate')
+plt.legend()
+plt.show()
+# Analyze competitor niche trends
+logging.info("Analyzing competitor niche trends...")
+competitor_niche_trends = competitor_df.groupby('content_type')['sentiment'].mean().sort_values(ascending=False)
+logging.info("Top Performing Content Types by Sentiment (Competitor):")
+print(competitor_niche_trends)
+# Combine your data with competitor data for enhanced analysis
+logging.info("Combining your data with competitor data...")
+combined_df = pd.concat([solved_df, competitor_df], ignore_index=True)
+# Enhanced recommendation system using combined data
+logging.info("Training enhanced recommendation system using combined data...")
+combined_hashtags = combined_df['hashtags'].apply(lambda x: ' '.join(x))
+if combined_hashtags.str.strip().eq('').all():
+    logging.warning("The 'hashtags' column is empty or contains only stop words. Skipping recommendation system.")
+else:
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform(combined_hashtags)
+    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
+    def recommend_hashtags(post_index, top_n=5):
+        sim_scores = list(enumerate(cosine_sim[post_index]))
+        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
+        top_indices = [i[0] for i in sim_scores[1:top_n+1]]
+        return combined_df.iloc[top_indices]['hashtags']
+    # Example: Recommend hashtags for the first post
+    logging.info("Example Hashtag Recommendations (Combined Data):")
+    print(recommend_hashtags(0))
+# Enhanced niche trend analysis using combined data
+logging.info("Analyzing enhanced niche trends using combined data...")
+combined_niche_trends = combined_df.groupby('content_type')['sentiment'].mean().sort_values(ascending=False)
+logging.info("Top Performing Content Types by Sentiment (Combined Data):")
+print(combined_niche_trends)
+# Enhanced viral potential prediction using combined data
+logging.info("Training enhanced viral potential prediction model using combined data...")
+combined_viral_threshold = combined_df['engagement_rate'].quantile(0.9)
+combined_df['viral'] = combined_df['engagement_rate'].apply(lambda x: 1 if x >= combined_viral_threshold else 0)
+# Features for viral potential prediction
+features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded']
+X = combined_df[features]
+y = combined_df['viral']
+# Split data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train a Random Forest Classifier
+viral_model = RandomForestClassifier(random_state=42)
+viral_model.fit(X_train, y_train)
+# Evaluate the model
+y_pred = viral_model.predict(X_test)
+accuracy = accuracy_score(y_test, y_pred)
+logging.info(f"Enhanced Viral Potential Model Accuracy: {accuracy:.4f}")
+# Feature importance
+importance = viral_model.feature_importances_
+for feature, score in zip(features, importance):
+    logging.info(f"Feature Importance - {feature}: {score:.4f}")
+# Enhanced engagement rate predictions using combined data
+logging.info("Training enhanced engagement rate prediction model using combined data...")
+X = combined_df[features]
+y = combined_df['engagement_rate']
+# Split data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train an XGBoost Regressor
+engagement_model = XGBRegressor(random_state=42)
+engagement_model.fit(X_train, y_train)
+# Evaluate the model
+y_pred = engagement_model.predict(X_test)
+mae = mean_absolute_error(y_test, y_pred)
+logging.info(f"Enhanced Engagement Rate Prediction Model - MAE: {mae:.4f}")
+# Feature importance
+importance = engagement_model.feature_importances_
+for feature, score in zip(features, importance):
+    logging.info(f"Feature Importance - {feature}: {score:.4f}")
+# Enhanced promotion strategy using combined data
+logging.info("Training enhanced promotion prediction model using combined data...")
+promotion_threshold = combined_df['engagement_rate'].quantile(0.8)
+combined_df['promote'] = combined_df['engagement_rate'].apply(lambda x: 1 if x >= promotion_threshold else 0)
+# Features for promotion prediction
+X = combined_df[features]
+y = combined_df['promote']
+# Split data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+# Train a Logistic Regression Model
+promotion_model = LogisticRegression(random_state=42)
+promotion_model.fit(X_train, y_train)
+# Evaluate the model
+y_pred = promotion_model.predict(X_test)
+accuracy = accuracy_score(y_test, y_pred)
+logging.info(f"Enhanced Promotion Prediction Model Accuracy: {accuracy:.4f}")
+# Analyze content type impact
+content_type_impact = combined_df.groupby('content_type')['promote'].mean().sort_values(ascending=False)
+logging.info("Enhanced Content Type Impact on Promotion:")
+print(content_type_impact)
+logging.info("Enhanced analysis complete!")