Spaces:

Fred808
/

Insta-AI

Paused

App Files Files Community

Fred808 commited on Jan 16, 2025

Commit

001f6ce

verified ·

1 Parent(s): 8d6e351

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -47

app.py CHANGED Viewed

@@ -29,25 +29,25 @@ def mean_absolute_percentage_error(y_true, y_pred):
     y_true, y_pred = np.array(y_true), np.array(y_pred)
     return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
-# Load main dataset
-logging.info("Loading main dataset...")
-data = pd.read_csv('train_data.csv')
 # Load engagement_metrics.json
 logging.info("Loading engagement metrics...")
-with open('engagement_metrics.json', 'r') as f:
-    engagement_metrics = json.load(f)
-# Convert engagement metrics to DataFrame
-engagement_df = pd.json_normalize(engagement_metrics)
 # Load solved.json (hashtags and captions)
 logging.info("Loading solved.json...")
-with open('solved.json', 'r') as f:
-    solved_data = json.load(f)
-# Convert solved.json to DataFrame
-solved_df = pd.json_normalize(solved_data)
 # Check for required columns in engagement data
 required_columns = ['posting_time', 'likes', 'comments', 'shares']
@@ -69,15 +69,14 @@ engagement_df.fillna({
 # Calculate engagement_rate
 engagement_df['engagement_rate'] = engagement_df['likes'] + engagement_df['comments'] + engagement_df['shares']
-# Convert posting_time to datetime in both datasets
 logging.info("Converting posting_time to datetime...")
-data['posting_time'] = pd.to_datetime(data['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
 engagement_df['posting_time'] = pd.to_datetime(engagement_df['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
-# Ensure 'caption' is treated as a string column in the main dataset
-data['caption'] = data['caption'].astype(str)
-# Extract hashtags from the caption column in the main dataset
 def extract_hashtags(caption):
     try:
         # Convert the caption string to a dictionary
@@ -89,38 +88,38 @@ def extract_hashtags(caption):
         return []
 # Apply the function to the caption column
-data['hashtags'] = data['caption'].apply(extract_hashtags)
-# Filter out rows with invalid posting_time in the main dataset
-data = data[data['posting_time'].notna()]
-# Convert to Unix timestamp in the main dataset (for time-based operations)
 logging.info("Converting posting_time to Unix timestamp...")
-data['posting_time_encoded'] = data['posting_time'].astype(int) / 10**9
-# Ensure required columns exist in the main dataset
-if 'content_type' not in data.columns:
-    data['content_type'] = 'photo'  # Default value (adjust based on your data)
-if 'media_type' not in data.columns:
-    data['media_type'] = 'image'  # Default value (adjust based on your data)
-# Encode categorical columns in the main dataset
 label_encoder = LabelEncoder()
-data['content_type_encoded'] = label_encoder.fit_transform(data['content_type'])
-data['media_type_encoded'] = label_encoder.fit_transform(data['media_type'])
-# Calculate sentiment for captions in the main dataset
 logging.info("Performing sentiment analysis on captions...")
-data['caption_sentiment'] = data['caption'].apply(lambda x: TextBlob(x).sentiment.polarity)
 # Use caption sentiment as the overall sentiment
-data['sentiment'] = data['caption_sentiment']
-# Feature Engineering in the main dataset
 logging.info("Performing feature engineering...")
-data['caption_length'] = data['caption'].apply(len)
-data['hashtag_count'] = data['hashtags'].apply(len)
 # Analyze engagement data separately
 logging.info("Analyzing engagement data separately...")
@@ -189,11 +188,11 @@ else:
             logging.info(f"ARIMA Model: MAPE: {mape:.4f}")
 # Ensure 'hashtags' column is properly formatted
-data['hashtags'] = data['hashtags'].apply(lambda x: x if isinstance(x, list) and len(x) > 0 else ['no_hashtag'])
-# Recommendation System: Hashtag and Keyword Recommendations (using main dataset)
 logging.info("Training recommendation system for hashtags...")
-hashtags = data['hashtags'].apply(lambda x: ' '.join(x))  # Convert list of hashtags to a single string
 # Check if hashtags are empty
 if hashtags.str.strip().eq('').all():
@@ -207,21 +206,21 @@ else:
         sim_scores = list(enumerate(cosine_sim[post_index]))
         sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
         top_indices = [i[0] for i in sim_scores[1:top_n+1]]
-        return data.iloc[top_indices]['hashtags']
     # Example: Recommend hashtags for the first post
     logging.info("Example Hashtag Recommendations:")
     print(recommend_hashtags(0))
-# Sentiment Analysis: Audience Reactions (using main dataset)
 logging.info("Performing sentiment analysis on captions...")
-data['sentiment_category'] = data['sentiment'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')
 logging.info("Sentiment Analysis Results:")
-print(data['sentiment_category'].value_counts())
-# Niche Trend Analysis (using main dataset)
 logging.info("Analyzing niche trends...")
-niche_trends = data.groupby('content_type')['sentiment'].mean().sort_values(ascending=False)
 logging.info("Top Performing Content Types by Sentiment:")
 print(niche_trends)

     y_true, y_pred = np.array(y_true), np.array(y_pred)
     return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
 # Load engagement_metrics.json
 logging.info("Loading engagement metrics...")
+try:
+    with open('engagement_metrics.json', 'r') as f:
+        engagement_metrics = json.load(f)
+    engagement_df = pd.json_normalize(engagement_metrics)
+except FileNotFoundError:
+    logging.error("engagement_metrics.json not found. Please ensure the file exists.")
+    exit()
 # Load solved.json (hashtags and captions)
 logging.info("Loading solved.json...")
+try:
+    with open('solved.json', 'r') as f:
+        solved_data = json.load(f)
+    solved_df = pd.json_normalize(solved_data)
+except FileNotFoundError:
+    logging.error("solved.json not found. Please ensure the file exists.")
+    exit()
 # Check for required columns in engagement data
 required_columns = ['posting_time', 'likes', 'comments', 'shares']
 # Calculate engagement_rate
 engagement_df['engagement_rate'] = engagement_df['likes'] + engagement_df['comments'] + engagement_df['shares']
+# Convert posting_time to datetime in engagement data
 logging.info("Converting posting_time to datetime...")
 engagement_df['posting_time'] = pd.to_datetime(engagement_df['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
+# Ensure 'caption' is treated as a string column in solved data
+solved_df['caption'] = solved_df['caption'].astype(str)
+# Extract hashtags from the caption column in solved data
 def extract_hashtags(caption):
     try:
         # Convert the caption string to a dictionary
         return []
 # Apply the function to the caption column
+solved_df['hashtags'] = solved_df['caption'].apply(extract_hashtags)
+# Filter out rows with invalid posting_time in engagement data
+engagement_df = engagement_df[engagement_df['posting_time'].notna()]
+# Convert posting_time to Unix timestamp in engagement data (for time-based operations)
 logging.info("Converting posting_time to Unix timestamp...")
+engagement_df['posting_time_encoded'] = engagement_df['posting_time'].astype(int) / 10**9
+# Ensure required columns exist in the solved dataset
+if 'content_type' not in solved_df.columns:
+    solved_df['content_type'] = 'photo'  # Default value (adjust based on your data)
+if 'media_type' not in solved_df.columns:
+    solved_df['media_type'] = 'image'  # Default value (adjust based on your data)
+# Encode categorical columns in the solved dataset
 label_encoder = LabelEncoder()
+solved_df['content_type_encoded'] = label_encoder.fit_transform(solved_df['content_type'])
+solved_df['media_type_encoded'] = label_encoder.fit_transform(solved_df['media_type'])
+# Calculate sentiment for captions in the solved dataset
 logging.info("Performing sentiment analysis on captions...")
+solved_df['caption_sentiment'] = solved_df['caption'].apply(lambda x: TextBlob(x).sentiment.polarity)
 # Use caption sentiment as the overall sentiment
+solved_df['sentiment'] = solved_df['caption_sentiment']
+# Feature Engineering in the solved dataset
 logging.info("Performing feature engineering...")
+solved_df['caption_length'] = solved_df['caption'].apply(len)
+solved_df['hashtag_count'] = solved_df['hashtags'].apply(len)
 # Analyze engagement data separately
 logging.info("Analyzing engagement data separately...")
             logging.info(f"ARIMA Model: MAPE: {mape:.4f}")
 # Ensure 'hashtags' column is properly formatted
+solved_df['hashtags'] = solved_df['hashtags'].apply(lambda x: x if isinstance(x, list) and len(x) > 0 else ['no_hashtag'])
+# Recommendation System: Hashtag and Keyword Recommendations (using solved dataset)
 logging.info("Training recommendation system for hashtags...")
+hashtags = solved_df['hashtags'].apply(lambda x: ' '.join(x))  # Convert list of hashtags to a single string
 # Check if hashtags are empty
 if hashtags.str.strip().eq('').all():
         sim_scores = list(enumerate(cosine_sim[post_index]))
         sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
         top_indices = [i[0] for i in sim_scores[1:top_n+1]]
+        return solved_df.iloc[top_indices]['hashtags']
     # Example: Recommend hashtags for the first post
     logging.info("Example Hashtag Recommendations:")
     print(recommend_hashtags(0))
+# Sentiment Analysis: Audience Reactions (using solved dataset)
 logging.info("Performing sentiment analysis on captions...")
+solved_df['sentiment_category'] = solved_df['sentiment'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')
 logging.info("Sentiment Analysis Results:")
+print(solved_df['sentiment_category'].value_counts())
+# Niche Trend Analysis (using solved dataset)
 logging.info("Analyzing niche trends...")
+niche_trends = solved_df.groupby('content_type')['sentiment'].mean().sort_values(ascending=False)
 logging.info("Top Performing Content Types by Sentiment:")
 print(niche_trends)