Spaces:

Fred808
/

Insta-AI

Paused

App Files Files Community

Sam Fred commited on Jan 19, 2025

Commit

40fb94f

1 Parent(s): 555c6af

Commit

Browse files

Files changed (15) hide show

app.py +22 -371
competitors_data.csv → data/processed/competitors_data.csv +0 -0
competitors_data.json → data/raw/competitors_data.json +0 -0
engagement_metrics.json → data/raw/engagement_metrics.json +0 -0
solved.json → data/raw/solved.json +0 -0
scripts/analyze_engagement.py +18 -0
scripts/analyze_image.py +25 -0
scripts/train_engagement_rate.py +26 -0
scripts/train_promotion_strategy.py +29 -0
scripts/train_time_series.py +22 -0
scripts/train_viral_potential.py +29 -0
utils/image_processing.py +43 -0
utils/logging_utils.py +5 -0
utils/preprocessing.py +43 -0
utils/visualization.py +20 -0

app.py CHANGED Viewed

@@ -1,375 +1,26 @@
-import os
-import pandas as pd
-import numpy as np
-import json
-import logging
-import re
-import requests
-from io import BytesIO
-from PIL import Image
-import pytesseract
-from textblob import TextBlob
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from xgboost import XGBRegressor
-from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import mean_absolute_error, accuracy_score
-from sklearn.preprocessing import LabelEncoder
-import torch
-from torchvision import transforms
-import matplotlib.pyplot as plt
-import seaborn as sns
-from collections import Counter
-import pickle
-from transformers import ResNetForImageClassification
-from prophet import Prophet
 # Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Set the working directory to a writable location
-WORKING_DIR = "/app"  # Use /app for temporary files
-os.makedirs(WORKING_DIR, exist_ok=True)
-os.chdir(WORKING_DIR)
-# Verify the current directory
-logging.info(f"Current working directory: {os.getcwd()}")
-# Cache file to store extracted text
-CACHE_FILE = os.path.join(WORKING_DIR, "image_text_cache.pkl")
-# Load cache if it exists
-if os.path.exists(CACHE_FILE):
-    with open(CACHE_FILE, "rb") as f:
-        cache = pickle.load(f)
-else:
-    cache = {}
-# Define mean_absolute_percentage_error function
-def mean_absolute_percentage_error(y_true, y_pred):
-    y_true, y_pred = np.array(y_true), np.array(y_pred)
-    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
-# Load engagement_metrics.json (your company's data)
-logging.info("Loading your company's engagement metrics...")
-try:
-    with open('engagement_metrics.json', 'r') as f:
-        engagement_metrics = json.load(f)
-    your_df = pd.json_normalize(engagement_metrics)
-except FileNotFoundError:
-    logging.error("engagement_metrics.json not found. Please ensure the file exists.")
-    exit()
-# Load solved.json (your company's hashtags and captions)
-logging.info("Loading your company's solved data...")
-try:
-    with open('solved.json', 'r') as f:
-        solved_data = json.load(f)
-    solved_df = pd.json_normalize(solved_data)
-except FileNotFoundError:
-    logging.error("solved.json not found. Please ensure the file exists.")
-    exit()
-# Load competitor data from JSON
-logging.info("Loading competitor data from JSON...")
-try:
-    with open('competitors_data.json', 'r') as f:
-        competitor_data = json.load(f)
-    competitor_df = pd.json_normalize(competitor_data['eazylancer_posts'])
-except FileNotFoundError:
-    logging.error("competitors_data.json not found. Please ensure the file exists.")
-    exit()
-# Ensure required columns exist in your company's data
-required_columns = ['likes', 'comments', 'shares', 'posting_time', 'caption', 'hashtags']
-missing_columns = [col for col in required_columns if col not in your_df.columns]
-if missing_columns:
-    logging.warning(f"Missing required columns in your company's data: {missing_columns}")
-    for col in missing_columns:
-        if col in ['likes', 'comments', 'shares']:
-            your_df[col] = 0  # Fill with default value (integer)
-        elif col == 'caption':
-            your_df[col] = ''  # Fill with default value (empty string)
-        elif col == 'hashtags':
-            your_df[col] = [[] for _ in range(len(your_df))]  # Fill with default value (list of empty lists)
-    logging.info("Default values added for missing columns.")
-# Ensure required columns exist in competitor data
-required_columns = ['caption', 'hashtags', 'likes', 'comments', 'date']
-missing_columns = [col for col in required_columns if col not in competitor_df.columns]
-if missing_columns:
-    logging.warning(f"Missing required columns in competitor data: {missing_columns}")
-    for col in missing_columns:
-        if col == 'caption':
-            competitor_df[col] = ''  # Fill with default value (empty string)
-        elif col == 'hashtags':
-            competitor_df[col] = [[] for _ in range(len(competitor_df))]  # Fill with default value (list of empty lists)
-        else:
-            competitor_df[col] = 0  # Fill with default value (integer)
-    logging.info("Default values added for missing columns.")
-# Process your company's data
-logging.info("Processing your company's data...")
-your_df['posting_time'] = pd.to_datetime(your_df['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
-your_df = your_df[your_df['posting_time'].notna()]
-your_df['engagement_rate'] = your_df['likes'] + your_df['comments'] + your_df['shares']
-your_df['caption_length'] = your_df['caption'].apply(len)
-your_df['hashtag_count'] = your_df['hashtags'].apply(len)
-your_df['caption_sentiment'] = your_df['caption'].apply(lambda x: TextBlob(x).sentiment.polarity)
-your_df['sentiment'] = your_df['caption_sentiment']
-# Process competitor data
-logging.info("Processing competitor data...")
-competitor_df['posting_time'] = pd.to_datetime(competitor_df['date'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
-competitor_df = competitor_df[competitor_df['posting_time'].notna()]
-competitor_df['engagement_rate'] = competitor_df['likes'] + competitor_df['comments']
-competitor_df['caption_length'] = competitor_df['caption'].apply(len)
-competitor_df['hashtag_count'] = competitor_df['hashtags'].apply(len)
-competitor_df['caption_sentiment'] = competitor_df['caption'].apply(lambda x: TextBlob(x).sentiment.polarity)
-competitor_df['sentiment'] = competitor_df['caption_sentiment']
-# Combine your company's data and competitor data for model training
-logging.info("Combining your company's data and competitor data for model training...")
-combined_df = pd.concat([your_df, competitor_df], ignore_index=True)
-# Encode categorical columns if they exist
-if 'content_type' in combined_df.columns and 'media_type' in combined_df.columns:
-    logging.info("Encoding categorical columns...")
-    label_encoder = LabelEncoder()
-    combined_df['content_type_encoded'] = label_encoder.fit_transform(combined_df['content_type'])
-    combined_df['media_type_encoded'] = label_encoder.fit_transform(combined_df['media_type'])
-    features = ['caption_length', 'hashtag_count', 'sentiment', 'content_type_encoded', 'media_type_encoded']
-else:
-    logging.warning("'content_type' or 'media_type' columns not found. Skipping encoding.")
-    features = ['caption_length', 'hashtag_count', 'sentiment']
-# Log the features being used
-logging.info(f"Features for model training: {features}")
-# Viral Potential Prediction
-logging.info("Training viral potential prediction model...")
-combined_viral_threshold = combined_df['engagement_rate'].quantile(0.9)
-combined_df['viral'] = combined_df['engagement_rate'].apply(lambda x: 1 if x >= combined_viral_threshold else 0)
-X = combined_df[features]
-y = combined_df['viral']
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-viral_model = RandomForestClassifier(random_state=42)
-viral_model.fit(X_train, y_train)
-y_pred = viral_model.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-logging.info(f"Viral Potential Model Accuracy: {accuracy:.4f}")
-# Engagement Rate Prediction
-logging.info("Training engagement rate prediction model...")
-X = combined_df[features]
-y = combined_df['engagement_rate']
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-engagement_model = XGBRegressor(random_state=42)
-engagement_model.fit(X_train, y_train)
-y_pred = engagement_model.predict(X_test)
-mae = mean_absolute_error(y_test, y_pred)
-logging.info(f"Engagement Rate Prediction Model - MAE: {mae:.4f}")
-# Promotion Strategy
-logging.info("Training promotion prediction model...")
-promotion_threshold = combined_df['engagement_rate'].quantile(0.8)
-combined_df['promote'] = combined_df['engagement_rate'].apply(lambda x: 1 if x >= promotion_threshold else 0)
-X = combined_df[features]
-y = combined_df['promote']
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-promotion_model = LogisticRegression(random_state=42)
-promotion_model.fit(X_train, y_train)
-y_pred = promotion_model.predict(X_test)
-accuracy = accuracy_score(y_test, y_pred)
-logging.info(f"Promotion Prediction Model Accuracy: {accuracy:.4f}")
-# Sentiment Analysis
-logging.info("Performing sentiment analysis on captions...")
-combined_df['sentiment_category'] = combined_df['sentiment'].apply(lambda x: 'Positive' if x > 0 else 'Negative' if x < 0 else 'Neutral')
-logging.info("Sentiment Analysis Results:")
-print(combined_df['sentiment_category'].value_counts())
-# Niche Trend Analysis
-logging.info("Analyzing niche trends...")
-if 'content_type' in combined_df.columns:
-    niche_trends = combined_df.groupby('content_type')['sentiment'].mean().sort_values(ascending=False)
-    logging.info("Top Performing Content Types by Sentiment:")
-    print(niche_trends)
-else:
-    logging.warning("'content_type' column not found. Skipping niche trend analysis.")
-# Trending Hashtags
-logging.info("Analyzing trending hashtags...")
-trending_hashtags = combined_df['hashtags'].explode().value_counts().head(10)
-logging.info("Top 10 Trending Hashtags:")
-print(trending_hashtags)
-# Trending Keywords
-logging.info("Analyzing trending keywords in captions...")
-words = combined_df['caption'].apply(lambda x: re.findall(r'\b\w+\b', x.lower())).explode()
-trending_keywords = Counter(words).most_common(10)
-logging.info("Top 10 Trending Keywords in Captions:")
-print(trending_keywords)
-# Engagement Heatmap by Time of Day (using combined data)
-logging.info("Creating engagement heatmap by time of day...")
-combined_df['hour'] = combined_df['posting_time'].dt.hour
-engagement_by_hour = combined_df.groupby('hour')['engagement_rate'].mean().reset_index()
-plt.figure(figsize=(10, 6))
-sns.heatmap(engagement_by_hour.pivot_table(index='hour', values='engagement_rate'), annot=True, cmap='YlGnBu')
-plt.title('Engagement Heatmap by Time of Day')
-plt.xlabel('Engagement Rate')
-plt.ylabel('Hour of Day')
-plt.show()
-def resize_image(image, max_size=(800, 600)):
-    """Resize an image to the specified maximum size."""
-    image.thumbnail(max_size)
-    return image
-# Function to extract text from an image
-def extract_text_from_image(image):
-    """Extract text from an image using OCR."""
-    try:
-        # Resize the image
-        image = resize_image(image)
-        # Extract text using pytesseract
-        text = pytesseract.image_to_string(image)
-        return text
-    except Exception as e:
-        logging.error(f"Error extracting text from image: {e}")
-        return ""
-# Function to analyze image content using a pre-trained model
-def analyze_image(image):
-    """Analyze image content using a pre-trained model."""
-    try:
-        # Preprocess the image
-        preprocess = transforms.Compose([
-            transforms.Resize(256),
-            transforms.CenterCrop(224),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
-        ])
-        image_tensor = preprocess(image).unsqueeze(0)
-        # Use the pre-trained ResNet model
-        with torch.no_grad():
-            output = model(image_tensor)
-        return output
-    except Exception as e:
-        logging.error(f"Error analyzing image: {e}")
-        return None
-# Function to rate an image based on visual appeal and text quality
-def rate_image(image, caption):
-    """Rate an image based on visual appeal and text quality."""
-    # Analyze the image
-    image_analysis = analyze_image(image)
-    if image_analysis is None:
-        return 0.0  # Return a default score if analysis fails
-    # Visual appeal (placeholder for image analysis score)
-    visual_appeal = 0.5  # Replace with actual image analysis logic
-    # Text quality (based on caption sentiment and length)
-    text_quality = 0.3 * TextBlob(caption).sentiment.polarity + 0.2 * len(caption)
-    # Combine factors into a weighted score
-    score = 0.6 * visual_appeal + 0.4 * text_quality
-    return score
-# Example usage
 if __name__ == "__main__":
-    # Example user-uploaded image and caption
-    image_url = "https://instagram.flos5-1.fna.fbcdn.net/v/t51.2885-15/468436961_18431508154072851_4306676786324401005_n.jpg?stp=dst-jpg_e35_p1080x1080_sh0.08_tt6&_nc_ht=instagram.flos5-1.fna.fbcdn.net&_nc_cat=103&_nc_ohc=-9uxYx-M4WYQ7kNvgEWXUp_&_nc_gid=cfa5b625792446db8db41e38348d0aeb&edm=AEhyXUkBAAAA&ccb=7-5&ig_cache_key=MzUxMDU2MTc3OTc5NDcxOTY4NQ%3D%3D.3-ccb7-5&oh=00_AYC25Wc1tsFzU0DKvzQ5kLYcYx4KOIKAdOLSFki4xmvFUQ&oe=678FA376&_nc_sid=8f1549"  # Replace with actual image URL
-    caption = "This is a beautiful sunset!"  # Replace with actual caption
-    try:
-        # Download the image
-        response = requests.get(image_url)
-        response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)
-        image = Image.open(BytesIO(response.content))
-        # Extract text from the image
-        extracted_text = extract_text_from_image(image)
-        logging.info(f"Extracted text: {extracted_text}")
-        # Rate the image
-        score = rate_image(image, caption)
-        logging.info(f"Image Rating: {score:.2f}")
-    except Exception as e:
-        logging.error(f"Error processing image: {e}")
-# Analyze engagement data separately
-logging.info("Analyzing engagement data separately...")
-engagement_summary = your_df.groupby('posting_time').agg({
-    'likes': 'sum',
-    'comments': 'sum',
-    'shares': 'sum',
-    'engagement_rate': 'mean'
-}).reset_index()
-# Convert posting_time to datetime in engagement data
-logging.info("Converting posting_time to datetime...")
-your_df['posting_time'] = pd.to_datetime(your_df['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
-# Plot engagement rate over time
-plt.figure(figsize=(10, 6))
-plt.plot(engagement_summary['posting_time'], engagement_summary['engagement_rate'])
-plt.title('Engagement Rate Over Time')
-plt.xlabel('Time')
-plt.ylabel('Engagement Rate')
-plt.show()
-# Handle missing values in engagement data
-your_df.fillna({
-    'likes': 0,
-    'comments': 0,
-    'shares': 0
-}, inplace=True)
-# Calculate engagement_rate
-your_df['engagement_rate'] = your_df['likes'] + your_df['comments'] + your_df['shares']
- # Time-Series Model: Optimal Posting Times (using Prophet)
-logging.info("Training time-series model for optimal posting times using Prophet...")
-time_series_data = engagement_summary[['posting_time', 'engagement_rate']].rename(columns={'posting_time': 'ds', 'engagement_rate': 'y'})
-# Train Prophet model
-prophet_model = Prophet()
-prophet_model.fit(time_series_data)
-# Make future predictions
-future = prophet_model.make_future_dataframe(periods=30)  # Predict for the next 30 days
-forecast = prophet_model.predict(future)
-# Plot the forecast
-fig = prophet_model.plot(forecast)
-plt.title('Engagement Rate Forecast (Prophet)')
-plt.xlabel('Date')
-plt.ylabel('Engagement Rate')
-plt.show()
-# Evaluate the model
-from sklearn.metrics import mean_absolute_error
-y_true = time_series_data['y']
-y_pred = forecast.loc[:len(y_true)-1, 'yhat']  # Align predictions with true values
-mae = mean_absolute_error(y_true, y_pred)
-logging.info(f"Prophet Model - MAE: {mae:.4f}")
-logging.info("Analysis complete!")

+from utils.logging_utils import setup_logging
+from scripts.train_viral_potential import train_viral_potential
+from scripts.train_engagement_rate import train_engagement_rate
+from scripts.train_promotion_strategy import train_promotion_strategy
+from scripts.train_time_series import train_time_series
+from scripts.analyze_image import analyze_image_url
+from scripts.analyze_engagement import analyze_engagement
 # Set up logging
+setup_logging()
+# Main application logic
 if __name__ == "__main__":
+    # Train models
+    train_viral_potential()
+    train_engagement_rate()
+    train_promotion_strategy()
+    train_time_series()
+    # Analyze engagement data
+    analyze_engagement()
+    # Analyze an example image
+    image_url = "https://example.com/path/to/image.jpg"
+    caption = "This is a beautiful sunset!"
+    analyze_image_url(image_url, caption)

competitors_data.csv → data/processed/competitors_data.csv RENAMED Viewed

File without changes

competitors_data.json → data/raw/competitors_data.json RENAMED Viewed

The diff for this file is too large to render. See raw diff

engagement_metrics.json → data/raw/engagement_metrics.json RENAMED Viewed

The diff for this file is too large to render. See raw diff

solved.json → data/raw/solved.json RENAMED Viewed

The diff for this file is too large to render. See raw diff

scripts/analyze_engagement.py ADDED Viewed

	@@ -0,0 +1,18 @@

+import pandas as pd
+from utils.visualization import plot_engagement_heatmap, plot_engagement_over_time
+from utils.preprocessing import preprocess_data
+def analyze_engagement():
+    """Analyze engagement data."""
+    # Load data
+    df = pd.read_json("data/raw/engagement_metrics.json")
+    df = preprocess_data(df)
+    # Group by hour for heatmap
+    df['hour'] = df['posting_time'].dt.hour
+    engagement_by_hour = df.groupby('hour')['engagement_rate'].mean().reset_index()
+    plot_engagement_heatmap(engagement_by_hour)
+    # Plot engagement over time
+    engagement_summary = df.groupby('posting_time').agg({'engagement_rate': 'mean'}).reset_index()
+    plot_engagement_over_time(engagement_summary)

scripts/analyze_image.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import requests
+from PIL import Image
+from io import BytesIO
+from utils.image_processing import extract_text_from_image, analyze_image
+from utils.logging_utils import setup_logging
+def analyze_image_url(image_url, caption):
+    """Analyze an image from a URL."""
+    setup_logging()
+    try:
+        # Download the image
+        response = requests.get(image_url)
+        response.raise_for_status()
+        image = Image.open(BytesIO(response.content))
+        # Extract text from the image
+        extracted_text = extract_text_from_image(image)
+        logging.info(f"Extracted text: {extracted_text}")
+        # Analyze the image
+        image_analysis = analyze_image(image)
+        if image_analysis is not None:
+            logging.info("Image analysis completed successfully.")
+    except Exception as e:
+        logging.error(f"Error processing image: {e}")

scripts/train_engagement_rate.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import pandas as pd
+from xgboost import XGBRegressor
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_absolute_error
+import joblib
+from utils.preprocessing import preprocess_data
+def train_engagement_rate():
+    """Train the engagement rate prediction model."""
+    # Load data
+    df = pd.read_json("data/raw/engagement_metrics.json")
+    df = preprocess_data(df)
+    # Train engagement rate model
+    X = df[['caption_length', 'hashtag_count', 'sentiment']]
+    y = df['engagement_rate']
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    engagement_model = XGBRegressor(random_state=42)
+    engagement_model.fit(X_train, y_train)
+    y_pred = engagement_model.predict(X_test)
+    mae = mean_absolute_error(y_test, y_pred)
+    print(f"Engagement Rate Prediction Model - MAE: {mae:.4f}")
+    # Save the model
+    joblib.dump(engagement_model, "models/engagement_rate_model.pkl")

scripts/train_promotion_strategy.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import pandas as pd
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+import joblib
+from utils.preprocessing import preprocess_data
+def train_promotion_strategy():
+    """Train the promotion strategy model."""
+    # Load data
+    df = pd.read_json("data/raw/engagement_metrics.json")
+    df = preprocess_data(df)
+    # Train promotion strategy model
+    promotion_threshold = df['engagement_rate'].quantile(0.8)
+    df['promote'] = df['engagement_rate'].apply(lambda x: 1 if x >= promotion_threshold else 0)
+    X = df[['caption_length', 'hashtag_count', 'sentiment']]
+    y = df['promote']
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    promotion_model = LogisticRegression(random_state=42)
+    promotion_model.fit(X_train, y_train)
+    y_pred = promotion_model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    print(f"Promotion Prediction Model Accuracy: {accuracy:.4f}")
+    # Save the model
+    joblib.dump(promotion_model, "models/promotion_strategy_model.pkl")

scripts/train_time_series.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import pandas as pd
+from prophet import Prophet
+from sklearn.metrics import mean_absolute_error
+import joblib
+from utils.preprocessing import preprocess_data
+def train_time_series():
+    """Train the time-series model for optimal posting times."""
+    # Load data
+    df = pd.read_json("data/raw/engagement_metrics.json")
+    df = preprocess_data(df)
+    # Prepare time-series data
+    time_series_data = df.groupby('posting_time').agg({'engagement_rate': 'mean'}).reset_index()
+    time_series_data = time_series_data.rename(columns={'posting_time': 'ds', 'engagement_rate': 'y'})
+    # Train Prophet model
+    prophet_model = Prophet()
+    prophet_model.fit(time_series_data)
+    # Save the model
+    joblib.dump(prophet_model, "models/prophet_model.pkl")

scripts/train_viral_potential.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import pandas as pd
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score
+import joblib
+from utils.preprocessing import preprocess_data
+def train_viral_potential():
+    """Train the viral potential prediction model."""
+    # Load data
+    df = pd.read_json("data/raw/engagement_metrics.json")
+    df = preprocess_data(df)
+    # Train viral potential model
+    viral_threshold = df['engagement_rate'].quantile(0.9)
+    df['viral'] = df['engagement_rate'].apply(lambda x: 1 if x >= viral_threshold else 0)
+    X = df[['caption_length', 'hashtag_count', 'sentiment']]
+    y = df['viral']
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    viral_model = RandomForestClassifier(random_state=42)
+    viral_model.fit(X_train, y_train)
+    y_pred = viral_model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    print(f"Viral Potential Model Accuracy: {accuracy:.4f}")
+    # Save the model
+    joblib.dump(viral_model, "models/viral_potential_model.pkl")

utils/image_processing.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from PIL import Image
+import pytesseract
+import torch
+from torchvision import transforms
+from transformers import ResNetForImageClassification
+import logging
+def resize_image(image, max_size=(800, 600)):
+    """Resize an image to the specified maximum size."""
+    image.thumbnail(max_size)
+    return image
+def extract_text_from_image(image):
+    """Extract text from an image using OCR."""
+    try:
+        image = resize_image(image)
+        text = pytesseract.image_to_string(image)
+        return text
+    except Exception as e:
+        logging.error(f"Error extracting text from image: {e}")
+        return ""
+def analyze_image(image):
+    """Analyze image content using a pre-trained model."""
+    try:
+        preprocess = transforms.Compose([
+            transforms.Resize(256),
+            transforms.CenterCrop(224),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+        image_tensor = preprocess(image).unsqueeze(0)
+        # Load ResNet model
+        model = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
+        model.eval()
+        with torch.no_grad():
+            output = model(image_tensor)
+        return output
+    except Exception as e:
+        logging.error(f"Error analyzing image: {e}")
+        return None

utils/logging_utils.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import logging
+def setup_logging():
+    """Set up logging configuration."""
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

utils/preprocessing.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import pandas as pd
+from textblob import TextBlob
+from sklearn.preprocessing import LabelEncoder
+import logging
+def preprocess_data(df):
+    """Preprocess the input DataFrame."""
+    # Ensure required columns exist
+    required_columns = ['likes', 'comments', 'shares', 'posting_time', 'caption', 'hashtags']
+    missing_columns = [col for col in required_columns if col not in df.columns]
+    if missing_columns:
+        logging.warning(f"Missing required columns: {missing_columns}")
+        for col in missing_columns:
+            if col in ['likes', 'comments', 'shares']:
+                df[col] = 0  # Fill with default value (integer)
+            elif col == 'caption':
+                df[col] = ''  # Fill with default value (empty string)
+            elif col == 'hashtags':
+                df[col] = [[] for _ in range(len(df))]  # Fill with default value (list of empty lists)
+    # Convert posting_time to datetime
+    df['posting_time'] = pd.to_datetime(df['posting_time'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
+    df = df[df['posting_time'].notna()]
+    # Calculate engagement rate
+    df['engagement_rate'] = df['likes'] + df['comments'] + df['shares']
+    # Calculate caption length and hashtag count
+    df['caption_length'] = df['caption'].apply(len)
+    df['hashtag_count'] = df['hashtags'].apply(len)
+    # Calculate sentiment
+    df['caption_sentiment'] = df['caption'].apply(lambda x: TextBlob(x).sentiment.polarity)
+    df['sentiment'] = df['caption_sentiment']
+    # Encode categorical columns
+    if 'content_type' in df.columns and 'media_type' in df.columns:
+        label_encoder = LabelEncoder()
+        df['content_type_encoded'] = label_encoder.fit_transform(df['content_type'])
+        df['media_type_encoded'] = label_encoder.fit_transform(df['media_type'])
+    return df

utils/visualization.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import matplotlib.pyplot as plt
+import seaborn as sns
+def plot_engagement_heatmap(engagement_by_hour):
+    """Plot engagement heatmap by time of day."""
+    plt.figure(figsize=(10, 6))
+    sns.heatmap(engagement_by_hour.pivot_table(index='hour', values='engagement_rate'), annot=True, cmap='YlGnBu')
+    plt.title('Engagement Heatmap by Time of Day')
+    plt.xlabel('Engagement Rate')
+    plt.ylabel('Hour of Day')
+    plt.show()
+def plot_engagement_over_time(engagement_summary):
+    """Plot engagement rate over time."""
+    plt.figure(figsize=(10, 6))
+    plt.plot(engagement_summary['posting_time'], engagement_summary['engagement_rate'])
+    plt.title('Engagement Rate Over Time')
+    plt.xlabel('Time')
+    plt.ylabel('Engagement Rate')
+    plt.show()