reachify-ai-service / scripts /export_performance_data.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
# File: ai-service/scripts/export_performance_data.py
import pandas as pd
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv
import sys
# Root directory ko path mein add karein taaki .env file mil sake
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_DIR)
load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
def export_performance_data():
"""
Connects to the Supabase database, fetches data from approved submissions,
and saves it to a CSV file for training the performance prediction model.
"""
print("--- Starting Performance Data Export Process ---")
db_url = os.getenv("DATABASE_URL")
if not db_url:
print("πŸ”΄ ERROR: DATABASE_URL not found. Please check your .env file in the ai-service root.")
return
# Yeh SQL query hamare AI model ke liye 'khana' (training data) nikalegi.
sql_query = """
SELECT
cs.likes,
cs.comments,
cs.caption,
ip.follower_count,
CASE
WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech'
WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion'
WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food'
WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming'
ELSE 'General'
END AS campaign_niche,
CASE
WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel'
WHEN c.content_guidelines ILIKE '%story%' THEN 'Story'
ELSE 'Post'
END AS content_format
FROM
public.campaign_submissions cs
JOIN
public.campaigns c ON cs.campaign_id = c.id
JOIN
public.influencer_profiles ip ON cs.influencer_id = ip.profile_id
WHERE
cs.status = 'approved' -- Sirf approved submissions se seekhein
AND cs.likes IS NOT NULL -- Jin par likes ka data ho
AND cs.comments IS NOT NULL -- Jin par comments ka data ho
AND cs.caption IS NOT NULL -- Jin par caption ho
AND ip.follower_count > 0; -- Jin influencers ke followers pata ho
"""
try:
print("Connecting to Supabase to fetch performance data...")
engine = create_engine(db_url)
with engine.connect() as connection:
df = pd.DataFrame(connection.execute(text(sql_query)))
print(f"βœ… Fetched {len(df)} approved submission records from the database.")
except Exception as e:
print(f"πŸ”΄ ERROR fetching data: {e}")
return
if df.empty:
print("⚠️ No valid training data found. A blank CSV will be created.")
else:
# Feature Engineering: Caption ki lambai (length) ko ek feature banayein
df['caption_length'] = df['caption'].str.len()
# Sirf zaroori columns ko CSV me save karein
columns_to_save = ['likes', 'comments', 'follower_count', 'caption_length', 'campaign_niche', 'content_format']
# Agar koi column na ho (khaali df ke case mein), toh use ignore karein
df_to_save = df.reindex(columns=columns_to_save).fillna(0)
# Data ko /data folder mein save karein
output_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv')
df_to_save.to_csv(output_path, index=False)
print(f"πŸŽ‰ Success! Performance data saved to {output_path}")
if __name__ == '__main__':
export_performance_data()