|
|
|
|
|
|
|
|
import pandas as pd |
|
|
from sqlalchemy import create_engine, text |
|
|
import os |
|
|
from dotenv import load_dotenv |
|
|
import sys |
|
|
|
|
|
|
|
|
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
|
|
sys.path.append(ROOT_DIR) |
|
|
load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env')) |
|
|
|
|
|
def export_performance_data(): |
|
|
""" |
|
|
Connects to the Supabase database, fetches data from approved submissions, |
|
|
and saves it to a CSV file for training the performance prediction model. |
|
|
""" |
|
|
print("--- Starting Performance Data Export Process ---") |
|
|
db_url = os.getenv("DATABASE_URL") |
|
|
if not db_url: |
|
|
print("π΄ ERROR: DATABASE_URL not found. Please check your .env file in the ai-service root.") |
|
|
return |
|
|
|
|
|
|
|
|
sql_query = """ |
|
|
SELECT |
|
|
cs.likes, |
|
|
cs.comments, |
|
|
cs.caption, |
|
|
ip.follower_count, |
|
|
CASE |
|
|
WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech' |
|
|
WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion' |
|
|
WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food' |
|
|
WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming' |
|
|
ELSE 'General' |
|
|
END AS campaign_niche, |
|
|
CASE |
|
|
WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel' |
|
|
WHEN c.content_guidelines ILIKE '%story%' THEN 'Story' |
|
|
ELSE 'Post' |
|
|
END AS content_format |
|
|
FROM |
|
|
public.campaign_submissions cs |
|
|
JOIN |
|
|
public.campaigns c ON cs.campaign_id = c.id |
|
|
JOIN |
|
|
public.influencer_profiles ip ON cs.influencer_id = ip.profile_id |
|
|
WHERE |
|
|
cs.status = 'approved' -- Sirf approved submissions se seekhein |
|
|
AND cs.likes IS NOT NULL -- Jin par likes ka data ho |
|
|
AND cs.comments IS NOT NULL -- Jin par comments ka data ho |
|
|
AND cs.caption IS NOT NULL -- Jin par caption ho |
|
|
AND ip.follower_count > 0; -- Jin influencers ke followers pata ho |
|
|
""" |
|
|
try: |
|
|
print("Connecting to Supabase to fetch performance data...") |
|
|
engine = create_engine(db_url) |
|
|
|
|
|
with engine.connect() as connection: |
|
|
df = pd.DataFrame(connection.execute(text(sql_query))) |
|
|
|
|
|
print(f"β
Fetched {len(df)} approved submission records from the database.") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"π΄ ERROR fetching data: {e}") |
|
|
return |
|
|
|
|
|
if df.empty: |
|
|
print("β οΈ No valid training data found. A blank CSV will be created.") |
|
|
else: |
|
|
|
|
|
df['caption_length'] = df['caption'].str.len() |
|
|
|
|
|
|
|
|
columns_to_save = ['likes', 'comments', 'follower_count', 'caption_length', 'campaign_niche', 'content_format'] |
|
|
|
|
|
df_to_save = df.reindex(columns=columns_to_save).fillna(0) |
|
|
|
|
|
|
|
|
output_path = os.path.join(ROOT_DIR, 'data', 'performance_training_data.csv') |
|
|
df_to_save.to_csv(output_path, index=False) |
|
|
print(f"π Success! Performance data saved to {output_path}") |
|
|
|
|
|
if __name__ == '__main__': |
|
|
export_performance_data() |