reachify-ai-service / scripts /export_training_data.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
# File: ai-service/scripts/export_training_data.py (FINAL VERSION)
import pandas as pd
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv
import sys
# Get the root directory of the ai-service
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_DIR)
load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
def export_data():
"""Connects to the database and saves the training data to a CSV file."""
print("--- Starting Data Export Process ---")
db_url = os.getenv("DATABASE_URL")
if not db_url:
print("πŸ”΄ ERROR: DATABASE_URL not found.")
return
# SQL Query waisi hi rahegi
sql_query = """
SELECT
cs.engagement_rate, ip.follower_count, p.amount AS payment_amount,
CASE
WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech'
WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion'
WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food'
WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming'
ELSE 'General'
END AS campaign_niche,
CASE
WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel'
WHEN c.content_guidelines ILIKE '%story%' THEN 'Story'
ELSE 'Post'
END AS content_format
FROM public.campaign_submissions cs
JOIN public.campaigns c ON cs.campaign_id = c.id
JOIN public.payments p ON c.id = p.campaign_id AND cs.influencer_id = p.influencer_id
JOIN public.influencer_profiles ip ON cs.influencer_id = ip.profile_id
WHERE cs.engagement_rate IS NOT NULL AND ip.follower_count > 0 AND p.amount IS NOT NULL;
"""
try:
print("Connecting to Supabase to fetch live data...")
engine = create_engine(db_url)
# === ✨ THE FIX IS HERE ✨ ===
# Hum ab seedhe connection ka istemal karke data nikalenge
with engine.connect() as connection:
# `text()` function zaroori hai SQLAlchemy ke naye versions ke liye
result = connection.execute(text(sql_query))
rows = result.fetchall() # Saari rows ko ek list mein nikaalo
# Agar koi data nahi mila
if not rows:
print("⚠️ WARNING: No training data found in the database. An empty CSV will be created.")
df = pd.DataFrame()
else:
# Us list of rows se DataFrame banao
df = pd.DataFrame(rows, columns=result.keys())
# === ✨ FIX ENDS HERE ✨ ===
print(f"βœ… Fetched {len(df)} records from the database.")
except Exception as e:
print(f"πŸ”΄ ERROR fetching data: {e}")
return
# Data ko /data folder mein save karo
output_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv')
df.to_csv(output_path, index=False)
print(f"πŸŽ‰ Success! Data saved to {output_path}")
if __name__ == '__main__':
export_data()