File size: 3,076 Bytes
0914e96 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File: ai-service/scripts/export_training_data.py (FINAL VERSION)
import pandas as pd
from sqlalchemy import create_engine, text
import os
from dotenv import load_dotenv
import sys
# Get the root directory of the ai-service
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(ROOT_DIR)
load_dotenv(dotenv_path=os.path.join(ROOT_DIR, '.env'))
def export_data():
"""Connects to the database and saves the training data to a CSV file."""
print("--- Starting Data Export Process ---")
db_url = os.getenv("DATABASE_URL")
if not db_url:
print("π΄ ERROR: DATABASE_URL not found.")
return
# SQL Query waisi hi rahegi
sql_query = """
SELECT
cs.engagement_rate, ip.follower_count, p.amount AS payment_amount,
CASE
WHEN c.title ILIKE '%tech%' OR c.description ILIKE '%tech%' THEN 'Tech'
WHEN c.title ILIKE '%fashion%' OR c.description ILIKE '%fashion%' THEN 'Fashion'
WHEN c.title ILIKE '%food%' OR c.description ILIKE '%food%' THEN 'Food'
WHEN c.title ILIKE '%gaming%' OR c.description ILIKE '%gaming%' THEN 'Gaming'
ELSE 'General'
END AS campaign_niche,
CASE
WHEN c.content_guidelines ILIKE '%reel%' THEN 'Reel'
WHEN c.content_guidelines ILIKE '%story%' THEN 'Story'
ELSE 'Post'
END AS content_format
FROM public.campaign_submissions cs
JOIN public.campaigns c ON cs.campaign_id = c.id
JOIN public.payments p ON c.id = p.campaign_id AND cs.influencer_id = p.influencer_id
JOIN public.influencer_profiles ip ON cs.influencer_id = ip.profile_id
WHERE cs.engagement_rate IS NOT NULL AND ip.follower_count > 0 AND p.amount IS NOT NULL;
"""
try:
print("Connecting to Supabase to fetch live data...")
engine = create_engine(db_url)
# === β¨ THE FIX IS HERE β¨ ===
# Hum ab seedhe connection ka istemal karke data nikalenge
with engine.connect() as connection:
# `text()` function zaroori hai SQLAlchemy ke naye versions ke liye
result = connection.execute(text(sql_query))
rows = result.fetchall() # Saari rows ko ek list mein nikaalo
# Agar koi data nahi mila
if not rows:
print("β οΈ WARNING: No training data found in the database. An empty CSV will be created.")
df = pd.DataFrame()
else:
# Us list of rows se DataFrame banao
df = pd.DataFrame(rows, columns=result.keys())
# === β¨ FIX ENDS HERE β¨ ===
print(f"β
Fetched {len(df)} records from the database.")
except Exception as e:
print(f"π΄ ERROR fetching data: {e}")
return
# Data ko /data folder mein save karo
output_path = os.path.join(ROOT_DIR, 'data', 'earnings_training_data.csv')
df.to_csv(output_path, index=False)
print(f"π Success! Data saved to {output_path}")
if __name__ == '__main__':
export_data() |