Spaces:
Sleeping
Sleeping
| import os | |
| from supabase import create_client, Client | |
| import pandas as pd | |
| import numpy as np | |
| from tqdm import tqdm | |
| from dotenv import load_dotenv | |
| # Replace with your Supabase project details | |
| SUPABASE_URL = os.getenv("SUPABASE_URL") | |
| SUPABASE_KEY = os.getenv("SUPABASE_KEY") | |
| # Initialize client | |
| supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY) | |
| # Example: Fetch all rows from a table | |
| df = pd.read_csv("merged_yt_data.csv", on_bad_lines='skip') | |
| # ๐น Data Cleaning | |
| df = df.replace([np.inf, -np.inf], np.nan) # Replace infinite values with NaN | |
| df = df.fillna("") # Replace NaN with an empty string (JSON-safe) | |
| df = df.drop_duplicates(subset=["video_id"]) | |
| # ๐น Convert DataFrame to a list of dictionaries | |
| data = df.to_dict(orient="records") | |
| print(f"Total rows to insert: {len(data)}") | |
| # ๐น Insert in Batches with Progress Bar | |
| BATCH_SIZE = 1000 | |
| for i in tqdm(range(0, len(data), BATCH_SIZE), desc="Uploading to Supabase", unit="batch"): | |
| batch = data[i : i + BATCH_SIZE] | |
| try: | |
| supabase.table("youtube").insert(batch).execute() | |
| except Exception as e: | |
| print(f"Error inserting batch {i}-{i+len(batch)}: {e}") | |
| # ๐น Get Row Count from Supabase | |
| responses = supabase.table("youtube").select("*", count="exact").execute() | |
| print(f"Total rows in Supabase: {responses.count}") |