import sys import os from supabase import create_client, Client from dotenv import load_dotenv # Add backend/src to path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../'))) # Load environment variables load_dotenv() def verify_labels(): print("šŸ” Fetching generated cluster labels from database...") url = os.environ.get("SUPABASE_URL") key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY") client: Client = create_client(url, key) resp = client.table("profiles").select("full_name, headline, cluster_label").not_.is_("cluster_label", "null").order("cluster_label").execute() if not resp or not hasattr(resp, 'data') or resp.data is None: print("āš ļø No cluster labels found or database error.") return print(f"\n{'Name':<25} | {'Original Headline':<35} | {'Cluster Label'}") print("-" * 85) for p in resp.data[:15]: # Show first 15 name = (p.get('full_name') or "Unknown")[:25] headline = (p.get('headline') or "N/A")[:35] label = p.get('cluster_label') or "Unknown" print(f"{name:<25} | {headline:<35} | {label}") # Show distinct labels all_labels = [p.get('cluster_label') for p in resp.data if p.get('cluster_label')] distinct_labels = sorted(list(set(all_labels))) print("\nšŸ“¦ Distinct Talent Pools (Clusters):") for idx, l in enumerate(distinct_labels, 1): count = all_labels.count(l) print(f"{idx}. {l} ({count} candidates)") if __name__ == "__main__": verify_labels()