File size: 1,584 Bytes
4b3a33f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import sys
import os
from supabase import create_client, Client
from dotenv import load_dotenv

# Add backend/src to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))

# Load environment variables
load_dotenv()

def verify_labels():
    print("🔍 Fetching generated cluster labels from database...")
    url = os.environ.get("SUPABASE_URL")
    key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
    client: Client = create_client(url, key)
    
    resp = client.table("profiles").select("full_name, headline, cluster_label").not_.is_("cluster_label", "null").order("cluster_label").execute()
    
    if not resp or not hasattr(resp, 'data') or resp.data is None:
        print("⚠️ No cluster labels found or database error.")
        return

    print(f"\n{'Name':<25} | {'Original Headline':<35} | {'Cluster Label'}")
    print("-" * 85)
    for p in resp.data[:15]: # Show first 15
        name = (p.get('full_name') or "Unknown")[:25]
        headline = (p.get('headline') or "N/A")[:35]
        label = p.get('cluster_label') or "Unknown"
        print(f"{name:<25} | {headline:<35} | {label}")

    # Show distinct labels
    all_labels = [p.get('cluster_label') for p in resp.data if p.get('cluster_label')]
    distinct_labels = sorted(list(set(all_labels)))
    print("\n📦 Distinct Talent Pools (Clusters):")
    for idx, l in enumerate(distinct_labels, 1):
        count = all_labels.count(l)
        print(f"{idx}. {l} ({count} candidates)")

if __name__ == "__main__":
    verify_labels()