iris_backend / backend /src /services /verify_labels.py
Saandraahh's picture
Implemented clustering
4b3a33f
import sys
import os
from supabase import create_client, Client
from dotenv import load_dotenv
# Add backend/src to path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
# Load environment variables
load_dotenv()
def verify_labels():
print("🔍 Fetching generated cluster labels from database...")
url = os.environ.get("SUPABASE_URL")
key = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY")
client: Client = create_client(url, key)
resp = client.table("profiles").select("full_name, headline, cluster_label").not_.is_("cluster_label", "null").order("cluster_label").execute()
if not resp or not hasattr(resp, 'data') or resp.data is None:
print("⚠️ No cluster labels found or database error.")
return
print(f"\n{'Name':<25} | {'Original Headline':<35} | {'Cluster Label'}")
print("-" * 85)
for p in resp.data[:15]: # Show first 15
name = (p.get('full_name') or "Unknown")[:25]
headline = (p.get('headline') or "N/A")[:35]
label = p.get('cluster_label') or "Unknown"
print(f"{name:<25} | {headline:<35} | {label}")
# Show distinct labels
all_labels = [p.get('cluster_label') for p in resp.data if p.get('cluster_label')]
distinct_labels = sorted(list(set(all_labels)))
print("\n📦 Distinct Talent Pools (Clusters):")
for idx, l in enumerate(distinct_labels, 1):
count = all_labels.count(l)
print(f"{idx}. {l} ({count} candidates)")
if __name__ == "__main__":
verify_labels()