import os import numpy as np from typing import List from dotenv import load_dotenv from supabase import create_client from sentence_transformers import SentenceTransformer # Load env load_dotenv() SUPABASE_URL = os.environ.get("SUPABASE_URL") SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY") # Singleton model (same pattern as profile code) _model = None def get_model(): global _model if _model is None: print("📥 Loading BAAI/bge-m3 model for job embeddings...") _model = SentenceTransformer("BAAI/bge-m3") return _model def get_supabase(): if not SUPABASE_URL or not SUPABASE_KEY: print("❌ Missing Supabase credentials for job embeddings.") return None return create_client(SUPABASE_URL, SUPABASE_KEY) # -------- Embedding helpers (IDENTICAL LOGIC) -------- def generate_embedding(text: str) -> List[float]: if not text or not text.strip(): return [0.0] * 1024 model = get_model() embedding = model.encode(text, normalize_embeddings=True) return embedding.tolist() def generate_list_embedding(items: List[str]) -> List[float]: if not items: return [0.0] * 1024 model = get_model() embeddings = model.encode(items, normalize_embeddings=True) mean_embedding = np.mean(embeddings, axis=0) return mean_embedding.tolist() # ---------------------------------------------------- def safe_generate_and_store_job_embeddings(client, job_id: str) -> None: """ Fetches job entities, generates entity-wise embeddings, and upserts them into job_embeddings table. """ print(f"🧬 Generating job embeddings for Job: {job_id}") # 1. Fetch job entities resp = client.table("jobs_entities") \ .select("*") \ .eq("job_id", job_id) \ .execute() if not resp.data: print(f"⚠️ Job entities not found for job_id={job_id}") return entities = resp.data[0] # 2. Parse list fields safely (same pattern) def parse_list(val): if not val: return [] if isinstance(val, list): return val if isinstance(val, str): return [x.strip() for x in val.split(",") if x.strip()] return [] skills = parse_list(entities.get("skills")) technical_skills = parse_list(entities.get("technical_skills")) tools = parse_list(entities.get("tools")) certifications = parse_list(entities.get("certifications")) experience = entities.get("experience") or "" education = entities.get("education") or "" try: # 3. Generate embeddings (ENTITY-WISE) payload = { "job_id": job_id, "skills": generate_list_embedding(skills), "technical_skills": generate_list_embedding(technical_skills), "tools": generate_list_embedding(tools), "work_experience": generate_embedding(experience), "education": generate_embedding(education), "certifications": generate_list_embedding(certifications), "updated_at": "now()" } # 4. Upsert into job_embeddings client.table("job_embeddings").upsert(payload).execute() print(f"✅ Job embeddings stored for job_id={job_id}") except Exception as e: print(f"❌ Job embedding generation failed: {e}")