Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| from typing import List | |
| from dotenv import load_dotenv | |
| from supabase import create_client | |
| from sentence_transformers import SentenceTransformer | |
| # Load env | |
| load_dotenv() | |
| SUPABASE_URL = os.environ.get("SUPABASE_URL") | |
| SUPABASE_KEY = os.environ.get("SUPABASE_SERVICE_ROLE_KEY") or os.environ.get("SUPABASE_KEY") | |
| # Singleton model (same pattern as profile code) | |
| _model = None | |
| def get_model(): | |
| global _model | |
| if _model is None: | |
| print("π₯ Loading BAAI/bge-m3 model for job embeddings...") | |
| _model = SentenceTransformer("BAAI/bge-m3") | |
| return _model | |
| def get_supabase(): | |
| if not SUPABASE_URL or not SUPABASE_KEY: | |
| print("β Missing Supabase credentials for job embeddings.") | |
| return None | |
| return create_client(SUPABASE_URL, SUPABASE_KEY) | |
| # -------- Embedding helpers (IDENTICAL LOGIC) -------- | |
| def generate_embedding(text: str) -> List[float]: | |
| if not text or not text.strip(): | |
| return [0.0] * 1024 | |
| model = get_model() | |
| embedding = model.encode(text, normalize_embeddings=True) | |
| return embedding.tolist() | |
| def generate_list_embedding(items: List[str]) -> List[float]: | |
| if not items: | |
| return [0.0] * 1024 | |
| model = get_model() | |
| embeddings = model.encode(items, normalize_embeddings=True) | |
| mean_embedding = np.mean(embeddings, axis=0) | |
| return mean_embedding.tolist() | |
| # ---------------------------------------------------- | |
| def safe_generate_and_store_job_embeddings(client, job_id: str) -> None: | |
| """ | |
| Fetches job entities, generates entity-wise embeddings, | |
| and upserts them into job_embeddings table. | |
| """ | |
| print(f"𧬠Generating job embeddings for Job: {job_id}") | |
| # 1. Fetch job entities | |
| resp = client.table("jobs_entities") \ | |
| .select("*") \ | |
| .eq("job_id", job_id) \ | |
| .execute() | |
| if not resp.data: | |
| print(f"β οΈ Job entities not found for job_id={job_id}") | |
| return | |
| entities = resp.data[0] | |
| # 2. Parse list fields safely (same pattern) | |
| def parse_list(val): | |
| if not val: | |
| return [] | |
| if isinstance(val, list): | |
| return val | |
| if isinstance(val, str): | |
| return [x.strip() for x in val.split(",") if x.strip()] | |
| return [] | |
| skills = parse_list(entities.get("skills")) | |
| technical_skills = parse_list(entities.get("technical_skills")) | |
| tools = parse_list(entities.get("tools")) | |
| certifications = parse_list(entities.get("certifications")) | |
| experience = entities.get("experience") or "" | |
| education = entities.get("education") or "" | |
| try: | |
| # 3. Generate embeddings (ENTITY-WISE) | |
| payload = { | |
| "job_id": job_id, | |
| "skills": generate_list_embedding(skills), | |
| "technical_skills": generate_list_embedding(technical_skills), | |
| "tools": generate_list_embedding(tools), | |
| "work_experience": generate_embedding(experience), | |
| "education": generate_embedding(education), | |
| "certifications": generate_list_embedding(certifications), | |
| "updated_at": "now()" | |
| } | |
| # 4. Upsert into job_embeddings | |
| client.table("job_embeddings").upsert(payload).execute() | |
| print(f"β Job embeddings stored for job_id={job_id}") | |
| except Exception as e: | |
| print(f"β Job embedding generation failed: {e}") | |