""" Candidate Sourcing Engine Generates Google X-ray LinkedIn search queries from a job description. Uses LLM to extract keywords and construct optimized boolean search queries. """ import urllib.parse from typing import Optional from .feature_extractor import LLMClient, _extract_json from .prompts.sourcing import XRAY_QUERY_GENERATION_PROMPT from .web_search import LinkedInSearcher class CandidateSourcer: """Generates Google X-ray search queries to find candidates on LinkedIn.""" def __init__(self, llm_client: Optional[LLMClient] = None, searcher: Optional[LinkedInSearcher] = None): self.llm = llm_client or LLMClient() self.searcher = searcher or LinkedInSearcher() def generate_queries( self, job_description: str, location: str = "Bangalore", industry: str = "", compensation_band: str = "", company_stage: str = "", ) -> dict: """Generate X-ray search queries from a job description. Returns dict with: analysis, queries (with google_url), boolean_strings, sourcing_tips """ prompt = XRAY_QUERY_GENERATION_PROMPT.format( job_description=job_description, location=location, industry=industry or "Not specified", compensation_band=compensation_band or "Not specified", company_stage=company_stage or "Not specified", ) response = self.llm.complete(prompt, temperature=0.3) result = _extract_json(response) # Enrich each query with a clickable Google search URL for query in result.get("queries", []): raw_query = query.get("query", "") if raw_query: query["google_url"] = ( "https://www.google.com/search?q=" + urllib.parse.quote_plus(raw_query) ) return result def find_candidates( self, job_description: str, location: str = "Bangalore", industry: str = "", compensation_band: str = "", company_stage: str = "", max_queries: int = 3, ) -> dict: """Generate queries AND run live web search to find candidate profiles. Returns the same dict as generate_queries() plus a 'candidates' list: [{name, title, company, linkedin_url, snippet, source_query, matched_queries}] """ result = self.generate_queries( job_description=job_description, location=location, industry=industry, compensation_band=compensation_band, company_stage=company_stage, ) # Extract raw query strings for web search raw_queries = [q.get("query", "") for q in result.get("queries", []) if q.get("query")] candidates = self.searcher.search_candidates( queries=raw_queries, max_queries=max_queries, ) result["candidates"] = candidates return result