| | """ |
| | Candidate Sourcing Engine |
| | |
| | Generates Google X-ray LinkedIn search queries from a job description. |
| | Uses LLM to extract keywords and construct optimized boolean search queries. |
| | """ |
| |
|
| | import urllib.parse |
| | from typing import Optional |
| |
|
| | from .feature_extractor import LLMClient, _extract_json |
| | from .prompts.sourcing import XRAY_QUERY_GENERATION_PROMPT |
| | from .web_search import LinkedInSearcher |
| |
|
| |
|
| | class CandidateSourcer: |
| | """Generates Google X-ray search queries to find candidates on LinkedIn.""" |
| |
|
| | def __init__(self, llm_client: Optional[LLMClient] = None, searcher: Optional[LinkedInSearcher] = None): |
| | self.llm = llm_client or LLMClient() |
| | self.searcher = searcher or LinkedInSearcher() |
| |
|
| | def generate_queries( |
| | self, |
| | job_description: str, |
| | location: str = "Bangalore", |
| | industry: str = "", |
| | compensation_band: str = "", |
| | company_stage: str = "", |
| | ) -> dict: |
| | """Generate X-ray search queries from a job description. |
| | |
| | Returns dict with: analysis, queries (with google_url), boolean_strings, sourcing_tips |
| | """ |
| | prompt = XRAY_QUERY_GENERATION_PROMPT.format( |
| | job_description=job_description, |
| | location=location, |
| | industry=industry or "Not specified", |
| | compensation_band=compensation_band or "Not specified", |
| | company_stage=company_stage or "Not specified", |
| | ) |
| |
|
| | response = self.llm.complete(prompt, temperature=0.3) |
| | result = _extract_json(response) |
| |
|
| | |
| | for query in result.get("queries", []): |
| | raw_query = query.get("query", "") |
| | if raw_query: |
| | query["google_url"] = ( |
| | "https://www.google.com/search?q=" |
| | + urllib.parse.quote_plus(raw_query) |
| | ) |
| |
|
| | return result |
| |
|
| | def find_candidates( |
| | self, |
| | job_description: str, |
| | location: str = "Bangalore", |
| | industry: str = "", |
| | compensation_band: str = "", |
| | company_stage: str = "", |
| | max_queries: int = 3, |
| | ) -> dict: |
| | """Generate queries AND run live web search to find candidate profiles. |
| | |
| | Returns the same dict as generate_queries() plus a 'candidates' list: |
| | [{name, title, company, linkedin_url, snippet, source_query, matched_queries}] |
| | """ |
| | result = self.generate_queries( |
| | job_description=job_description, |
| | location=location, |
| | industry=industry, |
| | compensation_band=compensation_band, |
| | company_stage=company_stage, |
| | ) |
| |
|
| | |
| | raw_queries = [q.get("query", "") for q in result.get("queries", []) if q.get("query")] |
| |
|
| | candidates = self.searcher.search_candidates( |
| | queries=raw_queries, |
| | max_queries=max_queries, |
| | ) |
| | result["candidates"] = candidates |
| | return result |
| |
|