Niketjain2002's picture
Add candidate sourcing live search: src/sourcing.py
58e2bc7 verified
"""
Candidate Sourcing Engine
Generates Google X-ray LinkedIn search queries from a job description.
Uses LLM to extract keywords and construct optimized boolean search queries.
"""
import urllib.parse
from typing import Optional
from .feature_extractor import LLMClient, _extract_json
from .prompts.sourcing import XRAY_QUERY_GENERATION_PROMPT
from .web_search import LinkedInSearcher
class CandidateSourcer:
"""Generates Google X-ray search queries to find candidates on LinkedIn."""
def __init__(self, llm_client: Optional[LLMClient] = None, searcher: Optional[LinkedInSearcher] = None):
self.llm = llm_client or LLMClient()
self.searcher = searcher or LinkedInSearcher()
def generate_queries(
self,
job_description: str,
location: str = "Bangalore",
industry: str = "",
compensation_band: str = "",
company_stage: str = "",
) -> dict:
"""Generate X-ray search queries from a job description.
Returns dict with: analysis, queries (with google_url), boolean_strings, sourcing_tips
"""
prompt = XRAY_QUERY_GENERATION_PROMPT.format(
job_description=job_description,
location=location,
industry=industry or "Not specified",
compensation_band=compensation_band or "Not specified",
company_stage=company_stage or "Not specified",
)
response = self.llm.complete(prompt, temperature=0.3)
result = _extract_json(response)
# Enrich each query with a clickable Google search URL
for query in result.get("queries", []):
raw_query = query.get("query", "")
if raw_query:
query["google_url"] = (
"https://www.google.com/search?q="
+ urllib.parse.quote_plus(raw_query)
)
return result
def find_candidates(
self,
job_description: str,
location: str = "Bangalore",
industry: str = "",
compensation_band: str = "",
company_stage: str = "",
max_queries: int = 3,
) -> dict:
"""Generate queries AND run live web search to find candidate profiles.
Returns the same dict as generate_queries() plus a 'candidates' list:
[{name, title, company, linkedin_url, snippet, source_query, matched_queries}]
"""
result = self.generate_queries(
job_description=job_description,
location=location,
industry=industry,
compensation_band=compensation_band,
company_stage=company_stage,
)
# Extract raw query strings for web search
raw_queries = [q.get("query", "") for q in result.get("queries", []) if q.get("query")]
candidates = self.searcher.search_candidates(
queries=raw_queries,
max_queries=max_queries,
)
result["candidates"] = candidates
return result