File size: 3,005 Bytes
79f3cde
 
 
 
 
 
 
 
 
 
 
 
58e2bc7
79f3cde
 
 
 
 
58e2bc7
79f3cde
58e2bc7
79f3cde
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58e2bc7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
"""
Candidate Sourcing Engine

Generates Google X-ray LinkedIn search queries from a job description.
Uses LLM to extract keywords and construct optimized boolean search queries.
"""

import urllib.parse
from typing import Optional

from .feature_extractor import LLMClient, _extract_json
from .prompts.sourcing import XRAY_QUERY_GENERATION_PROMPT
from .web_search import LinkedInSearcher


class CandidateSourcer:
    """Generates Google X-ray search queries to find candidates on LinkedIn."""

    def __init__(self, llm_client: Optional[LLMClient] = None, searcher: Optional[LinkedInSearcher] = None):
        self.llm = llm_client or LLMClient()
        self.searcher = searcher or LinkedInSearcher()

    def generate_queries(
        self,
        job_description: str,
        location: str = "Bangalore",
        industry: str = "",
        compensation_band: str = "",
        company_stage: str = "",
    ) -> dict:
        """Generate X-ray search queries from a job description.

        Returns dict with: analysis, queries (with google_url), boolean_strings, sourcing_tips
        """
        prompt = XRAY_QUERY_GENERATION_PROMPT.format(
            job_description=job_description,
            location=location,
            industry=industry or "Not specified",
            compensation_band=compensation_band or "Not specified",
            company_stage=company_stage or "Not specified",
        )

        response = self.llm.complete(prompt, temperature=0.3)
        result = _extract_json(response)

        # Enrich each query with a clickable Google search URL
        for query in result.get("queries", []):
            raw_query = query.get("query", "")
            if raw_query:
                query["google_url"] = (
                    "https://www.google.com/search?q="
                    + urllib.parse.quote_plus(raw_query)
                )

        return result

    def find_candidates(
        self,
        job_description: str,
        location: str = "Bangalore",
        industry: str = "",
        compensation_band: str = "",
        company_stage: str = "",
        max_queries: int = 3,
    ) -> dict:
        """Generate queries AND run live web search to find candidate profiles.

        Returns the same dict as generate_queries() plus a 'candidates' list:
            [{name, title, company, linkedin_url, snippet, source_query, matched_queries}]
        """
        result = self.generate_queries(
            job_description=job_description,
            location=location,
            industry=industry,
            compensation_band=compensation_band,
            company_stage=company_stage,
        )

        # Extract raw query strings for web search
        raw_queries = [q.get("query", "") for q in result.get("queries", []) if q.get("query")]

        candidates = self.searcher.search_candidates(
            queries=raw_queries,
            max_queries=max_queries,
        )
        result["candidates"] = candidates
        return result