recruitment-intelligence / src /prompts /feature_extraction.py
Niketjain2002's picture
Upload src/prompts/feature_extraction.py with huggingface_hub
7cbf44e verified
"""
LLM prompt templates for structured feature extraction.
These prompts are designed to extract factual, observable features
from job descriptions and resumes WITHOUT making subjective judgments.
The scoring happens in a separate layer.
"""
ROLE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this job description.
Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
JOB DESCRIPTION:
{job_description}
COMPANY CONTEXT:
- Stage: {company_stage}
- Industry: {industry}
- Compensation Band: {compensation_band}
- Location: {location}
- Remote Type: {remote_type}
Extract the following as JSON. Use null for anything not determinable:
{{
"inferred_title": "string - job title",
"seniority_band": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
"core_competencies": [
{{
"skill": "string",
"proficiency_required": "basic | intermediate | advanced | expert",
"criticality": "must_have | strong_prefer | nice_to_have",
"years_implied": null or number
}}
],
"scope_complexity": {{
"system_scale": "small | medium | large | massive",
"ambiguity_level": "low | medium | high | very_high",
"cross_functional_dependency": "none | low | medium | high",
"decision_authority": "executor | contributor | driver | owner"
}},
"domain_specificity": 0.0 to 1.0,
"team_context": {{
"team_size": null or integer,
"team_maturity": "new | growing | established | restructuring",
"reporting_structure": "individual_contributor | tech_lead | people_manager"
}},
"growth_trajectory_required": true or false,
"hard_requirements": ["list of absolute must-haves mentioned"],
"location_requirement": "strict_onsite | hybrid_required | remote_ok | fully_remote"
}}
Rules:
- Only mark "must_have" if the JD explicitly says required/must/essential
- If years of experience mentioned, extract the number
- domain_specificity: 0.0 = generic role, 1.0 = requires deep niche expertise
- Be conservative: when uncertain between two levels, choose the lower one
"""
CANDIDATE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this resume.
Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
RESUME:
{resume_text}
Extract the following as JSON. Use null for anything not determinable.
For experience entries, extract ALL positions listed.
{{
"candidate_name": "string or null",
"experience_profile": {{
"total_years": number or null,
"positions": [
{{
"title": "string",
"company": "string",
"duration_months": number or null,
"start_year": number or null,
"end_year": number or null,
"is_current": boolean,
"seniority_estimate": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
"key_technologies": ["list"],
"quantified_achievements": ["list of achievements with numbers"],
"scope_indicators": ["team size", "user count", "revenue impact", etc.]
}}
]
}},
"skills_mentioned": ["complete list of technical and professional skills"],
"education": [
{{
"degree": "string",
"field": "string",
"institution": "string",
"year": number or null
}}
],
"certifications": ["list"],
"trajectory_indicators": {{
"career_velocity": "slow | steady | fast | exceptional",
"scope_progression": "flat | linear | accelerating | decelerating",
"role_type_consistency": "consistent | pivoting | scattered"
}},
"stability_indicators": {{
"avg_tenure_months": number,
"shortest_tenure_months": number,
"longest_tenure_months": number,
"tenure_trend": "decreasing | stable | increasing",
"total_moves": number
}},
"impact_evidence": {{
"quantified_achievements_count": number,
"max_scope_of_impact": "individual | team | org | company | industry",
"technical_depth_signals": number,
"leadership_signals": number
}},
"industry_experience": ["list of industries worked in"],
"company_stage_experience": ["startup | scaleup | enterprise | public | government | etc."],
"remote_experience_evident": boolean,
"employment_gaps_months": [list of gap durations, empty if none detected]
}}
Rules:
- Calculate tenure from dates when available, estimate otherwise
- career_velocity: based on how quickly titles/scope increased
- Count only EXPLICIT numbers as quantified_achievements
- Do not infer skills not mentioned
- For scope_of_impact, use the HIGHEST level evidenced
- Do not use university name/prestige as a signal (bias mitigation)
"""
MATCH_ANALYSIS_PROMPT = """You are an analytical matching system. Given extracted features from a role and a candidate,
identify specific matches, gaps, and risks. Be factual and conservative.
ROLE FEATURES:
{role_features}
CANDIDATE FEATURES:
{candidate_features}
COMPANY CONTEXT:
- Stage: {company_stage}
- Industry: {industry}
- Compensation Band: {compensation_band}
- Remote Type: {remote_type}
Produce the following JSON:
{{
"skill_match_analysis": {{
"matched_must_haves": [
{{"skill": "...", "candidate_proficiency": "...", "required_proficiency": "...", "gap": "none | minor | significant"}}
],
"missing_must_haves": ["list of must-have skills not found in resume"],
"matched_preferred": ["list of nice-to-have skills found"],
"adjacent_skills": ["skills candidate has that are related but not exact match"],
"coverage_ratio": 0.0 to 1.0
}},
"seniority_alignment": {{
"role_seniority": "...",
"candidate_seniority": "...",
"alignment": "underqualified | slightly_under | aligned | slightly_over | overqualified",
"band_difference": integer
}},
"experience_depth": {{
"years_required": number or null,
"years_relevant": number,
"depth_assessment": "insufficient | adequate | strong | exceptional",
"recency_of_relevant_experience": "current | recent_2yr | moderate_5yr | stale"
}},
"context_fit": {{
"company_stage_fit": "no_experience | some_experience | strong_experience",
"industry_overlap": boolean,
"remote_fit": "incompatible | possible | compatible",
"compensation_alignment_estimate": "likely_below | unclear | likely_aligned | likely_above"
}},
"risk_flags": [
{{"risk": "string", "severity": "low | medium | high", "category": "shortlist | offer_acceptance | retention", "evidence": "string"}}
],
"positive_signals": [
{{"signal": "string", "strength": "moderate | strong | exceptional", "evidence": "string"}}
],
"missing_information": ["list of important data points that could not be determined"]
}}
Rules:
- coverage_ratio = matched_must_haves / total_must_haves
- Be strict: adjacent skills are NOT matches
- Only flag risks with specific evidence from the data
- Do NOT use demographic information, university prestige, or personal characteristics
- Focus on: skills, experience, trajectory, stability, scope, impact
"""