Upload src/prompts/feature_extraction.py with huggingface_hub
Browse files
src/prompts/feature_extraction.py
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLM prompt templates for structured feature extraction.
|
| 3 |
+
|
| 4 |
+
These prompts are designed to extract factual, observable features
|
| 5 |
+
from job descriptions and resumes WITHOUT making subjective judgments.
|
| 6 |
+
The scoring happens in a separate layer.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
ROLE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this job description.
|
| 10 |
+
Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
|
| 11 |
+
|
| 12 |
+
JOB DESCRIPTION:
|
| 13 |
+
{job_description}
|
| 14 |
+
|
| 15 |
+
COMPANY CONTEXT:
|
| 16 |
+
- Stage: {company_stage}
|
| 17 |
+
- Industry: {industry}
|
| 18 |
+
- Compensation Band: {compensation_band}
|
| 19 |
+
- Location: {location}
|
| 20 |
+
- Remote Type: {remote_type}
|
| 21 |
+
|
| 22 |
+
Extract the following as JSON. Use null for anything not determinable:
|
| 23 |
+
|
| 24 |
+
{{
|
| 25 |
+
"inferred_title": "string - job title",
|
| 26 |
+
"seniority_band": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
|
| 27 |
+
"core_competencies": [
|
| 28 |
+
{{
|
| 29 |
+
"skill": "string",
|
| 30 |
+
"proficiency_required": "basic | intermediate | advanced | expert",
|
| 31 |
+
"criticality": "must_have | strong_prefer | nice_to_have",
|
| 32 |
+
"years_implied": null or number
|
| 33 |
+
}}
|
| 34 |
+
],
|
| 35 |
+
"scope_complexity": {{
|
| 36 |
+
"system_scale": "small | medium | large | massive",
|
| 37 |
+
"ambiguity_level": "low | medium | high | very_high",
|
| 38 |
+
"cross_functional_dependency": "none | low | medium | high",
|
| 39 |
+
"decision_authority": "executor | contributor | driver | owner"
|
| 40 |
+
}},
|
| 41 |
+
"domain_specificity": 0.0 to 1.0,
|
| 42 |
+
"team_context": {{
|
| 43 |
+
"team_size": null or integer,
|
| 44 |
+
"team_maturity": "new | growing | established | restructuring",
|
| 45 |
+
"reporting_structure": "individual_contributor | tech_lead | people_manager"
|
| 46 |
+
}},
|
| 47 |
+
"growth_trajectory_required": true or false,
|
| 48 |
+
"hard_requirements": ["list of absolute must-haves mentioned"],
|
| 49 |
+
"location_requirement": "strict_onsite | hybrid_required | remote_ok | fully_remote"
|
| 50 |
+
}}
|
| 51 |
+
|
| 52 |
+
Rules:
|
| 53 |
+
- Only mark "must_have" if the JD explicitly says required/must/essential
|
| 54 |
+
- If years of experience mentioned, extract the number
|
| 55 |
+
- domain_specificity: 0.0 = generic role, 1.0 = requires deep niche expertise
|
| 56 |
+
- Be conservative: when uncertain between two levels, choose the lower one
|
| 57 |
+
"""
|
| 58 |
+
|
| 59 |
+
CANDIDATE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this resume.
|
| 60 |
+
Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
|
| 61 |
+
|
| 62 |
+
RESUME:
|
| 63 |
+
{resume_text}
|
| 64 |
+
|
| 65 |
+
Extract the following as JSON. Use null for anything not determinable.
|
| 66 |
+
For experience entries, extract ALL positions listed.
|
| 67 |
+
|
| 68 |
+
{{
|
| 69 |
+
"candidate_name": "string or null",
|
| 70 |
+
"experience_profile": {{
|
| 71 |
+
"total_years": number or null,
|
| 72 |
+
"positions": [
|
| 73 |
+
{{
|
| 74 |
+
"title": "string",
|
| 75 |
+
"company": "string",
|
| 76 |
+
"duration_months": number or null,
|
| 77 |
+
"start_year": number or null,
|
| 78 |
+
"end_year": number or null,
|
| 79 |
+
"is_current": boolean,
|
| 80 |
+
"seniority_estimate": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
|
| 81 |
+
"key_technologies": ["list"],
|
| 82 |
+
"quantified_achievements": ["list of achievements with numbers"],
|
| 83 |
+
"scope_indicators": ["team size", "user count", "revenue impact", etc.]
|
| 84 |
+
}}
|
| 85 |
+
]
|
| 86 |
+
}},
|
| 87 |
+
"skills_mentioned": ["complete list of technical and professional skills"],
|
| 88 |
+
"education": [
|
| 89 |
+
{{
|
| 90 |
+
"degree": "string",
|
| 91 |
+
"field": "string",
|
| 92 |
+
"institution": "string",
|
| 93 |
+
"year": number or null
|
| 94 |
+
}}
|
| 95 |
+
],
|
| 96 |
+
"certifications": ["list"],
|
| 97 |
+
"trajectory_indicators": {{
|
| 98 |
+
"career_velocity": "slow | steady | fast | exceptional",
|
| 99 |
+
"scope_progression": "flat | linear | accelerating | decelerating",
|
| 100 |
+
"role_type_consistency": "consistent | pivoting | scattered"
|
| 101 |
+
}},
|
| 102 |
+
"stability_indicators": {{
|
| 103 |
+
"avg_tenure_months": number,
|
| 104 |
+
"shortest_tenure_months": number,
|
| 105 |
+
"longest_tenure_months": number,
|
| 106 |
+
"tenure_trend": "decreasing | stable | increasing",
|
| 107 |
+
"total_moves": number
|
| 108 |
+
}},
|
| 109 |
+
"impact_evidence": {{
|
| 110 |
+
"quantified_achievements_count": number,
|
| 111 |
+
"max_scope_of_impact": "individual | team | org | company | industry",
|
| 112 |
+
"technical_depth_signals": number,
|
| 113 |
+
"leadership_signals": number
|
| 114 |
+
}},
|
| 115 |
+
"industry_experience": ["list of industries worked in"],
|
| 116 |
+
"company_stage_experience": ["startup | scaleup | enterprise | public | government | etc."],
|
| 117 |
+
"remote_experience_evident": boolean,
|
| 118 |
+
"employment_gaps_months": [list of gap durations, empty if none detected]
|
| 119 |
+
}}
|
| 120 |
+
|
| 121 |
+
Rules:
|
| 122 |
+
- Calculate tenure from dates when available, estimate otherwise
|
| 123 |
+
- career_velocity: based on how quickly titles/scope increased
|
| 124 |
+
- Count only EXPLICIT numbers as quantified_achievements
|
| 125 |
+
- Do not infer skills not mentioned
|
| 126 |
+
- For scope_of_impact, use the HIGHEST level evidenced
|
| 127 |
+
- Do not use university name/prestige as a signal (bias mitigation)
|
| 128 |
+
"""
|
| 129 |
+
|
| 130 |
+
MATCH_ANALYSIS_PROMPT = """You are an analytical matching system. Given extracted features from a role and a candidate,
|
| 131 |
+
identify specific matches, gaps, and risks. Be factual and conservative.
|
| 132 |
+
|
| 133 |
+
ROLE FEATURES:
|
| 134 |
+
{role_features}
|
| 135 |
+
|
| 136 |
+
CANDIDATE FEATURES:
|
| 137 |
+
{candidate_features}
|
| 138 |
+
|
| 139 |
+
COMPANY CONTEXT:
|
| 140 |
+
- Stage: {company_stage}
|
| 141 |
+
- Industry: {industry}
|
| 142 |
+
- Compensation Band: {compensation_band}
|
| 143 |
+
- Remote Type: {remote_type}
|
| 144 |
+
|
| 145 |
+
Produce the following JSON:
|
| 146 |
+
|
| 147 |
+
{{
|
| 148 |
+
"skill_match_analysis": {{
|
| 149 |
+
"matched_must_haves": [
|
| 150 |
+
{{"skill": "...", "candidate_proficiency": "...", "required_proficiency": "...", "gap": "none | minor | significant"}}
|
| 151 |
+
],
|
| 152 |
+
"missing_must_haves": ["list of must-have skills not found in resume"],
|
| 153 |
+
"matched_preferred": ["list of nice-to-have skills found"],
|
| 154 |
+
"adjacent_skills": ["skills candidate has that are related but not exact match"],
|
| 155 |
+
"coverage_ratio": 0.0 to 1.0
|
| 156 |
+
}},
|
| 157 |
+
"seniority_alignment": {{
|
| 158 |
+
"role_seniority": "...",
|
| 159 |
+
"candidate_seniority": "...",
|
| 160 |
+
"alignment": "underqualified | slightly_under | aligned | slightly_over | overqualified",
|
| 161 |
+
"band_difference": integer
|
| 162 |
+
}},
|
| 163 |
+
"experience_depth": {{
|
| 164 |
+
"years_required": number or null,
|
| 165 |
+
"years_relevant": number,
|
| 166 |
+
"depth_assessment": "insufficient | adequate | strong | exceptional",
|
| 167 |
+
"recency_of_relevant_experience": "current | recent_2yr | moderate_5yr | stale"
|
| 168 |
+
}},
|
| 169 |
+
"context_fit": {{
|
| 170 |
+
"company_stage_fit": "no_experience | some_experience | strong_experience",
|
| 171 |
+
"industry_overlap": boolean,
|
| 172 |
+
"remote_fit": "incompatible | possible | compatible",
|
| 173 |
+
"compensation_alignment_estimate": "likely_below | unclear | likely_aligned | likely_above"
|
| 174 |
+
}},
|
| 175 |
+
"risk_flags": [
|
| 176 |
+
{{"risk": "string", "severity": "low | medium | high", "category": "shortlist | offer_acceptance | retention", "evidence": "string"}}
|
| 177 |
+
],
|
| 178 |
+
"positive_signals": [
|
| 179 |
+
{{"signal": "string", "strength": "moderate | strong | exceptional", "evidence": "string"}}
|
| 180 |
+
],
|
| 181 |
+
"missing_information": ["list of important data points that could not be determined"]
|
| 182 |
+
}}
|
| 183 |
+
|
| 184 |
+
Rules:
|
| 185 |
+
- coverage_ratio = matched_must_haves / total_must_haves
|
| 186 |
+
- Be strict: adjacent skills are NOT matches
|
| 187 |
+
- Only flag risks with specific evidence from the data
|
| 188 |
+
- Do NOT use demographic information, university prestige, or personal characteristics
|
| 189 |
+
- Focus on: skills, experience, trajectory, stability, scope, impact
|
| 190 |
+
"""
|