Niketjain2002 commited on
Commit
7cbf44e
·
verified ·
1 Parent(s): 0d7b5b3

Upload src/prompts/feature_extraction.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. src/prompts/feature_extraction.py +190 -0
src/prompts/feature_extraction.py ADDED
@@ -0,0 +1,190 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM prompt templates for structured feature extraction.
3
+
4
+ These prompts are designed to extract factual, observable features
5
+ from job descriptions and resumes WITHOUT making subjective judgments.
6
+ The scoring happens in a separate layer.
7
+ """
8
+
9
+ ROLE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this job description.
10
+ Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
11
+
12
+ JOB DESCRIPTION:
13
+ {job_description}
14
+
15
+ COMPANY CONTEXT:
16
+ - Stage: {company_stage}
17
+ - Industry: {industry}
18
+ - Compensation Band: {compensation_band}
19
+ - Location: {location}
20
+ - Remote Type: {remote_type}
21
+
22
+ Extract the following as JSON. Use null for anything not determinable:
23
+
24
+ {{
25
+ "inferred_title": "string - job title",
26
+ "seniority_band": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
27
+ "core_competencies": [
28
+ {{
29
+ "skill": "string",
30
+ "proficiency_required": "basic | intermediate | advanced | expert",
31
+ "criticality": "must_have | strong_prefer | nice_to_have",
32
+ "years_implied": null or number
33
+ }}
34
+ ],
35
+ "scope_complexity": {{
36
+ "system_scale": "small | medium | large | massive",
37
+ "ambiguity_level": "low | medium | high | very_high",
38
+ "cross_functional_dependency": "none | low | medium | high",
39
+ "decision_authority": "executor | contributor | driver | owner"
40
+ }},
41
+ "domain_specificity": 0.0 to 1.0,
42
+ "team_context": {{
43
+ "team_size": null or integer,
44
+ "team_maturity": "new | growing | established | restructuring",
45
+ "reporting_structure": "individual_contributor | tech_lead | people_manager"
46
+ }},
47
+ "growth_trajectory_required": true or false,
48
+ "hard_requirements": ["list of absolute must-haves mentioned"],
49
+ "location_requirement": "strict_onsite | hybrid_required | remote_ok | fully_remote"
50
+ }}
51
+
52
+ Rules:
53
+ - Only mark "must_have" if the JD explicitly says required/must/essential
54
+ - If years of experience mentioned, extract the number
55
+ - domain_specificity: 0.0 = generic role, 1.0 = requires deep niche expertise
56
+ - Be conservative: when uncertain between two levels, choose the lower one
57
+ """
58
+
59
+ CANDIDATE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this resume.
60
+ Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
61
+
62
+ RESUME:
63
+ {resume_text}
64
+
65
+ Extract the following as JSON. Use null for anything not determinable.
66
+ For experience entries, extract ALL positions listed.
67
+
68
+ {{
69
+ "candidate_name": "string or null",
70
+ "experience_profile": {{
71
+ "total_years": number or null,
72
+ "positions": [
73
+ {{
74
+ "title": "string",
75
+ "company": "string",
76
+ "duration_months": number or null,
77
+ "start_year": number or null,
78
+ "end_year": number or null,
79
+ "is_current": boolean,
80
+ "seniority_estimate": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
81
+ "key_technologies": ["list"],
82
+ "quantified_achievements": ["list of achievements with numbers"],
83
+ "scope_indicators": ["team size", "user count", "revenue impact", etc.]
84
+ }}
85
+ ]
86
+ }},
87
+ "skills_mentioned": ["complete list of technical and professional skills"],
88
+ "education": [
89
+ {{
90
+ "degree": "string",
91
+ "field": "string",
92
+ "institution": "string",
93
+ "year": number or null
94
+ }}
95
+ ],
96
+ "certifications": ["list"],
97
+ "trajectory_indicators": {{
98
+ "career_velocity": "slow | steady | fast | exceptional",
99
+ "scope_progression": "flat | linear | accelerating | decelerating",
100
+ "role_type_consistency": "consistent | pivoting | scattered"
101
+ }},
102
+ "stability_indicators": {{
103
+ "avg_tenure_months": number,
104
+ "shortest_tenure_months": number,
105
+ "longest_tenure_months": number,
106
+ "tenure_trend": "decreasing | stable | increasing",
107
+ "total_moves": number
108
+ }},
109
+ "impact_evidence": {{
110
+ "quantified_achievements_count": number,
111
+ "max_scope_of_impact": "individual | team | org | company | industry",
112
+ "technical_depth_signals": number,
113
+ "leadership_signals": number
114
+ }},
115
+ "industry_experience": ["list of industries worked in"],
116
+ "company_stage_experience": ["startup | scaleup | enterprise | public | government | etc."],
117
+ "remote_experience_evident": boolean,
118
+ "employment_gaps_months": [list of gap durations, empty if none detected]
119
+ }}
120
+
121
+ Rules:
122
+ - Calculate tenure from dates when available, estimate otherwise
123
+ - career_velocity: based on how quickly titles/scope increased
124
+ - Count only EXPLICIT numbers as quantified_achievements
125
+ - Do not infer skills not mentioned
126
+ - For scope_of_impact, use the HIGHEST level evidenced
127
+ - Do not use university name/prestige as a signal (bias mitigation)
128
+ """
129
+
130
+ MATCH_ANALYSIS_PROMPT = """You are an analytical matching system. Given extracted features from a role and a candidate,
131
+ identify specific matches, gaps, and risks. Be factual and conservative.
132
+
133
+ ROLE FEATURES:
134
+ {role_features}
135
+
136
+ CANDIDATE FEATURES:
137
+ {candidate_features}
138
+
139
+ COMPANY CONTEXT:
140
+ - Stage: {company_stage}
141
+ - Industry: {industry}
142
+ - Compensation Band: {compensation_band}
143
+ - Remote Type: {remote_type}
144
+
145
+ Produce the following JSON:
146
+
147
+ {{
148
+ "skill_match_analysis": {{
149
+ "matched_must_haves": [
150
+ {{"skill": "...", "candidate_proficiency": "...", "required_proficiency": "...", "gap": "none | minor | significant"}}
151
+ ],
152
+ "missing_must_haves": ["list of must-have skills not found in resume"],
153
+ "matched_preferred": ["list of nice-to-have skills found"],
154
+ "adjacent_skills": ["skills candidate has that are related but not exact match"],
155
+ "coverage_ratio": 0.0 to 1.0
156
+ }},
157
+ "seniority_alignment": {{
158
+ "role_seniority": "...",
159
+ "candidate_seniority": "...",
160
+ "alignment": "underqualified | slightly_under | aligned | slightly_over | overqualified",
161
+ "band_difference": integer
162
+ }},
163
+ "experience_depth": {{
164
+ "years_required": number or null,
165
+ "years_relevant": number,
166
+ "depth_assessment": "insufficient | adequate | strong | exceptional",
167
+ "recency_of_relevant_experience": "current | recent_2yr | moderate_5yr | stale"
168
+ }},
169
+ "context_fit": {{
170
+ "company_stage_fit": "no_experience | some_experience | strong_experience",
171
+ "industry_overlap": boolean,
172
+ "remote_fit": "incompatible | possible | compatible",
173
+ "compensation_alignment_estimate": "likely_below | unclear | likely_aligned | likely_above"
174
+ }},
175
+ "risk_flags": [
176
+ {{"risk": "string", "severity": "low | medium | high", "category": "shortlist | offer_acceptance | retention", "evidence": "string"}}
177
+ ],
178
+ "positive_signals": [
179
+ {{"signal": "string", "strength": "moderate | strong | exceptional", "evidence": "string"}}
180
+ ],
181
+ "missing_information": ["list of important data points that could not be determined"]
182
+ }}
183
+
184
+ Rules:
185
+ - coverage_ratio = matched_must_haves / total_must_haves
186
+ - Be strict: adjacent skills are NOT matches
187
+ - Only flag risks with specific evidence from the data
188
+ - Do NOT use demographic information, university prestige, or personal characteristics
189
+ - Focus on: skills, experience, trajectory, stability, scope, impact
190
+ """