Spaces:

Niketjain2002
/

recruitment-intelligence

Sleeping

App Files Files Community

Niketjain2002 commited on 5 days ago

Commit

7cbf44e

verified ·

1 Parent(s): 0d7b5b3

Upload src/prompts/feature_extraction.py with huggingface_hub

Browse files

Files changed (1) hide show

src/prompts/feature_extraction.py +190 -0

src/prompts/feature_extraction.py ADDED Viewed

	@@ -0,0 +1,190 @@

+"""
+LLM prompt templates for structured feature extraction.
+These prompts are designed to extract factual, observable features
+from job descriptions and resumes WITHOUT making subjective judgments.
+The scoring happens in a separate layer.
+"""
+ROLE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this job description.
+Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
+JOB DESCRIPTION:
+{job_description}
+COMPANY CONTEXT:
+- Stage: {company_stage}
+- Industry: {industry}
+- Compensation Band: {compensation_band}
+- Location: {location}
+- Remote Type: {remote_type}
+Extract the following as JSON. Use null for anything not determinable:
+{{
+  "inferred_title": "string - job title",
+  "seniority_band": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
+  "core_competencies": [
+    {{
+      "skill": "string",
+      "proficiency_required": "basic | intermediate | advanced | expert",
+      "criticality": "must_have | strong_prefer | nice_to_have",
+      "years_implied": null or number
+    }}
+  ],
+  "scope_complexity": {{
+    "system_scale": "small | medium | large | massive",
+    "ambiguity_level": "low | medium | high | very_high",
+    "cross_functional_dependency": "none | low | medium | high",
+    "decision_authority": "executor | contributor | driver | owner"
+  }},
+  "domain_specificity": 0.0 to 1.0,
+  "team_context": {{
+    "team_size": null or integer,
+    "team_maturity": "new | growing | established | restructuring",
+    "reporting_structure": "individual_contributor | tech_lead | people_manager"
+  }},
+  "growth_trajectory_required": true or false,
+  "hard_requirements": ["list of absolute must-haves mentioned"],
+  "location_requirement": "strict_onsite | hybrid_required | remote_ok | fully_remote"
+}}
+Rules:
+- Only mark "must_have" if the JD explicitly says required/must/essential
+- If years of experience mentioned, extract the number
+- domain_specificity: 0.0 = generic role, 1.0 = requires deep niche expertise
+- Be conservative: when uncertain between two levels, choose the lower one
+"""
+CANDIDATE_FEATURE_EXTRACTION_PROMPT = """You are a structured data extraction system. Extract factual features from this resume.
+Do NOT make subjective quality judgments. Only extract what is explicitly stated or clearly implied.
+RESUME:
+{resume_text}
+Extract the following as JSON. Use null for anything not determinable.
+For experience entries, extract ALL positions listed.
+{{
+  "candidate_name": "string or null",
+  "experience_profile": {{
+    "total_years": number or null,
+    "positions": [
+      {{
+        "title": "string",
+        "company": "string",
+        "duration_months": number or null,
+        "start_year": number or null,
+        "end_year": number or null,
+        "is_current": boolean,
+        "seniority_estimate": "IC1_entry | IC2_mid | IC3_senior | IC4_staff | IC5_principal | M1_manager | M2_director | M3_vp",
+        "key_technologies": ["list"],
+        "quantified_achievements": ["list of achievements with numbers"],
+        "scope_indicators": ["team size", "user count", "revenue impact", etc.]
+      }}
+    ]
+  }},
+  "skills_mentioned": ["complete list of technical and professional skills"],
+  "education": [
+    {{
+      "degree": "string",
+      "field": "string",
+      "institution": "string",
+      "year": number or null
+    }}
+  ],
+  "certifications": ["list"],
+  "trajectory_indicators": {{
+    "career_velocity": "slow | steady | fast | exceptional",
+    "scope_progression": "flat | linear | accelerating | decelerating",
+    "role_type_consistency": "consistent | pivoting | scattered"
+  }},
+  "stability_indicators": {{
+    "avg_tenure_months": number,
+    "shortest_tenure_months": number,
+    "longest_tenure_months": number,
+    "tenure_trend": "decreasing | stable | increasing",
+    "total_moves": number
+  }},
+  "impact_evidence": {{
+    "quantified_achievements_count": number,
+    "max_scope_of_impact": "individual | team | org | company | industry",
+    "technical_depth_signals": number,
+    "leadership_signals": number
+  }},
+  "industry_experience": ["list of industries worked in"],
+  "company_stage_experience": ["startup | scaleup | enterprise | public | government | etc."],
+  "remote_experience_evident": boolean,
+  "employment_gaps_months": [list of gap durations, empty if none detected]
+}}
+Rules:
+- Calculate tenure from dates when available, estimate otherwise
+- career_velocity: based on how quickly titles/scope increased
+- Count only EXPLICIT numbers as quantified_achievements
+- Do not infer skills not mentioned
+- For scope_of_impact, use the HIGHEST level evidenced
+- Do not use university name/prestige as a signal (bias mitigation)
+"""
+MATCH_ANALYSIS_PROMPT = """You are an analytical matching system. Given extracted features from a role and a candidate,
+identify specific matches, gaps, and risks. Be factual and conservative.
+ROLE FEATURES:
+{role_features}
+CANDIDATE FEATURES:
+{candidate_features}
+COMPANY CONTEXT:
+- Stage: {company_stage}
+- Industry: {industry}
+- Compensation Band: {compensation_band}
+- Remote Type: {remote_type}
+Produce the following JSON:
+{{
+  "skill_match_analysis": {{
+    "matched_must_haves": [
+      {{"skill": "...", "candidate_proficiency": "...", "required_proficiency": "...", "gap": "none | minor | significant"}}
+    ],
+    "missing_must_haves": ["list of must-have skills not found in resume"],
+    "matched_preferred": ["list of nice-to-have skills found"],
+    "adjacent_skills": ["skills candidate has that are related but not exact match"],
+    "coverage_ratio": 0.0 to 1.0
+  }},
+  "seniority_alignment": {{
+    "role_seniority": "...",
+    "candidate_seniority": "...",
+    "alignment": "underqualified | slightly_under | aligned | slightly_over | overqualified",
+    "band_difference": integer
+  }},
+  "experience_depth": {{
+    "years_required": number or null,
+    "years_relevant": number,
+    "depth_assessment": "insufficient | adequate | strong | exceptional",
+    "recency_of_relevant_experience": "current | recent_2yr | moderate_5yr | stale"
+  }},
+  "context_fit": {{
+    "company_stage_fit": "no_experience | some_experience | strong_experience",
+    "industry_overlap": boolean,
+    "remote_fit": "incompatible | possible | compatible",
+    "compensation_alignment_estimate": "likely_below | unclear | likely_aligned | likely_above"
+  }},
+  "risk_flags": [
+    {{"risk": "string", "severity": "low | medium | high", "category": "shortlist | offer_acceptance | retention", "evidence": "string"}}
+  ],
+  "positive_signals": [
+    {{"signal": "string", "strength": "moderate | strong | exceptional", "evidence": "string"}}
+  ],
+  "missing_information": ["list of important data points that could not be determined"]
+}}
+Rules:
+- coverage_ratio = matched_must_haves / total_must_haves
+- Be strict: adjacent skills are NOT matches
+- Only flag risks with specific evidence from the data
+- Do NOT use demographic information, university prestige, or personal characteristics
+- Focus on: skills, experience, trajectory, stability, scope, impact
+"""