| """ |
| Prompt templates for Gemini Flash 2.5 API interactions |
| Temperature settings and structured prompts for different analysis stages |
| """ |
|
|
| CLAIM_EXTRACTION_PROMPT = """ |
| You are an expert technical recruiter analyzing a CV for factual claims and credibility. |
| Temperature: 0.1 for precision |
| |
| Section Type: {section_type} |
| Section Text: {section_text} |
| Seniority Level: {seniority_level} |
| |
| Task: Extract ALL factual claims with extreme precision. Focus on: |
| |
| 1. Work Experience Claims: |
| - Job titles, companies, dates, team sizes |
| - Specific responsibilities and technologies used |
| - Quantifiable achievements (metrics, percentages, timelines) |
| - Leadership/architectural claims |
| |
| 2. Project Claims: |
| - Project names, descriptions, outcomes |
| - Technical stack used, deployment status |
| - Team role and contribution level |
| - Measurable results (users, performance gains, cost savings) |
| |
| 3. Skills Claims (EXCLUDING EDUCATION): |
| - Programming languages with proficiency levels |
| - Frameworks, tools, platforms |
| - Certifications with dates |
| - Domain expertise claims |
| |
| 4. Research/Publication Claims: |
| - Paper titles, conference venues, citations |
| - SOTA claims with specific metrics |
| - Patents, open-source contributions |
| |
| Output JSON format: |
| {{ |
| "claims": [ |
| {{ |
| "claim_id": "unique_id", |
| "claim_text": "exact text from CV", |
| "category": "work_experience|project|skill|research", |
| "subcategory": "specific_type", |
| "quantifiable_metrics": ["list of numbers/percentages/dates"], |
| "technologies_mentioned": ["tech1", "tech2"], |
| "time_period": {{ |
| "start_date": "YYYY-MM or null", |
| "end_date": "YYYY-MM or null", |
| "duration_months": "number or null" |
| }}, |
| "seniority_claim": "junior|mid|senior|lead|architect|none", |
| "verifiability_level": "high|medium|low", |
| "evidence_present": "direct|contextual|none", |
| "links_artifacts": ["URLs if any"], |
| "needs_clarification": ["specific points to verify"] |
| }} |
| ], |
| "metadata": {{ |
| "total_claims": "number", |
| "buzzword_density": "0.0-1.0", |
| "specificity_score": "0.0-1.0" |
| }} |
| }} |
| |
| IMPORTANT: |
| - Extract ONLY explicit claims, not inferences |
| - Mark vague claims ("worked on cutting-edge AI") with low verifiability |
| - Flag role-achievement mismatches for interview |
| - Note if metrics seem unrealistic for timeframe |
| - Skip education verification completely |
| """ |
|
|
| EVIDENCE_VALIDATION_PROMPT = """ |
| You are validating evidence for CV claims. |
| Temperature: 0.2 for balanced analysis |
| |
| Claims to validate: |
| {claims_json} |
| |
| Full CV text for cross-reference: |
| {full_cv_text} |
| |
| For EACH claim, assess: |
| |
| 1. Direct Evidence: |
| - Links to repositories, portfolios, demos (check if provided) |
| - Certificates, publications (with identifiers) |
| - Company/project websites mentioned |
| |
| 2. Contextual Evidence: |
| - Technical depth in descriptions |
| - Specific tool versions, configurations |
| - Problem-solution narratives with details |
| |
| 3. Cross-Section Validation: |
| - Skills mentioned MUST appear in at least one project/work |
| - Dates must be consistent across sections |
| - Technologies should align with timeframes (no React before 2013) |
| |
| 4. Metric Sanity: |
| - Is "500% growth in 1 month" realistic? |
| - Do team sizes match achievement scope? |
| - Are research metrics within known SOTA bounds? |
| |
| Output JSON: |
| {{ |
| "validations": [ |
| {{ |
| "claim_id": "from_input", |
| "evidence_score": 0.0-1.0, |
| "evidence_type": "direct|contextual|cross_referenced|missing", |
| "supporting_sections": ["list of CV sections with evidence"], |
| "artifacts_found": [ |
| {{ |
| "type": "github|publication|certificate|website", |
| "url": "if_present", |
| "needs_verification": true/false |
| }} |
| ], |
| "cross_validation": {{ |
| "skill_used_in_project": true/false, |
| "dates_consistent": true/false, |
| "tech_timeline_valid": true/false |
| }}, |
| "metric_analysis": {{ |
| "realistic": true/false, |
| "explanation": "why realistic or not" |
| }}, |
| "triangulation_result": "verified|partial|unverified|red_flag" |
| }} |
| ], |
| "consistency_score": 0.0-1.0 |
| }} |
| """ |
|
|
| RED_FLAG_DETECTION_PROMPT = """ |
| You are detecting credibility red flags in CV claims. |
| Temperature: 0.2 for pattern detection |
| |
| Analyzed claims with validation: |
| {validated_claims_json} |
| |
| Seniority Level: {seniority_level} |
| Role Type: {role_type} |
| |
| Detect these RED FLAGS: |
| |
| 1. Role-Achievement Mismatch: |
| - "Led/Architected" in junior roles or <6 month tenure |
| - Senior achievements with entry-level titles |
| - Sole credit for large team projects |
| |
| 2. Timeline Issues: |
| - Overlapping full-time positions |
| - Technologies used before public release |
| - Impossibly short project durations for scope |
| |
| 3. Metric Implausibility: |
| - Extreme percentages without context (500%+ improvements) |
| - SOTA claims exceeding published benchmarks |
| - Unrealistic user numbers or scale claims |
| |
| 4. Vagueness Patterns: |
| - High buzzword density without specifics |
| - Missing metrics on all achievements |
| - No technical depth for "expert" claims |
| |
| 5. Over-claiming: |
| - Too many "expert" level skills (>15) |
| - All projects "successful" with no challenges |
| - Pattern of superlatives without evidence |
| |
| Output JSON: |
| {{ |
| "red_flags": [ |
| {{ |
| "flag_id": "unique_id", |
| "severity": "critical|high|medium|low", |
| "category": "timeline|implausible|vague|overclaim|mismatch", |
| "affected_claims": ["claim_ids"], |
| "description": "specific issue", |
| "interview_probe": "suggested question to clarify", |
| "auto_reject": false, |
| "requires_proof": true/false |
| }} |
| ], |
| "credibility_score": 0-100, |
| "seniority_adjustment": "applied adjustment based on level", |
| "risk_assessment": "low|medium|high|critical" |
| }} |
| """ |
|
|
| SOTA_VERIFICATION_PROMPT = """ |
| You are verifying research and technical achievement claims against known benchmarks. |
| Temperature: 0.1 for factual accuracy |
| |
| Research/Technical claims: |
| {research_claims_json} |
| |
| Verify against known SOTA (State-of-the-Art) as of {current_date}: |
| |
| For each claim: |
| 1. Identify the benchmark/dataset/metric |
| 2. Check if numbers exceed published SOTA |
| 3. Look for required context (dataset, conditions, hardware) |
| 4. Assess if improvement magnitude is plausible |
| |
| Known SOTA baselines to reference: |
| - ImageNet accuracy: ~92% (2024) |
| - BERT-base F1 on SQUAD: ~93% |
| - GPT-3 perplexity: varies by dataset |
| - Object detection mAP: ~60-65% on COCO |
| |
| Output JSON: |
| {{ |
| "sota_validations": [ |
| {{ |
| "claim_id": "from_input", |
| "benchmark": "identified benchmark/dataset", |
| "claimed_metric": "number", |
| "known_sota": "published baseline", |
| "exceeds_sota": true/false, |
| "has_context": true/false, |
| "missing_details": ["dataset", "evaluation protocol", "hardware"], |
| "plausibility": "plausible|unlikely|impossible", |
| "verification_status": "needs_clarification|likely_valid|red_flag", |
| "interview_questions": ["specific technical questions"] |
| }} |
| ] |
| }} |
| """ |
|
|
| REPOSITORY_ANALYSIS_PROMPT = """ |
| Analyze repository evidence for verification. |
| Temperature: 0.1 |
| |
| Repository URL: {repo_url} |
| Repository metrics: {repo_metrics} |
| Claimed contributions: {claimed_contributions} |
| |
| Assess: |
| 1. Commit density and authorship |
| 2. First commit vs claim date alignment |
| 3. README quality and documentation depth |
| 4. Issues/PRs linked to claimed features |
| 5. Code complexity matching claimed scope |
| 6. Dependencies matching claimed tech stack |
| |
| Output credibility score and specific findings. |
| """ |
|
|
| |
| SCORING_CONFIG = { |
| "weights": { |
| "credibility": 0.6, |
| "consistency": 0.4 |
| }, |
| "seniority_thresholds": { |
| "senior": { |
| "min_evidence_score": 0.7, |
| "max_buzzword_density": 0.2, |
| "min_specificity": 0.8 |
| }, |
| "mid": { |
| "min_evidence_score": 0.5, |
| "max_buzzword_density": 0.3, |
| "min_specificity": 0.6 |
| }, |
| "junior": { |
| "min_evidence_score": 0.3, |
| "max_buzzword_density": 0.4, |
| "min_specificity": 0.4 |
| }, |
| "intern": { |
| "min_evidence_score": 0.2, |
| "max_buzzword_density": 0.5, |
| "min_specificity": 0.3 |
| } |
| }, |
| "evidence_tier_weights": { |
| "doi_arxiv": 1.0, |
| "github_active": 0.9, |
| "company_blog": 0.8, |
| "personal_blog": 0.6, |
| "no_artifact": 0.3 |
| }, |
| "red_flag_severity_scores": { |
| "critical": -30, |
| "high": -20, |
| "medium": -10, |
| "low": -5 |
| } |
| } |
|
|
| |
| FAIRNESS_CONFIG = { |
| "protected_attributes": [ |
| "school", "university", "college", "age", "gender", |
| "nationality", "ethnicity", "religion", "marital status" |
| ], |
| "pii_patterns": { |
| "phone": r"\+?[\d\s\-\(\)]+", |
| "email": r"[\w\.-]+@[\w\.-]+\.\w+", |
| "address": r"\d+\s+[\w\s,]+\d{5}", |
| "ssn": r"\d{3}-\d{2}-\d{4}" |
| } |
| } |
|
|
| |
| INTERVIEW_TEMPLATES = { |
| "unverified_claim": "You mentioned {claim}. Can you provide more details about {specific_aspect}?", |
| "metric_clarification": "You achieved {metric}. What was the baseline and methodology?", |
| "timeline_gap": "Can you walk me through your activities between {start} and {end}?", |
| "tech_depth": "You listed {technology} expertise. Can you describe a specific challenge you solved with it?", |
| "sole_credit": "You mentioned {achievement}. Who else was involved and what was your specific contribution?", |
| "sota_claim": "Your research shows {metric} performance. How does this compare to published baselines?" |
| } |