"""
Skills APIs - compute skill-related metrics from actual data.

AUTO-GENERATED by scripts/generate_hf.sh - DO NOT EDIT DIRECTLY
Edit skills.py in main repo and regenerate.
"""

from typing import Dict, List, Any, Optional, Union
import pandas as pd
from loguru import logger
from data_loader import get_data_loader
from models import (
    RequisitionNotFoundResponse,
    SkillAnalysisResponse,
    SkillImpactFillRateResponse,
    SkillImpactSLAResponse,
    SkillRelevanceResponse,
    SuccessfulPostingResponse,
    DataSourcesResponse,
    SkillJustificationData,
    SkillJustificationImpact,
    SuccessCriteria,
)
BPO_LOG_API_CALLS = False  # Disabled for deployment


def _log_api_call(msg: str) -> None:
    """Log API call if BPO_LOG_API_CALLS is enabled."""
    if BPO_LOG_API_CALLS:
        logger.info(msg)


def _check_requisition_valid(requisition_id: str) -> Optional[RequisitionNotFoundResponse]:
    """
    Check if a requisition ID is valid. Returns None if valid,
    or an error response model if invalid.
    """
    loader = get_data_loader()
    if not loader.is_valid_requisition(requisition_id):
        suggestions = loader.get_suggested_requisitions(requisition_id)
        return RequisitionNotFoundResponse(
            error="requisition_not_found",
            message=f"No job can be found with the ID {requisition_id}.",
            suggested_requisition_ids=suggestions,
        )
    return None


def get_skill_analysis(requisition_id: str) -> Union[SkillAnalysisResponse, RequisitionNotFoundResponse]:
    """
    Provides statistical indicators for each skill associated with the requisition,
    enabling an LLM or analyst to decide whether a skill should be retained,
    removed, or reconsidered.

    Args:
        requisition_id: The job requisition ID.

    Returns:
        Dict with historical counts and SLA correlation per skill.
    """
    _log_api_call(f"API call: get_skill_analysis(requisition_id={requisition_id})")

    # Check if requisition ID is valid
    error = _check_requisition_valid(requisition_id)
    if error:
        return error

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)

    # Get all unique skills across all candidates
    all_skills = []
    for skills_list in data['skills_parsed']:
        all_skills.extend(skills_list)

    skill_counts = pd.Series(all_skills).value_counts()

    # For each skill, compute SLA correlation
    historical_skills = []
    for skill, count in skill_counts.head(10).items():  # Top 10 skills
        # Filter to reviewed candidates only (SLA only applies to reviewed candidates)
        reviewed_data = data[data['reviewed']]

        # Get candidates with and without this skill
        has_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill in x)]
        no_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill not in x)]

        # Calculate SLA rates
        sla_with = has_skill['sla_met'].mean() if len(has_skill) > 0 else 0
        sla_without = no_skill['sla_met'].mean() if len(no_skill) > 0 else 0

        # Determine correlation
        diff = sla_with - sla_without
        if diff < -0.10:
            correlation = "highly negative impact on SLA"
        elif diff < 0:
            correlation = "slightly negative impact on SLA"
        elif diff > 0.10:
            correlation = "highly positive impact on SLA"
        elif diff > 0:
            correlation = "slightly positive impact on SLA"
        else:
            correlation = "no impact on SLA"

        historical_skills.append({
            "name": skill,
            "skill_occurrence": int(count),
            "correlation": correlation
        })

    num_jobs = data['requisition_id'].nunique()

    return SkillAnalysisResponse(
        historical_jobs=num_jobs,
        input_skills=[],  # Would come from requisition details
        historical_skills_with_analysis=historical_skills,
    )


def get_skill_impact_fill_rate(requisition_id: str, skill_name: str) -> Union[SkillImpactFillRateResponse, RequisitionNotFoundResponse]:
    """
    Evaluates how the inclusion of a specific skill affects requisition
    fill-rate metrics and candidate pool size.

    Args:
        requisition_id: The job requisition ID.
        skill_name: The skill to evaluate.

    Returns:
        Impact metrics with and without the skill.
    """
    _log_api_call(f"API call: get_skill_impact_fill_rate(requisition_id={requisition_id}, skill_name={skill_name})")

    # Check if requisition ID is valid
    error = _check_requisition_valid(requisition_id)
    if error:
        return error

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)

    # Split data by whether requisitions included this skill
    has_skill_reqs = data[data['skills_parsed'].apply(lambda x: skill_name in x)]['requisition_id'].unique()
    no_skill_reqs = data[~data['requisition_id'].isin(has_skill_reqs)]['requisition_id'].unique()

    def calc_metrics(req_ids):
        if len(req_ids) == 0:
            return {"fill_rate_percentage": 0, "time_to_fill_days": 0, "candidate_pool_size": 0}

        req_data = data[data['requisition_id'].isin(req_ids)]

        # Fill rate: % of reqs that got at least one hire
        reqs_with_hires = req_data[req_data['hired']]['requisition_id'].nunique()
        fill_rate = reqs_with_hires / len(req_ids) * 100

        # Time to fill: average days from applied to hired
        hired = req_data[req_data['hired']]
        if len(hired) > 0:
            time_to_fill = (hired['hire_date'] - hired['applied_at']).dt.days.mean()
        else:
            time_to_fill = 0

        # Candidate pool size
        pool_size = len(req_data)

        return {
            "fill_rate_percentage": round(fill_rate, 1),
            "time_to_fill_days": int(time_to_fill),
            "candidate_pool_size": pool_size
        }

    with_skill = calc_metrics(has_skill_reqs)
    without_skill = calc_metrics(no_skill_reqs)

    return SkillImpactFillRateResponse(
        skill_name=skill_name,
        impact=with_skill,
        compared_to_baseline=without_skill,
    )


def get_skill_impact_sla(requisition_id: str, skill_name: str) -> Union[SkillImpactSLAResponse, RequisitionNotFoundResponse]:
    """
    Analyzes how a skill affects SLA achievement rate.

    Args:
        requisition_id: The job requisition ID.
        skill_name: The skill being analyzed.

    Returns:
        Success percentages with/without the skill and the delta.
    """
    _log_api_call(f"API call: get_skill_impact_sla(requisition_id={requisition_id}, skill_name={skill_name})")

    # Check if requisition ID is valid
    error = _check_requisition_valid(requisition_id)
    if error:
        return error

    loader = get_data_loader()
    data = loader.get_similar_requisitions(requisition_id)

    # Filter to reviewed candidates only (SLA only applies to reviewed candidates)
    reviewed_data = data[data['reviewed']]

    # Get candidates with and without this skill
    has_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill_name in x)]
    no_skill = reviewed_data[reviewed_data['skills_parsed'].apply(lambda x: skill_name not in x)]

    sla_with = round(has_skill['sla_met'].mean() * 100) if len(has_skill) > 0 else 0
    sla_without = round(no_skill['sla_met'].mean() * 100) if len(no_skill) > 0 else 0

    return SkillImpactSLAResponse(
        requisition_id=requisition_id,
        skill_name=skill_name,
        sla_achievement_with_skill=sla_with,
        sla_achievement_without_skill=sla_without,
        delta=sla_with - sla_without,
    )


def get_skill_relevance_justification(requisition_id: str, skill_name: str) -> Union[SkillRelevanceResponse, RequisitionNotFoundResponse]:
    """
    Explains whether a skill is relevant and why, based on historical hiring
    success and outcome data.

    Args:
        requisition_id: The job requisition ID.
        skill_name: The skill being justified.

    Returns:
        Relevance determination with justification.
    """
    _log_api_call(f"API call: get_skill_relevance_justification(requisition_id={requisition_id}, skill_name={skill_name})")

    # Check if requisition ID is valid
    error = _check_requisition_valid(requisition_id)
    if error:
        return error

    # Get both SLA and fill rate impacts
    sla_impact = get_skill_impact_sla(requisition_id, skill_name)
    fill_impact = get_skill_impact_fill_rate(requisition_id, skill_name)

    # Determine relevance based on both metrics
    is_relevant = False
    if sla_impact.delta > 5 or fill_impact.impact.fill_rate_percentage > fill_impact.compared_to_baseline.fill_rate_percentage * 1.2:
        is_relevant = True

    justification = SkillJustificationData(
        requisition_id=requisition_id,
        skill_name=skill_name,
        sla_achievement_with_skill=sla_impact.sla_achievement_with_skill,
        sla_achievement_without_skill=sla_impact.sla_achievement_without_skill,
        delta=sla_impact.delta,
        impact=SkillJustificationImpact(
            fill_rate_percentage=fill_impact.impact.fill_rate_percentage,
            time_to_fill_days=fill_impact.impact.time_to_fill_days,
            candidate_pool_size=fill_impact.impact.candidate_pool_size,
        ),
        compared_to_baseline=SkillJustificationImpact(
            fill_rate_percentage=fill_impact.compared_to_baseline.fill_rate_percentage,
            time_to_fill_days=fill_impact.compared_to_baseline.time_to_fill_days,
            candidate_pool_size=fill_impact.compared_to_baseline.candidate_pool_size,
        ),
    )

    return SkillRelevanceResponse(
        requisition_id=requisition_id,
        skill_name=skill_name,
        is_relevant=is_relevant,
        justification=justification,
    )


def get_successful_posting_criteria() -> SuccessfulPostingResponse:
    """
    Returns the business definition of a successful job posting,
    including thresholds and benchmarks for success.

    Returns:
        Success criteria thresholds.
    """
    _log_api_call("API call: get_successful_posting_criteria()")

    return SuccessfulPostingResponse(
        criteria=SuccessCriteria(
            time_to_fill_threshold_days=90,
            offer_acceptance_rate_min=50,
            sla_compliance_min=80,
            candidate_quality_rating_avg=3.5,
        ),
        justification="Based on historical performance benchmarks and industry standards",
    )


def get_data_sources_used(requisition_id: str) -> Union[DataSourcesResponse, RequisitionNotFoundResponse]:
    """
    Lists the datasets and ML models used to make hiring recommendations
    for a requisition.

    Args:
        requisition_id: The job requisition ID.

    Returns:
        Data sources and models used.
    """
    _log_api_call(f"API call: get_data_sources_used(requisition_id={requisition_id})")

    # Check if requisition ID is valid
    error = _check_requisition_valid(requisition_id)
    if error:
        return error

    return DataSourcesResponse(
        requisition_id=requisition_id,
        datasets_used=[
            "Historical hiring success data",
            "Requisition skill tagging",
            "Funnel conversion metrics",
            "Candidate quality feedback",
        ],
        models_involved=[
            "Skill relevance classifier",
            "SLA impact regression model",
            "Funnel conversion recommender",
        ],
    )