from __future__ import annotations

from collections import defaultdict
import math
from typing import Any

from app.ai.embeddings import CodeEmbeddingService
from app.ai.scoring import ScoringEngine
from app.clients.github_graphql import GitHubGraphQLClient
from app.core.config import settings
from app.clients.streak import compute_streak_from_calendar
from app.graph.state import ProfileState


def _extract_features(user: dict[str, Any]) -> tuple[dict[str, int], dict[str, int], list[str]]:
    repositories = user.get("repositories", {}).get("nodes", [])
    language_sizes: dict[str, int] = defaultdict(int)
    repo_snippets: list[str] = []
    total_commits = 0
    total_stars = 0
    total_forks = 0

    for repo in repositories:
        name = repo.get("name") or ""
        desc = repo.get("description") or ""
        primary = (repo.get("primaryLanguage") or {}).get("name") or ""
        repo_snippets.append(f"repo:{name} lang:{primary} desc:{desc}")
        total_stars += int(repo.get("stargazerCount", 0))
        total_forks += int(repo.get("forkCount", 0))

        default_branch = repo.get("defaultBranchRef") or {}
        history = (default_branch.get("target") or {}).get("history", {})
        total_commits += int(history.get("totalCount", 0))

        for edge in repo.get("languages", {}).get("edges", []):
            lang_name = edge.get("node", {}).get("name") or "Unknown"
            language_sizes[lang_name] += int(edge.get("size", 0))

    merged_prs = int(user.get("pullRequests", {}).get("totalCount", 0))
    contributions = user.get("contributionsCollection", {})
    public_activity = user.get("publicActivity", {})
    public_commits = int(public_activity.get("publicCommits", contributions.get("totalCommitContributions", 0)) or 0)
    public_prs_created = int(public_activity.get("publicPRsCreated", contributions.get("totalPullRequestContributions", 0)) or 0)
    total_contributions = int(contributions.get("contributionCalendar", {}).get("totalContributions", 0))
    followers = int(user.get("followers", {}).get("totalCount", 0))
    metrics = {
        "repo_count": len(repositories),
        "total_commits": total_commits,
        "merged_prs": merged_prs,
        "public_commits": public_commits,
        "public_prs_created": public_prs_created,
        "total_contributions": total_contributions,
        "total_stars": total_stars,
        "total_forks": total_forks,
        "followers": followers,
    }
    return dict(language_sizes), metrics, repo_snippets


def _scale_log(value: int, weight: int, factor: float = 2.0) -> int:
    return min(weight, int(math.log1p(max(value, 0)) * factor))


def _normalize_activity(metrics: dict[str, int], data_source: str) -> int:
    contribution_divisor = 12 if data_source == "rest-public" else 25
    score = 0
    score += min(30, metrics["total_contributions"] // contribution_divisor)
    score += min(18, metrics["repo_count"] * 2)
    score += min(17, metrics["total_commits"] // 80)
    score += min(12, metrics["merged_prs"] // 3)
    score += _scale_log(metrics["total_stars"], 10, 2.2)
    score += _scale_log(metrics["total_forks"], 6, 2.0)
    score += _scale_log(metrics["followers"], 7, 1.8)
    return int(max(0, min(100, score)))


def _normalize_consistency(current_streak: int, longest_streak: int) -> int:
    if longest_streak <= 0:
        return 0
    active_now = min(35, current_streak * 5)
    proven_consistency = min(65, longest_streak * 3)
    return int(max(0, min(100, active_now + proven_consistency)))


def _language_breakdown(language_sizes: dict[str, int]) -> tuple[str, dict[str, int]]:
    if not language_sizes:
        return "Unknown", {}
    strongest = max(language_sizes.items(), key=lambda x: x[1])[0]
    total = sum(language_sizes.values()) or 1
    breakdown = {lang: int((size / total) * 100) for lang, size in sorted(language_sizes.items(), key=lambda x: x[1], reverse=True)}
    return strongest, breakdown


class AnalyzerWorkflow:
    def __init__(self) -> None:
        self._github = GitHubGraphQLClient()
        self._embedder = CodeEmbeddingService()
        self._scorer = ScoringEngine(input_dim=self._embedder.embedding_dim)

    async def run(self, username: str) -> ProfileState:
        state: ProfileState = {"username": username}
        raw = await self._github.analyze_user(username)
        state["graphql_data"] = raw

        user = raw["data"]["user"]
        lang_sizes, metrics, snippets = _extract_features(user)
        strongest_language, breakdown = _language_breakdown(lang_sizes)

        weeks = user.get("contributionsCollection", {}).get("contributionCalendar", {}).get("weeks", [])
        streak = compute_streak_from_calendar(weeks)
        consistency_score = _normalize_consistency(streak.current_streak, streak.longest_streak)

        embedding = self._embedder.embed_repository_signals(snippets)
        activity_score = _normalize_activity(metrics, raw.get("source", "graphql"))
        scored = self._scorer.infer(embedding, activity_score, consistency_score)

        state["final_report"] = {
            "username": username,
            "rating_score": scored.hiring_score,
            "developer_level": scored.level,
            "confidence": scored.confidence,
            "strongest_language": strongest_language,
            "language_breakdown": breakdown,
            "hiring_readiness_score": scored.hiring_score,
            "consistency_score": consistency_score,
            "public_activity": {
                "public_commits": metrics["public_commits"],
                "public_prs_created": metrics["public_prs_created"],
            },
            "graphql_signals": {
                "total_commits": metrics["total_commits"],
                "merged_prs": metrics["merged_prs"],
                "total_contributions": metrics["total_contributions"],
            },
            "streak_data": {
                "current_streak": streak.current_streak,
                "longest_streak": streak.longest_streak,
            },
            "model_info": {
                "embedding_model": "microsoft/codebert-base",
                "scoring_model": settings.scoring_backend,
                "embedding_dim": self._embedder.embedding_dim,
                "embedding_backend": "transformers" if self._embedder.ready else "deterministic-fallback",
                "data_source": raw.get("source", "graphql"),
                "public_metrics": {
                    "repositories": metrics["repo_count"],
                    "stars": metrics["total_stars"],
                    "forks": metrics["total_forks"],
                    "followers": metrics["followers"],
                },
            },
        }
        return state