import json
import re
from collections import defaultdict
import os
import sys

sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))

from compiler.aliases import (
    CONDITION_SEMANTIC_SPECIAL_CASES,
    CONDITION_TRUE_ALIASES,
    EFFECT_GRAMMAR_CONVENIENCES,
    EFFECT_SEMANTIC_SPECIAL_CASES,
    EFFECT_TRUE_ALIASES,
    IGNORED_CONDITIONS,
    KEYWORD_CONDITIONS,
    TRIGGER_ALIASES,
)


def analyze():
    # Load metadata
    with open("data/metadata.json", "r", encoding="utf-8") as f:
        metadata = json.load(f)

    parser_trigger_aliases = set(TRIGGER_ALIASES)
    parser_effect_aliases = set(EFFECT_TRUE_ALIASES) | set(EFFECT_GRAMMAR_CONVENIENCES) | set(EFFECT_SEMANTIC_SPECIAL_CASES)
    parser_condition_aliases = (
        set(CONDITION_TRUE_ALIASES)
        | set(CONDITION_SEMANTIC_SPECIAL_CASES)
        | set(KEYWORD_CONDITIONS)
        | set(IGNORED_CONDITIONS)
    )
    parser_ignored_conds = set(IGNORED_CONDITIONS)

    # Load consolidated abilities
    with open("data/consolidated_abilities.json", "r", encoding="utf-8") as f:
        consolidated = json.load(f)

    # Load cards compiled to get counts
    with open("data/cards_compiled.json", "r", encoding="utf-8") as f:
        compiled = json.load(f)

    all_members = compiled.get("member_db", {})

    # Map pseudocode to card count
    pseudo_to_count = defaultdict(int)
    pseudo_to_cards = defaultdict(list)

    for mid, mdata in all_members.items():
        for ability in mdata.get("abilities", []):
            p = ability.get("pseudocode", "")
            if p:
                pseudo_to_count[p] += 1
                pseudo_to_cards[p].append(mdata.get("card_no", mid))

    # Extract keywords from pseudocode
    effects = defaultdict(int)
    conditions = defaultdict(int)
    triggers = defaultdict(int)
    costs = defaultdict(int)
    targets = defaultdict(int)

    keyword_usage_cards = {
        "EFFECT": defaultdict(set),
        "CONDITION": defaultdict(set),
        "TRIGGER": defaultdict(set),
        "COST": defaultdict(set),
        "TARGET": defaultdict(set),
    }

    for pseudo, count in pseudo_to_count.items():
        cards = pseudo_to_cards[pseudo]
        lines = pseudo.split("\n")
        for line in lines:
            line = line.strip()

            # Extract Effects
            if "EFFECT:" in line:
                match = re.search(r"EFFECT:\s*([\w|]+)", line)
                if match:
                    for e in match.group(1).split("|"):
                        effects[e] += count
                        for c in cards:
                            keyword_usage_cards["EFFECT"][e].add(c)

            # Extract Conditions
            if "CONDITION:" in line:
                match = re.search(r"CONDITION:\s*([\w|]+)", line)
                if match:
                    for c_kw in match.group(1).split("|"):
                        conditions[c_kw] += count
                        for c in cards:
                            keyword_usage_cards["CONDITION"][c_kw].add(c)

            # Extract Triggers
            if "TRIGGER:" in line:
                match = re.search(r"TRIGGER:\s*([\w|]+)", line)
                if match:
                    for t in match.group(1).split("|"):
                        triggers[t] += count
                        for c in cards:
                            keyword_usage_cards["TRIGGER"][t].add(c)

            # Extract Costs
            if "COST:" in line:
                match = re.search(r"COST:\s*([\w|]+)", line)
                if match:
                    for cost_kw in match.group(1).split("|"):
                        costs[cost_kw] += count
                        for c in cards:
                            keyword_usage_cards["COST"][cost_kw].add(c)

            # Extract Targets (-> KEYWORD)
            targets_match = re.findall(r"->\s*([\w|]+)", line)
            for tm in targets_match:
                for target_kw in tm.split("|"):
                    targets[target_kw] += count
                    for c in cards:
                        keyword_usage_cards["TARGET"][target_kw].add(c)

    # Compare with metadata
    meta_opcodes = set(metadata.get("opcodes", {}).keys())
    meta_conditions = set(metadata.get("conditions", {}).keys())
    meta_triggers = set(metadata.get("triggers", {}).keys())
    meta_costs = set(metadata.get("costs", {}).keys())
    meta_targets = set(metadata.get("targets", {}).keys())

    # Extras that are often handled without opcodes or are meta
    ignored_keywords = {
        "COUNT_VAL",
        "PLAYER",
        "SELF",
        "OPPONENT",
        "VARIABLE",
        "PER_CARD",
        "AND",
        "OR",
        "NOT",
        "TRUE",
        "FALSE",
        "IF",
        "THEN",
        "ELSE",
        "VALUE_GT",
        "VALUE_GE",
        "VALUE_LT",
        "VALUE_LE",
        "VALUE_EQ",
        "VALUE_NE",
    }

    def print_gaps(category, found, meta, parser_aliases, ignore=set()):
        print(f"\n=== {category} GAPS (Not in Metadata AND Not Aliased) ===")
        missing = []
        for kw, count in sorted(found.items(), key=lambda x: -x[1]):
            if kw not in meta and kw not in parser_aliases and kw not in ignore and not kw.isdigit():
                missing.append((kw, count))

        if not missing:
            print("No gaps found.")
        else:
            for kw, count in missing:
                cards = sorted(list(keyword_usage_cards[category][kw]))[:5]
                print(f"{kw:<25} Usage: {count:>3}  Cards: {', '.join(cards)}")

    print_gaps("TRIGGER", triggers, meta_triggers, parser_trigger_aliases)
    print_gaps("EFFECT", effects, meta_opcodes, parser_effect_aliases)
    print_gaps(
        "CONDITION", conditions, meta_conditions, parser_condition_aliases, ignored_keywords.union(parser_ignored_conds)
    )
    print_gaps("COST", costs, meta_costs, set())  # Costs don't have aliases usually
    print_gaps("TARGET", targets, meta_targets, set(), ignored_keywords)


if __name__ == "__main__":
    analyze()