Spaces:

trioskosmos
/

rabukasim

Sleeping

App Files Files Community

rabukasim / tools /debug /analyze_metadata_needs.py

trioskosmos

Upload folder using huggingface_hub

463f868 verified 14 days ago

raw

history blame contribute delete

6.07 kB

	import json
	import re
	from collections import defaultdict
	import os
	import sys

	sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")))

	from compiler.aliases import (
	CONDITION_SEMANTIC_SPECIAL_CASES,
	CONDITION_TRUE_ALIASES,
	EFFECT_GRAMMAR_CONVENIENCES,
	EFFECT_SEMANTIC_SPECIAL_CASES,
	EFFECT_TRUE_ALIASES,
	IGNORED_CONDITIONS,
	KEYWORD_CONDITIONS,
	TRIGGER_ALIASES,
	)


	def analyze():
	# Load metadata
	with open("data/metadata.json", "r", encoding="utf-8") as f:
	metadata = json.load(f)

	parser_trigger_aliases = set(TRIGGER_ALIASES)
	parser_effect_aliases = set(EFFECT_TRUE_ALIASES) \| set(EFFECT_GRAMMAR_CONVENIENCES) \| set(EFFECT_SEMANTIC_SPECIAL_CASES)
	parser_condition_aliases = (
	set(CONDITION_TRUE_ALIASES)
	\| set(CONDITION_SEMANTIC_SPECIAL_CASES)
	\| set(KEYWORD_CONDITIONS)
	\| set(IGNORED_CONDITIONS)
	)
	parser_ignored_conds = set(IGNORED_CONDITIONS)

	# Load consolidated abilities
	with open("data/consolidated_abilities.json", "r", encoding="utf-8") as f:
	consolidated = json.load(f)

	# Load cards compiled to get counts
	with open("data/cards_compiled.json", "r", encoding="utf-8") as f:
	compiled = json.load(f)

	all_members = compiled.get("member_db", {})

	# Map pseudocode to card count
	pseudo_to_count = defaultdict(int)
	pseudo_to_cards = defaultdict(list)

	for mid, mdata in all_members.items():
	for ability in mdata.get("abilities", []):
	p = ability.get("pseudocode", "")
	if p:
	pseudo_to_count[p] += 1
	pseudo_to_cards[p].append(mdata.get("card_no", mid))

	# Extract keywords from pseudocode
	effects = defaultdict(int)
	conditions = defaultdict(int)
	triggers = defaultdict(int)
	costs = defaultdict(int)
	targets = defaultdict(int)

	keyword_usage_cards = {
	"EFFECT": defaultdict(set),
	"CONDITION": defaultdict(set),
	"TRIGGER": defaultdict(set),
	"COST": defaultdict(set),
	"TARGET": defaultdict(set),
	}

	for pseudo, count in pseudo_to_count.items():
	cards = pseudo_to_cards[pseudo]
	lines = pseudo.split("\n")
	for line in lines:
	line = line.strip()

	# Extract Effects
	if "EFFECT:" in line:
	match = re.search(r"EFFECT:\s*([\w\|]+)", line)
	if match:
	for e in match.group(1).split("\|"):
	effects[e] += count
	for c in cards:
	keyword_usage_cards["EFFECT"][e].add(c)

	# Extract Conditions
	if "CONDITION:" in line:
	match = re.search(r"CONDITION:\s*([\w\|]+)", line)
	if match:
	for c_kw in match.group(1).split("\|"):
	conditions[c_kw] += count
	for c in cards:
	keyword_usage_cards["CONDITION"][c_kw].add(c)

	# Extract Triggers
	if "TRIGGER:" in line:
	match = re.search(r"TRIGGER:\s*([\w\|]+)", line)
	if match:
	for t in match.group(1).split("\|"):
	triggers[t] += count
	for c in cards:
	keyword_usage_cards["TRIGGER"][t].add(c)

	# Extract Costs
	if "COST:" in line:
	match = re.search(r"COST:\s*([\w\|]+)", line)
	if match:
	for cost_kw in match.group(1).split("\|"):
	costs[cost_kw] += count
	for c in cards:
	keyword_usage_cards["COST"][cost_kw].add(c)

	# Extract Targets (-> KEYWORD)
	targets_match = re.findall(r"->\s*([\w\|]+)", line)
	for tm in targets_match:
	for target_kw in tm.split("\|"):
	targets[target_kw] += count
	for c in cards:
	keyword_usage_cards["TARGET"][target_kw].add(c)

	# Compare with metadata
	meta_opcodes = set(metadata.get("opcodes", {}).keys())
	meta_conditions = set(metadata.get("conditions", {}).keys())
	meta_triggers = set(metadata.get("triggers", {}).keys())
	meta_costs = set(metadata.get("costs", {}).keys())
	meta_targets = set(metadata.get("targets", {}).keys())

	# Extras that are often handled without opcodes or are meta
	ignored_keywords = {
	"COUNT_VAL",
	"PLAYER",
	"SELF",
	"OPPONENT",
	"VARIABLE",
	"PER_CARD",
	"AND",
	"OR",
	"NOT",
	"TRUE",
	"FALSE",
	"IF",
	"THEN",
	"ELSE",
	"VALUE_GT",
	"VALUE_GE",
	"VALUE_LT",
	"VALUE_LE",
	"VALUE_EQ",
	"VALUE_NE",
	}

	def print_gaps(category, found, meta, parser_aliases, ignore=set()):
	print(f"\n=== {category} GAPS (Not in Metadata AND Not Aliased) ===")
	missing = []
	for kw, count in sorted(found.items(), key=lambda x: -x[1]):
	if kw not in meta and kw not in parser_aliases and kw not in ignore and not kw.isdigit():
	missing.append((kw, count))

	if not missing:
	print("No gaps found.")
	else:
	for kw, count in missing:
	cards = sorted(list(keyword_usage_cards[category][kw]))[:5]
	print(f"{kw:<25} Usage: {count:>3} Cards: {', '.join(cards)}")

	print_gaps("TRIGGER", triggers, meta_triggers, parser_trigger_aliases)
	print_gaps("EFFECT", effects, meta_opcodes, parser_effect_aliases)
	print_gaps(
	"CONDITION", conditions, meta_conditions, parser_condition_aliases, ignored_keywords.union(parser_ignored_conds)
	)
	print_gaps("COST", costs, meta_costs, set()) # Costs don't have aliases usually
	print_gaps("TARGET", targets, meta_targets, set(), ignored_keywords)


	if __name__ == "__main__":
	analyze()