Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- core/logging.py +70 -0
- core/rule_engine.py +1 -1
- core/validation.py +172 -0
core/logging.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Structured Logging for Design System Extractor
|
| 3 |
+
================================================
|
| 4 |
+
|
| 5 |
+
Provides consistent logging across the application using loguru.
|
| 6 |
+
Falls back to standard logging if loguru is not available.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import sys
|
| 10 |
+
from typing import Optional
|
| 11 |
+
|
| 12 |
+
try:
|
| 13 |
+
from loguru import logger as _loguru_logger
|
| 14 |
+
|
| 15 |
+
# Remove default handler
|
| 16 |
+
_loguru_logger.remove()
|
| 17 |
+
|
| 18 |
+
# Add structured console handler
|
| 19 |
+
_loguru_logger.add(
|
| 20 |
+
sys.stderr,
|
| 21 |
+
format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{extra[module]}</cyan> | {message}",
|
| 22 |
+
level="INFO",
|
| 23 |
+
colorize=True,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# Add file handler for debugging (rotated)
|
| 27 |
+
_loguru_logger.add(
|
| 28 |
+
"logs/extractor_{time:YYYY-MM-DD}.log",
|
| 29 |
+
format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {extra[module]} | {message}",
|
| 30 |
+
level="DEBUG",
|
| 31 |
+
rotation="10 MB",
|
| 32 |
+
retention="7 days",
|
| 33 |
+
compression="gz",
|
| 34 |
+
catch=True, # Don't crash on log errors
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
HAS_LOGURU = True
|
| 38 |
+
|
| 39 |
+
except ImportError:
|
| 40 |
+
import logging
|
| 41 |
+
|
| 42 |
+
HAS_LOGURU = False
|
| 43 |
+
logging.basicConfig(
|
| 44 |
+
level=logging.INFO,
|
| 45 |
+
format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
|
| 46 |
+
datefmt="%H:%M:%S",
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def get_logger(module_name: str = "app"):
|
| 51 |
+
"""
|
| 52 |
+
Get a logger instance for a specific module.
|
| 53 |
+
|
| 54 |
+
Args:
|
| 55 |
+
module_name: Name of the module (e.g., 'rule_engine', 'aurora', 'app')
|
| 56 |
+
|
| 57 |
+
Returns:
|
| 58 |
+
Logger instance with module context
|
| 59 |
+
"""
|
| 60 |
+
if HAS_LOGURU:
|
| 61 |
+
return _loguru_logger.bind(module=module_name)
|
| 62 |
+
else:
|
| 63 |
+
return logging.getLogger(module_name)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# Pre-configured loggers for common modules
|
| 67 |
+
app_logger = get_logger("app")
|
| 68 |
+
rule_engine_logger = get_logger("rule_engine")
|
| 69 |
+
agent_logger = get_logger("agents")
|
| 70 |
+
extraction_logger = get_logger("extraction")
|
core/rule_engine.py
CHANGED
|
@@ -619,7 +619,7 @@ def analyze_spacing_grid(spacing_tokens: dict) -> SpacingGridAnalysis:
|
|
| 619 |
px_val = int(float(str(px).replace('px', '')))
|
| 620 |
if px_val > 0:
|
| 621 |
values.append(px_val)
|
| 622 |
-
except:
|
| 623 |
continue
|
| 624 |
|
| 625 |
if not values:
|
|
|
|
| 619 |
px_val = int(float(str(px).replace('px', '')))
|
| 620 |
if px_val > 0:
|
| 621 |
values.append(px_val)
|
| 622 |
+
except (ValueError, TypeError):
|
| 623 |
continue
|
| 624 |
|
| 625 |
if not values:
|
core/validation.py
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent Output Validation
|
| 3 |
+
========================
|
| 4 |
+
|
| 5 |
+
JSON schemas for validating LLM agent outputs.
|
| 6 |
+
Ensures data integrity between pipeline stages.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from typing import Any, Optional
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
from jsonschema import validate, ValidationError
|
| 13 |
+
|
| 14 |
+
HAS_JSONSCHEMA = True
|
| 15 |
+
except ImportError:
|
| 16 |
+
HAS_JSONSCHEMA = False
|
| 17 |
+
|
| 18 |
+
from core.logging import get_logger
|
| 19 |
+
|
| 20 |
+
logger = get_logger("validation")
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# =============================================================================
|
| 24 |
+
# SCHEMAS
|
| 25 |
+
# =============================================================================
|
| 26 |
+
|
| 27 |
+
BRAND_IDENTIFICATION_SCHEMA = {
|
| 28 |
+
"type": "object",
|
| 29 |
+
"properties": {
|
| 30 |
+
"brand_primary": {"type": ["string", "null"]},
|
| 31 |
+
"brand_secondary": {"type": ["string", "null"]},
|
| 32 |
+
"brand_accent": {"type": ["string", "null"]},
|
| 33 |
+
"palette_strategy": {"type": "string"},
|
| 34 |
+
"cohesion_score": {"type": ["number", "integer"]},
|
| 35 |
+
"cohesion_notes": {"type": "string"},
|
| 36 |
+
"semantic_names": {"type": "object"},
|
| 37 |
+
"self_evaluation": {"type": "object"},
|
| 38 |
+
},
|
| 39 |
+
"required": ["brand_primary", "palette_strategy"],
|
| 40 |
+
}
|
| 41 |
+
|
| 42 |
+
BENCHMARK_ADVICE_SCHEMA = {
|
| 43 |
+
"type": "object",
|
| 44 |
+
"properties": {
|
| 45 |
+
"recommended_benchmark": {"type": "string"},
|
| 46 |
+
"recommended_benchmark_name": {"type": "string"},
|
| 47 |
+
"reasoning": {"type": "string"},
|
| 48 |
+
"alignment_changes": {"type": "array"},
|
| 49 |
+
"pros_of_alignment": {"type": "array"},
|
| 50 |
+
"cons_of_alignment": {"type": "array"},
|
| 51 |
+
"alternative_benchmarks": {"type": "array"},
|
| 52 |
+
"self_evaluation": {"type": "object"},
|
| 53 |
+
},
|
| 54 |
+
"required": ["recommended_benchmark", "reasoning"],
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
BEST_PRACTICES_SCHEMA = {
|
| 58 |
+
"type": "object",
|
| 59 |
+
"properties": {
|
| 60 |
+
"overall_score": {"type": ["number", "integer"]},
|
| 61 |
+
"checks": {"type": "array"},
|
| 62 |
+
"priority_fixes": {"type": "array"},
|
| 63 |
+
"passing_practices": {"type": "array"},
|
| 64 |
+
"failing_practices": {"type": "array"},
|
| 65 |
+
"self_evaluation": {"type": "object"},
|
| 66 |
+
},
|
| 67 |
+
"required": ["overall_score", "priority_fixes"],
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
HEAD_SYNTHESIS_SCHEMA = {
|
| 71 |
+
"type": "object",
|
| 72 |
+
"properties": {
|
| 73 |
+
"executive_summary": {"type": "string"},
|
| 74 |
+
"scores": {"type": "object"},
|
| 75 |
+
"benchmark_fit": {"type": "object"},
|
| 76 |
+
"brand_analysis": {"type": "object"},
|
| 77 |
+
"top_3_actions": {"type": "array"},
|
| 78 |
+
"color_recommendations": {"type": "array"},
|
| 79 |
+
"type_scale_recommendation": {"type": "object"},
|
| 80 |
+
"spacing_recommendation": {"type": "object"},
|
| 81 |
+
"self_evaluation": {"type": "object"},
|
| 82 |
+
},
|
| 83 |
+
"required": ["executive_summary", "top_3_actions"],
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
# Map agent names to schemas
|
| 87 |
+
AGENT_SCHEMAS = {
|
| 88 |
+
"aurora": BRAND_IDENTIFICATION_SCHEMA,
|
| 89 |
+
"brand_identifier": BRAND_IDENTIFICATION_SCHEMA,
|
| 90 |
+
"atlas": BENCHMARK_ADVICE_SCHEMA,
|
| 91 |
+
"benchmark_advisor": BENCHMARK_ADVICE_SCHEMA,
|
| 92 |
+
"sentinel": BEST_PRACTICES_SCHEMA,
|
| 93 |
+
"best_practices": BEST_PRACTICES_SCHEMA,
|
| 94 |
+
"nexus": HEAD_SYNTHESIS_SCHEMA,
|
| 95 |
+
"head_synthesizer": HEAD_SYNTHESIS_SCHEMA,
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# =============================================================================
|
| 100 |
+
# VALIDATION FUNCTIONS
|
| 101 |
+
# =============================================================================
|
| 102 |
+
|
| 103 |
+
def validate_agent_output(data: Any, agent_name: str) -> tuple[bool, Optional[str]]:
|
| 104 |
+
"""
|
| 105 |
+
Validate an agent's output against its expected schema.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
data: The output data (dict or dataclass with to_dict())
|
| 109 |
+
agent_name: Name of the agent (e.g., 'aurora', 'nexus')
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
(is_valid, error_message) tuple
|
| 113 |
+
"""
|
| 114 |
+
agent_key = agent_name.lower().strip()
|
| 115 |
+
schema = AGENT_SCHEMAS.get(agent_key)
|
| 116 |
+
|
| 117 |
+
if not schema:
|
| 118 |
+
logger.warning(f"No schema found for agent: {agent_name}")
|
| 119 |
+
return True, None # No schema = pass (don't block)
|
| 120 |
+
|
| 121 |
+
# Convert dataclass to dict if needed
|
| 122 |
+
if hasattr(data, "to_dict"):
|
| 123 |
+
data_dict = data.to_dict()
|
| 124 |
+
elif hasattr(data, "__dataclass_fields__"):
|
| 125 |
+
from dataclasses import asdict
|
| 126 |
+
data_dict = asdict(data)
|
| 127 |
+
elif isinstance(data, dict):
|
| 128 |
+
data_dict = data
|
| 129 |
+
else:
|
| 130 |
+
return False, f"Cannot validate: unexpected type {type(data)}"
|
| 131 |
+
|
| 132 |
+
if not HAS_JSONSCHEMA:
|
| 133 |
+
# Fallback: manual required-field check
|
| 134 |
+
return _manual_validate(data_dict, schema, agent_name)
|
| 135 |
+
|
| 136 |
+
try:
|
| 137 |
+
validate(instance=data_dict, schema=schema)
|
| 138 |
+
logger.debug(f"Validation passed for {agent_name}")
|
| 139 |
+
return True, None
|
| 140 |
+
except ValidationError as e:
|
| 141 |
+
error_msg = f"Validation failed for {agent_name}: {e.message}"
|
| 142 |
+
logger.warning(error_msg)
|
| 143 |
+
return False, error_msg
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _manual_validate(data: dict, schema: dict, agent_name: str) -> tuple[bool, Optional[str]]:
|
| 147 |
+
"""Fallback validation without jsonschema library."""
|
| 148 |
+
required = schema.get("required", [])
|
| 149 |
+
missing = [field for field in required if field not in data]
|
| 150 |
+
|
| 151 |
+
if missing:
|
| 152 |
+
error_msg = f"{agent_name} output missing required fields: {missing}"
|
| 153 |
+
logger.warning(error_msg)
|
| 154 |
+
return False, error_msg
|
| 155 |
+
|
| 156 |
+
return True, None
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def validate_all_agents(outputs: dict) -> dict[str, tuple[bool, Optional[str]]]:
|
| 160 |
+
"""
|
| 161 |
+
Validate all agent outputs at once.
|
| 162 |
+
|
| 163 |
+
Args:
|
| 164 |
+
outputs: Dict mapping agent_name → output data
|
| 165 |
+
|
| 166 |
+
Returns:
|
| 167 |
+
Dict mapping agent_name → (is_valid, error_message)
|
| 168 |
+
"""
|
| 169 |
+
results = {}
|
| 170 |
+
for agent_name, data in outputs.items():
|
| 171 |
+
results[agent_name] = validate_agent_output(data, agent_name)
|
| 172 |
+
return results
|