riazmo commited on
Commit
a2498f7
·
verified ·
1 Parent(s): f7fb352

Upload 3 files

Browse files
Files changed (3) hide show
  1. core/logging.py +70 -0
  2. core/rule_engine.py +1 -1
  3. core/validation.py +172 -0
core/logging.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Structured Logging for Design System Extractor
3
+ ================================================
4
+
5
+ Provides consistent logging across the application using loguru.
6
+ Falls back to standard logging if loguru is not available.
7
+ """
8
+
9
+ import sys
10
+ from typing import Optional
11
+
12
+ try:
13
+ from loguru import logger as _loguru_logger
14
+
15
+ # Remove default handler
16
+ _loguru_logger.remove()
17
+
18
+ # Add structured console handler
19
+ _loguru_logger.add(
20
+ sys.stderr,
21
+ format="<green>{time:HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{extra[module]}</cyan> | {message}",
22
+ level="INFO",
23
+ colorize=True,
24
+ )
25
+
26
+ # Add file handler for debugging (rotated)
27
+ _loguru_logger.add(
28
+ "logs/extractor_{time:YYYY-MM-DD}.log",
29
+ format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {extra[module]} | {message}",
30
+ level="DEBUG",
31
+ rotation="10 MB",
32
+ retention="7 days",
33
+ compression="gz",
34
+ catch=True, # Don't crash on log errors
35
+ )
36
+
37
+ HAS_LOGURU = True
38
+
39
+ except ImportError:
40
+ import logging
41
+
42
+ HAS_LOGURU = False
43
+ logging.basicConfig(
44
+ level=logging.INFO,
45
+ format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
46
+ datefmt="%H:%M:%S",
47
+ )
48
+
49
+
50
+ def get_logger(module_name: str = "app"):
51
+ """
52
+ Get a logger instance for a specific module.
53
+
54
+ Args:
55
+ module_name: Name of the module (e.g., 'rule_engine', 'aurora', 'app')
56
+
57
+ Returns:
58
+ Logger instance with module context
59
+ """
60
+ if HAS_LOGURU:
61
+ return _loguru_logger.bind(module=module_name)
62
+ else:
63
+ return logging.getLogger(module_name)
64
+
65
+
66
+ # Pre-configured loggers for common modules
67
+ app_logger = get_logger("app")
68
+ rule_engine_logger = get_logger("rule_engine")
69
+ agent_logger = get_logger("agents")
70
+ extraction_logger = get_logger("extraction")
core/rule_engine.py CHANGED
@@ -619,7 +619,7 @@ def analyze_spacing_grid(spacing_tokens: dict) -> SpacingGridAnalysis:
619
  px_val = int(float(str(px).replace('px', '')))
620
  if px_val > 0:
621
  values.append(px_val)
622
- except:
623
  continue
624
 
625
  if not values:
 
619
  px_val = int(float(str(px).replace('px', '')))
620
  if px_val > 0:
621
  values.append(px_val)
622
+ except (ValueError, TypeError):
623
  continue
624
 
625
  if not values:
core/validation.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent Output Validation
3
+ ========================
4
+
5
+ JSON schemas for validating LLM agent outputs.
6
+ Ensures data integrity between pipeline stages.
7
+ """
8
+
9
+ from typing import Any, Optional
10
+
11
+ try:
12
+ from jsonschema import validate, ValidationError
13
+
14
+ HAS_JSONSCHEMA = True
15
+ except ImportError:
16
+ HAS_JSONSCHEMA = False
17
+
18
+ from core.logging import get_logger
19
+
20
+ logger = get_logger("validation")
21
+
22
+
23
+ # =============================================================================
24
+ # SCHEMAS
25
+ # =============================================================================
26
+
27
+ BRAND_IDENTIFICATION_SCHEMA = {
28
+ "type": "object",
29
+ "properties": {
30
+ "brand_primary": {"type": ["string", "null"]},
31
+ "brand_secondary": {"type": ["string", "null"]},
32
+ "brand_accent": {"type": ["string", "null"]},
33
+ "palette_strategy": {"type": "string"},
34
+ "cohesion_score": {"type": ["number", "integer"]},
35
+ "cohesion_notes": {"type": "string"},
36
+ "semantic_names": {"type": "object"},
37
+ "self_evaluation": {"type": "object"},
38
+ },
39
+ "required": ["brand_primary", "palette_strategy"],
40
+ }
41
+
42
+ BENCHMARK_ADVICE_SCHEMA = {
43
+ "type": "object",
44
+ "properties": {
45
+ "recommended_benchmark": {"type": "string"},
46
+ "recommended_benchmark_name": {"type": "string"},
47
+ "reasoning": {"type": "string"},
48
+ "alignment_changes": {"type": "array"},
49
+ "pros_of_alignment": {"type": "array"},
50
+ "cons_of_alignment": {"type": "array"},
51
+ "alternative_benchmarks": {"type": "array"},
52
+ "self_evaluation": {"type": "object"},
53
+ },
54
+ "required": ["recommended_benchmark", "reasoning"],
55
+ }
56
+
57
+ BEST_PRACTICES_SCHEMA = {
58
+ "type": "object",
59
+ "properties": {
60
+ "overall_score": {"type": ["number", "integer"]},
61
+ "checks": {"type": "array"},
62
+ "priority_fixes": {"type": "array"},
63
+ "passing_practices": {"type": "array"},
64
+ "failing_practices": {"type": "array"},
65
+ "self_evaluation": {"type": "object"},
66
+ },
67
+ "required": ["overall_score", "priority_fixes"],
68
+ }
69
+
70
+ HEAD_SYNTHESIS_SCHEMA = {
71
+ "type": "object",
72
+ "properties": {
73
+ "executive_summary": {"type": "string"},
74
+ "scores": {"type": "object"},
75
+ "benchmark_fit": {"type": "object"},
76
+ "brand_analysis": {"type": "object"},
77
+ "top_3_actions": {"type": "array"},
78
+ "color_recommendations": {"type": "array"},
79
+ "type_scale_recommendation": {"type": "object"},
80
+ "spacing_recommendation": {"type": "object"},
81
+ "self_evaluation": {"type": "object"},
82
+ },
83
+ "required": ["executive_summary", "top_3_actions"],
84
+ }
85
+
86
+ # Map agent names to schemas
87
+ AGENT_SCHEMAS = {
88
+ "aurora": BRAND_IDENTIFICATION_SCHEMA,
89
+ "brand_identifier": BRAND_IDENTIFICATION_SCHEMA,
90
+ "atlas": BENCHMARK_ADVICE_SCHEMA,
91
+ "benchmark_advisor": BENCHMARK_ADVICE_SCHEMA,
92
+ "sentinel": BEST_PRACTICES_SCHEMA,
93
+ "best_practices": BEST_PRACTICES_SCHEMA,
94
+ "nexus": HEAD_SYNTHESIS_SCHEMA,
95
+ "head_synthesizer": HEAD_SYNTHESIS_SCHEMA,
96
+ }
97
+
98
+
99
+ # =============================================================================
100
+ # VALIDATION FUNCTIONS
101
+ # =============================================================================
102
+
103
+ def validate_agent_output(data: Any, agent_name: str) -> tuple[bool, Optional[str]]:
104
+ """
105
+ Validate an agent's output against its expected schema.
106
+
107
+ Args:
108
+ data: The output data (dict or dataclass with to_dict())
109
+ agent_name: Name of the agent (e.g., 'aurora', 'nexus')
110
+
111
+ Returns:
112
+ (is_valid, error_message) tuple
113
+ """
114
+ agent_key = agent_name.lower().strip()
115
+ schema = AGENT_SCHEMAS.get(agent_key)
116
+
117
+ if not schema:
118
+ logger.warning(f"No schema found for agent: {agent_name}")
119
+ return True, None # No schema = pass (don't block)
120
+
121
+ # Convert dataclass to dict if needed
122
+ if hasattr(data, "to_dict"):
123
+ data_dict = data.to_dict()
124
+ elif hasattr(data, "__dataclass_fields__"):
125
+ from dataclasses import asdict
126
+ data_dict = asdict(data)
127
+ elif isinstance(data, dict):
128
+ data_dict = data
129
+ else:
130
+ return False, f"Cannot validate: unexpected type {type(data)}"
131
+
132
+ if not HAS_JSONSCHEMA:
133
+ # Fallback: manual required-field check
134
+ return _manual_validate(data_dict, schema, agent_name)
135
+
136
+ try:
137
+ validate(instance=data_dict, schema=schema)
138
+ logger.debug(f"Validation passed for {agent_name}")
139
+ return True, None
140
+ except ValidationError as e:
141
+ error_msg = f"Validation failed for {agent_name}: {e.message}"
142
+ logger.warning(error_msg)
143
+ return False, error_msg
144
+
145
+
146
+ def _manual_validate(data: dict, schema: dict, agent_name: str) -> tuple[bool, Optional[str]]:
147
+ """Fallback validation without jsonschema library."""
148
+ required = schema.get("required", [])
149
+ missing = [field for field in required if field not in data]
150
+
151
+ if missing:
152
+ error_msg = f"{agent_name} output missing required fields: {missing}"
153
+ logger.warning(error_msg)
154
+ return False, error_msg
155
+
156
+ return True, None
157
+
158
+
159
+ def validate_all_agents(outputs: dict) -> dict[str, tuple[bool, Optional[str]]]:
160
+ """
161
+ Validate all agent outputs at once.
162
+
163
+ Args:
164
+ outputs: Dict mapping agent_name → output data
165
+
166
+ Returns:
167
+ Dict mapping agent_name → (is_valid, error_message)
168
+ """
169
+ results = {}
170
+ for agent_name, data in outputs.items():
171
+ results[agent_name] = validate_agent_output(data, agent_name)
172
+ return results