Paramify-test / structure_analysis.py
bluestpanda
3rd
a9f051b
"""
Structure analysis utilities for detecting fields in JSON data.
"""
from typing import Dict, Any, List
def detect_summary_fields(data: Dict[str, Any]) -> List[str]:
"""
Detect summary/aggregate fields in the data structure.
Looks for fields in 'summary' sections or aggregate fields.
"""
summary_fields = []
# Check for 'summary' in results
if 'results' in data and isinstance(data['results'], dict):
if 'summary' in data['results']:
summary_data = data['results']['summary']
if isinstance(summary_data, dict):
summary_fields.extend([f"summary.{key}" for key in summary_data.keys()])
# Check for top-level 'summary'
if 'summary' in data and isinstance(data['summary'], dict):
summary_fields.extend([f"summary.{key}" for key in data['summary'].keys()])
# Look for aggregate patterns in field names
def find_aggregate_fields(obj, path=""):
if isinstance(obj, dict):
for key, value in obj.items():
current_path = f"{path}.{key}" if path else key
# Check for aggregate patterns
if any(pattern in key.lower() for pattern in ['total', 'count', 'sum', 'average', 'avg', 'percent', 'percentage']):
if isinstance(value, (int, float)):
summary_fields.append(current_path)
# Recurse
find_aggregate_fields(value, current_path)
elif isinstance(obj, list) and len(obj) > 0:
find_aggregate_fields(obj[0], path)
find_aggregate_fields(data)
# Remove duplicates and return
return list(set(summary_fields))
def classify_data_structure(data: Dict[str, Any]) -> Dict[str, Any]:
"""
Classify the data structure and return categorization.
"""
config_fields = []
object_arrays = []
def classify_recursive(obj, path=""):
if isinstance(obj, dict):
for key, value in obj.items():
current_path = f"{path}.{key}" if path else key
# Check for config/compliance fields
if any(pattern in key.lower() for pattern in ['config', 'compliance', 'enabled', 'enforced', 'policy']):
config_fields.append(current_path)
# Check for object arrays
if isinstance(value, list) and len(value) > 0 and isinstance(value[0], dict):
object_arrays.append(current_path)
# Recurse
classify_recursive(value, current_path)
elif isinstance(obj, list) and len(obj) > 0:
classify_recursive(obj[0], path)
classify_recursive(data)
return {
'config_fields': config_fields,
'object_arrays': object_arrays
}
def get_hierarchy_summary(data: Dict[str, Any]) -> Dict[str, Any]:
"""
Get a summary of the data hierarchy.
"""
has_summary = False
# Check for summary sections
if 'results' in data and isinstance(data['results'], dict):
if 'summary' in data['results']:
has_summary = True
if 'summary' in data:
has_summary = True
return {
'has_summary': has_summary,
'levels': 2 if has_summary else 1
}