Spaces:
Sleeping
Sleeping
| """ | |
| Utilities for creating optimized Pydantic schemas for LLM usage. | |
| """ | |
| from typing import Any | |
| from pydantic import BaseModel | |
| class SchemaOptimizer: | |
| def create_optimized_json_schema(model: type[BaseModel]) -> dict[str, Any]: | |
| """ | |
| Create the most optimized schema by flattening all $ref/$defs while preserving | |
| FULL descriptions and ALL action definitions. Also ensures OpenAI strict mode compatibility. | |
| Args: | |
| model: The Pydantic model to optimize | |
| Returns: | |
| Optimized schema with all $refs resolved and strict mode compatibility | |
| """ | |
| # Generate original schema | |
| original_schema = model.model_json_schema() | |
| # Extract $defs for reference resolution, then flatten everything | |
| defs_lookup = original_schema.get('$defs', {}) | |
| def optimize_schema( | |
| obj: Any, | |
| defs_lookup: dict[str, Any] | None = None, | |
| *, | |
| in_properties: bool = False, # NEW: track context | |
| ) -> Any: | |
| """Apply all optimization techniques including flattening all $ref/$defs""" | |
| if isinstance(obj, dict): | |
| optimized: dict[str, Any] = {} | |
| flattened_ref: dict[str, Any] | None = None | |
| # Skip unnecessary fields AND $defs (we'll inline everything) | |
| skip_fields = ['additionalProperties', '$defs'] | |
| for key, value in obj.items(): | |
| if key in skip_fields: | |
| continue | |
| # Skip metadata "title" unless we're iterating inside an actual `properties` map | |
| if key == 'title' and not in_properties: | |
| continue | |
| # Preserve FULL descriptions without truncation, skip empty ones | |
| elif key == 'description': | |
| if value: # Only include non-empty descriptions | |
| optimized[key] = value | |
| # Handle type field | |
| elif key == 'type': | |
| optimized[key] = value | |
| # FLATTEN: Resolve $ref by inlining the actual definition | |
| elif key == '$ref' and defs_lookup: | |
| ref_path = value.split('/')[-1] # Get the definition name from "#/$defs/SomeName" | |
| if ref_path in defs_lookup: | |
| # Get the referenced definition and flatten it | |
| referenced_def = defs_lookup[ref_path] | |
| flattened_ref = optimize_schema(referenced_def, defs_lookup) | |
| # Keep all anyOf structures (action unions) and resolve any $refs within | |
| elif key == 'anyOf' and isinstance(value, list): | |
| optimized[key] = [optimize_schema(item, defs_lookup) for item in value] | |
| # Recursively optimize nested structures | |
| elif key in ['properties', 'items']: | |
| optimized[key] = optimize_schema( | |
| value, | |
| defs_lookup, | |
| in_properties=(key == 'properties'), | |
| ) | |
| # Keep essential validation fields | |
| elif key in ['type', 'required', 'minimum', 'maximum', 'minItems', 'maxItems', 'pattern', 'default']: | |
| optimized[key] = value if not isinstance(value, (dict, list)) else optimize_schema(value, defs_lookup) | |
| # Recursively process all other fields | |
| else: | |
| optimized[key] = optimize_schema(value, defs_lookup) if isinstance(value, (dict, list)) else value | |
| # If we have a flattened reference, merge it with the optimized properties | |
| if flattened_ref is not None and isinstance(flattened_ref, dict): | |
| # Start with the flattened reference as the base | |
| result = flattened_ref.copy() | |
| # Merge in any sibling properties that were processed | |
| for key, value in optimized.items(): | |
| # Preserve descriptions from the original object if they exist | |
| if key == 'description' and 'description' not in result: | |
| result[key] = value | |
| elif key != 'description': # Don't overwrite description from flattened ref | |
| result[key] = value | |
| return result | |
| else: | |
| # No $ref, just return the optimized object | |
| # CRITICAL: Add additionalProperties: false to ALL objects for OpenAI strict mode | |
| if optimized.get('type') == 'object': | |
| optimized['additionalProperties'] = False | |
| return optimized | |
| elif isinstance(obj, list): | |
| return [optimize_schema(item, defs_lookup, in_properties=in_properties) for item in obj] | |
| return obj | |
| # Create optimized schema with flattening | |
| optimized_result = optimize_schema(original_schema, defs_lookup) | |
| # Ensure we have a dictionary (should always be the case for schema root) | |
| if not isinstance(optimized_result, dict): | |
| raise ValueError('Optimized schema result is not a dictionary') | |
| optimized_schema: dict[str, Any] = optimized_result | |
| # Additional pass to ensure ALL objects have additionalProperties: false | |
| def ensure_additional_properties_false(obj: Any) -> None: | |
| """Ensure all objects have additionalProperties: false""" | |
| if isinstance(obj, dict): | |
| # If it's an object type, ensure additionalProperties is false | |
| if obj.get('type') == 'object': | |
| obj['additionalProperties'] = False | |
| # Recursively apply to all values | |
| for value in obj.values(): | |
| if isinstance(value, (dict, list)): | |
| ensure_additional_properties_false(value) | |
| elif isinstance(obj, list): | |
| for item in obj: | |
| if isinstance(item, (dict, list)): | |
| ensure_additional_properties_false(item) | |
| ensure_additional_properties_false(optimized_schema) | |
| SchemaOptimizer._make_strict_compatible(optimized_schema) | |
| return optimized_schema | |
| def _make_strict_compatible(schema: dict[str, Any] | list[Any]) -> None: | |
| """Ensure all properties are required for OpenAI strict mode""" | |
| if isinstance(schema, dict): | |
| # First recursively apply to nested objects | |
| for key, value in schema.items(): | |
| if isinstance(value, (dict, list)) and key != 'required': | |
| SchemaOptimizer._make_strict_compatible(value) | |
| # Then update required for this level | |
| if 'properties' in schema and 'type' in schema and schema['type'] == 'object': | |
| # Add all properties to required array | |
| all_props = list(schema['properties'].keys()) | |
| schema['required'] = all_props # Set all properties as required | |
| elif isinstance(schema, list): | |
| for item in schema: | |
| SchemaOptimizer._make_strict_compatible(item) | |
| def create_gemini_optimized_schema(model: type[BaseModel]) -> dict[str, Any]: | |
| """ | |
| Create Gemini-optimized schema, preserving explicit `required` arrays so Gemini | |
| respects mandatory fields defined by the caller. | |
| Args: | |
| model: The Pydantic model to optimize | |
| Returns: | |
| Optimized schema suitable for Gemini structured output | |
| """ | |
| return SchemaOptimizer.create_optimized_json_schema(model) | |