Spaces:
Build error
Build error
| from datetime import datetime | |
| from open_webui.utils.misc import sanitize_text_for_db | |
| KEYS_TO_EXCLUDE = ['content', 'pages', 'tables', 'paragraphs', 'sections', 'figures'] | |
| def filter_metadata(metadata: dict[str, any]) -> dict[str, any]: | |
| # Removes large/redundant fields from metadata dict. | |
| metadata = {key: value for key, value in metadata.items() if key not in KEYS_TO_EXCLUDE} | |
| return metadata | |
| def process_metadata( | |
| metadata: dict[str, any], | |
| ) -> dict[str, any]: | |
| # Removes large fields, converts non-serializable types (datetime, list, dict) to strings, | |
| # and sanitizes strings for database storage (strips null bytes and invalid surrogates). | |
| result = {} | |
| for key, value in metadata.items(): | |
| # Skip large fields | |
| if key in KEYS_TO_EXCLUDE: | |
| continue | |
| # Convert non-serializable fields to strings | |
| if isinstance(value, (datetime, list, dict)): | |
| result[key] = sanitize_text_for_db(str(value)) | |
| else: | |
| result[key] = sanitize_text_for_db(value) | |
| return result | |