Spaces:
Runtime error
Runtime error
| """ | |
| Shared formatting utility functions for both dialectic and deriver modules. | |
| This module contains helper functions for processing observations, formatting context, | |
| handling temporal metadata, and string escaping for the reasoning system. | |
| """ | |
| from datetime import datetime, timezone | |
| ILIKE_ESCAPE_CHAR = "\\" | |
| def escape_ilike_pattern(text: str) -> str: | |
| """ | |
| Escape SQL ILIKE/LIKE pattern special characters in user-provided text. | |
| SQL LIKE/ILIKE patterns treat '%' as "match any sequence" and '_' as | |
| "match any single character". Without escaping, a user searching for | |
| "100%" would match "100" followed by anything, not the literal "100%". | |
| This function escapes these wildcards so user input is treated literally. | |
| The escape character itself (backslash) is also escaped. | |
| Args: | |
| text: User-provided search text that may contain %, _, or backslash | |
| Returns: | |
| Escaped text safe for use in ILIKE patterns. Use with escape='\\' parameter. | |
| Example: | |
| >>> escape_ilike_pattern("100%") | |
| '100\\%' | |
| >>> escape_ilike_pattern("file_name") | |
| 'file\\_name' | |
| >>> escape_ilike_pattern("path\\to\\file") | |
| 'path\\\\to\\\\file' | |
| """ | |
| return ( | |
| text.replace(ILIKE_ESCAPE_CHAR, ILIKE_ESCAPE_CHAR + ILIKE_ESCAPE_CHAR) | |
| .replace("%", ILIKE_ESCAPE_CHAR + "%") | |
| .replace("_", ILIKE_ESCAPE_CHAR + "_") | |
| ) | |
| def format_datetime_utc(dt: datetime) -> str: | |
| """ | |
| Format datetime to ISO 8601 string with Z suffix for UTC timezone. | |
| This ensures consistent datetime formatting across the entire backend, | |
| using the Z format which is the ISO 8601 standard for UTC and matches | |
| Pydantic's JSON serialization behavior. | |
| Args: | |
| dt: datetime object (should be timezone-aware) | |
| Returns: | |
| ISO 8601 formatted string with Z suffix for UTC | |
| Example: | |
| >>> dt = datetime(2023, 1, 1, 12, 0, 0, tzinfo=timezone.utc) | |
| >>> format_datetime_utc(dt) | |
| '2023-01-01T12:00:00Z' | |
| """ | |
| if dt.tzinfo is None: | |
| # If no timezone info, assume UTC | |
| dt = dt.replace(tzinfo=timezone.utc) | |
| # Convert to UTC if not already | |
| if dt.tzinfo != timezone.utc: | |
| dt = dt.astimezone(timezone.utc) | |
| # Remove subsecond precision | |
| dt = dt.replace(microsecond=0) | |
| # Format and replace +00:00 with Z | |
| return dt.isoformat().replace("+00:00", "Z") | |
| def utc_now_iso() -> str: | |
| """ | |
| Get current UTC time as ISO 8601 string with Z suffix. | |
| Removes subsecond precision. | |
| Returns: | |
| Current UTC time in ISO 8601 format with Z suffix | |
| Example: | |
| >>> utc_now_iso() | |
| '2023-01-01T12:34:56Z' | |
| """ | |
| return format_datetime_utc(datetime.now(timezone.utc)) | |
| def parse_datetime_iso(iso_string: str) -> datetime: | |
| """ | |
| Parse ISO 8601 datetime string, handling various timezone formats. | |
| This function properly handles Z suffix, timezone offsets, and naive timestamps. | |
| It validates input and always returns a timezone-aware datetime object. | |
| Args: | |
| iso_string: ISO 8601 formatted datetime string | |
| Returns: | |
| datetime object with timezone information | |
| Raises: | |
| ValueError: If the input string is invalid or contains suspicious content | |
| Example: | |
| >>> parse_datetime_iso('2023-01-01T12:00:00Z') | |
| datetime.datetime(2023, 1, 1, 12, 0, tzinfo=datetime.timezone.utc) | |
| >>> parse_datetime_iso('2023-01-01T12:00:00+05:00') | |
| datetime.datetime(2023, 1, 1, 12, 0, tzinfo=datetime.timezone(datetime.timedelta(seconds=18000))) | |
| """ | |
| # Input validation - ensure type and reject suspicious content | |
| if not iso_string: | |
| raise ValueError("Invalid input: must be a non-empty string") | |
| iso_string = str(iso_string) | |
| # Security check - reject strings with null bytes or suspicious characters | |
| if "\x00" in iso_string or "\r" in iso_string or "\n" in iso_string: | |
| raise ValueError("Invalid input: contains null bytes or line breaks") | |
| # Check for non-printable unicode characters that could be used for attacks | |
| if any(ord(c) < 32 and c not in "\t" for c in iso_string): | |
| raise ValueError("Invalid input: contains non-printable characters") | |
| # Strip whitespace | |
| iso_string = iso_string.strip() | |
| if not iso_string: | |
| raise ValueError("Invalid input: empty after stripping whitespace") | |
| # Handle Z suffix (convert to +00:00) | |
| if iso_string.endswith(("Z", "z")): | |
| iso_string = iso_string[:-1] + "+00:00" | |
| try: | |
| # Try parsing with timezone info first | |
| result = datetime.fromisoformat(iso_string) | |
| # If no timezone info, assume UTC | |
| if result.tzinfo is None: | |
| result = result.replace(tzinfo=timezone.utc) | |
| return result | |
| except ValueError as e: | |
| raise ValueError(f"Invalid ISO 8601 datetime format: {e}") from e | |
| def format_new_turn_with_timestamp( | |
| new_turn: str, current_time: datetime, speaker: str | |
| ) -> str: | |
| """ | |
| Format new turn message with optional timestamp. | |
| Args: | |
| new_turn: The message content | |
| current_time: Message timestamp | |
| speaker: The speaker's name | |
| Returns: | |
| Formatted string like "2023-05-08 13:56:00 speaker: hello" | |
| """ | |
| current_time_str = current_time.strftime("%Y-%m-%d %H:%M:%S") | |
| return f"{current_time_str} {speaker}: {new_turn}" | |