Spaces:
Sleeping
Sleeping
| import re | |
| def copy_run_formatting(source_run, target_run): | |
| """Copy the formatting of the source run to the target run.""" | |
| target_run.bold = source_run.bold | |
| target_run.italic = source_run.italic | |
| target_run.underline = source_run.underline | |
| if source_run.font.size: | |
| target_run.font.size = source_run.font.size | |
| if source_run.font.name: | |
| target_run.font.name = source_run.font.name | |
| if source_run.font.color.rgb: | |
| target_run.font.color.rgb = source_run.font.color.rgb | |
| if source_run.font.highlight_color: | |
| target_run.font.highlight_color = source_run.font.highlight_color | |
| def segment_text(text): | |
| """Split text into segments that are translatable or not.""" | |
| # Define patterns to preserve | |
| dash_chars = r'-βββββββ' | |
| # Updated time_pattern to match times like '9:15a', '10:30a', '1:00p', '10:30am' | |
| time_pattern = r'\b\d{1,2}:\d{2}[aApP][mM]?\b' | |
| # Updated time_range_pattern to match ranges like '10:00a β 10:30a' | |
| time_range_pattern = fr'{time_pattern}\s*[{dash_chars}]\s*{time_pattern}' | |
| email_pattern = r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+' | |
| phone_pattern = r'\+?\d[\d\s\-]{7,}\d\b' | |
| url_pattern = r'https?://[^\s,]+|www\.[^\s,]+' # URLs match until whitespace or comma, exclude trailing commas | |
| # New pattern to match numbers | |
| number_pattern = r'\b\d+(?:\.\d+)?\b' | |
| patterns = [ | |
| r'_{3,}', # Three or more underscores | |
| phone_pattern, # Phone numbers | |
| email_pattern, # Email addresses | |
| url_pattern, # URLs | |
| time_range_pattern,# Time ranges | |
| time_pattern, # Times | |
| number_pattern, # Numbers | |
| ] | |
| combined_pattern = '|'.join(patterns) | |
| regex = re.compile(combined_pattern, re.IGNORECASE) | |
| segments = [] | |
| last_end = 0 | |
| for match in regex.finditer(text): | |
| start, end = match.span() | |
| if start > last_end: | |
| segments.append((True, text[last_end:start])) | |
| segments.append((False, text[start:end])) | |
| last_end = end | |
| if last_end < len(text): | |
| segments.append((True, text[last_end:])) | |
| return segments | |
| def is_phone_number(text): | |
| """Check if the text matches the phone number pattern.""" | |
| phone_pattern = r'^\+?\d[\d\-\s\(\)]{3,}\d$' | |
| return re.match(phone_pattern, text.strip()) is not None |