TTBook / fix_regex.py
Ruben Tsui
Revise index.qmd for academic tone and fix formatting
ba550b1
file_path = 'QuartoProject/src/index.qmd'
with open(file_path, 'r', encoding='utf-8') as f:
text = f.read()
replacements = {
r'\\d+': r'`\d+`',
r'\\d+(?:\.\d+)?': r'`\d+(?:\.\d+)?`',
r'\\d+(\.\d+)?\s?(km\|GB\|%)': r'`\d+(\.\d+)?\s?(km|GB|%)`',
r'\[A-Z\]': r'`[A-Z]`',
r'\\d ': r'`\d` ',
r'\\s ': r'`\s` ',
r'\\w ': r'`\w` ',
r'\\t ': r'`\t` ',
r'\\n ': r'`\n` ',
r'\\r\\n': r'`\r\n`',
r'\\b ': r'`\b` ',
r'\\p{Han}': r'`\p{Han}`',
r'\[一-龥\]': r'`[一-龥]`',
r'\[\u4E00-\u9FFF\]': r'`[\u4E00-\u9FFF]`',
r'\[\x{4E00}-\x{9FFF}\]': r'`[\x{4E00}-\x{9FFF}]`',
r'\[\^aeiou\]': r'`[^aeiou]`',
r'\D、\S、\W、\P{Han}': r'`\D`、`\S`、`\W`、`\P{Han}`',
r'\D \S \W': r'`\D` `\S` `\W`',
r'\P{Han}\\': r'`\P{Han}`',
r'\\{\[^}]+\\}': r'`\{[^}]+\}`',
r'\\{\\{\[^}]+\\}\\}': r'`\{\{[^}]+\}\}`',
r'(Chapter\|Section)\s+\d+': r'`(Chapter|Section)\s+\d+`',
r'(?<=\第)\\d+(?=\條)': r'`(?<=第)\d+(?=條)`',
r'(?=\()': r'`(?=()`',
r'([A-Za-z][A-Za-z0-9 ._-]+)(([^)]+))': r'`([A-Za-z][A-Za-z0-9 ._-]+)(([^)]+))`',
r'\2(\1)': r'`\2(\1)`',
r'([,。!?;:])\1+': r'`([,。!?;:])\1+`',
r'\1、\2': r'`\1`、`\2`',
r'thr[eo]w(s\|ing)? .+? under the bus': r'`thr[eo]w(s|ing)? .+? under the bus`',
r'(\p{Han})\1(\p{Han})\2': r'`(\p{Han})\1(\p{Han})\2`',
r'句點 . 代表': r'句點 `.` 代表',
r'星號 \* 代表': r'星號 `*` 代表',
r'加號 + 代表': r'加號 `+` 代表',
r'問號 ? 代表': r'問號 `?` 代表',
r'方括號 \[\] 用來': r'方括號 `[]` 用來',
r'脫字符號 \^ 與錢字號 \$ 常用來': r'脫字符號 `^` 與錢字號 `$` 常用來',
r' !!、,, 或。。': r' `!!`、`,,` 或 `。。`',
}
for old, new in replacements.items():
text = text.replace(old, new)
# Handle table items (some literal strings and regexes in the table)
text = text.replace(r' \t ', r' `\t` ')
text = text.replace(r' \n ', r' `\n` ')
text = text.replace(r' \s ', r' `\s` ')
text = text.replace(r' \b ', r' `\b` ')
text = text.replace(r' \d ', r' `\d` ')
text = text.replace(r' \w ', r' `\w` ')
text = text.replace(r'| \^ |', r'| `^` |')
text = text.replace(r'| \$ |', r'| `$` |')
text = text.replace(r'| \+ |', r'| `+` |')
text = text.replace(r'| \* |', r'| `*` |')
text = text.replace(r'| \? |', r'| `?` |')
text = text.replace(r'| \.(*句點*) |', r'| `.`(*句點*) |')
text = text.replace(r'| \\ |', r'| `\` |')
text = text.replace(r'| \| |', r'| `|` |')
text = text.replace(r'| () |', r'| `()` |')
text = text.replace(r'| \[\] |', r'| `[]` |')
text = text.replace(r'| {} \* ? + |', r'| `{}` `*` `?` `+` |')
text = text.replace(r'| \^ \$ |', r'| `^` `$` |')
with open(file_path, 'w', encoding='utf-8') as f:
f.write(text)