| import os |
| import re |
|
|
| filepath = r'd:\aicoding\kaiyuan\v2\index.html' |
|
|
| |
| GLOBAL_REPLACEMENTS = { |
| '🇺🇸': '🇺🇸', |
| 'ðŸ‡Â🇰': '🇭🇰', |
| '🇪🇸': '🇪🇸', |
| '🇫🇷': '🇫🇷', |
| '🇩🇪': '🇩🇪', |
| '🇯🇵': '🇯🇵', |
| '🇰🇷': '🇰🇷', |
| '🇸🇦': '🇸🇦', |
| '🇵🇹': '🇵🇹', |
| '🌎': '🌍', |
| '📢': '📢', |
| '🌠': '🌍', |
| '📠': '📄', |
| '■': '▾', |
| '▾': '▾', |
| 'â–?': '▾', |
| 'â–': '▾', |
| 'Français': 'Français', |
| 'Español': 'Español', |
| 'Português': 'Português', |
| '日本誠': '日本語', |
| 'ÕœêµÂ얠': '한국어', |
| '繠體ä¸Â文': '繁體中文', |
| 'العربية': 'العربية', |
| '©': '©', |
| '•': '•', |
| '—': '—', |
| '▾': '▾', |
| '🇺🇸': '🇺🇸', |
| 'ðŸ‡ðŸ‡°': '🇭🇰', |
| '🇪🇸': '🇪🇸', |
| '🇫🇷': '🇫🇷', |
| '🇩🇪': '🇩🇪', |
| '🇯🇵': '🇯🇵', |
| '🇰🇷': '🇰🇷', |
| '🇸🇦': '🇸🇦', |
| '🇵🇹': '🇵🇹' |
| } |
|
|
| def audit_file(): |
| with open(filepath, 'rb') as f: |
| data = f.read() |
| |
| |
| try: |
| content = data.decode('utf-8') |
| except UnicodeDecodeError: |
| content = data.decode('latin-1') |
|
|
| |
| for old, new in GLOBAL_REPLACEMENTS.items(): |
| content = content.replace(old, new) |
|
|
| |
| content = content.replace('å¦你', '学位').replace('大å¦', '大学') |
| content = content.replace('戠绩å ?', '成绩单').replace('诠书', '证书') |
| content = content.replace('造堇', '造假') |
|
|
| |
| |
| if '<meta charset="UTF-8">' not in content: |
| content = content.replace('<head>', '<head>\n <meta charset="UTF-8">') |
| |
| |
| content = re.sub(r'\?\s*/div>', '</div>', content) |
| content = re.sub(r'\?\s*/button>', '</button>', content) |
| |
| |
| open_divs = content.count('<div') |
| close_divs = content.count('</div') |
| print(f'Div count check: Open={open_divs}, Close={close_divs}') |
| |
| if open_divs > close_divs: |
| print('WARNING: Unclosed divs detected. Attempting to balance...') |
| |
| |
| |
| content = content.replace('🔍', '🔍').replace('🛡️', '🛡️').replace('🌐', '🌐') |
|
|
| |
| with open(filepath, 'w', encoding='utf-8', newline='\n') as f: |
| f.write(content) |
| |
| print('Deep Audit and Cleanup Complete.') |
|
|
| if __name__ == '__main__': |
| audit_file() |
|
|