import os filepath = r'd:\aicoding\kaiyuan\v2\index.html' # Read the entire file as binary to avoid encoding confusion with open(filepath, 'rb') as f: raw_data = f.read() # Convert to string, ignoring errors temporarily to find boundaries text = raw_data.decode('utf-8', errors='ignore') # 1. Ensure Meta Charset is at the top of head if '' not in text: text = text.replace('', '\n ') # 2. Reconstruct the Language Selector block with perfect UTF-8 lang_selector_start = '
🇺🇸 ''' # Find the old block and replace it # We need to be careful with the corrupted text import re # Regex to find the corrupted lang-selector block pattern = re.compile(r'
', re.DOTALL) text = pattern.sub(new_lang_selector, text) # 3. Fix the dropdown arrow if corrupted text = text.replace('â–?', '▾').replace('â–?', '▾') # 4. Final safety check on characters # Write back as clean UTF-8 with open(filepath, 'w', encoding='utf-8', newline='\n') as f: f.write(text) print('index.html fully reconstructed with clean UTF-8')