import os
filepath = r'd:\aicoding\kaiyuan\v2\index.html'
# Read the entire file as binary to avoid encoding confusion
with open(filepath, 'rb') as f:
raw_data = f.read()
# Convert to string, ignoring errors temporarily to find boundaries
text = raw_data.decode('utf-8', errors='ignore')
# 1. Ensure Meta Charset is at the top of head
if '' not in text:
text = text.replace('
', '\n ')
# 2. Reconstruct the Language Selector block with perfect UTF-8
lang_selector_start = '
🇺🇸
'''
# Find the old block and replace it
# We need to be careful with the corrupted text
import re
# Regex to find the corrupted lang-selector block
pattern = re.compile(r'
', re.DOTALL)
text = pattern.sub(new_lang_selector, text)
# 3. Fix the dropdown arrow if corrupted
text = text.replace('â–?', '▾').replace('â–?', '▾')
# 4. Final safety check on characters
# Write back as clean UTF-8
with open(filepath, 'w', encoding='utf-8', newline='\n') as f:
f.write(text)
print('index.html fully reconstructed with clean UTF-8')