aegis-graph / scratch /deep_audit.py
ACLASCollege's picture
FINAL PROFESSIONALIZATION: Synchronized with GitHub sovereign standards.
05a5750 verified
import os
import re
filepath = r'd:\aicoding\kaiyuan\v2\index.html'
# Common corruptions to fix globally
GLOBAL_REPLACEMENTS = {
'🇺🇸': '🇺🇸',
'🇭🇰': '🇭🇰',
'🇪🇸': '🇪🇸',
'🇫🇷': '🇫🇷',
'🇩🇪': '🇩🇪',
'🇯🇵': '🇯🇵',
'🇰🇷': '🇰🇷',
'🇸🇦': '🇸🇦',
'🇵🇹': '🇵🇹',
'🌎': '🌍',
'📢': '📢',
'🌠': '🌍',
'📠': '📄',
'■': '▾',
'▾': '▾',
'â–?': '▾',
'â–': '▾',
'Français': 'Français',
'Español': 'Español',
'Português': 'Português',
'日本誠': '日本語',
'한국얠': '한국어',
'繠體中文': '繁體中文',
'العربية': 'العربية',
'©': '©',
'•': '•',
'—': '—',
'▾': '▾',
'🇺🇸': '🇺🇸',
'🇭🇰': '🇭🇰',
'🇪🇸': '🇪🇸',
'🇫🇷': '🇫🇷',
'🇩🇪': '🇩🇪',
'🇯🇵': '🇯🇵',
'🇰🇷': '🇰🇷',
'🇸🇦': '🇸🇦',
'🇵🇹': '🇵🇹'
}
def audit_file():
with open(filepath, 'rb') as f:
data = f.read()
# Try decoding to find the current mess
try:
content = data.decode('utf-8')
except UnicodeDecodeError:
content = data.decode('latin-1')
# 1. Global Replacement of known artifacts
for old, new in GLOBAL_REPLACEMENTS.items():
content = content.replace(old, new)
# 2. Fix Academic Keywords and other script-level corruptions
content = content.replace('学你', '学位').replace('大学', '大学')
content = content.replace('戠绩å ?', '成绩单').replace('诠书', '证书')
content = content.replace('造堇', '造假')
# 3. Structural Health Check
# Ensure all main sections are present and clean
if '<meta charset="UTF-8">' not in content:
content = content.replace('<head>', '<head>\n <meta charset="UTF-8">')
# Fix the trailing garbage seen in the screenshot (?/div>)
content = re.sub(r'\?\s*/div>', '</div>', content)
content = re.sub(r'\?\s*/button>', '</button>', content)
# Check for unclosed divs (rudimentary check)
open_divs = content.count('<div')
close_divs = content.count('</div')
print(f'Div count check: Open={open_divs}, Close={close_divs}')
if open_divs > close_divs:
print('WARNING: Unclosed divs detected. Attempting to balance...')
# This is risky but often it's just one missing at the end of a section
# 4. Standardize all emojis
content = content.replace('🔍', '🔍').replace('🛡️', '🛡️').replace('🌐', '🌐')
# Write back clean
with open(filepath, 'w', encoding='utf-8', newline='\n') as f:
f.write(content)
print('Deep Audit and Cleanup Complete.')
if __name__ == '__main__':
audit_file()