import os def fix_file(path): try: with open(path, "rb") as f: raw_bytes = f.read() try: text = raw_bytes.decode("utf-8") # Heuristic: If it contains mojibake characters if any(c in text for c in "???"): try: # Reverse mojibake: UTF-8 interpreted as CP932 -> encode as CP932 -> decode as UTF-8 recovered = text.encode("cp932").decode("utf-8") if recovered != text: print(f" [FIXED] {path}") with open(path, "w", encoding="utf-8") as f: f.write(recovered) return True except: # If full recovery fails, try replacement try: recovered = text.encode("cp932", errors="replace").decode("utf-8", errors="replace") if recovered != text: print(f" [PARTIAL FIX] {path}") with open(path, "w", encoding="utf-8") as f: f.write(recovered) return True except: pass return False except UnicodeDecodeError: # If not valid UTF-8, it might be raw SJIS try: text = raw_bytes.decode("cp932") print(f" [CONVERTED SJIS->UTF8] {path}") with open(path, "w", encoding="utf-8") as f: f.write(text) return True except: return False except Exception as e: print(f" Error processing {path}: {e}") return False def main(): target_dirs = ["engine", "ai", "compiler", "tools", "."] files_fixed = 0 for d in target_dirs: if not os.path.exists(d): continue for root, dirs, files in os.walk(d): if ".git" in root or "node_modules" in root or ".venv" in root: continue for file in files: if file.endswith((".py", ".json", ".html", ".css", ".js", ".md")): if fix_file(os.path.join(root, file)): files_fixed += 1 print(f"\nDone. Fixed {files_fixed} files.") if __name__ == "__main__": main()