Spaces:
Sleeping
Sleeping
File size: 2,422 Bytes
9bd4ce5 | 1 | import os
def fix_file(path):
try:
with open(path, "rb") as f:
raw_bytes = f.read()
try:
text = raw_bytes.decode("utf-8")
# Heuristic: If it contains mojibake characters
if any(c in text for c in "???"):
try:
# Reverse mojibake: UTF-8 interpreted as CP932 -> encode as CP932 -> decode as UTF-8
recovered = text.encode("cp932").decode("utf-8")
if recovered != text:
print(f" [FIXED] {path}")
with open(path, "w", encoding="utf-8") as f:
f.write(recovered)
return True
except:
# If full recovery fails, try replacement
try:
recovered = text.encode("cp932", errors="replace").decode("utf-8", errors="replace")
if recovered != text:
print(f" [PARTIAL FIX] {path}")
with open(path, "w", encoding="utf-8") as f:
f.write(recovered)
return True
except:
pass
return False
except UnicodeDecodeError:
# If not valid UTF-8, it might be raw SJIS
try:
text = raw_bytes.decode("cp932")
print(f" [CONVERTED SJIS->UTF8] {path}")
with open(path, "w", encoding="utf-8") as f:
f.write(text)
return True
except:
return False
except Exception as e:
print(f" Error processing {path}: {e}")
return False
def main():
target_dirs = ["engine", "ai", "compiler", "tools", "."]
files_fixed = 0
for d in target_dirs:
if not os.path.exists(d):
continue
for root, dirs, files in os.walk(d):
if ".git" in root or "node_modules" in root or ".venv" in root:
continue
for file in files:
if file.endswith((".py", ".json", ".html", ".css", ".js", ".md")):
if fix_file(os.path.join(root, file)):
files_fixed += 1
print(f"\nDone. Fixed {files_fixed} files.")
if __name__ == "__main__":
main()
|