model_tools / json_reverter.py
Naphula's picture
Upload json_reverter.py
fac182a verified
import os
import requests
import argparse
from colorama import init, Fore, Style
from tqdm import tqdm
init()
TARGET_FILES = [
"config.json",
"tokenizer.json",
"special_tokens_map.json",
"tokenizer_config.json",
"generation_config.json"
]
def download_original(repo_id, filename):
"""Downloads a specific file from the HF main branch."""
url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}"
try:
response = requests.get(url, timeout=10)
if response.status_code == 200:
return response.text
else:
return None
except Exception:
return None
def main():
parser = argparse.ArgumentParser(description="Revert modified JSON files to their HF originals.")
parser.add_argument("base_dir", help="Directory to scan (e.g., B:\\12B)")
args = parser.parse_args()
if not os.path.exists(args.base_dir):
print(f"{Fore.RED}Error: Path {args.base_dir} does not exist.{Style.RESET_ALL}")
return
# Get list of folders starting with !models--
model_folders = [f for f in os.listdir(args.base_dir) if f.startswith("!models--")]
print(f"{Fore.CYAN}--- JSON REVERTER: RESTORING ORIGINALS ---{Style.RESET_ALL}")
print(f"Found {len(model_folders)} potential model folders.\n")
for folder in tqdm(model_folders, desc="Processing Models"):
# Parse repo ID: !models--author--model -> author/model
# We split by '--' and skip the first part (!models)
parts = folder.split("--")
if len(parts) < 3:
continue
author = parts[1]
model_name = "--".join(parts[2:]) # Rejoin in case model name has --
repo_id = f"{author}/{model_name}"
full_path = os.path.join(args.base_dir, folder)
# Check for 'fixed' subdirectory (some of your models had this)
target_dir = full_path
if os.path.exists(os.path.join(full_path, "fixed")):
target_dir = os.path.join(full_path, "fixed")
tqdm.write(f"\n{Fore.YELLOW}Restoring: {repo_id}{Style.RESET_ALL}")
for filename in TARGET_FILES:
original_content = download_original(repo_id, filename)
if original_content:
file_path = os.path.join(target_dir, filename)
# Backup existing if it exists
if os.path.exists(file_path):
os.replace(file_path, file_path + ".bak")
# Write original
with open(file_path, "w", encoding="utf-8") as f:
f.write(original_content)
tqdm.write(f" {Fore.GREEN}✓ Restored {filename}{Style.RESET_ALL}")
else:
# Some models don't have all 4 files (e.g. missing tokenizer.json)
tqdm.write(f" {Fore.LIGHTBLACK_EX}- Skipped {filename} (Not found on HF){Style.RESET_ALL}")
print(f"\n{Fore.CYAN}--- REVERSION COMPLETE ---{Style.RESET_ALL}")
print("All modified JSONs have been replaced with originals (Backups saved as .bak).")
print("You can now run eos_scanner.py to start fresh.")
if __name__ == "__main__":
main()