import os import requests import argparse from colorama import init, Fore, Style from tqdm import tqdm init() TARGET_FILES = [ "config.json", "tokenizer.json", "special_tokens_map.json", "tokenizer_config.json", "generation_config.json" ] def download_original(repo_id, filename): """Downloads a specific file from the HF main branch.""" url = f"https://huggingface.co/{repo_id}/resolve/main/{filename}" try: response = requests.get(url, timeout=10) if response.status_code == 200: return response.text else: return None except Exception: return None def main(): parser = argparse.ArgumentParser(description="Revert modified JSON files to their HF originals.") parser.add_argument("base_dir", help="Directory to scan (e.g., B:\\12B)") args = parser.parse_args() if not os.path.exists(args.base_dir): print(f"{Fore.RED}Error: Path {args.base_dir} does not exist.{Style.RESET_ALL}") return # Get list of folders starting with !models-- model_folders = [f for f in os.listdir(args.base_dir) if f.startswith("!models--")] print(f"{Fore.CYAN}--- JSON REVERTER: RESTORING ORIGINALS ---{Style.RESET_ALL}") print(f"Found {len(model_folders)} potential model folders.\n") for folder in tqdm(model_folders, desc="Processing Models"): # Parse repo ID: !models--author--model -> author/model # We split by '--' and skip the first part (!models) parts = folder.split("--") if len(parts) < 3: continue author = parts[1] model_name = "--".join(parts[2:]) # Rejoin in case model name has -- repo_id = f"{author}/{model_name}" full_path = os.path.join(args.base_dir, folder) # Check for 'fixed' subdirectory (some of your models had this) target_dir = full_path if os.path.exists(os.path.join(full_path, "fixed")): target_dir = os.path.join(full_path, "fixed") tqdm.write(f"\n{Fore.YELLOW}Restoring: {repo_id}{Style.RESET_ALL}") for filename in TARGET_FILES: original_content = download_original(repo_id, filename) if original_content: file_path = os.path.join(target_dir, filename) # Backup existing if it exists if os.path.exists(file_path): os.replace(file_path, file_path + ".bak") # Write original with open(file_path, "w", encoding="utf-8") as f: f.write(original_content) tqdm.write(f" {Fore.GREEN}✓ Restored {filename}{Style.RESET_ALL}") else: # Some models don't have all 4 files (e.g. missing tokenizer.json) tqdm.write(f" {Fore.LIGHTBLACK_EX}- Skipped {filename} (Not found on HF){Style.RESET_ALL}") print(f"\n{Fore.CYAN}--- REVERSION COMPLETE ---{Style.RESET_ALL}") print("All modified JSONs have been replaced with originals (Backups saved as .bak).") print("You can now run eos_scanner.py to start fresh.") if __name__ == "__main__": main()